[INFO] fetching crate is-it-slop-preprocessing 0.5.0... [INFO] testing is-it-slop-preprocessing-0.5.0 against master#562dee4820c458d823175268e41601d4c060588a for pr-154210-1 [INFO] extracting crate is-it-slop-preprocessing 0.5.0 into /workspace/builds/worker-6-tc1/source [INFO] started tweaking crates.io crate is-it-slop-preprocessing 0.5.0 [INFO] removed 0 missing examples [INFO] finished tweaking crates.io crate is-it-slop-preprocessing 0.5.0 [INFO] tweaked toml for crates.io crate is-it-slop-preprocessing 0.5.0 written to /workspace/builds/worker-6-tc1/source/Cargo.toml [INFO] validating manifest of crates.io crate is-it-slop-preprocessing 0.5.0 on toolchain 562dee4820c458d823175268e41601d4c060588a [INFO] running `Command { std: CARGO_HOME="/workspace/cargo-home" RUSTUP_HOME="/workspace/rustup-home" "/workspace/cargo-home/bin/cargo" "+562dee4820c458d823175268e41601d4c060588a" "metadata" "--manifest-path" "Cargo.toml" "--no-deps", kill_on_drop: false }` [INFO] crate crates.io crate is-it-slop-preprocessing 0.5.0 already has a lockfile, it will not be regenerated [INFO] running `Command { std: CARGO_HOME="/workspace/cargo-home" RUSTUP_HOME="/workspace/rustup-home" "/workspace/cargo-home/bin/cargo" "+562dee4820c458d823175268e41601d4c060588a" "fetch" "--manifest-path" "Cargo.toml", kill_on_drop: false }` [INFO] [stderr] Updating crates.io index [INFO] [stderr] Downloading crates ... [INFO] [stderr] Downloaded alga v0.9.3 [INFO] [stderr] Downloaded pyo3-macros v0.28.2 [INFO] [stderr] Downloaded approx v0.3.2 [INFO] [stderr] Downloaded unit-prefix v0.5.2 [INFO] [stderr] Downloaded target-lexicon v0.13.5 [INFO] [stderr] Downloaded console v0.16.2 [INFO] [stderr] Downloaded aneubeck-daachorse v1.1.1 [INFO] [stderr] Downloaded pyo3-build-config v0.28.2 [INFO] [stderr] Downloaded indicatif v0.18.4 [INFO] [stderr] Downloaded pyo3-macros-backend v0.28.2 [INFO] [stderr] Downloaded bumpalo v3.20.0 [INFO] [stderr] Downloaded pyo3-ffi v0.28.2 [INFO] [stderr] Downloaded sprs v0.11.4 [INFO] [stderr] Downloaded bpe v0.2.1 [INFO] [stderr] Downloaded pyo3 v0.28.2 [INFO] [stderr] Downloaded bpe-openai v0.3.0 [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-6-tc1/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-6-tc1/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+562dee4820c458d823175268e41601d4c060588a" "metadata" "--no-deps" "--format-version=1", kill_on_drop: false }` [INFO] [stdout] b91224235d1f30c2855e9edefb9b769171c8a3ac713d922f3f5083f81209c312 [INFO] running `Command { std: "docker" "start" "-a" "b91224235d1f30c2855e9edefb9b769171c8a3ac713d922f3f5083f81209c312", kill_on_drop: false }` [INFO] running `Command { std: "docker" "inspect" "b91224235d1f30c2855e9edefb9b769171c8a3ac713d922f3f5083f81209c312", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "b91224235d1f30c2855e9edefb9b769171c8a3ac713d922f3f5083f81209c312", kill_on_drop: false }` [INFO] [stdout] b91224235d1f30c2855e9edefb9b769171c8a3ac713d922f3f5083f81209c312 [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-6-tc1/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-6-tc1/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=forbid" "-e" "RUSTDOCFLAGS=--cap-lints=forbid" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+562dee4820c458d823175268e41601d4c060588a" "build" "--frozen" "--message-format=json", kill_on_drop: false }` [INFO] [stdout] ca99acda6ab3b3dd4820cb46d7d03ae095361987bca4ea15b31d6fc631743045 [INFO] running `Command { std: "docker" "start" "-a" "ca99acda6ab3b3dd4820cb46d7d03ae095361987bca4ea15b31d6fc631743045", kill_on_drop: false }` [INFO] [stderr] Compiling libm v0.2.16 [INFO] [stderr] Compiling libc v0.2.182 [INFO] [stderr] Compiling aneubeck-daachorse v1.1.1 [INFO] [stderr] Compiling either v1.15.0 [INFO] [stderr] Compiling num-complex v0.2.4 [INFO] [stderr] Compiling num-traits v0.2.19 [INFO] [stderr] Compiling matrixmultiply v0.3.10 [INFO] [stderr] Compiling syn v2.0.116 [INFO] [stderr] Compiling flate2 v1.1.9 [INFO] [stderr] Compiling zerocopy v0.8.39 [INFO] [stderr] Compiling regex-syntax v0.8.9 [INFO] [stderr] Compiling simdutf8 v0.1.5 [INFO] [stderr] Compiling ahash v0.8.12 [INFO] [stderr] Compiling itertools v0.14.0 [INFO] [stderr] Compiling hashbrown v0.16.1 [INFO] [stderr] Compiling rmp v0.8.15 [INFO] [stderr] Compiling parking_lot_core v0.9.12 [INFO] [stderr] Compiling num_cpus v1.17.0 [INFO] [stderr] Compiling getrandom v0.3.4 [INFO] [stderr] Compiling dashmap v6.1.0 [INFO] [stderr] Compiling regex-automata v0.4.14 [INFO] [stderr] Compiling num-complex v0.4.6 [INFO] [stderr] Compiling approx v0.3.2 [INFO] [stderr] Compiling num-integer v0.1.46 [INFO] [stderr] Compiling ndarray v0.17.2 [INFO] [stderr] Compiling alga v0.9.3 [INFO] [stderr] Compiling serde_derive v1.0.228 [INFO] [stderr] Compiling ptr_meta_derive v0.3.1 [INFO] [stderr] Compiling bytecheck_derive v0.8.2 [INFO] [stderr] Compiling munge_macro v0.4.7 [INFO] [stderr] Compiling tracing-attributes v0.1.31 [INFO] [stderr] Compiling rkyv_derive v0.8.15 [INFO] [stderr] Compiling ptr_meta v0.3.1 [INFO] [stderr] Compiling rancor v0.1.1 [INFO] [stderr] Compiling munge v0.4.7 [INFO] [stderr] Compiling bytecheck v0.8.2 [INFO] [stderr] Compiling tracing v0.1.44 [INFO] [stderr] Compiling rend v0.5.3 [INFO] [stderr] Compiling regex v1.12.3 [INFO] [stderr] Compiling sprs v0.11.4 [INFO] [stderr] Compiling serde v1.0.228 [INFO] [stderr] Compiling rmp-serde v1.3.1 [INFO] [stderr] Compiling bpe v0.2.1 [INFO] [stderr] Compiling bpe-openai v0.3.0 [INFO] [stderr] Compiling rkyv v0.8.15 [INFO] [stderr] Compiling is-it-slop-preprocessing v0.5.0 (/opt/rustwide/workdir) [INFO] [stderr] Finished `dev` profile [unoptimized + debuginfo] target(s) in 1m 18s [INFO] running `Command { std: "docker" "inspect" "ca99acda6ab3b3dd4820cb46d7d03ae095361987bca4ea15b31d6fc631743045", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "ca99acda6ab3b3dd4820cb46d7d03ae095361987bca4ea15b31d6fc631743045", kill_on_drop: false }` [INFO] [stdout] ca99acda6ab3b3dd4820cb46d7d03ae095361987bca4ea15b31d6fc631743045 [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-6-tc1/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-6-tc1/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=forbid" "-e" "RUSTDOCFLAGS=--cap-lints=forbid" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+562dee4820c458d823175268e41601d4c060588a" "test" "--frozen" "--no-run" "--message-format=json", kill_on_drop: false }` [INFO] [stdout] b3cfa4c8c80675a710a62846750f2166e20625b6dbf51c68332cc028ca2c8ed5 [INFO] running `Command { std: "docker" "start" "-a" "b3cfa4c8c80675a710a62846750f2166e20625b6dbf51c68332cc028ca2c8ed5", kill_on_drop: false }` [INFO] [stderr] Compiling serde_core v1.0.228 [INFO] [stderr] Compiling tracing-core v0.1.36 [INFO] [stderr] Compiling cc v1.2.56 [INFO] [stderr] Compiling serde v1.0.228 [INFO] [stderr] Compiling matchers v0.2.0 [INFO] [stderr] Compiling csv-core v0.1.13 [INFO] [stderr] Compiling ryu v1.0.23 [INFO] [stderr] Compiling tracing v0.1.44 [INFO] [stderr] Compiling tracing-log v0.2.0 [INFO] [stderr] Compiling tracing-subscriber v0.3.22 [INFO] [stderr] Compiling rmp-serde v1.3.1 [INFO] [stderr] Compiling bpe v0.2.1 [INFO] [stderr] Compiling libmimalloc-sys v0.1.44 [INFO] [stderr] Compiling bpe-openai v0.3.0 [INFO] [stderr] Compiling csv v1.4.0 [INFO] [stderr] Compiling mimalloc v0.1.48 [INFO] [stderr] Compiling ahash v0.8.12 [INFO] [stderr] Compiling is-it-slop-preprocessing v0.5.0 (/opt/rustwide/workdir) [INFO] [stderr] Finished `test` profile [unoptimized + debuginfo] target(s) in 1m 00s [INFO] running `Command { std: "docker" "inspect" "b3cfa4c8c80675a710a62846750f2166e20625b6dbf51c68332cc028ca2c8ed5", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "b3cfa4c8c80675a710a62846750f2166e20625b6dbf51c68332cc028ca2c8ed5", kill_on_drop: false }` [INFO] [stdout] b3cfa4c8c80675a710a62846750f2166e20625b6dbf51c68332cc028ca2c8ed5 [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-6-tc1/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-6-tc1/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=forbid" "-e" "RUSTDOCFLAGS=--cap-lints=forbid" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+562dee4820c458d823175268e41601d4c060588a" "test" "--frozen", kill_on_drop: false }` [INFO] [stdout] e35b23699b222af777080c4c74e0439c40105aa285817e394a383f006cebe56b [INFO] running `Command { std: "docker" "start" "-a" "e35b23699b222af777080c4c74e0439c40105aa285817e394a383f006cebe56b", kill_on_drop: false }` [INFO] [stderr] Finished `test` profile [unoptimized + debuginfo] target(s) in 0.24s [INFO] [stderr] Running unittests src/lib.rs (/opt/rustwide/target/debug/deps/is_it_slop_preprocessing-a32b4dbcb19a957f) [INFO] [stdout] [INFO] [stdout] running 161 tests [INFO] [stdout] test pre_processor::chunker::tests::test_calculate_num_chunks ... ok [INFO] [stdout] test pre_processor::chunker::tests::test_small_input_unchanged ... ok [INFO] [stdout] test pre_processor::chunker::tests::test_chunking_with_custom_parameters ... ok [INFO] [stdout] test pre_processor::chunker::tests::test_no_dropped_tokens_in_chunks ... ok [INFO] [stdout] test pre_processor::chunker::tests::test_overlap_validation_between_adjacent_chunks ... ok [INFO] [stdout] test pre_processor::chunker::tests::test_zero_overlap_no_shared_tokens ... ok [INFO] [stdout] test pre_processor::chunker::tests::test_chunk_edge_cases ... ok [INFO] [stdout] test pre_processor::chunker::tests::test_even_chunk_distribution ... ok [INFO] [stdout] test pre_processor::chunker::tests::test_invalid_config_min_chunk_greater_than_chunk_size ... ok [INFO] [stdout] test pre_processor::chunker::tests::test_no_dropped_content ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_citation_with_newline ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_academic_keywords ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_malformed_citations ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_academic_prompts ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_description_headers ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_dateline_cities ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_raid_bench_artifacts ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_dateline_cities_after_newline ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_dateline_endings ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_em_dash_parentheses ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_numbered_lists ... ok [INFO] [stdout] test pre_processor::cleaner::tests::edge_cases::test_mixed_quote_styles ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_time_patterns ... ok [INFO] [stdout] test pre_processor::cleaner::tests::edge_cases::test_no_over_trimming ... ok [INFO] [stdout] test pre_processor::cleaner::tests::edge_cases::test_newlines_preserved_correctly ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_wikipedia_headers_at_start ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_accepts_string_ownership ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_timezone_abbreviations ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_accepts_str_reference ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_inference_mode_universal_only ... ok [INFO] [stdout] test pre_processor::cleaner::tests::edge_cases::test_nested_html_tags ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_preserves_legitimate_content ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_real_world_academic_paper ... ok [INFO] [stdout] test pre_processor::cleaner::tests::edge_cases::test_consecutive_artifacts ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_empty_after_cleaning ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_complex_mixed_artifacts ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_trimming ... ok [INFO] [stdout] test pre_processor::cleaner::tests::edge_cases::test_only_whitespace_and_artifacts ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_unicode_preservation ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_real_world_news_article ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_long_text_performance ... ok [INFO] [stdout] test pre_processor::cleaner::tests::text_cleaner::test_training_mode_both_cleaners ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_accepts_str ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_html_entities ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_academic_section_headers ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_accepts_string ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_html_numeric_entities ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_malformed_utf8_quotes ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_citation_markers ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_non_breaking_spaces ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_malformed_utf8_dashes ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_complex_academic_abstract ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_news_agency_parentheses ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_news_agency_dash ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_partial_html_entities ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_count_ngrams_capacity_estimate ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_count_ngrams_empty ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_count_ngrams_basic ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_count_ngrams_repeated ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_build_vocabulary ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_complex_news_article ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_no_changes_needed ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_multiple_issues ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_build_vocabulary_parallel_deterministic ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_build_vocabulary_dashmap_no_race_conditions ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_count_ngrams_no_excessive_allocations ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_state_abbreviations ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_extended_dateline_cities ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_all_same_token ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_key_trailing_zeros_3gram ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_single_token ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_unique_ngrams ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_unique_ngrams_no_duplicates ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_empty_tokens ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_insufficient_tokens_for_size ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_key_trailing_zeros_2gram ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_reverse_tokenize_empty ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_key_exactly_4_tokens ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_key_as_slice_correct_length ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_key_zero_token_value ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_empty_and_whitespace_only ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_min_texts_threshold ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_html_tags ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_key_round_trip ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_whitespace_normalization ... ok [INFO] [stdout] test pre_processor::cleaner::tests::dataset_artifact_cleaner::test_wikipedia_headers ... ok [INFO] [stdout] test pre_processor::cleaner::tests::universal_cleaner::test_zero_width_spaces ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_should_use_parallel_edge_cases ... ok [INFO] [stdout] test pre_processor::ngrams::tests::test_ngram_range_ordering ... ok [INFO] [stdout] test pre_processor::cleaner::tests::edge_cases::test_extremely_long_citation ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_reverse_tokenize_single_token ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_empty_string ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_empty ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_emojis_and_special_chars ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_reverse_tokenize_round_trip ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_combining_diacritics ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_basic ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_parallel_sequential_same_result ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_deterministic ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_whitespace_only ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_zero_width_characters ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_mixed_scripts ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_repeated_characters ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_999_texts_sequential ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_csr_format_sorted_indices ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_vocabulary_no_decode_artifacts ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_fit_transform_equivalence ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_empty_text_transform ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_csr_matrix_indices_sorted ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_max_df_absolute_filtering ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_max_df_all_filtered ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_min_df_proportion ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_fit_basic ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_csr_matrix_indptr_validity ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_csr_matrix_indices_in_bounds ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_csr_matrix_no_duplicate_indices ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_transform_shape ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_transform_with_unseen_ngrams ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_vectorizer_with_single_word_texts ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_min_df_absolute ... ok [INFO] [stdout] test pre_processor::vectorizer::params::tests::test_default_params ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_vocabulary_determinism ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_vectorizer_with_moderately_long_text ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_max_df_no_filtering ... ok [INFO] [stdout] test pre_processor::vectorizer::params::tests::test_ngram_counts ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_determinism ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_parallel_sequential_threshold_1000 ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_parallel_threshold_with_byte_check ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_parallel_sequential_threshold_999 ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_fit_transform_basic ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_deterministic_parallel ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_max_df_proportion_filtering ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_fit_transform_equivalence ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_idf_formula ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_exactly_1000_texts_parallel ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_num_features_matches_vocabulary ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_l2_normalization ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_idf_produces_valid_output ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_parallel_transform_determinism ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_rkyv_roundtrip_determinism ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_rare_term_vs_common_term ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_tfidf_with_single_document ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_sublinear_tf_with_high_counts ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_sublinear_tf_effect ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_rkyv_serialization ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_tfidf_very_sparse_input ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_tfidf_empty_after_filtering ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_rkyv_unaligned_bytes ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_universal_term_produces_consistent_output ... ok [INFO] [stdout] test pre_processor::vectorizer::count_vectorizer::tests::test_parallel_sequential_equivalence ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_vectorize_from_tokens_equivalence ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_transform_preserves_input_order ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_vocabulary_access ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_vectorize_from_tokens_empty ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_vectorize_from_tokens_batch ... ok [INFO] [stdout] test pre_processor::vectorizer::tfidf_vectorizer::tests::test_rkyv_large_vectorizer ... ok [INFO] [stdout] test pre_processor::vectorizer::params::tests::test_invalid_min_df - should panic ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_batch_sizes_crossing_threshold ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_below_1mb_sequential ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_1mb_single_text_parallel ... ok [INFO] [stdout] test pre_processor::tokenizer::tests::test_tokenize_very_long_single_text ... ok [INFO] [stdout] [INFO] [stdout] test result: ok. 161 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 13.19s [INFO] [stdout] [INFO] [stderr] Doc-tests is_it_slop_preprocessing [INFO] [stdout] [INFO] [stdout] running 5 tests [INFO] [stdout] test src/pre_processor/chunker.rs - pre_processor::chunker (line 24) ... ok [INFO] [stdout] test src/pre_processor/cleaner.rs - pre_processor::cleaner::TextCleaner (line 339) ... ok [INFO] [stdout] test src/pre_processor/cleaner.rs - pre_processor::cleaner (line 23) ... ok [INFO] [stdout] test src/pre_processor/vectorizer/mod.rs - pre_processor::vectorizer (line 22) ... ok [INFO] [stdout] test src/lib.rs - (line 24) ... ok [INFO] [stdout] [INFO] [stdout] test result: ok. 5 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 1.73s [INFO] [stdout] [INFO] [stdout] all doctests ran in 4.04s; merged doctests compilation took 2.25s [INFO] running `Command { std: "docker" "inspect" "e35b23699b222af777080c4c74e0439c40105aa285817e394a383f006cebe56b", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "e35b23699b222af777080c4c74e0439c40105aa285817e394a383f006cebe56b", kill_on_drop: false }` [INFO] [stdout] e35b23699b222af777080c4c74e0439c40105aa285817e394a383f006cebe56b