[INFO] fetching crate rusty-llm-jury 0.1.0... [INFO] testing rusty-llm-jury-0.1.0 against 1.91.0 for beta-1.92-2 [INFO] extracting crate rusty-llm-jury 0.1.0 into /workspace/builds/worker-4-tc1/source [INFO] started tweaking crates.io crate rusty-llm-jury 0.1.0 [INFO] removed 0 missing tests [INFO] finished tweaking crates.io crate rusty-llm-jury 0.1.0 [INFO] tweaked toml for crates.io crate rusty-llm-jury 0.1.0 written to /workspace/builds/worker-4-tc1/source/Cargo.toml [INFO] validating manifest of crates.io crate rusty-llm-jury 0.1.0 on toolchain 1.91.0 [INFO] running `Command { std: CARGO_HOME="/workspace/cargo-home" RUSTUP_HOME="/workspace/rustup-home" "/workspace/cargo-home/bin/cargo" "+1.91.0" "metadata" "--manifest-path" "Cargo.toml" "--no-deps", kill_on_drop: false }` [INFO] crate crates.io crate rusty-llm-jury 0.1.0 already has a lockfile, it will not be regenerated [INFO] running `Command { std: CARGO_HOME="/workspace/cargo-home" RUSTUP_HOME="/workspace/rustup-home" "/workspace/cargo-home/bin/cargo" "+1.91.0" "fetch" "--manifest-path" "Cargo.toml", kill_on_drop: false }` [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-4-tc1/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-4-tc1/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:4848fb76d95f26979359cc7e45710b1dbc8f3acb7aeedee7c460d7702230f228" "/opt/rustwide/cargo-home/bin/cargo" "+1.91.0" "metadata" "--no-deps" "--format-version=1", kill_on_drop: false }` [INFO] [stdout] 61bb25069032470edcf567336fb4b706259c9f72598669b1789d358752b3a3e4 [INFO] running `Command { std: "docker" "start" "-a" "61bb25069032470edcf567336fb4b706259c9f72598669b1789d358752b3a3e4", kill_on_drop: false }` [INFO] running `Command { std: "docker" "inspect" "61bb25069032470edcf567336fb4b706259c9f72598669b1789d358752b3a3e4", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "61bb25069032470edcf567336fb4b706259c9f72598669b1789d358752b3a3e4", kill_on_drop: false }` [INFO] [stdout] 61bb25069032470edcf567336fb4b706259c9f72598669b1789d358752b3a3e4 [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-4-tc1/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-4-tc1/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=warn" "-e" "RUSTDOCFLAGS=--cap-lints=warn" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:4848fb76d95f26979359cc7e45710b1dbc8f3acb7aeedee7c460d7702230f228" "/opt/rustwide/cargo-home/bin/cargo" "+1.91.0" "build" "--frozen" "--message-format=json", kill_on_drop: false }` [INFO] [stdout] 977aded222c494e85ead81e64ad35daca9dce69a3c699f1bd7a7c89ec9ba24cc [INFO] running `Command { std: "docker" "start" "-a" "977aded222c494e85ead81e64ad35daca9dce69a3c699f1bd7a7c89ec9ba24cc", kill_on_drop: false }` [INFO] [stderr] Compiling libc v0.2.172 [INFO] [stderr] Compiling zerocopy v0.8.25 [INFO] [stderr] Compiling num-traits v0.2.19 [INFO] [stderr] Compiling matrixmultiply v0.3.10 [INFO] [stderr] Compiling csv-core v0.1.12 [INFO] [stderr] Compiling syn v2.0.101 [INFO] [stderr] Compiling clap_builder v4.5.39 [INFO] [stderr] Compiling num-integer v0.1.46 [INFO] [stderr] Compiling num-complex v0.4.6 [INFO] [stderr] Compiling getrandom v0.2.16 [INFO] [stderr] Compiling rand_core v0.6.4 [INFO] [stderr] Compiling ndarray v0.15.6 [INFO] [stderr] Compiling ppv-lite86 v0.2.21 [INFO] [stderr] Compiling rand_chacha v0.3.1 [INFO] [stderr] Compiling serde_derive v1.0.219 [INFO] [stderr] Compiling thiserror-impl v1.0.69 [INFO] [stderr] Compiling clap_derive v4.5.32 [INFO] [stderr] Compiling rand v0.8.5 [INFO] [stderr] Compiling thiserror v1.0.69 [INFO] [stderr] Compiling clap v4.5.39 [INFO] [stderr] Compiling serde v1.0.219 [INFO] [stderr] Compiling csv v1.3.1 [INFO] [stderr] Compiling serde_json v1.0.140 [INFO] [stderr] Compiling rusty-llm-jury v0.1.0 (/opt/rustwide/workdir) [INFO] [stderr] Finished `dev` profile [unoptimized + debuginfo] target(s) in 23.85s [INFO] running `Command { std: "docker" "inspect" "977aded222c494e85ead81e64ad35daca9dce69a3c699f1bd7a7c89ec9ba24cc", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "977aded222c494e85ead81e64ad35daca9dce69a3c699f1bd7a7c89ec9ba24cc", kill_on_drop: false }` [INFO] [stdout] 977aded222c494e85ead81e64ad35daca9dce69a3c699f1bd7a7c89ec9ba24cc [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-4-tc1/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-4-tc1/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=warn" "-e" "RUSTDOCFLAGS=--cap-lints=warn" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:4848fb76d95f26979359cc7e45710b1dbc8f3acb7aeedee7c460d7702230f228" "/opt/rustwide/cargo-home/bin/cargo" "+1.91.0" "test" "--frozen" "--no-run" "--message-format=json", kill_on_drop: false }` [INFO] [stdout] 4d97449253380b6ba1d26ed762bcb244216feae1e9c9d6598780827c8550b892 [INFO] running `Command { std: "docker" "start" "-a" "4d97449253380b6ba1d26ed762bcb244216feae1e9c9d6598780827c8550b892", kill_on_drop: false }` [INFO] [stderr] Compiling approx v0.5.1 [INFO] [stderr] Compiling getrandom v0.3.3 [INFO] [stderr] Compiling tempfile v3.20.0 [INFO] [stderr] Compiling rusty-llm-jury v0.1.0 (/opt/rustwide/workdir) [INFO] [stderr] Finished `test` profile [unoptimized + debuginfo] target(s) in 2.55s [INFO] running `Command { std: "docker" "inspect" "4d97449253380b6ba1d26ed762bcb244216feae1e9c9d6598780827c8550b892", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "4d97449253380b6ba1d26ed762bcb244216feae1e9c9d6598780827c8550b892", kill_on_drop: false }` [INFO] [stdout] 4d97449253380b6ba1d26ed762bcb244216feae1e9c9d6598780827c8550b892 [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-4-tc1/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-4-tc1/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=warn" "-e" "RUSTDOCFLAGS=--cap-lints=warn" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:4848fb76d95f26979359cc7e45710b1dbc8f3acb7aeedee7c460d7702230f228" "/opt/rustwide/cargo-home/bin/cargo" "+1.91.0" "test" "--frozen", kill_on_drop: false }` [INFO] [stdout] bbe1bbdfd3e0418c7d220615273f51d443aba1e1f0753474d01a9d94327852dd [INFO] running `Command { std: "docker" "start" "-a" "bbe1bbdfd3e0418c7d220615273f51d443aba1e1f0753474d01a9d94327852dd", kill_on_drop: false }` [INFO] [stderr] Finished `test` profile [unoptimized + debuginfo] target(s) in 0.10s [INFO] [stderr] Running unittests src/lib.rs (/opt/rustwide/target/debug/deps/llmjury-8474503c8e2e0011) [INFO] [stdout] [INFO] [stdout] running 44 tests [INFO] [stdout] test bias_correction::tests::test_input_validation_invalid_confidence_level ... ok [INFO] [stdout] test bias_correction::tests::test_input_validation_empty_arrays ... ok [INFO] [stdout] test bias_correction::tests::test_estimate_success_rate_basic ... ok [INFO] [stdout] test bias_correction::tests::test_input_validation_non_binary ... ok [INFO] [stdout] test bias_correction::tests::test_input_validation_mismatched_lengths ... ok [INFO] [stdout] test bias_correction::tests::test_judge_accuracy_too_low ... ok [INFO] [stdout] test bias_correction::tests::test_judge_metrics_perfect_judge ... ok [INFO] [stdout] test bias_correction::tests::test_judge_metrics_random_judge ... ok [INFO] [stdout] test bias_correction::tests::test_no_negative_examples ... ok [INFO] [stdout] test bias_correction::tests::test_no_positive_examples ... ok [INFO] [stdout] test cli::tests::test_estimate_args_load_data_from_strings ... ok [INFO] [stdout] test cli::tests::test_synth_experiment_args_create_config ... ok [INFO] [stdout] test cli::tests::test_estimate_args_load_data_from_files ... ok [INFO] [stdout] test synthetic::tests::test_create_example_dataset_invalid_scenario ... ok [INFO] [stdout] test bias_correction::tests::test_estimate_success_rate_perfect_judge ... ok [INFO] [stdout] test synthetic::tests::test_generate_test_data_reproducibility ... ok [INFO] [stdout] test synthetic::tests::test_create_example_dataset_reproducibility ... ok [INFO] [stdout] test synthetic::tests::test_generate_test_data_basic ... ok [INFO] [stdout] test synthetic::tests::test_generate_test_data_input_validation ... ok [INFO] [stdout] test synthetic::tests::test_generate_test_data_perfect_accuracy ... ok [INFO] [stdout] test cli::tests::test_estimate_args_validation ... ok [INFO] [stdout] test synthetic::tests::test_generate_test_data_zero_accuracy ... ok [INFO] [stdout] test synthetic::tests::test_generate_unlabeled_data_basic ... ok [INFO] [stdout] test synthetic::tests::test_generate_unlabeled_data_extreme_pass_rates ... ok [INFO] [stdout] test synthetic::tests::test_generate_unlabeled_data_input_validation ... ok [INFO] [stdout] test tests::test_version_is_set ... ok [INFO] [stdout] test utils::tests::test_load_binary_from_csv_with_empty_lines ... ok [INFO] [stdout] test utils::tests::test_load_binary_from_csv_invalid_data ... ok [INFO] [stdout] test utils::tests::test_load_binary_from_csv ... ok [INFO] [stdout] test utils::tests::test_parse_binary_string_invalid ... ok [INFO] [stdout] test utils::tests::test_parse_range ... ok [INFO] [stdout] test utils::tests::test_validate_probability ... ok [INFO] [stdout] test utils::tests::test_parse_binary_string_empty ... ok [INFO] [stdout] test utils::tests::test_load_binary_from_csv_with_header ... ok [INFO] [stdout] test bias_correction::tests::test_different_confidence_levels ... ok [INFO] [stdout] test utils::tests::test_parse_binary_string_valid ... ok [INFO] [stdout] test synthetic::tests::test_create_example_dataset_all_scenarios ... ok [INFO] [stdout] test synthetic::tests::test_scenario_accuracy_properties ... ok [INFO] [stdout] test utils::tests::test_format_float ... ok [INFO] [stdout] test utils::tests::test_format_percentage ... ok [INFO] [stdout] test utils::tests::test_load_binary_from_csv_nonexistent_file ... ok [INFO] [stdout] test synthetic::tests::test_run_sensitivity_experiment_tnr ... ok [INFO] [stdout] test synthetic::tests::test_run_sensitivity_experiment_tpr ... ok [INFO] [stdout] test synthetic::tests::test_create_example_dataset_different_scenarios_differ ... ok [INFO] [stdout] [INFO] [stdout] test result: ok. 44 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.07s [INFO] [stdout] [INFO] [stderr] Running unittests src/main.rs (/opt/rustwide/target/debug/deps/llm_jury-c3331dea74d7c03e) [INFO] [stderr] Running tests/cli_tests.rs (/opt/rustwide/target/debug/deps/cli_tests-8488958538bc1ebb) [INFO] [stdout] [INFO] [stdout] running 0 tests [INFO] [stdout] [INFO] [stdout] test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s [INFO] [stdout] [INFO] [stdout] [INFO] [stdout] running 6 tests [INFO] [stdout] test test_cli_help ... ok [INFO] [stdout] test test_cli_estimate_basic ... ok [INFO] [stdout] test test_cli_synth_experiment ... ok [INFO] [stdout] test test_cli_estimate_with_files ... ok [INFO] [stdout] test test_cli_version ... ok [INFO] [stdout] test test_cli_error_handling ... ok [INFO] [stdout] [INFO] [stdout] test result: ok. 6 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.44s [INFO] [stdout] [INFO] [stderr] Running tests/integration_test.rs (/opt/rustwide/target/debug/deps/integration_test-a9aff611830fb6e3) [INFO] [stdout] [INFO] [stdout] running 11 tests [INFO] [stdout] test test_boundary_conditions ... ok [INFO] [stdout] test test_csv_file_loading ... ok [INFO] [stdout] test test_error_handling ... ok [INFO] [stdout] test test_performance_benchmark ... ignored [INFO] [stdout] test test_utility_functions ... ok [INFO] [stdout] test test_confidence_intervals ... ok [INFO] [stdout] test test_reproducibility ... ok [INFO] [stdout] test test_complete_workflow ... ok [INFO] [stdout] test test_judge_metrics ... ok [INFO] [stdout] test test_example_scenarios ... ok [INFO] [stdout] test test_large_dataset ... ok [INFO] [stdout] [INFO] [stdout] test result: ok. 10 passed; 0 failed; 1 ignored; 0 measured; 0 filtered out; finished in 0.40s [INFO] [stdout] [INFO] [stderr] Doc-tests llmjury [INFO] [stdout] [INFO] [stdout] running 7 tests [INFO] [stdout] test src/utils.rs - utils::load_binary_from_csv (line 50) - compile ... ok [INFO] [stdout] test src/synthetic.rs - synthetic::generate_unlabeled_data (line 178) ... ok [INFO] [stdout] test src/synthetic.rs - synthetic::generate_test_data (line 104) ... ok [INFO] [stdout] test src/synthetic.rs - synthetic::create_example_dataset (line 385) ... ok [INFO] [stdout] test src/synthetic.rs - synthetic::run_sensitivity_experiment (line 268) ... ok [INFO] [stdout] test src/bias_correction.rs - bias_correction::estimate_success_rate (line 124) ... ok [INFO] [stdout] test src/utils.rs - utils::parse_binary_string (line 11) ... ok [INFO] [stdout] [INFO] [stdout] test result: ok. 7 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 1.23s [INFO] [stdout] [INFO] running `Command { std: "docker" "inspect" "bbe1bbdfd3e0418c7d220615273f51d443aba1e1f0753474d01a9d94327852dd", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "bbe1bbdfd3e0418c7d220615273f51d443aba1e1f0753474d01a9d94327852dd", kill_on_drop: false }` [INFO] [stdout] bbe1bbdfd3e0418c7d220615273f51d443aba1e1f0753474d01a9d94327852dd