[INFO] fetching crate smoleval 0.2.0... [INFO] testing smoleval-0.2.0 against try#dec9417b8611e34e787a3e4c37686b5131f9e5c5 for pr-154210-2 [INFO] extracting crate smoleval 0.2.0 into /workspace/builds/worker-5-tc2/source [INFO] started tweaking crates.io crate smoleval 0.2.0 [INFO] removed 0 missing tests [INFO] finished tweaking crates.io crate smoleval 0.2.0 [INFO] tweaked toml for crates.io crate smoleval 0.2.0 written to /workspace/builds/worker-5-tc2/source/Cargo.toml [INFO] validating manifest of crates.io crate smoleval 0.2.0 on toolchain dec9417b8611e34e787a3e4c37686b5131f9e5c5 [INFO] running `Command { std: CARGO_HOME="/workspace/cargo-home" RUSTUP_HOME="/workspace/rustup-home" "/workspace/cargo-home/bin/cargo" "+dec9417b8611e34e787a3e4c37686b5131f9e5c5" "metadata" "--manifest-path" "Cargo.toml" "--no-deps", kill_on_drop: false }` [INFO] crate crates.io crate smoleval 0.2.0 already has a lockfile, it will not be regenerated [INFO] running `Command { std: CARGO_HOME="/workspace/cargo-home" RUSTUP_HOME="/workspace/rustup-home" "/workspace/cargo-home/bin/cargo" "+dec9417b8611e34e787a3e4c37686b5131f9e5c5" "fetch" "--manifest-path" "Cargo.toml", kill_on_drop: false }` [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-5-tc2/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-5-tc2/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+dec9417b8611e34e787a3e4c37686b5131f9e5c5" "metadata" "--no-deps" "--format-version=1", kill_on_drop: false }` [INFO] [stdout] f96b1285849eee6d630e219e0acbc110d184c6b55dbcce936186b66c40b03480 [INFO] running `Command { std: "docker" "start" "-a" "f96b1285849eee6d630e219e0acbc110d184c6b55dbcce936186b66c40b03480", kill_on_drop: false }` [INFO] running `Command { std: "docker" "inspect" "f96b1285849eee6d630e219e0acbc110d184c6b55dbcce936186b66c40b03480", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "f96b1285849eee6d630e219e0acbc110d184c6b55dbcce936186b66c40b03480", kill_on_drop: false }` [INFO] [stdout] f96b1285849eee6d630e219e0acbc110d184c6b55dbcce936186b66c40b03480 [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-5-tc2/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-5-tc2/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=forbid" "-e" "RUSTDOCFLAGS=--cap-lints=forbid" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+dec9417b8611e34e787a3e4c37686b5131f9e5c5" "build" "--frozen" "--message-format=json", kill_on_drop: false }` [INFO] [stdout] 6545f8a5c1cffb469e5e254ab076f5faa71b1b933f2fc4f362afff3ac819119a [INFO] running `Command { std: "docker" "start" "-a" "6545f8a5c1cffb469e5e254ab076f5faa71b1b933f2fc4f362afff3ac819119a", kill_on_drop: false }` [INFO] [stderr] Compiling syn v2.0.117 [INFO] [stderr] Compiling libc v0.2.183 [INFO] [stderr] Compiling futures-core v0.3.32 [INFO] [stderr] Compiling fs_extra v1.3.0 [INFO] [stderr] Compiling dunce v1.0.5 [INFO] [stderr] Compiling futures-sink v0.3.32 [INFO] [stderr] Compiling aws-lc-rs v1.16.1 [INFO] [stderr] Compiling zeroize v1.8.2 [INFO] [stderr] Compiling http v1.4.0 [INFO] [stderr] Compiling litemap v0.8.1 [INFO] [stderr] Compiling writeable v0.6.2 [INFO] [stderr] Compiling futures-task v0.3.32 [INFO] [stderr] Compiling futures-io v0.3.32 [INFO] [stderr] Compiling icu_properties_data v2.1.2 [INFO] [stderr] Compiling icu_normalizer_data v2.1.1 [INFO] [stderr] Compiling indexmap v2.13.0 [INFO] [stderr] Compiling rustls v0.23.37 [INFO] [stderr] Compiling futures-channel v0.3.32 [INFO] [stderr] Compiling tracing v0.1.44 [INFO] [stderr] Compiling percent-encoding v2.3.2 [INFO] [stderr] Compiling rustls-pki-types v1.14.0 [INFO] [stderr] Compiling zmij v1.0.21 [INFO] [stderr] Compiling sync_wrapper v1.0.2 [INFO] [stderr] Compiling ipnet v2.12.0 [INFO] [stderr] Compiling openssl-probe v0.2.1 [INFO] [stderr] Compiling bitflags v2.11.0 [INFO] [stderr] Compiling iri-string v0.7.10 [INFO] [stderr] Compiling form_urlencoded v1.2.2 [INFO] [stderr] Compiling encoding_rs v0.8.35 [INFO] [stderr] Compiling ryu v1.0.23 [INFO] [stderr] Compiling rustls-native-certs v0.8.3 [INFO] [stderr] Compiling serde_json v1.0.149 [INFO] [stderr] Compiling http-body v1.0.1 [INFO] [stderr] Compiling http-body-util v0.1.3 [INFO] [stderr] Compiling jobserver v0.1.34 [INFO] [stderr] Compiling mio v1.1.1 [INFO] [stderr] Compiling socket2 v0.6.3 [INFO] [stderr] Compiling cc v1.2.56 [INFO] [stderr] Compiling tokio v1.50.0 [INFO] [stderr] Compiling cmake v0.1.57 [INFO] [stderr] Compiling aws-lc-sys v0.38.0 [INFO] [stderr] Compiling synstructure v0.13.2 [INFO] [stderr] Compiling zerofrom-derive v0.1.6 [INFO] [stderr] Compiling yoke-derive v0.8.1 [INFO] [stderr] Compiling zerovec-derive v0.11.2 [INFO] [stderr] Compiling displaydoc v0.2.5 [INFO] [stderr] Compiling futures-macro v0.3.32 [INFO] [stderr] Compiling serde_derive v1.0.228 [INFO] [stderr] Compiling thiserror-impl v2.0.18 [INFO] [stderr] Compiling futures-util v0.3.32 [INFO] [stderr] Compiling zerofrom v0.1.6 [INFO] [stderr] Compiling thiserror v2.0.18 [INFO] [stderr] Compiling yoke v0.8.1 [INFO] [stderr] Compiling zerotrie v0.2.3 [INFO] [stderr] Compiling zerovec v0.11.5 [INFO] [stderr] Compiling tinystr v0.8.2 [INFO] [stderr] Compiling potential_utf v0.1.4 [INFO] [stderr] Compiling icu_locale_core v2.1.1 [INFO] [stderr] Compiling icu_collections v2.1.1 [INFO] [stderr] Compiling serde v1.0.228 [INFO] [stderr] Compiling tokio-util v0.7.18 [INFO] [stderr] Compiling serde_yaml v0.9.34+deprecated [INFO] [stderr] Compiling icu_provider v2.1.1 [INFO] [stderr] Compiling h2 v0.4.13 [INFO] [stderr] Compiling icu_normalizer v2.1.1 [INFO] [stderr] Compiling icu_properties v2.1.2 [INFO] [stderr] Compiling tower v0.5.3 [INFO] [stderr] Compiling futures-executor v0.3.32 [INFO] [stderr] Compiling futures v0.3.32 [INFO] [stderr] Compiling tower-http v0.6.8 [INFO] [stderr] Compiling idna_adapter v1.2.1 [INFO] [stderr] Compiling idna v1.1.0 [INFO] [stderr] Compiling url v2.5.8 [INFO] [stderr] Compiling hyper v1.8.1 [INFO] [stderr] Compiling hyper-util v0.1.20 [INFO] [stderr] Compiling rustls-webpki v0.103.9 [INFO] [stderr] Compiling tokio-rustls v0.26.4 [INFO] [stderr] Compiling rustls-platform-verifier v0.6.2 [INFO] [stderr] Compiling hyper-rustls v0.27.7 [INFO] [stderr] Compiling reqwest v0.13.2 [INFO] [stderr] Compiling smoleval v0.2.0 (/opt/rustwide/workdir) [INFO] [stderr] Finished `dev` profile [unoptimized + debuginfo] target(s) in 1m 00s [INFO] running `Command { std: "docker" "inspect" "6545f8a5c1cffb469e5e254ab076f5faa71b1b933f2fc4f362afff3ac819119a", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "6545f8a5c1cffb469e5e254ab076f5faa71b1b933f2fc4f362afff3ac819119a", kill_on_drop: false }` [INFO] [stdout] 6545f8a5c1cffb469e5e254ab076f5faa71b1b933f2fc4f362afff3ac819119a [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-5-tc2/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-5-tc2/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=forbid" "-e" "RUSTDOCFLAGS=--cap-lints=forbid" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+dec9417b8611e34e787a3e4c37686b5131f9e5c5" "test" "--frozen" "--no-run" "--message-format=json", kill_on_drop: false }` [INFO] [stdout] 95560e7b0bef623cafee8b6b7563a2aad460effbd43e861e6b532bce896543fb [INFO] running `Command { std: "docker" "start" "-a" "95560e7b0bef623cafee8b6b7563a2aad460effbd43e861e6b532bce896543fb", kill_on_drop: false }` [INFO] [stderr] Compiling parking_lot_core v0.9.12 [INFO] [stderr] Compiling errno v0.3.14 [INFO] [stderr] Compiling tokio-macros v2.6.1 [INFO] [stderr] Compiling signal-hook-registry v1.4.8 [INFO] [stderr] Compiling parking_lot v0.12.5 [INFO] [stderr] Compiling tokio v1.50.0 [INFO] [stderr] Compiling tokio-util v0.7.18 [INFO] [stderr] Compiling tokio-rustls v0.26.4 [INFO] [stderr] Compiling tower v0.5.3 [INFO] [stderr] Compiling tower-http v0.6.8 [INFO] [stderr] Compiling h2 v0.4.13 [INFO] [stderr] Compiling hyper v1.8.1 [INFO] [stderr] Compiling hyper-util v0.1.20 [INFO] [stderr] Compiling hyper-rustls v0.27.7 [INFO] [stderr] Compiling reqwest v0.13.2 [INFO] [stderr] Compiling smoleval v0.2.0 (/opt/rustwide/workdir) [INFO] [stderr] Finished `test` profile [unoptimized + debuginfo] target(s) in 24.12s [INFO] running `Command { std: "docker" "inspect" "95560e7b0bef623cafee8b6b7563a2aad460effbd43e861e6b532bce896543fb", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "95560e7b0bef623cafee8b6b7563a2aad460effbd43e861e6b532bce896543fb", kill_on_drop: false }` [INFO] [stdout] 95560e7b0bef623cafee8b6b7563a2aad460effbd43e861e6b532bce896543fb [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-5-tc2/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-5-tc2/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=forbid" "-e" "RUSTDOCFLAGS=--cap-lints=forbid" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+dec9417b8611e34e787a3e4c37686b5131f9e5c5" "test" "--frozen", kill_on_drop: false }` [INFO] [stdout] cf0bf2d41c66613c004323a5795fbd4397ad8ab70e77eb99cbba9649f67e669a [INFO] running `Command { std: "docker" "start" "-a" "cf0bf2d41c66613c004323a5795fbd4397ad8ab70e77eb99cbba9649f67e669a", kill_on_drop: false }` [INFO] [stderr] Finished `test` profile [unoptimized + debuginfo] target(s) in 0.21s [INFO] [stderr] Running unittests src/lib.rs (/opt/rustwide/target/debug/deps/smoleval-2cd9b8b66f86aada) [INFO] [stdout] [INFO] [stdout] running 93 tests [INFO] [stdout] test agent::tests::agent_response_serialize_roundtrip ... ok [INFO] [stdout] test agent::tests::tool_call_deserialize_with_arguments ... ok [INFO] [stdout] test agent::tests::agent_response_missing_text_fails ... ok [INFO] [stdout] test agent::tests::tool_call_deserialize_without_arguments ... ok [INFO] [stdout] test agent::tests::agent_response_deserialize_no_tool_calls ... ok [INFO] [stdout] test agent::tests::tool_call_serialize_roundtrip ... ok [INFO] [stdout] test agent::tests::agent_response_deserialize_full ... ok [INFO] [stdout] test check::tests::check_result_build_boundary_one ... ok [INFO] [stdout] test check::tests::check_result_build_boundary_zero ... ok [INFO] [stdout] test check::tests::check_result_build_valid ... ok [INFO] [stdout] test check::tests::check_result_build_invalid ... ok [INFO] [stdout] test check::tests::check_result_reason_preserved ... ok [INFO] [stdout] test check::tests::check_spec_deserialize ... ok [INFO] [stdout] test check::tests::check_result_fail ... ok [INFO] [stdout] test check::tests::check_result_pass ... ok [INFO] [stdout] test check::tests::contains_all_case_sensitive ... ok [INFO] [stdout] test check::tests::exact_match_pass ... ok [INFO] [stdout] test check::tests::not_contains_empty_values ... ok [INFO] [stdout] test check::tests::not_contains_pass ... ok [INFO] [stdout] test check::tests::not_contains_case_sensitive ... ok [INFO] [stdout] test check::tests::contains_all_fail ... ok [INFO] [stdout] test check::tests::contains_all_invalid_config ... ok [INFO] [stdout] test check::tests::contains_any_case_sensitive ... ok [INFO] [stdout] test check::tests::contains_any_empty_values ... ok [INFO] [stdout] test check::tests::contains_any_fail ... ok [INFO] [stdout] test check::tests::contains_any_pass ... ok [INFO] [stdout] test check::tests::registry_custom_check ... ok [INFO] [stdout] test check::tests::exact_match_empty_string ... ok [INFO] [stdout] test check::tests::registry_empty_cannot_create ... ok [INFO] [stdout] test check::tests::registry_tool_used_at_least_from_config ... ok [INFO] [stdout] test check::tests::registry_tools_used_in_order_from_config ... ok [INFO] [stdout] test check::tests::tool_used_at_least_fail_insufficient ... ok [INFO] [stdout] test check::tests::registry_unknown_type ... ok [INFO] [stdout] test check::tests::tool_used_at_least_pass_multiple ... ok [INFO] [stdout] test check::tests::tool_used_at_least_fail_not_present ... ok [INFO] [stdout] test check::tests::tool_used_at_most_fail ... ok [INFO] [stdout] test check::tests::tool_used_at_least_with_params_fail ... ok [INFO] [stdout] test check::tests::tool_used_at_most_pass_zero_calls ... ok [INFO] [stdout] test check::tests::tool_used_at_most_pass_exact ... ok [INFO] [stdout] test check::tests::tool_used_at_most_with_params ... ok [INFO] [stdout] test check::tests::tool_used_at_least_pass_default_times ... ok [INFO] [stdout] test check::tests::tool_used_at_least_with_params_pass ... ok [INFO] [stdout] test check::tests::exact_match_fail ... ok [INFO] [stdout] test check::tests::tool_used_exactly_config_requires_times ... ok [INFO] [stdout] test check::tests::tool_used_exactly_fail_too_few ... ok [INFO] [stdout] test check::tests::tool_used_exactly_fail_too_many ... ok [INFO] [stdout] test check::tests::tool_used_exactly_zero_pass ... ok [INFO] [stdout] test check::tests::tools_used_in_order_fail_missing ... ok [INFO] [stdout] test check::tests::tools_used_in_order_empty_passes ... ok [INFO] [stdout] test check::tests::tools_used_in_order_pass_exact ... ok [INFO] [stdout] test check::tests::tools_used_in_order_fail_wrong_order ... ok [INFO] [stdout] test check::tests::tools_used_in_order_pass_with_extras ... ok [INFO] [stdout] test dataset::tests::from_file_nonexistent ... ok [INFO] [stdout] test dataset::tests::parse_invalid_yaml ... ok [INFO] [stdout] test dataset::tests::parse_minimal_dataset ... ok [INFO] [stdout] test dataset::tests::check_spec_preserves_config ... ok [INFO] [stdout] test check::tests::tool_used_exactly_pass ... ok [INFO] [stdout] test dataset::tests::parse_missing_required_field_name ... ok [INFO] [stdout] test check::tests::tools_used_in_order_repeated ... ok [INFO] [stdout] test dataset::tests::parse_empty_tests_list ... ok [INFO] [stdout] test dataset::tests::parse_duplicate_test_names ... ok [INFO] [stdout] test error::tests::agent_error_display ... ok [INFO] [stdout] test error::tests::check_config_display ... ok [INFO] [stdout] test dataset::tests::parse_full_dataset ... ok [INFO] [stdout] test dataset::tests::parse_missing_required_field_prompt ... ok [INFO] [stdout] test dataset::tests::parse_missing_required_field_tests ... ok [INFO] [stdout] test check::tests::contains_all_empty_values ... ok [INFO] [stdout] test error::tests::dataset_io_error_display ... ok [INFO] [stdout] test error::tests::invalid_score_display ... ok [INFO] [stdout] test error::tests::io_error_converts ... ok [INFO] [stdout] test error::tests::unknown_check_display ... ok [INFO] [stdout] test error::tests::yaml_error_converts ... ok [INFO] [stdout] test eval::tests::eval_report_all_pass ... ok [INFO] [stdout] test eval::tests::mean_score_empty ... ok [INFO] [stdout] test eval::tests::run_checks_multiple_mixed ... ok [INFO] [stdout] test eval::tests::mean_score_partial ... ok [INFO] [stdout] test eval::tests::mean_score_mixed ... ok [INFO] [stdout] test eval::tests::run_checks_no_checks ... ok [INFO] [stdout] test eval::tests::eval_report_empty ... ok [INFO] [stdout] test eval::tests::eval_report_mixed ... ok [INFO] [stdout] test eval::tests::evaluate_agent_error_propagates ... ok [INFO] [stdout] test eval::tests::evaluate_empty_dataset ... ok [INFO] [stdout] test eval::tests::evaluate_no_checks_scores_one ... ok [INFO] [stdout] test eval::tests::run_checks_unknown_type_errors ... ok [INFO] [stdout] test eval::tests::run_checks_single_passing ... ok [INFO] [stdout] test eval::tests::mean_score_single_fail ... ok [INFO] [stdout] test eval::tests::mean_score_single_pass ... ok [INFO] [stdout] test check::tests::not_contains_fail ... ok [INFO] [stdout] test check::tests::registry_default_is_empty ... ok [INFO] [stdout] test eval::tests::evaluate_preserves_response ... ok [INFO] [stdout] test check::tests::registry_builtins_resolve ... ok [INFO] [stdout] test check::tests::contains_all_pass ... ok [INFO] [stdout] test dataset::tests::serialize_roundtrip ... ok [INFO] [stdout] [INFO] [stdout] test result: ok. 93 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.21s [INFO] [stdout] [INFO] [stderr] Running tests/integration.rs (/opt/rustwide/target/debug/deps/integration-565cf8a61712d7c7) [INFO] [stdout] [INFO] [stdout] running 28 tests [INFO] [stdout] test custom_check_registration ... ok [INFO] [stdout] test evaluate_concurrent_captures_agent_errors ... ok [INFO] [stdout] test contains_all_case_sensitive ... ok [INFO] [stdout] test multiple_custom_checks_in_one_test ... ok [INFO] [stdout] test evaluate_concurrent_produces_same_results ... ok [INFO] [stdout] test evaluate_unknown_check_fails_fast ... ok [INFO] [stdout] test evaluate_unknown_check_captured_without_fail_fast ... ok [INFO] [stdout] test on_result_callback_invoked_concurrently ... ok [INFO] [stdout] test on_result_callback_invoked_for_each_test ... ok [INFO] [stdout] test evaluate_fail_fast_aborts_on_agent_error ... ok [INFO] [stdout] test evaluation_preserves_test_case_metadata ... ok [INFO] [stdout] test evaluate_no_fail_fast_captures_errors ... ok [INFO] [stdout] test report_has_nonzero_duration ... ok [INFO] [stdout] test response_not_contains_fails_when_present ... ok [INFO] [stdout] test tool_used_at_least_with_zero_times ... ok [INFO] [stdout] test report_metrics_with_mixed_outcomes ... ok [INFO] [stdout] test tool_used_exactly_fail_wrong_tool ... ok [INFO] [stdout] test test_case_labels_match_scores ... ok [INFO] [stdout] test tool_used_exactly_pass ... ok [INFO] [stdout] test parse_yaml_from_string ... ok [INFO] [stdout] test partial_score_with_mixed_checks ... ok [INFO] [stdout] test load_yaml_and_evaluate ... ok [INFO] [stdout] test validate_dataset_standalone ... ok [INFO] [stdout] test preflight_catches_unknown_check_before_agent_runs ... ok [INFO] [stdout] test preflight_collects_multiple_errors ... ok [INFO] [stdout] test preflight_invalid_config_caught ... ok [INFO] [stdout] test contains_any_fails_when_none_match ... ok [INFO] [stdout] test preflight_valid_dataset_runs_normally ... ok [INFO] [stdout] [INFO] [stdout] test result: ok. 28 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.06s [INFO] [stdout] [INFO] [stderr] Doc-tests smoleval [INFO] [stdout] [INFO] [stdout] running 0 tests [INFO] [stdout] [INFO] [stdout] test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s [INFO] [stdout] [INFO] running `Command { std: "docker" "inspect" "cf0bf2d41c66613c004323a5795fbd4397ad8ab70e77eb99cbba9649f67e669a", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "cf0bf2d41c66613c004323a5795fbd4397ad8ab70e77eb99cbba9649f67e669a", kill_on_drop: false }` [INFO] [stdout] cf0bf2d41c66613c004323a5795fbd4397ad8ab70e77eb99cbba9649f67e669a