[INFO] fetching crate koho 0.1.1... [INFO] testing koho-0.1.1 against beta-2026-04-21 for beta-1.96-1 [INFO] extracting crate koho 0.1.1 into /workspace/builds/worker-0-tc2/source [INFO] started tweaking crates.io crate koho 0.1.1 [INFO] finished tweaking crates.io crate koho 0.1.1 [INFO] tweaked toml for crates.io crate koho 0.1.1 written to /workspace/builds/worker-0-tc2/source/Cargo.toml [INFO] validating manifest of crates.io crate koho 0.1.1 on toolchain beta-2026-04-21 [INFO] running `Command { std: CARGO_HOME="/workspace/cargo-home" RUSTUP_HOME="/workspace/rustup-home" "/workspace/cargo-home/bin/cargo" "+beta-2026-04-21" "metadata" "--manifest-path" "Cargo.toml" "--no-deps", kill_on_drop: false }` [INFO] crate crates.io crate koho 0.1.1 already has a lockfile, it will not be regenerated [INFO] running `Command { std: CARGO_HOME="/workspace/cargo-home" RUSTUP_HOME="/workspace/rustup-home" "/workspace/cargo-home/bin/cargo" "+beta-2026-04-21" "fetch" "--manifest-path" "Cargo.toml", kill_on_drop: false }` [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-0-tc2/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-0-tc2/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+beta-2026-04-21" "metadata" "--no-deps" "--format-version=1", kill_on_drop: false }` [INFO] [stdout] 7b3f38a3e6d9cdb20afd7acefce51821ad1ba68286326e8750dcc74eb5a6a61a [INFO] running `Command { std: "docker" "start" "-a" "7b3f38a3e6d9cdb20afd7acefce51821ad1ba68286326e8750dcc74eb5a6a61a", kill_on_drop: false }` [INFO] running `Command { std: "docker" "inspect" "7b3f38a3e6d9cdb20afd7acefce51821ad1ba68286326e8750dcc74eb5a6a61a", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "7b3f38a3e6d9cdb20afd7acefce51821ad1ba68286326e8750dcc74eb5a6a61a", kill_on_drop: false }` [INFO] [stdout] 7b3f38a3e6d9cdb20afd7acefce51821ad1ba68286326e8750dcc74eb5a6a61a [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-0-tc2/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-0-tc2/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=warn" "-e" "RUSTDOCFLAGS=--cap-lints=warn" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+beta-2026-04-21" "build" "--frozen" "--message-format=json", kill_on_drop: false }` [INFO] [stdout] 8f88572894e73afb8f7de8fa18eee139262cddc3f24c90eb532c9ab4a0aaf053 [INFO] running `Command { std: "docker" "start" "-a" "8f88572894e73afb8f7de8fa18eee139262cddc3f24c90eb532c9ab4a0aaf053", kill_on_drop: false }` [INFO] [stderr] Compiling getrandom v0.3.2 [INFO] [stderr] Compiling zerocopy v0.8.25 [INFO] [stderr] Compiling reborrow v0.5.5 [INFO] [stderr] Compiling seq-macro v0.3.6 [INFO] [stderr] Compiling num-traits v0.2.19 [INFO] [stderr] Compiling pulp v0.21.5 [INFO] [stderr] Compiling hashbrown v0.15.3 [INFO] [stderr] Compiling raw-cpuid v11.5.0 [INFO] [stderr] Compiling raw-cpuid v10.7.0 [INFO] [stderr] Compiling toml_datetime v0.6.9 [INFO] [stderr] Compiling winnow v0.7.10 [INFO] [stderr] Compiling bytemuck_derive v1.9.3 [INFO] [stderr] Compiling rayon v1.10.0 [INFO] [stderr] Compiling stable_deref_trait v1.2.0 [INFO] [stderr] Compiling memchr v2.7.4 [INFO] [stderr] Compiling tracing-core v0.1.33 [INFO] [stderr] Compiling zip v1.1.4 [INFO] [stderr] Compiling memmap2 v0.9.5 [INFO] [stderr] Compiling yoke v0.7.5 [INFO] [stderr] Compiling num_cpus v1.16.0 [INFO] [stderr] Compiling libloading v0.8.7 [INFO] [stderr] Compiling rand_core v0.9.3 [INFO] [stderr] Compiling tracing v0.1.41 [INFO] [stderr] Compiling serde_json v1.0.140 [INFO] [stderr] Compiling indexmap v2.9.0 [INFO] [stderr] Compiling bytemuck v1.23.0 [INFO] [stderr] Compiling dyn-stack v0.13.0 [INFO] [stderr] Compiling dyn-stack v0.10.0 [INFO] [stderr] Compiling num-complex v0.4.6 [INFO] [stderr] Compiling num-integer v0.1.46 [INFO] [stderr] Compiling safetensors v0.4.5 [INFO] [stderr] Compiling num-bigint v0.4.6 [INFO] [stderr] Compiling num-iter v0.1.45 [INFO] [stderr] Compiling ppv-lite86 v0.2.21 [INFO] [stderr] Compiling pulp v0.18.22 [INFO] [stderr] Compiling toml_edit v0.22.26 [INFO] [stderr] Compiling rand_chacha v0.9.0 [INFO] [stderr] Compiling rand v0.9.1 [INFO] [stderr] Compiling num-rational v0.4.2 [INFO] [stderr] Compiling num v0.4.3 [INFO] [stderr] Compiling rand_distr v0.5.1 [INFO] [stderr] Compiling half v2.6.0 [INFO] [stderr] Compiling proc-macro-crate v3.3.0 [INFO] [stderr] Compiling gemm-common v0.18.2 [INFO] [stderr] Compiling gemm-common v0.17.1 [INFO] [stderr] Compiling num_enum_derive v0.7.3 [INFO] [stderr] Compiling gemm-f32 v0.17.1 [INFO] [stderr] Compiling gemm-f64 v0.17.1 [INFO] [stderr] Compiling gemm-c64 v0.17.1 [INFO] [stderr] Compiling gemm-c32 v0.17.1 [INFO] [stderr] Compiling gemm-f32 v0.18.2 [INFO] [stderr] Compiling gemm-c64 v0.18.2 [INFO] [stderr] Compiling gemm-c32 v0.18.2 [INFO] [stderr] Compiling gemm-f64 v0.18.2 [INFO] [stderr] Compiling gemm-f16 v0.17.1 [INFO] [stderr] Compiling num_enum v0.7.3 [INFO] [stderr] Compiling gemm-f16 v0.18.2 [INFO] [stderr] Compiling gemm v0.17.1 [INFO] [stderr] Compiling gemm v0.18.2 [INFO] [stderr] Compiling ug v0.4.0 [INFO] [stderr] Compiling candle-core v0.9.1 [INFO] [stderr] Compiling candle-nn v0.9.1 [INFO] [stderr] Compiling koho v0.1.1 (/opt/rustwide/workdir) [INFO] [stderr] Finished `dev` profile [unoptimized + debuginfo] target(s) in 44.74s [INFO] running `Command { std: "docker" "inspect" "8f88572894e73afb8f7de8fa18eee139262cddc3f24c90eb532c9ab4a0aaf053", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "8f88572894e73afb8f7de8fa18eee139262cddc3f24c90eb532c9ab4a0aaf053", kill_on_drop: false }` [INFO] [stdout] 8f88572894e73afb8f7de8fa18eee139262cddc3f24c90eb532c9ab4a0aaf053 [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-0-tc2/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-0-tc2/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=warn" "-e" "RUSTDOCFLAGS=--cap-lints=warn" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+beta-2026-04-21" "test" "--frozen" "--no-run" "--message-format=json", kill_on_drop: false }` [INFO] [stdout] ce0fa3b881fcca4d611f2b606b30dacdb1394d1ec91c88930fc82a472e249f28 [INFO] running `Command { std: "docker" "start" "-a" "ce0fa3b881fcca4d611f2b606b30dacdb1394d1ec91c88930fc82a472e249f28", kill_on_drop: false }` [INFO] [stderr] Compiling koho v0.1.1 (/opt/rustwide/workdir) [INFO] [stderr] Finished `test` profile [unoptimized + debuginfo] target(s) in 2.71s [INFO] running `Command { std: "docker" "inspect" "ce0fa3b881fcca4d611f2b606b30dacdb1394d1ec91c88930fc82a472e249f28", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "ce0fa3b881fcca4d611f2b606b30dacdb1394d1ec91c88930fc82a472e249f28", kill_on_drop: false }` [INFO] [stdout] ce0fa3b881fcca4d611f2b606b30dacdb1394d1ec91c88930fc82a472e249f28 [INFO] running `Command { std: "docker" "create" "-v" "/var/lib/crater-agent-workspace/builds/worker-0-tc2/target:/opt/rustwide/target:rw,Z" "-v" "/var/lib/crater-agent-workspace/builds/worker-0-tc2/source:/opt/rustwide/workdir:ro,Z" "-v" "/var/lib/crater-agent-workspace/cargo-home:/opt/rustwide/cargo-home:ro,Z" "-v" "/var/lib/crater-agent-workspace/rustup-home:/opt/rustwide/rustup-home:ro,Z" "-e" "SOURCE_DIR=/opt/rustwide/workdir" "-e" "CARGO_TARGET_DIR=/opt/rustwide/target" "-e" "CARGO_INCREMENTAL=0" "-e" "RUST_BACKTRACE=full" "-e" "RUSTFLAGS=--cap-lints=warn" "-e" "RUSTDOCFLAGS=--cap-lints=warn" "-e" "CARGO_HOME=/opt/rustwide/cargo-home" "-e" "RUSTUP_HOME=/opt/rustwide/rustup-home" "-w" "/opt/rustwide/workdir" "-m" "1610612736" "--user" "0:0" "--network" "none" "ghcr.io/rust-lang/crates-build-env/linux@sha256:d429b63d4308055ea97f60fb1d3dfca48854a00942f1bd2ad806beaf015945ec" "/opt/rustwide/cargo-home/bin/cargo" "+beta-2026-04-21" "test" "--frozen", kill_on_drop: false }` [INFO] [stdout] bda2f3d42daf17c444b0b88461d295ec835528a508a3c84d3adfb76902200f62 [INFO] running `Command { std: "docker" "start" "-a" "bda2f3d42daf17c444b0b88461d295ec835528a508a3c84d3adfb76902200f62", kill_on_drop: false }` [INFO] [stderr] Finished `test` profile [unoptimized + debuginfo] target(s) in 0.14s [INFO] [stderr] Running unittests src/lib.rs (/opt/rustwide/target/debug/deps/koho-8d732d59edc7325e) [INFO] [stdout] [INFO] [stdout] running 39 tests [INFO] [stdout] test math::cell::tests::test_invalid_cell_idx_in_incidences ... ok [INFO] [stdout] test math::cell::tests::test_attach_edge_and_incidence_relations ... ok [INFO] [stdout] test math::cell::tests::test_attach_zero_cells ... ok [INFO] [stdout] test math::sheaf::adjoint_and_laplacian_tests::test_adjoint_with_zero_restriction ... ok [INFO] [stdout] test math::cell::tests::test_incidences_empty_and_invalid ... ok [INFO] [stdout] test math::cell::tests::test_dimension_mismatch_on_attach ... ok [INFO] [stdout] test math::sheaf::adjoint_and_laplacian_tests::test_k_hodge_laplacian_up_only ... ok [INFO] [stdout] test math::sheaf::generated_restrictions_tests::test_different_noise_levels ... ok [INFO] [stdout] test math::sheaf::adjoint_and_laplacian_tests::test_hodge_laplacian_triangle ... ok [INFO] [stdout] test math::sheaf::adjoint_and_laplacian_tests::test_k_coboundary_simple ... ok [INFO] [stdout] test math::sheaf::generated_restrictions_tests::test_zero_noise_gives_identity ... ok [INFO] [stdout] test math::sheaf::generated_restrictions_tests::test_triangle_with_generated_restrictions ... ok [INFO] [stdout] test math::sheaf::tests::test_set_restriction_errors_and_success ... ok [INFO] [stdout] test math::sheaf::tests::test_get_k_cochain_and_errors ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_frobenius_norm ... ok [INFO] [stdout] test math::sheaf::generated_restrictions_tests::test_generated_restrictions_hodge_laplacian ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_identity_rectangular_tall ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_identity_rectangular_wide ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_identity_like_method ... ok [INFO] [stdout] test math::sheaf::tests::test_attach_sections_and_cells ... ok [INFO] [stdout] test math::sheaf::tests::test_coboundary_computation ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_matrix_add ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_matrix_from_slice ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_matrix_matmul ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_matrix_scale_numeric ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_matrix_new_and_shape ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_matrix_transpose ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_matrix_vector_multiplication ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_matrix_vector_dimension_mismatch ... ok [INFO] [stdout] test math::sheaf::tests::test_init_empty_sheaf ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_identity_single_element ... ok [INFO] [stdout] test math::sheaf::adjoint_and_laplacian_tests::test_k_adjoint_coboundary_simple ... ok [INFO] [stdout] test math::sheaf::generated_restrictions_tests::test_generated_restrictions_coboundary_computation ... ok [INFO] [stdout] test math::sheaf::adjoint_and_laplacian_tests::test_dimensions_preserved ... ok [INFO] [stdout] test math::tensors::matrix_tests::test_identity_square ... ok [INFO] [stdout] test integration_tests::test_triangle_diffusion_learning ... ok [INFO] [stdout] test integration_tests::test_learned_vs_fixed_restrictions ... ok [INFO] [stdout] test integration_tests::test_edge_diffusion_learning ... FAILED [INFO] [stdout] test integration_tests::test_multiple_diffusion_layers ... ok [INFO] [stdout] [INFO] [stdout] failures: [INFO] [stdout] [INFO] [stdout] ---- integration_tests::test_edge_diffusion_learning stdout ---- [INFO] [stdout] uppers: [0, 2] [INFO] [stdout] got edges [INFO] [stdout] === Training Debug Info === [INFO] [stdout] Total parameters: 10 [INFO] [stdout] Parameter 0: shape=[1, 1], first_few_values=[-2.701618] [INFO] [stdout] Parameter 1: shape=[1, 1], first_few_values=[1.0523381] [INFO] [stdout] Parameter 2: shape=[1, 1], first_few_values=[0.95656186] [INFO] [stdout] Parameter 3: shape=[1, 1], first_few_values=[0.9418041] [INFO] [stdout] Parameter 4: shape=[1, 1], first_few_values=[0.98669875] [INFO] [stdout] Parameter 5: shape=[1, 1], first_few_values=[1.0675156] [INFO] [stdout] Parameter 6: shape=[1, 1], first_few_values=[0.9112029] [INFO] [stdout] Parameter 7: shape=[1, 1], first_few_values=[1.2749774] [INFO] [stdout] Parameter 8: shape=[1, 1], first_few_values=[1.1141753] [INFO] [stdout] Parameter 9: shape=[1, 1], first_few_values=[1.1580805] [INFO] [stdout] [INFO] [stdout] Epoch 1, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] --- End batch 0 --- [INFO] [stdout] Epoch 1: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 2, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] --- End batch 0 --- [INFO] [stdout] Epoch 2: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 3, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] --- End batch 0 --- [INFO] [stdout] Epoch 3: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 4, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 4: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 5, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 5: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 6, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 6: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 7, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 7: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 8, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 8: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 9, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 9: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 10, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 10: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 11, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 12, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 13, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 14, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 15, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 16, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 17, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 18, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 19, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 20, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 20: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 21, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 22, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 23, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 24, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 25, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 26, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 27, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 28, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 29, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 30, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 30: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 31, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 32, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 33, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 34, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 35, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 36, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 37, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 38, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 39, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 40, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 40: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 41, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 42, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 43, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 44, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 45, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 46, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 47, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 48, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 49, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 50, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 50: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 51, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 52, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 53, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 54, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 55, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 56, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 57, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 58, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 59, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 60, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 60: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 61, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 62, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 63, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 64, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 65, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 66, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 67, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 68, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 69, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 70, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 70: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 71, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 72, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 73, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 74, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 75, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 76, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 77, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 78, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 79, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 80, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 80: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 81, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 82, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 83, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 84, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 85, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 86, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 87, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 88, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 89, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 90, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 90: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 91, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 92, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 93, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 94, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 95, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 96, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 97, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 98, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 99, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 100, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 100: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 101, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 102, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 103, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 104, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 105, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 106, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 107, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 108, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 109, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 110, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 110: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 111, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 112, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 113, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 114, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 115, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 116, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 117, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 118, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 119, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 120, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 120: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 121, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 122, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 123, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 124, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 125, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 126, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 127, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 128, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 129, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 130, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 130: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 131, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 132, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 133, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 134, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 135, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 136, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 137, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 138, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stderr] error: test failed, to rerun pass `--lib` [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 139, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 140, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 140: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 141, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 142, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 143, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 144, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 145, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 146, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 147, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 148, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 149, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 150, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 150: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 151, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 152, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 153, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 154, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 155, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 156, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 157, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 158, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 159, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 160, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 160: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 161, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 162, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 163, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 164, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 165, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 166, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 167, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 168, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 169, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 170, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 170: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 171, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 172, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 173, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 174, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 175, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 176, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 177, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 178, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 179, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 180, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 180: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 181, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 182, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 183, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 184, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 185, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 186, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 187, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 188, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 189, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 190, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 190: avg_loss = 2.2766666 [INFO] [stdout] [INFO] [stdout] Epoch 191, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 192, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 193, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 194, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 195, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 196, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 197, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 198, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 199, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] [INFO] [stdout] Epoch 200, Batch 0 [INFO] [stdout] Input: [0.5, 0.5, 0.5] [INFO] [stdout] Target: [0.5, 0.3, 0.7] [INFO] [stdout] Output: [-1.0, -1.0, -1.0] [INFO] [stdout] Loss: 2.2766666 [INFO] [stdout] Loss tensor shape: [] [INFO] [stdout] Loss tensor dtype: F32 [INFO] [stdout] Computing gradients... [INFO] [stdout] Checking gradients for 10 parameters: [INFO] [stdout] Param 0: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 1: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 2: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 3: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 4: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 5: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 6: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 7: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 8: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Param 9: grad_norm=0, first_few_grads=[0.0] [INFO] [stdout] Applying optimizer step... [INFO] [stdout] Param 0 change norm: 0 [INFO] [stdout] Param 1 change norm: 0 [INFO] [stdout] Param 2 change norm: 0 [INFO] [stdout] Param 3 change norm: 0 [INFO] [stdout] Param 4 change norm: 0 [INFO] [stdout] Param 5 change norm: 0 [INFO] [stdout] Param 6 change norm: 0 [INFO] [stdout] Param 7 change norm: 0 [INFO] [stdout] Param 8 change norm: 0 [INFO] [stdout] Param 9 change norm: 0 [INFO] [stdout] Epoch 200: avg_loss = 2.2766666 [INFO] [stdout] Edge diffusion final loss: 2.2766666 [INFO] [stdout] [INFO] [stdout] thread 'integration_tests::test_edge_diffusion_learning' (18) panicked at src/lib.rs:442:9: [INFO] [stdout] Loss should be reasonable [INFO] [stdout] stack backtrace: [INFO] [stdout] 0: 0x60300efe6b2a - std[128c3efe2914e152]::backtrace_rs::backtrace::libunwind::trace [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/../../backtrace/src/backtrace/libunwind.rs:117:9 [INFO] [stdout] 1: 0x60300efe6b2a - std[128c3efe2914e152]::backtrace_rs::backtrace::trace_unsynchronized:: [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/../../backtrace/src/backtrace/mod.rs:66:14 [INFO] [stdout] 2: 0x60300efe6b2a - std[128c3efe2914e152]::sys::backtrace::_print_fmt [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/sys/backtrace.rs:74:9 [INFO] [stdout] 3: 0x60300efe6b2a - <::print::DisplayBacktrace as core[6771d259883166e6]::fmt::Display>::fmt [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/sys/backtrace.rs:44:26 [INFO] [stdout] 4: 0x60300effcaea - ::fmt [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/core/src/fmt/rt.rs:152:76 [INFO] [stdout] 5: 0x60300effcaea - core[6771d259883166e6]::fmt::write [INFO] [stdout] 6: 0x60300efebb32 - std[128c3efe2914e152]::io::default_write_fmt::> [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/io/mod.rs:621:11 [INFO] [stdout] 7: 0x60300efebb32 - as std[128c3efe2914e152]::io::Write>::write_fmt [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/io/mod.rs:1976:13 [INFO] [stdout] 8: 0x60300efc4b8f - ::print [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/sys/backtrace.rs:47:9 [INFO] [stdout] 9: 0x60300efc4b8f - std[128c3efe2914e152]::panicking::default_hook::{closure#0} [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panicking.rs:292:27 [INFO] [stdout] 10: 0x60300efde779 - std[128c3efe2914e152]::panicking::default_hook [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panicking.rs:316:9 [INFO] [stdout] 11: 0x60300e08735c - core[6771d259883166e6]::ops::function::Fn<(&'a std[128c3efe2914e152]::panic::PanicHookInfo<'b>,), Output = ()> + core[6771d259883166e6]::marker::Sync + core[6771d259883166e6]::marker::Send> as core[6771d259883166e6]::ops::function::Fn<(&std[128c3efe2914e152]::panic::PanicHookInfo,)>>::call [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/alloc/src/boxed.rs:2263:9 [INFO] [stdout] 12: 0x60300e08735c - test[b137923399915ecf]::test_main_with_exit_callback::::{closure#0} [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/test/src/lib.rs:145:21 [INFO] [stdout] 13: 0x60300efde9f2 - core[6771d259883166e6]::ops::function::Fn<(&'a std[128c3efe2914e152]::panic::PanicHookInfo<'b>,), Output = ()> + core[6771d259883166e6]::marker::Sync + core[6771d259883166e6]::marker::Send> as core[6771d259883166e6]::ops::function::Fn<(&std[128c3efe2914e152]::panic::PanicHookInfo,)>>::call [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/alloc/src/boxed.rs:2263:9 [INFO] [stdout] 14: 0x60300efde9f2 - std[128c3efe2914e152]::panicking::panic_with_hook [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panicking.rs:833:13 [INFO] [stdout] 15: 0x60300efc4c7a - std[128c3efe2914e152]::panicking::panic_handler::{closure#0} [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panicking.rs:691:13 [INFO] [stdout] 16: 0x60300efb9719 - std[128c3efe2914e152]::sys::backtrace::__rust_end_short_backtrace:: [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/sys/backtrace.rs:182:18 [INFO] [stdout] 17: 0x60300efc5a4d - __rustc[752cc74e29381ccc]::rust_begin_unwind [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panicking.rs:689:5 [INFO] [stdout] 18: 0x60300effd27c - core[6771d259883166e6]::panicking::panic_fmt [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/core/src/panicking.rs:80:14 [INFO] [stdout] 19: 0x60300e033491 - koho::integration_tests::test_edge_diffusion_learning::h9e01a119ad63714a [INFO] [stdout] at /opt/rustwide/workdir/src/lib.rs:442:9 [INFO] [stdout] 20: 0x60300e0335fc - koho::integration_tests::test_edge_diffusion_learning::{{closure}}::h10104b703e07aa7d [INFO] [stdout] at /opt/rustwide/workdir/src/lib.rs:416:42 [INFO] [stdout] 21: 0x60300e06d9f6 - core::ops::function::FnOnce::call_once::h05c5096f6d4c4057 [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/core/src/ops/function.rs:250:5 [INFO] [stdout] 22: 0x60300e07b45b - core[6771d259883166e6]::result::Result<(), alloc[fef50e8eecedd288]::string::String> as core[6771d259883166e6]::ops::function::FnOnce<()>>::call_once [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/core/src/ops/function.rs:250:5 [INFO] [stdout] 23: 0x60300e07b45b - test[b137923399915ecf]::__rust_begin_short_backtrace::, fn() -> core[6771d259883166e6]::result::Result<(), alloc[fef50e8eecedd288]::string::String>> [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/test/src/lib.rs:663:18 [INFO] [stdout] 24: 0x60300e087e2b - test[b137923399915ecf]::run_test_in_process::{closure#0} [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/test/src/lib.rs:686:74 [INFO] [stdout] 25: 0x60300e087e2b - as core[6771d259883166e6]::ops::function::FnOnce<()>>::call_once [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/core/src/panic/unwind_safe.rs:275:9 [INFO] [stdout] 26: 0x60300e087e2b - std[128c3efe2914e152]::panicking::catch_unwind::do_call::, core[6771d259883166e6]::result::Result<(), alloc[fef50e8eecedd288]::string::String>> [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panicking.rs:581:40 [INFO] [stdout] 27: 0x60300e087e2b - std[128c3efe2914e152]::panicking::catch_unwind::, core[6771d259883166e6]::panic::unwind_safe::AssertUnwindSafe> [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panicking.rs:544:19 [INFO] [stdout] 28: 0x60300e087e2b - std[128c3efe2914e152]::panic::catch_unwind::, core[6771d259883166e6]::result::Result<(), alloc[fef50e8eecedd288]::string::String>> [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panic.rs:359:14 [INFO] [stdout] 29: 0x60300e087e2b - test[b137923399915ecf]::run_test_in_process [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/test/src/lib.rs:686:27 [INFO] [stdout] 30: 0x60300e087e2b - test[b137923399915ecf]::run_test::{closure#0} [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/test/src/lib.rs:607:43 [INFO] [stdout] 31: 0x60300e082284 - test[b137923399915ecf]::run_test::{closure#1} [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/test/src/lib.rs:637:41 [INFO] [stdout] 32: 0x60300e082284 - std[128c3efe2914e152]::sys::backtrace::__rust_begin_short_backtrace:: [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/sys/backtrace.rs:166:18 [INFO] [stdout] 33: 0x60300e08aa32 - std[128c3efe2914e152]::thread::lifecycle::spawn_unchecked::::{closure#1}::{closure#0} [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/thread/lifecycle.rs:91:13 [INFO] [stdout] 34: 0x60300e08aa32 - ::{closure#1}::{closure#0}> as core[6771d259883166e6]::ops::function::FnOnce<()>>::call_once [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/core/src/panic/unwind_safe.rs:275:9 [INFO] [stdout] 35: 0x60300e08aa32 - std[128c3efe2914e152]::panicking::catch_unwind::do_call::::{closure#1}::{closure#0}>, ()> [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panicking.rs:581:40 [INFO] [stdout] 36: 0x60300e08aa32 - std[128c3efe2914e152]::panicking::catch_unwind::<(), core[6771d259883166e6]::panic::unwind_safe::AssertUnwindSafe::{closure#1}::{closure#0}>> [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panicking.rs:544:19 [INFO] [stdout] 37: 0x60300e08aa32 - std[128c3efe2914e152]::panic::catch_unwind::::{closure#1}::{closure#0}>, ()> [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/panic.rs:359:14 [INFO] [stdout] 38: 0x60300e08aa32 - std[128c3efe2914e152]::thread::lifecycle::spawn_unchecked::::{closure#1} [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/thread/lifecycle.rs:89:26 [INFO] [stdout] 39: 0x60300e08aa32 - ::{closure#1} as core[6771d259883166e6]::ops::function::FnOnce<()>>::call_once::{shim:vtable#0} [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/core/src/ops/function.rs:250:5 [INFO] [stdout] 40: 0x60300efe609f - + core[6771d259883166e6]::marker::Send> as core[6771d259883166e6]::ops::function::FnOnce<()>>::call_once [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/alloc/src/boxed.rs:2249:9 [INFO] [stdout] 41: 0x60300efe609f - ::new::thread_start [INFO] [stdout] at /rustc/8489f1adbe1c8f8921cf18b9962cf565237a5489/library/std/src/sys/thread/unix.rs:118:17 [INFO] [stdout] 42: 0x732282321aa4 - [INFO] [stdout] 43: 0x7322823aea64 - clone [INFO] [stdout] 44: 0x0 - [INFO] [stdout] [INFO] [stdout] [INFO] [stdout] failures: [INFO] [stdout] integration_tests::test_edge_diffusion_learning [INFO] [stdout] [INFO] [stdout] test result: FAILED. 38 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out; finished in 1.51s [INFO] [stdout] [INFO] running `Command { std: "docker" "inspect" "bda2f3d42daf17c444b0b88461d295ec835528a508a3c84d3adfb76902200f62", kill_on_drop: false }` [INFO] running `Command { std: "docker" "rm" "-f" "bda2f3d42daf17c444b0b88461d295ec835528a508a3c84d3adfb76902200f62", kill_on_drop: false }` [INFO] [stdout] bda2f3d42daf17c444b0b88461d295ec835528a508a3c84d3adfb76902200f62