[{"ts": "2026-06-21T22:25:28.888414+00:00", "task": "exp-tdd-intervals", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3470142, "tokens_total": 658985, "input_tokens": 653162, "output_tokens": 5823, "num_turns": 26, "duration_ms": 105616, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 9, "total": 9, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:25:57.475813+00:00", "task": "exp-tdd-intervals", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.0841731, "tokens_total": 133703, "input_tokens": 132712, "output_tokens": 991, "num_turns": 8, "duration_ms": 19617, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 8, "total": 40, "score": 0.2, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}, {"ts": "2026-06-21T22:26:30.054296+00:00", "task": "exp-tdd-intervals", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.10852049999999999, "tokens_total": 177928, "input_tokens": 176286, "output_tokens": 1642, "num_turns": 8, "duration_ms": 29654, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 6, "total": 6, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:27:04.228978+00:00", "task": "exp-tdd-intervals", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.0884617, "tokens_total": 135027, "input_tokens": 133948, "output_tokens": 1079, "num_turns": 8, "duration_ms": 23745, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 6, "total": 40, "score": 0.15, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}, {"ts": "2026-06-21T22:28:42.938411+00:00", "task": "exp-tdd-intervals", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.31439549999999994, "tokens_total": 434846, "input_tokens": 429029, "output_tokens": 5817, "num_turns": 16, "duration_ms": 95429, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 6, "total": 6, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:30:25.513198+00:00", "task": "exp-tdd-intervals", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3793993, "tokens_total": 582804, "input_tokens": 577432, "output_tokens": 5372, "num_turns": 22, "duration_ms": 92707, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 6, "total": 40, "score": 0.15, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}, {"ts": "2026-06-21T22:31:02.416828+00:00", "task": "exp-tdd-stats", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.10726770000000001, "tokens_total": 177900, "input_tokens": 176371, "output_tokens": 1529, "num_turns": 8, "duration_ms": 32208, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 15, "total": 15, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:31:52.488619+00:00", "task": "exp-tdd-stats", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1285854, "tokens_total": 187821, "input_tokens": 185641, "output_tokens": 2180, "num_turns": 10, "duration_ms": 39911, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 14, "total": 40, "score": 0.35, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}]}, "contamination": []}, {"ts": "2026-06-21T22:32:29.430068+00:00", "task": "exp-tdd-stats", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.11064249999999999, "tokens_total": 177689, "input_tokens": 175938, "output_tokens": 1751, "num_turns": 8, "duration_ms": 32024, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 16, "total": 16, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:33:08.404177+00:00", "task": "exp-tdd-stats", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.10746119999999998, "tokens_total": 144486, "input_tokens": 143101, "output_tokens": 1385, "num_turns": 8, "duration_ms": 28611, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 13, "total": 40, "score": 0.325, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}]}, "contamination": []}, {"ts": "2026-06-21T22:34:32.757078+00:00", "task": "exp-tdd-stats", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2263985, "tokens_total": 315223, "input_tokens": 311524, "output_tokens": 3699, "num_turns": 12, "duration_ms": 79086, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 16, "total": 16, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:35:17.808453+00:00", "task": "exp-tdd-stats", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1426439, "tokens_total": 207530, "input_tokens": 205764, "output_tokens": 1766, "num_turns": 10, "duration_ms": 34696, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 16, "total": 40, "score": 0.4, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}]}, "contamination": []}, {"ts": "2026-06-21T22:24:13.883587+00:00", "task": "exp-tdd-money", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1055879, "tokens_total": 177534, "input_tokens": 176066, "output_tokens": 1468, "num_turns": 8, "duration_ms": 31754, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 4, "total": 4, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:24:45.990825+00:00", "task": "exp-tdd-money", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.10098929999999999, "tokens_total": 159120, "input_tokens": 157825, "output_tokens": 1295, "num_turns": 9, "duration_ms": 26764, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 8, "total": 20, "score": 0.4, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}, {"ts": "2026-06-21T22:25:27.348493+00:00", "task": "exp-tdd-money", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.10343440000000001, "tokens_total": 176536, "input_tokens": 175132, "output_tokens": 1404, "num_turns": 8, "duration_ms": 38504, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 6, "total": 6, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:25:54.696155+00:00", "task": "exp-tdd-money", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1008841, "tokens_total": 142595, "input_tokens": 141414, "output_tokens": 1181, "num_turns": 8, "duration_ms": 21569, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 10, "total": 22, "score": 0.455, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}, {"ts": "2026-06-21T22:27:57.721401+00:00", "task": "exp-tdd-money", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.4199019499999999, "tokens_total": 465119, "input_tokens": 459002, "output_tokens": 6117, "num_turns": 19, "duration_ms": 118809, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 7, "total": 7, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:31:11.784808+00:00", "task": "exp-tdd-money", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.5755518500000001, "tokens_total": 752987, "input_tokens": 743535, "output_tokens": 9452, "num_turns": 27, "duration_ms": 186639, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 13, "total": 25, "score": 0.52, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}, {"ts": "2026-06-21T22:33:03.083122+00:00", "task": "exp-tdd-timeparse", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.36242460000000004, "tokens_total": 732003, "input_tokens": 726907, "output_tokens": 5096, "num_turns": 30, "duration_ms": 105625, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 97.6, "tests_passed": true}, "mutation": {"killed": 15, "total": 15, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:34:06.528829+00:00", "task": "exp-tdd-timeparse", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1243175, "tokens_total": 187568, "input_tokens": 185669, "output_tokens": 1899, "num_turns": 10, "duration_ms": 54231, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 49.0, "tests_passed": true}, "mutation": {"killed": 20, "total": 34, "score": 0.588, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}, {"ts": "2026-06-21T22:34:54.619531+00:00", "task": "exp-tdd-timeparse", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.12158440000000001, "tokens_total": 181031, "input_tokens": 178802, "output_tokens": 2229, "num_turns": 8, "duration_ms": 41698, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 19, "total": 19, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:35:39.706599+00:00", "task": "exp-tdd-timeparse", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.112027, "tokens_total": 140860, "input_tokens": 138853, "output_tokens": 2007, "num_turns": 9, "duration_ms": 34425, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 26, "total": 40, "score": 0.65, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}, {"ts": "2026-06-21T22:36:55.692133+00:00", "task": "exp-tdd-timeparse", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2412175, "tokens_total": 344816, "input_tokens": 340861, "output_tokens": 3955, "num_turns": 13, "duration_ms": 70324, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 18, "total": 18, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:37:49.796719+00:00", "task": "exp-tdd-timeparse", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.16805779999999998, "tokens_total": 237379, "input_tokens": 234858, "output_tokens": 2521, "num_turns": 12, "duration_ms": 43815, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 25, "total": 39, "score": 0.641, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}, {"ts": "2026-06-21T22:55:45.110523+00:00", "task": "exp-tdd-matrix", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1074911, "tokens_total": 178399, "input_tokens": 176882, "output_tokens": 1517, "num_turns": 8, "duration_ms": 34488, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 4, "total": 4, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:56:23.144353+00:00", "task": "exp-tdd-matrix", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.11385289999999999, "tokens_total": 168061, "input_tokens": 166659, "output_tokens": 1402, "num_turns": 9, "duration_ms": 28597, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 5, "total": 40, "score": 0.125, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}]}, "contamination": []}, {"ts": "2026-06-21T22:56:55.106282+00:00", "task": "exp-tdd-matrix", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1061336, "tokens_total": 176992, "input_tokens": 175456, "output_tokens": 1536, "num_turns": 8, "duration_ms": 29427, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 4, "total": 4, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:57:33.610968+00:00", "task": "exp-tdd-matrix", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1051588, "tokens_total": 143402, "input_tokens": 142044, "output_tokens": 1358, "num_turns": 8, "duration_ms": 29485, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 5, "total": 40, "score": 0.125, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}]}, "contamination": []}, {"ts": "2026-06-21T22:55:46.408594+00:00", "task": "exp-tdd-matrix", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1260395, "tokens_total": 176767, "input_tokens": 175032, "output_tokens": 1735, "num_turns": 7, "duration_ms": 35468, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 4, "total": 4, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T22:56:27.377784+00:00", "task": "exp-tdd-matrix", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1382533, "tokens_total": 182072, "input_tokens": 180107, "output_tokens": 1965, "num_turns": 10, "duration_ms": 31235, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 5, "total": 40, "score": 0.125, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}]}, "contamination": []}, {"ts": "2026-06-21T22:56:43.940318+00:00", "task": "exp-tdd-csvlite", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3253345, "tokens_total": 647582, "input_tokens": 642925, "output_tokens": 4657, "num_turns": 27, "duration_ms": 89813, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 16, "total": 20, "score": 0.8, "baseline_green": true, "survivors": [{"file": "csvlite.py", "site": 16}, {"file": "csvlite.py", "site": 17}, {"file": "csvlite.py", "site": 18}, {"file": "csvlite.py", "site": 19}]}, "contamination": []}, {"ts": "2026-06-21T23:06:06.252134+00:00", "task": "exp-tdd-csvlite", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.11786310000000001, "tokens_total": 169681, "input_tokens": 168152, "output_tokens": 1529, "num_turns": 9, "duration_ms": 27971, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 23, "total": 28, "score": 0.821, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}]}, "contamination": []}, {"ts": "2026-06-21T23:17:12.066851+00:00", "task": "exp-tdd-csvlite", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.13631749999999998, "tokens_total": 205481, "input_tokens": 202906, "output_tokens": 2575, "num_turns": 9, "duration_ms": 46696, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 91.3, "tests_passed": true}, "mutation": {"killed": 32, "total": 32, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}, {"ts": "2026-06-21T23:32:58.135013+00:00", "task": "exp-tdd-csvlite", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.10554749999999999, "tokens_total": 139342, "input_tokens": 137615, "output_tokens": 1727, "num_turns": 8, "duration_ms": 31340, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 46.1, "tests_passed": true}, "mutation": {"killed": 35, "total": 40, "score": 0.875, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}]}, "contamination": []}, {"ts": "2026-06-21T23:07:33.741161+00:00", "task": "exp-tdd-csvlite", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.68712625, "tokens_total": 1039921, "input_tokens": 1031454, "output_tokens": 8467, "num_turns": 33, "duration_ms": 267311, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 17, "total": 20, "score": 0.85, "baseline_green": true, "survivors": [{"file": "csvlite.py", "site": 17}, {"file": "csvlite.py", "site": 18}, {"file": "csvlite.py", "site": 19}]}, "contamination": []}, {"ts": "2026-06-21T23:16:32.521067+00:00", "task": "exp-tdd-csvlite", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.42114899999999994, "tokens_total": 682141, "input_tokens": 676035, "output_tokens": 6106, "num_turns": 26, "duration_ms": 118437, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 50.0, "tests_passed": true}, "mutation": {"killed": 23, "total": 28, "score": 0.821, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}]}, "contamination": []}]