{"ts": "2026-06-22T13:00:09.371079+00:00", "task": "exp-tdd-jsonptr", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.4074863, "tokens_total": 466581, "input_tokens": 456917, "output_tokens": 9664, "num_turns": 17, "duration_ms": 142579, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 91.7, "tests_passed": true}, "mutation": {"killed": 15, "total": 15, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}
{"ts": "2026-06-22T13:05:09.965419+00:00", "task": "exp-tdd-jsonptr", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.5085149, "tokens_total": 489032, "input_tokens": 474737, "output_tokens": 14295, "num_turns": 18, "duration_ms": 236956, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 68.7, "tests_passed": true}, "mutation": {"killed": 9, "total": 40, "score": 0.225, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 14}, {"file": "acc.py", "site": 16}]}, "contamination": []}
{"ts": "2026-06-22T13:07:26.694720+00:00", "task": "exp-tdd-jsonptr", "arm": "build-pipeline", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2996134999999999, "tokens_total": 346740, "input_tokens": 340638, "output_tokens": 6102, "num_turns": 14, "duration_ms": 94596, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 91.2, "tests_passed": true}, "mutation": {"killed": 23, "total": 32, "score": 0.719, "baseline_green": true, "survivors": [{"file": "patch.py", "site": 9}, {"file": "patch.py", "site": 11}, {"file": "patch.py", "site": 12}, {"file": "patch.py", "site": 17}, {"file": "patch.py", "site": 18}, {"file": "patch.py", "site": 21}, {"file": "patch.py", "site": 22}, {"file": "patch.py", "site": 23}]}, "contamination": []}
{"ts": "2026-06-22T13:09:43.760248+00:00", "task": "exp-tdd-jsonptr", "arm": "build-pipeline", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3848033, "tokens_total": 512317, "input_tokens": 505030, "output_tokens": 7287, "num_turns": 20, "duration_ms": 121697, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 69.5, "tests_passed": true}, "mutation": {"killed": 8, "total": 40, "score": 0.2, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 13}, {"file": "acc.py", "site": 16}, {"file": "acc.py", "site": 18}]}, "contamination": []}
{"ts": "2026-06-22T13:15:36.934832+00:00", "task": "exp-tdd-jsonptr", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3331198, "tokens_total": 368166, "input_tokens": 359073, "output_tokens": 9093, "num_turns": 15, "duration_ms": 132734, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 93.8, "tests_passed": true}, "mutation": {"killed": 17, "total": 21, "score": 0.81, "baseline_green": true, "survivors": [{"file": "patch.py", "site": 0}, {"file": "patch.py", "site": 14}, {"file": "patch.py", "site": 15}, {"file": "patch.py", "site": 16}]}, "contamination": []}
{"ts": "2026-06-22T13:17:40.218191+00:00", "task": "exp-tdd-jsonptr", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.29683049999999994, "tokens_total": 376254, "input_tokens": 369808, "output_tokens": 6446, "num_turns": 16, "duration_ms": 102930, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 71.3, "tests_passed": true}, "mutation": {"killed": 7, "total": 40, "score": 0.175, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 14}, {"file": "acc.py", "site": 16}]}, "contamination": []}
{"ts": "2026-06-22T13:20:15.436895+00:00", "task": "exp-tdd-jsonptr", "arm": "test-after", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.31806969999999996, "tokens_total": 278091, "input_tokens": 268145, "output_tokens": 9946, "num_turns": 12, "duration_ms": 146210, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 93.6, "tests_passed": true}, "mutation": {"killed": 20, "total": 22, "score": 0.909, "baseline_green": true, "survivors": [{"file": "patch.py", "site": 0}, {"file": "patch.py", "site": 10}]}, "contamination": []}
{"ts": "2026-06-22T13:22:16.707717+00:00", "task": "exp-tdd-jsonptr", "arm": "test-after", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.30482639999999994, "tokens_total": 405331, "input_tokens": 398911, "output_tokens": 6420, "num_turns": 18, "duration_ms": 99728, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 71.0, "tests_passed": true}, "mutation": {"killed": 9, "total": 40, "score": 0.225, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 14}, {"file": "acc.py", "site": 16}]}, "contamination": []}
{"ts": "2026-06-22T13:25:35.480015+00:00", "task": "exp-tdd-jsonptr", "arm": "test-after", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.42960699999999996, "tokens_total": 356149, "input_tokens": 341907, "output_tokens": 14242, "num_turns": 13, "duration_ms": 190488, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 91.2, "tests_passed": true}, "mutation": {"killed": 18, "total": 19, "score": 0.947, "baseline_green": true, "survivors": [{"file": "patch.py", "site": 12}]}, "contamination": []}
{"ts": "2026-06-22T13:28:02.478684+00:00", "task": "exp-tdd-jsonptr", "arm": "test-after", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3812068, "tokens_total": 520959, "input_tokens": 512717, "output_tokens": 8242, "num_turns": 21, "duration_ms": 132140, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 69.5, "tests_passed": true}, "mutation": {"killed": 8, "total": 40, "score": 0.2, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 14}, {"file": "acc.py", "site": 16}]}, "contamination": []}
{"ts": "2026-06-22T12:59:27.316809+00:00", "task": "exp-tdd-jsonptr", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2903574, "tokens_total": 347951, "input_tokens": 340981, "output_tokens": 6970, "num_turns": 14, "duration_ms": 108125, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 91.7, "tests_passed": true}, "mutation": {"killed": 16, "total": 17, "score": 0.941, "baseline_green": true, "survivors": [{"file": "patch.py", "site": 11}]}, "contamination": []}
{"ts": "2026-06-22T13:02:36.370911+00:00", "task": "exp-tdd-jsonptr", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.33715970000000006, "tokens_total": 420042, "input_tokens": 412875, "output_tokens": 7167, "num_turns": 16, "duration_ms": 161266, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 69.6, "tests_passed": true}, "mutation": {"killed": 8, "total": 40, "score": 0.2, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 13}, {"file": "acc.py", "site": 16}]}, "contamination": []}
{"ts": "2026-06-22T13:05:34.110304+00:00", "task": "exp-tdd-jsonptr", "arm": "test-first", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.35927339999999997, "tokens_total": 419790, "input_tokens": 410881, "output_tokens": 8909, "num_turns": 16, "duration_ms": 162112, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 94.2, "tests_passed": true}, "mutation": {"killed": 21, "total": 35, "score": 0.6, "baseline_green": true, "survivors": [{"file": "patch.py", "site": 0}, {"file": "patch.py", "site": 1}, {"file": "patch.py", "site": 3}, {"file": "patch.py", "site": 4}, {"file": "patch.py", "site": 8}, {"file": "patch.py", "site": 9}, {"file": "patch.py", "site": 10}, {"file": "patch.py", "site": 17}]}, "contamination": []}
{"ts": "2026-06-22T13:08:55.875143+00:00", "task": "exp-tdd-jsonptr", "arm": "test-first", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.40809059999999997, "tokens_total": 441091, "input_tokens": 430112, "output_tokens": 10979, "num_turns": 19, "duration_ms": 186430, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 70.9, "tests_passed": true}, "mutation": {"killed": 10, "total": 40, "score": 0.25, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 13}, {"file": "acc.py", "site": 16}, {"file": "acc.py", "site": 19}]}, "contamination": []}
{"ts": "2026-06-22T13:10:30.547677+00:00", "task": "exp-tdd-jsonptr", "arm": "test-first", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2256531, "tokens_total": 287099, "input_tokens": 282085, "output_tokens": 5014, "num_turns": 13, "duration_ms": 85171, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 90.9, "tests_passed": true}, "mutation": {"killed": 17, "total": 18, "score": 0.944, "baseline_green": true, "survivors": [{"file": "patch.py", "site": 11}]}, "contamination": []}
{"ts": "2026-06-22T13:13:11.481520+00:00", "task": "exp-tdd-jsonptr", "arm": "test-first", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.35158319999999993, "tokens_total": 456418, "input_tokens": 448406, "output_tokens": 8012, "num_turns": 19, "duration_ms": 142158, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 69.4, "tests_passed": true}, "mutation": {"killed": 8, "total": 40, "score": 0.2, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 13}, {"file": "acc.py", "site": 16}]}, "contamination": []}
{"ts": "2026-06-22T13:00:00.179055+00:00", "task": "exp-tdd-ledger", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.40106840000000005, "tokens_total": 452333, "input_tokens": 443164, "output_tokens": 9169, "num_turns": 16, "duration_ms": 133429, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 12, "total": 14, "score": 0.857, "baseline_green": true, "survivors": [{"file": "report.py", "site": 5}, {"file": "report.py", "site": 6}]}, "contamination": []}
{"ts": "2026-06-22T13:03:54.237537+00:00", "task": "exp-tdd-ledger", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.44244250000000007, "tokens_total": 468553, "input_tokens": 457756, "output_tokens": 10797, "num_turns": 17, "duration_ms": 205628, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 75.0, "tests_passed": true}, "mutation": {"killed": 12, "total": 40, "score": 0.3, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}]}, "contamination": []}
{"ts": "2026-06-22T13:05:29.931059+00:00", "task": "exp-tdd-ledger", "arm": "build-pipeline", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.21932449999999998, "tokens_total": 280548, "input_tokens": 276448, "output_tokens": 4100, "num_turns": 12, "duration_ms": 88246, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 98.1, "tests_passed": true}, "mutation": {"killed": 12, "total": 14, "score": 0.857, "baseline_green": true, "survivors": [{"file": "report.py", "site": 5}, {"file": "report.py", "site": 6}]}, "contamination": []}
{"ts": "2026-06-22T13:08:14.614820+00:00", "task": "exp-tdd-ledger", "arm": "build-pipeline", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.34799900000000006, "tokens_total": 390277, "input_tokens": 382329, "output_tokens": 7948, "num_turns": 16, "duration_ms": 141339, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 73.8, "tests_passed": true}, "mutation": {"killed": 13, "total": 40, "score": 0.325, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}]}, "contamination": []}
{"ts": "2026-06-22T13:10:51.038723+00:00", "task": "exp-tdd-ledger", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.1846176, "tokens_total": 223637, "input_tokens": 219539, "output_tokens": 4098, "num_turns": 11, "duration_ms": 64957, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 14, "total": 14, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}
{"ts": "2026-06-22T13:13:33.675894+00:00", "task": "exp-tdd-ledger", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.37041799999999997, "tokens_total": 343689, "input_tokens": 332889, "output_tokens": 10800, "num_turns": 14, "duration_ms": 147140, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 75.0, "tests_passed": true}, "mutation": {"killed": 11, "total": 40, "score": 0.275, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 9}]}, "contamination": []}
{"ts": "2026-06-22T13:14:47.009165+00:00", "task": "exp-tdd-ledger", "arm": "test-after", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2062823, "tokens_total": 251612, "input_tokens": 246876, "output_tokens": 4736, "num_turns": 12, "duration_ms": 66175, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 14, "total": 14, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}
{"ts": "2026-06-22T13:17:03.137853+00:00", "task": "exp-tdd-ledger", "arm": "test-after", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2971728, "tokens_total": 279787, "input_tokens": 272389, "output_tokens": 7398, "num_turns": 13, "duration_ms": 122122, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 75.0, "tests_passed": true}, "mutation": {"killed": 11, "total": 40, "score": 0.275, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 9}]}, "contamination": []}
{"ts": "2026-06-22T13:18:07.614787+00:00", "task": "exp-tdd-ledger", "arm": "test-after", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.17529909999999999, "tokens_total": 221907, "input_tokens": 218243, "output_tokens": 3664, "num_turns": 11, "duration_ms": 56863, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 13, "total": 14, "score": 0.929, "baseline_green": true, "survivors": [{"file": "report.py", "site": 6}]}, "contamination": []}
{"ts": "2026-06-22T13:20:06.842608+00:00", "task": "exp-tdd-ledger", "arm": "test-after", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2773008, "tokens_total": 297636, "input_tokens": 291026, "output_tokens": 6610, "num_turns": 13, "duration_ms": 105871, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 75.0, "tests_passed": true}, "mutation": {"killed": 13, "total": 40, "score": 0.325, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 9}]}, "contamination": []}
{"ts": "2026-06-22T12:58:56.646122+00:00", "task": "exp-tdd-ledger", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2114076, "tokens_total": 258850, "input_tokens": 254095, "output_tokens": 4755, "num_turns": 12, "duration_ms": 76631, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 12, "total": 14, "score": 0.857, "baseline_green": true, "survivors": [{"file": "report.py", "site": 5}, {"file": "report.py", "site": 6}]}, "contamination": []}
{"ts": "2026-06-22T13:02:03.630725+00:00", "task": "exp-tdd-ledger", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3095409, "tokens_total": 405472, "input_tokens": 399449, "output_tokens": 6023, "num_turns": 17, "duration_ms": 149192, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 75.0, "tests_passed": true}, "mutation": {"killed": 12, "total": 40, "score": 0.3, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}]}, "contamination": []}
{"ts": "2026-06-22T13:03:40.814581+00:00", "task": "exp-tdd-ledger", "arm": "test-first", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.20340929999999996, "tokens_total": 256023, "input_tokens": 251514, "output_tokens": 4509, "num_turns": 10, "duration_ms": 84973, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 100.0, "tests_passed": true}, "mutation": {"killed": 12, "total": 14, "score": 0.857, "baseline_green": true, "survivors": [{"file": "report.py", "site": 5}, {"file": "report.py", "site": 6}]}, "contamination": []}
{"ts": "2026-06-22T13:05:52.549484+00:00", "task": "exp-tdd-ledger", "arm": "test-first", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.31766159999999993, "tokens_total": 257489, "input_tokens": 248323, "output_tokens": 9166, "num_turns": 11, "duration_ms": 112963, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 75.0, "tests_passed": true}, "mutation": {"killed": 11, "total": 40, "score": 0.275, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}]}, "contamination": []}
{"ts": "2026-06-22T13:07:34.784058+00:00", "task": "exp-tdd-ledger", "arm": "test-first", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2085, "tokens_total": 258919, "input_tokens": 254357, "output_tokens": 4562, "num_turns": 12, "duration_ms": 94505, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.0, "tests_passed": true}, "mutation": {"killed": 13, "total": 16, "score": 0.812, "baseline_green": true, "survivors": [{"file": "report.py", "site": 5}, {"file": "report.py", "site": 6}, {"file": "report.py", "site": 7}]}, "contamination": []}
{"ts": "2026-06-22T13:09:39.671771+00:00", "task": "exp-tdd-ledger", "arm": "test-first", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3052098, "tokens_total": 314871, "input_tokens": 307473, "output_tokens": 7398, "num_turns": 13, "duration_ms": 109955, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 75.0, "tests_passed": true}, "mutation": {"killed": 11, "total": 40, "score": 0.275, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}]}, "contamination": []}
{"ts": "2026-06-22T12:59:22.555045+00:00", "task": "exp-tdd-router", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3201299, "tokens_total": 447701, "input_tokens": 442160, "output_tokens": 5541, "num_turns": 17, "duration_ms": 94467, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 90.5, "tests_passed": true}, "mutation": {"killed": 38, "total": 40, "score": 0.95, "baseline_green": true, "survivors": [{"file": "pattern.py", "site": 26}, {"file": "pattern.py", "site": 31}]}, "contamination": []}
{"ts": "2026-06-22T13:03:06.370256+00:00", "task": "exp-tdd-router", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.4710989, "tokens_total": 437983, "input_tokens": 424764, "output_tokens": 13219, "num_turns": 15, "duration_ms": 205882, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 68.3, "tests_passed": true}, "mutation": {"killed": 22, "total": 40, "score": 0.55, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 13}, {"file": "acc.py", "site": 15}, {"file": "acc.py", "site": 18}]}, "contamination": []}
{"ts": "2026-06-22T13:06:11.163925+00:00", "task": "exp-tdd-router", "arm": "build-pipeline", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3674434, "tokens_total": 464046, "input_tokens": 456447, "output_tokens": 7599, "num_turns": 17, "duration_ms": 167942, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 96.3, "tests_passed": true}, "mutation": {"killed": 36, "total": 37, "score": 0.973, "baseline_green": true, "survivors": [{"file": "pattern.py", "site": 0}]}, "contamination": []}
{"ts": "2026-06-22T13:12:03.553769+00:00", "task": "exp-tdd-router", "arm": "build-pipeline", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.9218739, "tokens_total": 1187692, "input_tokens": 1173189, "output_tokens": 14503, "num_turns": 33, "duration_ms": 337675, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 73.3, "tests_passed": true}, "mutation": {"killed": 23, "total": 40, "score": 0.575, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 13}]}, "contamination": []}
{"ts": "2026-06-22T13:17:56.764250+00:00", "task": "exp-tdd-router", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.4011623000000001, "tokens_total": 323315, "input_tokens": 310078, "output_tokens": 13237, "num_turns": 12, "duration_ms": 177479, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 96.3, "tests_passed": true}, "mutation": {"killed": 31, "total": 38, "score": 0.816, "baseline_green": true, "survivors": [{"file": "pattern.py", "site": 11}, {"file": "pattern.py", "site": 12}, {"file": "pattern.py", "site": 13}, {"file": "pattern.py", "site": 14}, {"file": "pattern.py", "site": 20}, {"file": "pattern.py", "site": 21}, {"file": "pattern.py", "site": 22}]}, "contamination": []}
{"ts": "2026-06-22T13:21:07.170173+00:00", "task": "exp-tdd-router", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.45681099999999997, "tokens_total": 660321, "input_tokens": 651414, "output_tokens": 8907, "num_turns": 22, "duration_ms": 174511, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 73.3, "tests_passed": true}, "mutation": {"killed": 17, "total": 40, "score": 0.425, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 14}]}, "contamination": []}
{"ts": "2026-06-22T13:24:05.820004+00:00", "task": "exp-tdd-router", "arm": "test-after", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.38403740000000003, "tokens_total": 375333, "input_tokens": 363790, "output_tokens": 11543, "num_turns": 14, "duration_ms": 163343, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 96.7, "tests_passed": true}, "mutation": {"killed": 38, "total": 40, "score": 0.95, "baseline_green": true, "survivors": [{"file": "pattern.py", "site": 9}, {"file": "pattern.py", "site": 11}]}, "contamination": []}
{"ts": "2026-06-22T13:25:56.432343+00:00", "task": "exp-tdd-router", "arm": "test-after", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.31302420000000003, "tokens_total": 457133, "input_tokens": 451716, "output_tokens": 5417, "num_turns": 18, "duration_ms": 95545, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 73.0, "tests_passed": true}, "mutation": {"killed": 23, "total": 40, "score": 0.575, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 14}, {"file": "acc.py", "site": 16}]}, "contamination": []}
{"ts": "2026-06-22T13:30:26.007299+00:00", "task": "exp-tdd-router", "arm": "test-after", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.5524759, "tokens_total": 453999, "input_tokens": 435324, "output_tokens": 18675, "num_turns": 14, "duration_ms": 255147, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 91.1, "tests_passed": true}, "mutation": {"killed": 36, "total": 40, "score": 0.9, "baseline_green": true, "survivors": [{"file": "core.py", "site": 23}, {"file": "pattern.py", "site": 34}, {"file": "pattern.py", "site": 35}, {"file": "pattern.py", "site": 37}]}, "contamination": []}
{"ts": "2026-06-22T13:33:00.121499+00:00", "task": "exp-tdd-router", "arm": "test-after", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3890823, "tokens_total": 509764, "input_tokens": 501605, "output_tokens": 8159, "num_turns": 18, "duration_ms": 137758, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 68.4, "tests_passed": true}, "mutation": {"killed": 24, "total": 40, "score": 0.6, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 13}, {"file": "acc.py", "site": 15}, {"file": "acc.py", "site": 18}]}, "contamination": []}
{"ts": "2026-06-22T13:00:07.752633+00:00", "task": "exp-tdd-router", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3402988, "tokens_total": 457456, "input_tokens": 449934, "output_tokens": 7522, "num_turns": 19, "duration_ms": 128218, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 97.3, "tests_passed": true}, "mutation": {"killed": 38, "total": 40, "score": 0.95, "baseline_green": true, "survivors": [{"file": "pattern.py", "site": 3}, {"file": "pattern.py", "site": 12}]}, "contamination": []}
{"ts": "2026-06-22T13:03:48.456798+00:00", "task": "exp-tdd-router", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.4081701, "tokens_total": 523243, "input_tokens": 514795, "output_tokens": 8448, "num_turns": 18, "duration_ms": 191534, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 73.8, "tests_passed": true}, "mutation": {"killed": 26, "total": 40, "score": 0.65, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 13}, {"file": "acc.py", "site": 16}]}, "contamination": []}
{"ts": "2026-06-22T13:06:03.526006+00:00", "task": "exp-tdd-router", "arm": "test-first", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.28362299999999996, "tokens_total": 333891, "input_tokens": 326908, "output_tokens": 6983, "num_turns": 14, "duration_ms": 115673, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 95.4, "tests_passed": true}, "mutation": {"killed": 33, "total": 40, "score": 0.825, "baseline_green": true, "survivors": [{"file": "pattern.py", "site": 4}, {"file": "pattern.py", "site": 5}, {"file": "pattern.py", "site": 13}, {"file": "pattern.py", "site": 14}, {"file": "pattern.py", "site": 15}, {"file": "pattern.py", "site": 16}, {"file": "pattern.py", "site": 19}]}, "contamination": []}
{"ts": "2026-06-22T13:08:09.681213+00:00", "task": "exp-tdd-router", "arm": "test-first", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.27844019999999997, "tokens_total": 397164, "input_tokens": 392395, "output_tokens": 4769, "num_turns": 16, "duration_ms": 106729, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 73.4, "tests_passed": true}, "mutation": {"killed": 20, "total": 40, "score": 0.5, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 13}]}, "contamination": []}
{"ts": "2026-06-22T13:12:28.323242+00:00", "task": "exp-tdd-router", "arm": "test-first", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.47560889999999995, "tokens_total": 449303, "input_tokens": 434524, "output_tokens": 14779, "num_turns": 14, "duration_ms": 245022, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 98.5, "tests_passed": true}, "mutation": {"killed": 32, "total": 32, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}
{"ts": "2026-06-22T13:14:40.817909+00:00", "task": "exp-tdd-router", "arm": "test-first", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.36574590000000007, "tokens_total": 492866, "input_tokens": 485755, "output_tokens": 7111, "num_turns": 18, "duration_ms": 115832, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.5, "tests_passed": true}, "mutation": {"killed": 22, "total": 40, "score": 0.55, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 12}]}, "contamination": []}
{"ts": "2026-06-22T12:58:58.933593+00:00", "task": "exp-tdd-scheduler", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.22181049999999997, "tokens_total": 281386, "input_tokens": 277151, "output_tokens": 4235, "num_turns": 12, "duration_ms": 74704, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.2, "tests_passed": true}, "mutation": {"killed": 22, "total": 24, "score": 0.917, "baseline_green": true, "survivors": [{"file": "order.py", "site": 14}, {"file": "order.py", "site": 15}]}, "contamination": []}
{"ts": "2026-06-22T13:02:09.649386+00:00", "task": "exp-tdd-scheduler", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": false, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 1, "stderr_first_line": "Traceback (most recent call last):"}], "cost": {"cost_usd": 0.2943608, "tokens_total": 351589, "input_tokens": 345866, "output_tokens": 5723, "num_turns": 13, "duration_ms": 148615, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.4, "tests_passed": true}, "mutation": {"killed": 21, "total": 40, "score": 0.525, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}]}, "contamination": []}
{"ts": "2026-06-22T13:05:07.350470+00:00", "task": "exp-tdd-scheduler", "arm": "build-pipeline", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3769692999999999, "tokens_total": 441824, "input_tokens": 433696, "output_tokens": 8128, "num_turns": 17, "duration_ms": 122302, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.3, "tests_passed": true}, "mutation": {"killed": 22, "total": 25, "score": 0.88, "baseline_green": true, "survivors": [{"file": "order.py", "site": 6}, {"file": "order.py", "site": 14}, {"file": "order.py", "site": 15}]}, "contamination": []}
{"ts": "2026-06-22T13:07:30.158375+00:00", "task": "exp-tdd-scheduler", "arm": "build-pipeline", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3027557, "tokens_total": 371278, "input_tokens": 365065, "output_tokens": 6213, "num_turns": 16, "duration_ms": 123427, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.5, "tests_passed": true}, "mutation": {"killed": 24, "total": 40, "score": 0.6, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}]}, "contamination": []}
{"ts": "2026-06-22T13:15:14.864317+00:00", "task": "exp-tdd-scheduler", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.27303669999999997, "tokens_total": 272363, "input_tokens": 264647, "output_tokens": 7716, "num_turns": 12, "duration_ms": 99226, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.3, "tests_passed": true}, "mutation": {"killed": 17, "total": 20, "score": 0.85, "baseline_green": true, "survivors": [{"file": "order.py", "site": 4}, {"file": "order.py", "site": 5}, {"file": "order.py", "site": 11}]}, "contamination": []}
{"ts": "2026-06-22T13:16:49.775908+00:00", "task": "exp-tdd-scheduler", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2440193, "tokens_total": 280306, "input_tokens": 274912, "output_tokens": 5394, "num_turns": 13, "duration_ms": 79613, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.4, "tests_passed": true}, "mutation": {"killed": 21, "total": 40, "score": 0.525, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}
{"ts": "2026-06-22T13:19:29.051521+00:00", "task": "exp-tdd-scheduler", "arm": "test-after", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3650425, "tokens_total": 367193, "input_tokens": 356508, "output_tokens": 10685, "num_turns": 15, "duration_ms": 149492, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.3, "tests_passed": true}, "mutation": {"killed": 21, "total": 23, "score": 0.913, "baseline_green": true, "survivors": [{"file": "order.py", "site": 6}, {"file": "order.py", "site": 7}]}, "contamination": []}
{"ts": "2026-06-22T13:21:19.528993+00:00", "task": "exp-tdd-scheduler", "arm": "test-after", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2943761, "tokens_total": 311804, "input_tokens": 305020, "output_tokens": 6784, "num_turns": 13, "duration_ms": 94731, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.5, "tests_passed": true}, "mutation": {"killed": 23, "total": 40, "score": 0.575, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 8}]}, "contamination": []}
{"ts": "2026-06-22T13:23:34.172348+00:00", "task": "exp-tdd-scheduler", "arm": "test-after", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.27341839999999995, "tokens_total": 275683, "input_tokens": 267972, "output_tokens": 7711, "num_turns": 12, "duration_ms": 116818, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.2, "tests_passed": true}, "mutation": {"killed": 22, "total": 23, "score": 0.957, "baseline_green": true, "survivors": [{"file": "order.py", "site": 14}]}, "contamination": []}
{"ts": "2026-06-22T13:24:57.838707+00:00", "task": "exp-tdd-scheduler", "arm": "test-after", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": false, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 1, "stderr_first_line": "Traceback (most recent call last):"}], "cost": {"cost_usd": 0.2229888, "tokens_total": 272278, "input_tokens": 267654, "output_tokens": 4624, "num_turns": 13, "duration_ms": 69707, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.4, "tests_passed": true}, "mutation": {"killed": 23, "total": 40, "score": 0.575, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}
{"ts": "2026-06-22T13:02:10.617624+00:00", "task": "exp-tdd-scheduler", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.5054966, "tokens_total": 610192, "input_tokens": 596776, "output_tokens": 13416, "num_turns": 23, "duration_ms": 255753, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.2, "tests_passed": true}, "mutation": {"killed": 19, "total": 21, "score": 0.905, "baseline_green": true, "survivors": [{"file": "order.py", "site": 6}, {"file": "order.py", "site": 14}]}, "contamination": []}
{"ts": "2026-06-22T13:04:08.189418+00:00", "task": "exp-tdd-scheduler", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": false, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 1, "stderr_first_line": "Traceback (most recent call last):"}], "cost": {"cost_usd": 0.2604731, "tokens_total": 287254, "input_tokens": 281221, "output_tokens": 6033, "num_turns": 13, "duration_ms": 92384, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.4, "tests_passed": true}, "mutation": {"killed": 20, "total": 40, "score": 0.5, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}
{"ts": "2026-06-22T13:08:23.645874+00:00", "task": "exp-tdd-scheduler", "arm": "test-first", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.50247, "tokens_total": 534122, "input_tokens": 519566, "output_tokens": 14556, "num_turns": 19, "duration_ms": 241751, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.3, "tests_passed": true}, "mutation": {"killed": 19, "total": 22, "score": 0.864, "baseline_green": true, "survivors": [{"file": "order.py", "site": 6}, {"file": "order.py", "site": 7}, {"file": "order.py", "site": 14}]}, "contamination": []}
{"ts": "2026-06-22T13:10:04.570953+00:00", "task": "exp-tdd-scheduler", "arm": "test-first", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": false, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 1, "stderr_first_line": "Traceback (most recent call last):"}], "cost": {"cost_usd": 0.24824610000000003, "tokens_total": 271679, "input_tokens": 266490, "output_tokens": 5189, "num_turns": 12, "duration_ms": 86041, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.5, "tests_passed": true}, "mutation": {"killed": 20, "total": 40, "score": 0.5, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}
{"ts": "2026-06-22T13:12:05.401325+00:00", "task": "exp-tdd-scheduler", "arm": "test-first", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2868075, "tokens_total": 349196, "input_tokens": 342274, "output_tokens": 6922, "num_turns": 15, "duration_ms": 111879, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.3, "tests_passed": true}, "mutation": {"killed": 20, "total": 22, "score": 0.909, "baseline_green": true, "survivors": [{"file": "order.py", "site": 6}, {"file": "order.py", "site": 7}]}, "contamination": []}
{"ts": "2026-06-22T13:13:27.202371+00:00", "task": "exp-tdd-scheduler", "arm": "test-first", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": false, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 1, "stderr_first_line": "Traceback (most recent call last):"}], "cost": {"cost_usd": 0.2160191, "tokens_total": 294932, "input_tokens": 290964, "output_tokens": 3968, "num_turns": 15, "duration_ms": 65352, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.4, "tests_passed": true}, "mutation": {"killed": 23, "total": 40, "score": 0.575, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 7}]}, "contamination": []}
{"ts": "2026-06-22T13:02:13.794017+00:00", "task": "exp-tdd-spreadsheet", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.5441183, "tokens_total": 616781, "input_tokens": 603978, "output_tokens": 12803, "num_turns": 19, "duration_ms": 225936, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 95.3, "tests_passed": true}, "mutation": {"killed": 40, "total": 40, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}
{"ts": "2026-06-22T13:05:19.008323+00:00", "task": "exp-tdd-spreadsheet", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3738610999999999, "tokens_total": 435789, "input_tokens": 428402, "output_tokens": 7387, "num_turns": 14, "duration_ms": 107271, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 71.6, "tests_passed": true}, "mutation": {"killed": 26, "total": 40, "score": 0.65, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 14}, {"file": "acc.py", "site": 17}, {"file": "acc.py", "site": 20}]}, "contamination": []}
{"ts": "2026-06-22T13:08:37.018551+00:00", "task": "exp-tdd-spreadsheet", "arm": "build-pipeline", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.41097279999999997, "tokens_total": 482260, "input_tokens": 473108, "output_tokens": 9152, "num_turns": 17, "duration_ms": 171210, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 93.1, "tests_passed": true}, "mutation": {"killed": 36, "total": 40, "score": 0.9, "baseline_green": true, "survivors": [{"file": "core.py", "site": 28}, {"file": "core.py", "site": 30}, {"file": "core.py", "site": 32}, {"file": "parse.py", "site": 20}]}, "contamination": []}
{"ts": "2026-06-22T13:10:30.191950+00:00", "task": "exp-tdd-spreadsheet", "arm": "build-pipeline", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3421348, "tokens_total": 390580, "input_tokens": 384154, "output_tokens": 6426, "num_turns": 13, "duration_ms": 84979, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 70.0, "tests_passed": true}, "mutation": {"killed": 28, "total": 40, "score": 0.7, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 15}, {"file": "acc.py", "site": 19}, {"file": "acc.py", "site": 22}]}, "contamination": []}
{"ts": "2026-06-22T14:34:32.579423+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.42231050000000003, "tokens_total": 381231, "input_tokens": 368025, "output_tokens": 13206, "num_turns": 14, "duration_ms": 154159, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 95.9, "tests_passed": true}, "mutation": {"killed": 32, "total": 40, "score": 0.8, "baseline_green": true, "survivors": [{"file": "core.py", "site": 7}, {"file": "parse.py", "site": 12}, {"file": "parse.py", "site": 21}, {"file": "parse.py", "site": 23}, {"file": "parse.py", "site": 40}, {"file": "parse.py", "site": 42}, {"file": "parse.py", "site": 44}, {"file": "parse.py", "site": 47}]}, "contamination": []}
{"ts": "2026-06-22T14:36:53.329791+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.28984109999999996, "tokens_total": 319551, "input_tokens": 313503, "output_tokens": 6048, "num_turns": 13, "duration_ms": 90858, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 72.2, "tests_passed": true}, "mutation": {"killed": 28, "total": 40, "score": 0.7, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 13}, {"file": "acc.py", "site": 16}, {"file": "acc.py", "site": 20}, {"file": "acc.py", "site": 23}]}, "contamination": []}
{"ts": "2026-06-22T14:40:46.985395+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-after", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.4311064, "tokens_total": 323112, "input_tokens": 308429, "output_tokens": 14683, "num_turns": 12, "duration_ms": 182790, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 94.9, "tests_passed": true}, "mutation": {"killed": 37, "total": 40, "score": 0.925, "baseline_green": true, "survivors": [{"file": "core.py", "site": 13}, {"file": "core.py", "site": 15}, {"file": "core.py", "site": 17}]}, "contamination": []}
{"ts": "2026-06-22T14:43:45.725867+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-after", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.31675, "tokens_total": 296290, "input_tokens": 288667, "output_tokens": 7623, "num_turns": 12, "duration_ms": 98535, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 71.4, "tests_passed": true}, "mutation": {"killed": 23, "total": 40, "score": 0.575, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 14}, {"file": "acc.py", "site": 17}, {"file": "acc.py", "site": 20}]}, "contamination": []}
{"ts": "2026-06-22T14:47:21.459774+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-after", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.35835310000000004, "tokens_total": 300825, "input_tokens": 289356, "output_tokens": 11469, "num_turns": 12, "duration_ms": 137156, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 91.1, "tests_passed": true}, "mutation": {"killed": 35, "total": 40, "score": 0.875, "baseline_green": true, "survivors": [{"file": "parse.py", "site": 24}, {"file": "parse.py", "site": 37}, {"file": "parse.py", "site": 40}, {"file": "parse.py", "site": 43}, {"file": "parse.py", "site": 46}]}, "contamination": []}
{"ts": "2026-06-22T14:49:28.962407+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-after", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2532303, "tokens_total": 270787, "input_tokens": 265500, "output_tokens": 5287, "num_turns": 12, "duration_ms": 75435, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 68.5, "tests_passed": true}, "mutation": {"killed": 26, "total": 40, "score": 0.65, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 16}, {"file": "acc.py", "site": 21}, {"file": "acc.py", "site": 25}, {"file": "acc.py", "site": 29}]}, "contamination": []}
{"ts": "2026-06-22T13:05:04.254019+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.4314908, "tokens_total": 383803, "input_tokens": 370450, "output_tokens": 13353, "num_turns": 14, "duration_ms": 245525, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 95.1, "tests_passed": true}, "mutation": {"killed": 35, "total": 40, "score": 0.875, "baseline_green": true, "survivors": [{"file": "core.py", "site": 16}, {"file": "parse.py", "site": 13}, {"file": "parse.py", "site": 16}, {"file": "parse.py", "site": 61}, {"file": "parse.py", "site": 64}]}, "contamination": []}
{"ts": "2026-06-22T13:09:19.740572+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2700833, "tokens_total": 288361, "input_tokens": 282794, "output_tokens": 5567, "num_turns": 12, "duration_ms": 83427, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 71.4, "tests_passed": true}, "mutation": {"killed": 29, "total": 40, "score": 0.725, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 15}, {"file": "acc.py", "site": 19}, {"file": "acc.py", "site": 22}, {"file": "acc.py", "site": 26}]}, "contamination": []}
{"ts": "2026-06-22T13:13:13.799916+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-first", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.476937, "tokens_total": 330611, "input_tokens": 314010, "output_tokens": 16601, "num_turns": 12, "duration_ms": 208791, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 96.5, "tests_passed": true}, "mutation": {"killed": 37, "total": 40, "score": 0.925, "baseline_green": true, "survivors": [{"file": "core.py", "site": 7}, {"file": "core.py", "site": 10}, {"file": "core.py", "site": 13}]}, "contamination": []}
{"ts": "2026-06-22T13:14:56.742729+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-first", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2604165, "tokens_total": 300001, "input_tokens": 294691, "output_tokens": 5310, "num_turns": 13, "duration_ms": 82416, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 72.5, "tests_passed": true}, "mutation": {"killed": 30, "total": 40, "score": 0.75, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 15}, {"file": "acc.py", "site": 18}, {"file": "acc.py", "site": 22}, {"file": "acc.py", "site": 26}]}, "contamination": []}
{"ts": "2026-06-22T13:19:08.575150+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-first", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3632809, "tokens_total": 369937, "input_tokens": 359627, "output_tokens": 10310, "num_turns": 14, "duration_ms": 134955, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 95.9, "tests_passed": true}, "mutation": {"killed": 36, "total": 40, "score": 0.9, "baseline_green": true, "survivors": [{"file": "core.py", "site": 8}, {"file": "parse.py", "site": 3}, {"file": "parse.py", "site": 64}, {"file": "parse.py", "site": 67}]}, "contamination": []}
{"ts": "2026-06-22T13:23:30.131284+00:00", "task": "exp-tdd-spreadsheet", "arm": "test-first", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.31773290000000004, "tokens_total": 364839, "input_tokens": 358145, "output_tokens": 6694, "num_turns": 14, "duration_ms": 102599, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 72.1, "tests_passed": true}, "mutation": {"killed": 24, "total": 40, "score": 0.6, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 14}, {"file": "acc.py", "site": 18}, {"file": "acc.py", "site": 22}, {"file": "acc.py", "site": 25}]}, "contamination": []}
{"ts": "2026-06-22T13:01:54.065667+00:00", "task": "exp-tdd-template-engine", "arm": "build-pipeline", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.49701239999999985, "tokens_total": 662243, "input_tokens": 651652, "output_tokens": 10591, "num_turns": 22, "duration_ms": 225417, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.1, "tests_passed": true}, "mutation": {"killed": 34, "total": 34, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}
{"ts": "2026-06-22T13:15:40.287856+00:00", "task": "exp-tdd-template-engine", "arm": "build-pipeline", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 3.24815585, "tokens_total": 4308438, "input_tokens": 4274253, "output_tokens": 34185, "num_turns": 88, "duration_ms": 799542, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 74.5, "tests_passed": true}, "mutation": {"killed": 23, "total": 40, "score": 0.575, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 12}]}, "contamination": ["high_turn_count=88"]}
{"ts": "2026-06-22T13:18:08.948809+00:00", "task": "exp-tdd-template-engine", "arm": "build-pipeline", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.32613680000000006, "tokens_total": 333163, "input_tokens": 324480, "output_tokens": 8683, "num_turns": 13, "duration_ms": 124978, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.6, "tests_passed": true}, "mutation": {"killed": 40, "total": 40, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}
{"ts": "2026-06-22T13:20:29.779899+00:00", "task": "exp-tdd-template-engine", "arm": "build-pipeline", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.36318709999999993, "tokens_total": 471274, "input_tokens": 464477, "output_tokens": 6797, "num_turns": 16, "duration_ms": 123694, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 74.7, "tests_passed": true}, "mutation": {"killed": 25, "total": 40, "score": 0.625, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 14}, {"file": "acc.py", "site": 16}]}, "contamination": []}
{"ts": "2026-06-22T13:15:42.763486+00:00", "task": "exp-tdd-template-engine", "arm": "test-after", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3082343, "tokens_total": 290067, "input_tokens": 280836, "output_tokens": 9231, "num_turns": 12, "duration_ms": 125094, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.2, "tests_passed": true}, "mutation": {"killed": 37, "total": 40, "score": 0.925, "baseline_green": true, "survivors": [{"file": "parser.py", "site": 25}, {"file": "parser.py", "site": 26}, {"file": "parser.py", "site": 40}]}, "contamination": []}
{"ts": "2026-06-22T13:17:36.316434+00:00", "task": "exp-tdd-template-engine", "arm": "test-after", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.26688, "tokens_total": 339376, "input_tokens": 333659, "output_tokens": 5717, "num_turns": 15, "duration_ms": 94845, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.5, "tests_passed": true}, "mutation": {"killed": 23, "total": 40, "score": 0.575, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 15}, {"file": "acc.py", "site": 17}]}, "contamination": []}
{"ts": "2026-06-22T13:20:46.480361+00:00", "task": "exp-tdd-template-engine", "arm": "test-after", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3987610999999999, "tokens_total": 324625, "input_tokens": 311454, "output_tokens": 13171, "num_turns": 12, "duration_ms": 173927, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 98.8, "tests_passed": true}, "mutation": {"killed": 40, "total": 40, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}
{"ts": "2026-06-22T13:23:09.997348+00:00", "task": "exp-tdd-template-engine", "arm": "test-after", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.32712209999999997, "tokens_total": 359414, "input_tokens": 351656, "output_tokens": 7758, "num_turns": 14, "duration_ms": 128279, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.2, "tests_passed": true}, "mutation": {"killed": 24, "total": 40, "score": 0.6, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 14}]}, "contamination": []}
{"ts": "2026-06-22T13:25:27.195301+00:00", "task": "exp-tdd-template-engine", "arm": "test-after", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3024817000000001, "tokens_total": 285446, "input_tokens": 276429, "output_tokens": 9017, "num_turns": 12, "duration_ms": 124072, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.5, "tests_passed": true}, "mutation": {"killed": 31, "total": 32, "score": 0.969, "baseline_green": true, "survivors": [{"file": "render.py", "site": 13}]}, "contamination": []}
{"ts": "2026-06-22T13:27:26.696195+00:00", "task": "exp-tdd-template-engine", "arm": "test-after", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.29311149999999997, "tokens_total": 352353, "input_tokens": 345746, "output_tokens": 6607, "num_turns": 16, "duration_ms": 103575, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.7, "tests_passed": true}, "mutation": {"killed": 21, "total": 40, "score": 0.525, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 12}]}, "contamination": []}
{"ts": "2026-06-22T13:00:03.165711+00:00", "task": "exp-tdd-template-engine", "arm": "test-first", "trial": 1, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3016249, "tokens_total": 411685, "input_tokens": 404990, "output_tokens": 6695, "num_turns": 17, "duration_ms": 127759, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.1, "tests_passed": true}, "mutation": {"killed": 31, "total": 38, "score": 0.816, "baseline_green": true, "survivors": [{"file": "parser.py", "site": 8}, {"file": "parser.py", "site": 9}, {"file": "parser.py", "site": 10}, {"file": "parser.py", "site": 18}, {"file": "parser.py", "site": 19}, {"file": "parser.py", "site": 20}, {"file": "parser.py", "site": 21}]}, "contamination": []}
{"ts": "2026-06-22T13:02:37.047321+00:00", "task": "exp-tdd-template-engine", "arm": "test-first", "trial": 1, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2820716, "tokens_total": 370350, "input_tokens": 365124, "output_tokens": 5226, "num_turns": 15, "duration_ms": 123541, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.4, "tests_passed": true}, "mutation": {"killed": 20, "total": 40, "score": 0.5, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 1}, {"file": "acc.py", "site": 3}, {"file": "acc.py", "site": 5}, {"file": "acc.py", "site": 7}, {"file": "acc.py", "site": 9}, {"file": "acc.py", "site": 11}, {"file": "acc.py", "site": 13}]}, "contamination": []}
{"ts": "2026-06-22T13:05:56.540806+00:00", "task": "exp-tdd-template-engine", "arm": "test-first", "trial": 2, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.4021140000000001, "tokens_total": 396189, "input_tokens": 384170, "output_tokens": 12019, "num_turns": 13, "duration_ms": 178804, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.1, "tests_passed": true}, "mutation": {"killed": 37, "total": 37, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}
{"ts": "2026-06-22T13:08:23.902012+00:00", "task": "exp-tdd-template-engine", "arm": "test-first", "trial": 2, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.30658650000000004, "tokens_total": 380541, "input_tokens": 373627, "output_tokens": 6914, "num_turns": 16, "duration_ms": 125635, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.5, "tests_passed": true}, "mutation": {"killed": 24, "total": 40, "score": 0.6, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 14}]}, "contamination": []}
{"ts": "2026-06-22T13:10:25.982520+00:00", "task": "exp-tdd-template-engine", "arm": "test-first", "trial": 3, "stage": "build", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.2831814, "tokens_total": 403414, "input_tokens": 397324, "output_tokens": 6090, "num_turns": 15, "duration_ms": 106255, "is_error": false, "parsed": true}, "agent_test_files": 2, "self_coverage": {"percent": 99.1, "tests_passed": true}, "mutation": {"killed": 36, "total": 36, "score": 1.0, "baseline_green": true, "survivors": []}, "contamination": []}
{"ts": "2026-06-22T13:13:18.265469+00:00", "task": "exp-tdd-template-engine", "arm": "test-first", "trial": 3, "stage": "change", "model": "claude-sonnet-4-6", "passed": true, "results": [{"command": "python3 acc.py", "exit_code": 0, "stderr_first_line": ""}, {"command": "python3 acc_change.py", "exit_code": 0, "stderr_first_line": ""}], "cost": {"cost_usd": 0.3387634, "tokens_total": 340592, "input_tokens": 331286, "output_tokens": 9306, "num_turns": 14, "duration_ms": 147645, "is_error": false, "parsed": true}, "agent_test_files": 4, "self_coverage": {"percent": 74.4, "tests_passed": true}, "mutation": {"killed": 24, "total": 40, "score": 0.6, "baseline_green": true, "survivors": [{"file": "acc.py", "site": 0}, {"file": "acc.py", "site": 2}, {"file": "acc.py", "site": 4}, {"file": "acc.py", "site": 6}, {"file": "acc.py", "site": 8}, {"file": "acc.py", "site": 10}, {"file": "acc.py", "site": 12}, {"file": "acc.py", "site": 14}]}, "contamination": []}
