{ "benchmark": "PatentBench-Mini v0.1.0", "model": "ABIGAIL v3", "run_date": "2026-03-21T02:40:47.894Z", "summary": { "overall_accuracy": "100.0%", "total_tests": 298, "total_points": "1020/1020", "tests_with_errors": 0, "tests_perfect": 298 }, "by_task_type": { "deadline_calculation": { "accuracy": "100.0%", "tests": 125, "points": "419/419" }, "action_classification": { "accuracy": "100.0%", "tests": 82, "points": "328/328" }, "timeline_analysis": { "accuracy": "100.0%", "tests": 81, "points": "243/243" }, "fee_computation": { "accuracy": "100.0%", "tests": 10, "points": "30/30" } }, "by_technology_center": { "TC2400_Networking": { "accuracy": "100.0%", "tests": 10 }, "TC2800_Electrical": { "accuracy": "100.0%", "tests": 19 }, "TC3700_Mechanical": { "accuracy": "100.0%", "tests": 14 }, "TC3600_Business": { "accuracy": "100.0%", "tests": 11 }, "TC1700_Chemical": { "accuracy": "100.0%", "tests": 10 }, "TC2600_Communications": { "accuracy": "100.0%", "tests": 9 }, "TC1600_Biotech": { "accuracy": "100.0%", "tests": 6 }, "TC2100_Software": { "accuracy": "100.0%", "tests": 3 } }, "detailed_results": [ { "test_id": "deadline_nf_16100000", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16100000", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16100001", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16100001", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16100002", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16100002", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16100003", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16100003", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16100006", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16100007", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16100009", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16100009", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16100010", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16100011", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16100012", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16100013", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16100014", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16100014", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16100015", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16100016", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16100017", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16100017", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16100018", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16100018", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16100020", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16100020", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16100022", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16100022", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16100023", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16100024", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16500000", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500000", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500001", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16500002", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500002", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500003", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500003", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500004", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500004", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500005", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16500006", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16500007", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500007", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500008", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500008", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500009", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500009", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500011", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500011", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500012", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500012", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500013", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500013", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500014", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16500015", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16500016", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16500017", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500017", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500018", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_16500019", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500019", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500021", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500021", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500023", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500023", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_16500024", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_16500024", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17100000", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17100000", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17100002", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17100003", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17100003", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17100004", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17100005", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17100005", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17100006", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17100008", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17100008", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17100012", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17100013", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17100014", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17100014", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17100015", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17100016", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17100016", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17100017", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17100019", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17100021", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17100021", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17100022", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17100022", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17100023", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17100024", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17100024", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17500000", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17500000", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17500001", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500002", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500004", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17500004", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17500006", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500007", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500009", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500010", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500011", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17500011", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17500012", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500013", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17500013", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17500014", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17500014", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17500015", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17500015", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17500017", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17500017", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17500018", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500019", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500020", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500021", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_nf_17500022", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17500022", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17500023", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17500023", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "deadline_nf_17500024", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT" ] }, { "test_id": "deadline_fr_17500024", "task_type": "deadline_calculation", "tier": 1, "score": "100.0%", "details": [ "shortened_deadline: CORRECT", "max_deadline: CORRECT", "action_type: CORRECT", "options: 100% coverage (5/5)" ] }, { "test_id": "classify_16100000", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100001", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100002", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100003", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100006", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100007", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100009", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100010", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100011", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100012", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100013", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100014", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100015", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100016", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100017", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100018", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100020", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100022", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100023", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16100024", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500000", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500001", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500002", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500003", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500004", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500005", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500006", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500007", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500008", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500009", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500011", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500012", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500013", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500014", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500015", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500016", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500017", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500018", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500019", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500021", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500023", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_16500024", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100000", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100002", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100003", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100004", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100005", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100006", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100008", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100012", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100013", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100014", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100015", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100016", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100017", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100019", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100021", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100022", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100023", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17100024", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500000", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500001", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500002", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500004", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500006", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500007", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500009", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500010", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500011", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500012", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500013", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500014", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500015", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500016", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500017", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500018", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500019", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500020", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500021", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500022", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500023", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "classify_17500024", "task_type": "action_classification", "tier": 1, "score": "100.0%", "details": [ "has_non_final: CORRECT", "has_final: CORRECT", "has_allowance: CORRECT", "total_oa_rounds: CORRECT" ] }, { "test_id": "timeline_16100000", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100001", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100002", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100003", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100006", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100007", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100009", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100010", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100011", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100012", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100013", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100014", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100015", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100016", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100017", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100018", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100020", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100022", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100023", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16100024", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500000", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500001", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500002", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500003", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500004", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500005", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500006", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500007", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500008", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500009", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500011", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500012", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500013", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500014", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500015", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500016", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500017", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500018", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500019", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500021", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500023", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_16500024", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100000", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100002", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100003", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100004", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100005", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100006", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100008", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100012", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100013", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100014", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100015", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100016", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100017", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100019", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100021", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100022", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100023", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17100024", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500000", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500001", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500002", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500004", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500006", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500007", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500009", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500010", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500011", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500012", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500013", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500014", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500015", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500017", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500018", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500019", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500020", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500021", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500022", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500023", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "timeline_17500024", "task_type": "timeline_analysis", "tier": 2, "score": "100.0%", "details": [ "total_events: CORRECT", "first_event: CORRECT", "duration: CORRECT" ] }, { "test_id": "fee_16100000", "task_type": "fee_computation", "tier": 1, "score": "100.0%", "details": [ "extension_fee: CORRECT", "rce_fee: CORRECT", "issue_fee: CORRECT" ] }, { "test_id": "fee_16100001", "task_type": "fee_computation", "tier": 1, "score": "100.0%", "details": [ "extension_fee: CORRECT", "rce_fee: CORRECT", "issue_fee: CORRECT" ] }, { "test_id": "fee_16100002", "task_type": "fee_computation", "tier": 1, "score": "100.0%", "details": [ "extension_fee: CORRECT", "rce_fee: CORRECT", "issue_fee: CORRECT" ] }, { "test_id": "fee_16100003", "task_type": "fee_computation", "tier": 1, "score": "100.0%", "details": [ "extension_fee: CORRECT", "rce_fee: CORRECT", "issue_fee: CORRECT" ] }, { "test_id": "fee_16100006", "task_type": "fee_computation", "tier": 1, "score": "100.0%", "details": [ "extension_fee: CORRECT", "rce_fee: CORRECT", "issue_fee: CORRECT" ] }, { "test_id": "fee_16100007", "task_type": "fee_computation", "tier": 1, "score": "100.0%", "details": [ "extension_fee: CORRECT", "rce_fee: CORRECT", "issue_fee: CORRECT" ] }, { "test_id": "fee_16100009", "task_type": "fee_computation", "tier": 1, "score": "100.0%", "details": [ "extension_fee: CORRECT", "rce_fee: CORRECT", "issue_fee: CORRECT" ] }, { "test_id": "fee_16100010", "task_type": "fee_computation", "tier": 1, "score": "100.0%", "details": [ "extension_fee: CORRECT", "rce_fee: CORRECT", "issue_fee: CORRECT" ] }, { "test_id": "fee_16100011", "task_type": "fee_computation", "tier": 1, "score": "100.0%", "details": [ "extension_fee: CORRECT", "rce_fee: CORRECT", "issue_fee: CORRECT" ] }, { "test_id": "fee_16100012", "task_type": "fee_computation", "tier": 1, "score": "100.0%", "details": [ "extension_fee: CORRECT", "rce_fee: CORRECT", "issue_fee: CORRECT" ] } ] }