[ { "benchmark": "swe-bench-multimodal", "score": 35.9, "metric": "solveable_accuracy", "cost_per_instance": 2.97, "average_runtime": 1434.0, "full_archive": "https://results.eval.all-hands.dev/eval-21383028167-gpt-5-2-co_litellm_proxy-gpt-5-2-codex_26-01-27-09-13.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.8.3", "submission_time": "2026-01-26T23:24:40.642068+00:00", "component_scores": { "solveable_accuracy": 35.9, "unsolveable_accuracy": 0.0, "combined_accuracy": 24.0, "solveable_resolved": 23, "solveable_total": 64, "unsolveable_resolved": 0, "unsolveable_total": 32 } }, { "benchmark": "gaia", "score": 70.9, "metric": "accuracy", "cost_per_instance": 0.55, "average_runtime": 799.0, "full_archive": "https://results.eval.all-hands.dev/eval-21377864374-gpt-5-2-co_litellm_proxy-gpt-5-2-codex_26-01-27-04-20.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.8.3", "submission_time": "2026-01-26T23:24:40.642068+00:00" }, { "benchmark": "swe-bench", "score": 73.8, "metric": "accuracy", "cost_per_instance": 0.94, "average_runtime": 438.0, "full_archive": "https://results.eval.all-hands.dev/eval-21386738547-gpt-5-2-co_litellm_proxy-gpt-5-2-codex_26-01-27-12-57.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-26T23:24:40.642068+00:00" }, { "benchmark": "commit0", "score": 43.8, "metric": "accuracy", "cost_per_instance": 5.5, "average_runtime": 1559.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-gpt-5-2-codex/24787401776/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-22T20:22:24+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/57c6395a-4cd4-4346-a0c0-4b1209275523" }, { "benchmark": "swt-bench", "score": 67.0, "metric": "accuracy", "cost_per_instance": 0.66, "average_runtime": 344.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-gpt-5-2-codex/23816423289/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-03-31T22:17:38+00:00" } ]