[ { "benchmark": "swe-bench", "score": 74.6, "metric": "accuracy", "cost_per_instance": 0.86, "average_runtime": 476.0, "full_archive": "https://results.eval.all-hands.dev/eval-21010530639-gpt-5-2_litellm_proxy-openai-gpt-5-2-2025-12-11_26-01-15-04-24.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-26T15:55:26.395894+00:00" }, { "benchmark": "gaia", "score": 65.5, "metric": "accuracy", "cost_per_instance": 0.48, "average_runtime": 189.0, "full_archive": "https://results.eval.all-hands.dev/eval-21041979432-gpt-5-2_litellm_proxy-openai-gpt-5-2-2025-12-11_26-01-15-20-40.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.8.3", "submission_time": "2026-01-26T15:55:26.395894+00:00" }, { "benchmark": "commit0", "score": 50.0, "metric": "accuracy", "cost_per_instance": 4.7, "average_runtime": 1580.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-openai-gpt-5-2-2025-12-11/25033142930/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-28T08:17:00+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/c343765e-1fdd-494f-96b7-de843c272a36" }, { "benchmark": "swt-bench", "score": 73.2, "metric": "accuracy", "cost_per_instance": 0.56, "average_runtime": 347.0, "full_archive": "https://results.eval.all-hands.dev/eval-21146218648-gpt-5-2_litellm_proxy-openai-gpt-5-2-2025-12-11_26-01-19-23-58.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-26T15:55:26.395894+00:00" }, { "benchmark": "swe-bench-multimodal", "score": 30.9, "metric": "solveable_accuracy", "cost_per_instance": 2.77, "average_runtime": 1571.0, "full_archive": "https://results.eval.all-hands.dev/eval-21320837315-gpt-5-2_litellm_proxy-openai-gpt-5-2-2025-12-11_26-01-25-04-55.tar.gz", "tags": [ "swe-bench-multimodal" ], "component_scores": { "solveable_accuracy": 30.9, "unsolveable_accuracy": 0.0, "combined_accuracy": 20.6, "solveable_resolved": 21, "solveable_total": 68, "unsolveable_resolved": 0, "unsolveable_total": 34 }, "agent_version": "v1.8.3", "submission_time": "2026-01-26T15:55:26.395894+00:00" } ]