[ { "benchmark": "swe-bench", "score": 74.2, "metric": "accuracy", "cost_per_instance": 1.19, "average_runtime": 534.0, "full_archive": "https://results.eval.all-hands.dev/eval-21104232299-claude-son_litellm_proxy-claude-sonnet-4-5-20250929_26-01-18-07-25.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-26T16:02:48.428351+00:00" }, { "benchmark": "commit0", "score": 12.5, "metric": "accuracy", "cost_per_instance": 3.23, "average_runtime": 756.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-claude-sonnet-4-5-20250929/25079432235/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-28T23:43:06+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/17fc5d57-6aae-4dda-8f56-f6275df7db8e" }, { "benchmark": "gaia", "score": 72.7, "metric": "accuracy", "cost_per_instance": 0.87, "average_runtime": 258.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-claude-sonnet-4-5-20250929/23440285883/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.15.0", "submission_time": "2026-03-23T15:26:18+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/b39f4bcc-7db3-435d-b7d1-517c269c16f2" }, { "benchmark": "swt-bench", "score": 68.8, "metric": "accuracy", "cost_per_instance": 0.98, "average_runtime": 488.0, "full_archive": "https://results.eval.all-hands.dev/eval-21146174206-claude-son_litellm_proxy-claude-sonnet-4-5-20250929_26-01-19-23-25.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-26T16:02:48.428351+00:00" }, { "benchmark": "swe-bench-multimodal", "score": 36.8, "metric": "solveable_accuracy", "cost_per_instance": 1.89, "average_runtime": 787.0, "full_archive": "https://results.eval.all-hands.dev/eval-21318221216-claude-son_litellm_proxy-claude-sonnet-4-5-20250929_26-01-24-19-46.tar.gz", "tags": [ "swe-bench-multimodal" ], "component_scores": { "solveable_accuracy": 36.8, "unsolveable_accuracy": 2.9, "combined_accuracy": 25.5, "solveable_resolved": 25, "solveable_total": 68, "unsolveable_resolved": 1, "unsolveable_total": 34 }, "agent_version": "v1.8.3", "submission_time": "2026-01-26T16:02:48.428351+00:00" } ]