[ { "benchmark": "swe-bench", "score": 73.8, "metric": "accuracy", "cost_per_instance": 1.54, "average_runtime": 536.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-claude-sonnet-4-5-20250929/23402268176/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.15.0", "submission_time": "2026-03-22T16:50:02+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/c7c8ffb1-1501-4eb0-9cc5-0e7f750fccf6" }, { "benchmark": "swe-bench-multimodal", "score": 38.2, "metric": "solveable_accuracy", "cost_per_instance": 2.76, "average_runtime": 945.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-claude-sonnet-4-5-20250929/23560474118/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.15.0", "submission_time": "2026-03-26T04:35:35+00:00", "component_scores": { "solveable_accuracy": 38.2, "unsolveable_accuracy": 0.0, "combined_accuracy": 25.5 }, "eval_visualization_page": "https://laminar.sh/shared/evals/cc1a7f58-d661-4b02-9cb4-bc24183ed047" }, { "benchmark": "gaia", "score": 70.9, "metric": "accuracy", "cost_per_instance": 0.58, "average_runtime": 294.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-claude-sonnet-4-5-20250929/23132892437/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.15.0", "submission_time": "2026-03-16T08:56:41+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/9a0af189-2253-4aff-8364-283cdc32817f" }, { "benchmark": "commit0", "score": 12.5, "metric": "accuracy", "cost_per_instance": 1.66, "average_runtime": 1060.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-claude-sonnet-4-5-20250929/22995629877/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.15.0", "submission_time": "2026-03-12T10:47:40+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/12034b3d-5e75-47e9-82a8-9a1a2a486c59" }, { "benchmark": "swt-bench", "score": 67.4, "metric": "accuracy", "cost_per_instance": 1.33, "average_runtime": 477.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-claude-sonnet-4-5-20250929/23783483121/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.15.0", "submission_time": "2026-03-31T12:10:34+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/b12d5afa-a4ce-4ab4-a3f2-3b6e5be6fa94" } ]