[ { "benchmark": "swe-bench", "score": 74.4, "metric": "accuracy", "cost_per_instance": 1.14, "average_runtime": 261.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-anthropic-claude-opus-4-6/1778132365/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.15.0", "submission_time": "2026-03-27T08:40:20+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/f842f21c-2cac-4b07-8e96-0ebf33be4881" }, { "benchmark": "commit0", "score": 37.5, "metric": "accuracy", "cost_per_instance": 10.39, "average_runtime": 1522.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-anthropic-claude-opus-4-6/1778132344/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.15.0", "submission_time": "2026-03-26T02:23:58+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/f8dfe954-95d3-4f05-85d8-2429838ee4d4" }, { "benchmark": "gaia", "score": 66.1, "metric": "accuracy", "cost_per_instance": 1.79, "average_runtime": 323.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-anthropic-claude-opus-4-6/1778132351/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.15.0", "submission_time": "2026-03-27T06:13:49+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/7f5c1575-08d1-44ac-b561-d60e4ac21028" }, { "benchmark": "swe-bench-multimodal", "score": 32.4, "metric": "solveable_accuracy", "cost_per_instance": 1.36, "average_runtime": 547.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-anthropic-claude-opus-4-6/1778132358/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.15.0", "submission_time": "2026-04-06T22:05:51+00:00", "component_scores": { "solveable_accuracy": 32.4, "unsolveable_accuracy": 0.0, "combined_accuracy": 21.6 }, "eval_visualization_page": "https://laminar.sh/shared/evals/7b67fe45-3c68-4102-b573-bb2d144182fc" }, { "benchmark": "swt-bench", "score": 66.7, "metric": "accuracy", "cost_per_instance": 1.04, "average_runtime": 220.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-anthropic-claude-opus-4-6/1778132372/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.15.0", "submission_time": "2026-03-28T19:13:08+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/70bcf9b9-e251-4250-b07e-5eaae508bb71" } ]