[ { "benchmark": "commit0", "score": 37.5, "metric": "accuracy", "cost_per_instance": 9.11, "average_runtime": 1264.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-anthropic-claude-opus-4-5-20251101/25079416757/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-29T01:34:39+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/d15f9cba-5c46-4413-b71d-f7b5d15e6efb" }, { "benchmark": "gaia", "score": 69.1, "metric": "accuracy", "cost_per_instance": 0.55, "average_runtime": 97.0, "full_archive": "https://results.eval.all-hands.dev/eval-20805719842-claude-4-5_litellm_proxy-anthropic-claude-opus-4-5-20251101_26-01-08-06-24.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.8.3", "submission_time": "2026-01-27T01:24:15.735789+00:00" }, { "benchmark": "swe-bench", "score": 76.6, "metric": "accuracy", "cost_per_instance": 1.82, "average_runtime": 325.0, "full_archive": "https://results.eval.all-hands.dev/eval-21370451733-claude-4-5_litellm_proxy-anthropic-claude-opus-4-5-20251101_26-01-26-23-59.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-27T01:24:15.735789+00:00" }, { "benchmark": "swt-bench", "score": 78.5, "metric": "accuracy", "cost_per_instance": 1.38, "average_runtime": 268.0, "full_archive": "https://results.eval.all-hands.dev/eval-21173239168-claude-4-5_litellm_proxy-anthropic-claude-opus-4-5-20251101_26-01-20-19-27.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-27T01:24:15.735789+00:00" }, { "benchmark": "swe-bench-multimodal", "score": 41.2, "metric": "solveable_accuracy", "cost_per_instance": 2.54, "average_runtime": 671.0, "full_archive": "https://results.eval.all-hands.dev/eval-21323385943-claude-4-5_litellm_proxy-anthropic-claude-opus-4-5-20251101_26-01-25-04-21.tar.gz", "tags": [ "swe-bench-multimodal" ], "component_scores": { "solveable_accuracy": 41.2, "unsolveable_accuracy": 0.0, "combined_accuracy": 27.5, "solveable_resolved": 28, "solveable_total": 68, "unsolveable_resolved": 0, "unsolveable_total": 34 }, "agent_version": "v1.8.3", "submission_time": "2026-01-27T01:24:15.735789+00:00" } ]