[ { "benchmark": "commit0", "score": 43.8, "metric": "accuracy", "cost_per_instance": 7.34, "average_runtime": 1433.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-anthropic-claude-opus-4-7/1778132345/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.18.1", "acp_agent_name": "@agentclientprotocol/claude-agent-acp", "acp_agent_version": "v0.30.0", "submission_time": "2026-04-27T17:03:39.913993+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/22382ee0-7c69-466c-9cb3-21d31c11d85f" }, { "benchmark": "swe-bench", "score": 75.8, "metric": "accuracy", "cost_per_instance": 1.15, "average_runtime": 260.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-anthropic-claude-opus-4-7/1778132366/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.18.0", "acp_agent_name": "@agentclientprotocol/claude-agent-acp", "acp_agent_version": "v0.30.0", "submission_time": "2026-04-23T20:42:16+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/2a4b6a0b-75dd-42c8-ab9c-003fd869369f" }, { "benchmark": "swt-bench", "score": 64.4, "metric": "accuracy", "cost_per_instance": 1.19, "average_runtime": 247.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-anthropic-claude-opus-4-7/1778132373/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.18.0", "acp_agent_name": "@agentclientprotocol/claude-agent-acp", "acp_agent_version": "v0.30.0", "submission_time": "2026-04-24T03:39:42+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/7417c7f5-9b2a-44a8-b1a8-80a7154aa352" }, { "benchmark": "swe-bench-multimodal", "score": 35.3, "metric": "solveable_accuracy", "cost_per_instance": 2.67, "average_runtime": 641.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-anthropic-claude-opus-4-7/1778132359/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.18.0", "acp_agent_name": "@agentclientprotocol/claude-agent-acp", "acp_agent_version": "v0.27.0", "submission_time": "2026-04-23T05:49:26+00:00", "component_scores": { "solveable_accuracy": 35.3, "unsolveable_accuracy": 0.0, "combined_accuracy": 23.5 }, "eval_visualization_page": "https://laminar.sh/shared/evals/7cc7c378-84c5-4844-9bd6-f379702f8623" }, { "benchmark": "gaia", "score": 74.5, "metric": "accuracy", "cost_per_instance": 0.87, "average_runtime": 264.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-anthropic-claude-opus-4-7/1778132352/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.18.1", "submission_time": "2026-04-23T18:56:24+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/39ba5dfd-749a-444c-82a2-9da63303db83" } ]