[ { "benchmark": "swe-bench-multimodal", "score": 48.5, "metric": "solveable_accuracy", "cost_per_instance": 2.83, "average_runtime": 372.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-anthropic-claude-opus-4-7/24642030719/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.17.0", "submission_time": "2026-04-20T03:16:57+00:00", "component_scores": { "solveable_accuracy": 48.5, "unsolveable_accuracy": 0.0, "combined_accuracy": 32.4 }, "eval_visualization_page": "https://laminar.sh/shared/evals/cfe39593-1e41-44ba-bf2f-a29e64a51432" }, { "benchmark": "swt-bench", "score": 80.8, "metric": "accuracy", "cost_per_instance": 0.82, "average_runtime": 143.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-anthropic-claude-opus-4-7/24612239545/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.17.0", "submission_time": "2026-04-18T21:45:55+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/96a4370b-a553-4c91-951b-79ae3a93979d" }, { "benchmark": "swe-bench", "score": 74.2, "metric": "accuracy", "cost_per_instance": 1.08, "average_runtime": 183.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-anthropic-claude-opus-4-7/24583721908/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.17.0", "submission_time": "2026-04-18T04:25:43+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/9464c828-db85-45f7-b4e9-b6526cb7d2f3" }, { "benchmark": "gaia", "score": 81.2, "metric": "accuracy", "cost_per_instance": 0.89, "average_runtime": 129.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-anthropic-claude-opus-4-7/24585074284/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.17.0", "submission_time": "2026-04-17T22:13:04+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/734845e8-b910-414f-a4b7-ad60d464131c" }, { "benchmark": "commit0", "score": 56.2, "metric": "accuracy", "cost_per_instance": 5.69, "average_runtime": 636.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-anthropic-claude-opus-4-7/24805652683/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-23T02:56:45+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/825aa22d-16cb-49c1-8080-97a46db81099" } ]