[ { "benchmark": "swe-bench-multimodal", "score": 36.8, "metric": "solveable_accuracy", "cost_per_instance": 1.45, "average_runtime": 458.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openai-gpt-5-4/24796045694/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.18.0", "submission_time": "2026-04-22T20:38:49+00:00", "component_scores": { "solveable_accuracy": 36.8, "unsolveable_accuracy": 0.0, "combined_accuracy": 24.5 }, "eval_visualization_page": "https://laminar.sh/shared/evals/b8e55418-a747-461b-92b8-2981612e5650" }, { "benchmark": "commit0", "score": 56.2, "metric": "accuracy", "cost_per_instance": 4.04, "average_runtime": 1173.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-openai-gpt-5-4/25079308429/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-28T22:47:10+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/04c309cc-564d-465f-8e84-e0b114044609" }, { "benchmark": "swe-bench", "score": 75.6, "metric": "accuracy", "cost_per_instance": 0.63, "average_runtime": 284.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-openai-gpt-5-4/24890465655/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.18.1", "submission_time": "2026-04-24T21:15:07+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/5f6faa00-117b-4471-b635-5d830fcac6d5" }, { "benchmark": "gaia", "score": 82.4, "metric": "accuracy", "cost_per_instance": 0.61, "average_runtime": 224.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-openai-gpt-5-4/24777631212/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.18.0", "submission_time": "2026-04-22T15:33:51+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/9eaf668d-bfbe-4525-a0f3-d29cb6479a98" }, { "benchmark": "swt-bench", "score": 70.4, "metric": "accuracy", "cost_per_instance": 0.47, "average_runtime": 228.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-openai-gpt-5-4/24890492621/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.18.1", "submission_time": "2026-04-24T22:01:01+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/90cebe80-49bb-40c1-b65b-78f5e40461f9" } ]