[ { "benchmark": "swe-bench", "score": 74.4, "metric": "accuracy", "cost_per_instance": 0.7, "average_runtime": 258.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-claude-sonnet-4-5-20250929/1778132367/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.15.0", "submission_time": "2026-03-26T21:42:42+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/cce78b51-685d-4936-a217-ff9fd8b85fe4" }, { "benchmark": "swe-bench-multimodal", "score": 35.3, "metric": "solveable_accuracy", "cost_per_instance": 1.41, "average_runtime": 603.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-claude-sonnet-4-5-20250929/1778132360/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.15.0", "submission_time": "2026-04-07T01:42:34+00:00", "component_scores": { "solveable_accuracy": 35.3, "unsolveable_accuracy": 0.0, "combined_accuracy": 23.5 }, "eval_visualization_page": "https://laminar.sh/shared/evals/f2f96de1-9714-4eaf-8412-938eca58072f" }, { "benchmark": "gaia", "score": 63.0, "metric": "accuracy", "cost_per_instance": 0.81, "average_runtime": 270.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-claude-sonnet-4-5-20250929/1778132353/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.15.0", "submission_time": "2026-03-26T17:51:20+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/f0c349f4-5e88-48ae-89ea-563610c04d74" }, { "benchmark": "commit0", "score": 31.2, "metric": "accuracy", "cost_per_instance": 2.19, "average_runtime": 717.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-claude-sonnet-4-5-20250929/1778132346/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.15.0", "submission_time": "2026-03-27T04:52:48+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/a6e01f43-7b3f-4e4b-85ab-1ef74bb0df8a" }, { "benchmark": "swt-bench", "score": 69.3, "metric": "accuracy", "cost_per_instance": 0.62, "average_runtime": 234.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-claude-sonnet-4-5-20250929/1778132374/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.15.0", "submission_time": "2026-03-28T19:01:13+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/b60702c8-5ca7-453a-b175-58e52ec56540" } ]