[ { "benchmark": "commit0", "score": 12.5, "metric": "accuracy", "cost_per_instance": 4.3309, "average_runtime": 4027.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-dashscope-qwen3-5-flash-2026-02-23/22443226509/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.11.5", "submission_time": "2026-02-26T15:30:23+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/2416b7ce-7841-4d7b-b8fb-83ad5c992826" }, { "benchmark": "swt-bench", "score": 38.3, "metric": "accuracy", "cost_per_instance": 1.7107, "average_runtime": 3083.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-dashscope-qwen3-5-flash-2026-02-23/24086513973/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.16.1", "submission_time": "2026-04-09T07:27:29+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/4ef023af-364e-4ba6-ac3d-19336bd4af18" }, { "benchmark": "gaia", "score": 49.7, "metric": "accuracy", "cost_per_instance": 0.3422, "average_runtime": 847.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-dashscope-qwen3-5-flash-2026-02-23/24193141939/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.16.1", "submission_time": "2026-04-10T00:23:12+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/9a6fe5b9-dc75-41b4-a7e5-6c4817ae4c50" }, { "benchmark": "swe-bench", "score": 62.0, "metric": "accuracy", "cost_per_instance": 2.1639, "average_runtime": 4132.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-dashscope-qwen3-5-flash-2026-02-23/24193134639/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.16.1", "submission_time": "2026-04-12T21:40:52+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/828c5288-5e15-4660-9f22-c6bfe6e819e5" }, { "benchmark": "swe-bench-multimodal", "score": 27.9, "metric": "solveable_accuracy", "cost_per_instance": 2.6218, "average_runtime": 2459.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-dashscope-qwen3-5-flash-2026-02-23/24426485940/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.17.0", "submission_time": "2026-04-15T15:18:37+00:00", "component_scores": { "solveable_accuracy": 27.9, "unsolveable_accuracy": 0.0, "combined_accuracy": 18.6 }, "eval_visualization_page": "https://laminar.sh/shared/evals/84c8dc14-d082-430d-b19c-e142de615385" } ]