[ { "benchmark": "gaia", "score": 72.1, "metric": "accuracy", "cost_per_instance": 0.3377, "average_runtime": 308.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-dashscope-qwen3-6-plus/24043637505/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.16.1", "submission_time": "2026-04-06T21:35:31+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/b26a4b3d-0bc2-41d8-b8bf-09453cf2fb53" }, { "benchmark": "swt-bench", "score": 62.1, "metric": "accuracy", "cost_per_instance": 2.085, "average_runtime": 851.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-dashscope-qwen3-6-plus/24043642717/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.16.1", "submission_time": "2026-04-07T14:26:45+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/d5c3aa56-4cbd-4bac-9dfa-8d85cc25e994" }, { "benchmark": "swe-bench", "score": 74.2, "metric": "accuracy", "cost_per_instance": 1.5379, "average_runtime": 664.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-dashscope-qwen3-6-plus/23957434539/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.16.1", "submission_time": "2026-04-04T12:26:22+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/b9cb5405-1049-459f-8d4d-c256091a5549" }, { "benchmark": "commit0", "score": 25.0, "metric": "accuracy", "cost_per_instance": 7.4936, "average_runtime": 1037.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-dashscope-qwen3-6-plus/25079439356/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-29T03:13:01+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/c5f31dca-5471-4ce3-830a-938de77baece" }, { "benchmark": "swe-bench-multimodal", "score": 30.9, "metric": "solveable_accuracy", "cost_per_instance": 2.3158, "average_runtime": 638.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-dashscope-qwen3-6-plus/23949324373/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.16.1", "submission_time": "2026-04-04T09:25:12+00:00", "component_scores": { "solveable_accuracy": 30.9, "unsolveable_accuracy": 0.0, "combined_accuracy": 20.6 }, "eval_visualization_page": "https://laminar.sh/shared/evals/7c984e26-6009-4208-819f-a9ba57d5476f" } ]