[ { "benchmark": "commit0", "score": 18.8, "metric": "accuracy", "cost_per_instance": 1.26, "average_runtime": 1878.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-moonshot-kimi-k2-5/25033201272/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-28T06:11:37+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/7f44e8ae-607d-46e7-8f5d-b43abacf8c1b" }, { "benchmark": "swe-bench", "score": 68.8, "metric": "accuracy", "cost_per_instance": 0.4063, "average_runtime": 707.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-moonshot-kimi-k2-5/21417485547/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-30T23:37:19.341897+00:00" }, { "benchmark": "swt-bench", "score": 61.9, "metric": "accuracy", "cost_per_instance": 0.4246, "average_runtime": 385.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-moonshot-kimi-k2-5/21535132257/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-30T23:37:19.341897+00:00" }, { "benchmark": "gaia", "score": 63.6, "metric": "accuracy", "cost_per_instance": 0.3781, "average_runtime": 602.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-moonshot-kimi-k2-5/21497407856/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.8.3", "submission_time": "2026-01-30T23:37:19.341897+00:00" }, { "benchmark": "swe-bench-multimodal", "score": 32.8, "metric": "solveable_accuracy", "cost_per_instance": 1.6213, "average_runtime": 921.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-moonshot-kimi-k2-5/21492411890/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.8.3", "submission_time": "2026-01-30T23:37:19.341897+00:00", "component_scores": { "solveable_accuracy": 32.8, "unsolveable_accuracy": 3.0, "combined_accuracy": 22.7, "solveable_resolved": 21, "solveable_total": 64, "unsolveable_resolved": 1, "unsolveable_total": 33 } } ]