[ { "benchmark": "commit0", "score": 12.5, "metric": "accuracy", "cost_per_instance": 0.49, "average_runtime": 1730.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-minimax-MiniMax-M2-5/24902079478/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-24T20:44:14+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/c06b32df-bf9e-4d10-9e81-93f2c01955a5" }, { "benchmark": "gaia", "score": 47.9, "metric": "accuracy", "cost_per_instance": 0.0587, "average_runtime": 716.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-jade-spark-2862/21896759788/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.11.3", "submission_time": "2026-02-11T11:51:48+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/ac28bd0e-7131-47b4-ba01-e64d1c49df45" }, { "benchmark": "swe-bench", "score": 72.6, "metric": "accuracy", "cost_per_instance": 0.3066, "average_runtime": 455.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-jade-spark-2862/21885618644/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.11.3", "submission_time": "2026-02-11T05:21:25+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/5f0703c7-f946-414b-b3ba-f4ce77aad1b5" }, { "benchmark": "swt-bench", "score": 68.1, "metric": "accuracy", "cost_per_instance": 0.2239, "average_runtime": 389.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-jade-spark-2862/21870831025/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.11.3", "submission_time": "2026-02-10T23:49:41+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/9bbf1e2a-6085-4a0a-b68f-357f417f4012" }, { "benchmark": "swe-bench-multimodal", "score": 25.0, "metric": "solveable_accuracy", "cost_per_instance": 0.5941, "average_runtime": 611.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-jade-spark-2862/21900356665/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.11.3", "submission_time": "2026-02-11T11:36:41+00:00", "component_scores": { "solveable_accuracy": 25.0, "unsolveable_accuracy": 0.0, "combined_accuracy": 16.7 }, "eval_visualization_page": "https://laminar.sh/shared/evals/194e3e71-5874-45a5-96b8-220155ea1e24" } ]