[ { "benchmark": "swe-bench-multimodal", "score": 27.9, "metric": "solveable_accuracy", "cost_per_instance": 0.388, "average_runtime": 1084.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-minimax-MiniMax-M2-7/23369175877/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.14.0", "submission_time": "2026-03-21T07:29:33+00:00", "component_scores": { "solveable_accuracy": 27.9, "unsolveable_accuracy": 2.9, "combined_accuracy": 19.6 }, "eval_visualization_page": "https://laminar.sh/shared/evals/161dd621-b70d-4089-afcf-ea2e3d13e874" }, { "benchmark": "commit0", "score": 18.8, "metric": "accuracy", "cost_per_instance": 0.8926, "average_runtime": 1621.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-minimax-MiniMax-M2-7/25008721830/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-27T22:04:27+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/081a0b7f-fef2-4dad-a267-49f90b2fc866" }, { "benchmark": "swe-bench", "score": 75.6, "metric": "accuracy", "cost_per_instance": 0.1795, "average_runtime": 529.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-minimax-MiniMax-M2-7/23463806447/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.14.0", "submission_time": "2026-03-23T23:35:12+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/a248d81f-d8d0-4823-9853-0378eed4dcc4" }, { "benchmark": "gaia", "score": 25.5, "metric": "accuracy", "cost_per_instance": 0.2163, "average_runtime": 676.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-minimax-MiniMax-M2-7/23855930236/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.16.0", "submission_time": "2026-04-01T22:22:23+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/6839ba33-64fc-4b43-8ec2-e0c18f2f94bf" }, { "benchmark": "swt-bench", "score": 69.1, "metric": "accuracy", "cost_per_instance": 0.1283, "average_runtime": 352.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-minimax-MiniMax-M2-7/24039895569/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.15.0", "submission_time": "2026-04-06T18:38:26+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/f42deebe-f174-41f7-96e8-3e2fbc8a61d7" } ]