[ { "benchmark": "swt-bench", "score": 70.2, "metric": "accuracy", "cost_per_instance": 0.76, "average_runtime": 280.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-openrouter-z-ai-glm-5-1/24214829814/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.16.1", "submission_time": "2026-04-10T06:58:18+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/edae3592-9be7-4da5-83c3-b6ebe10e1bf6" }, { "benchmark": "gaia", "score": 67.3, "metric": "accuracy", "cost_per_instance": 0.47, "average_runtime": 280.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-openrouter-z-ai-glm-5-1/24376299760/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.17.0", "submission_time": "2026-04-14T04:36:03+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/0d35046a-cbee-473b-b316-d4f57ff7f9d2" }, { "benchmark": "commit0", "score": 37.5, "metric": "accuracy", "cost_per_instance": 5.31, "average_runtime": 2498.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-openrouter-z-ai-glm-5-1/24902095932/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-24T22:03:39+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/069f1865-a4b0-4a67-9451-3043c2ab138c" }, { "benchmark": "swe-bench", "score": 75.0, "metric": "accuracy", "cost_per_instance": 1.54, "average_runtime": 748.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-openrouter-z-ai-glm-5-1/24697829798/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.17.0", "submission_time": "2026-04-21T06:01:18+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/6543c602-c721-48e6-a0fe-a1abb08fab4b" }, { "benchmark": "swe-bench-multimodal", "score": 41.2, "metric": "solveable_accuracy", "cost_per_instance": 6.92, "average_runtime": 3031.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openrouter-z-ai-glm-5-1/24730025059/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.17.0", "submission_time": "2026-04-21T19:14:21+00:00", "component_scores": { "solveable_accuracy": 41.2, "unsolveable_accuracy": 0.0, "combined_accuracy": 27.5 }, "eval_visualization_page": "https://laminar.sh/shared/evals/26dfa179-9ab7-4b15-988d-b7ed2bf91328" } ]