[ { "benchmark": "swe-bench", "score": 73.4, "metric": "accuracy", "cost_per_instance": 1.06, "average_runtime": 1435.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-openrouter-z-ai-glm-5/22118959539/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.11.5", "submission_time": "2026-02-18T19:24:52+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/58f92ff8-e886-40ab-a636-8337fc8c9bfe" }, { "benchmark": "commit0", "score": 31.2, "metric": "accuracy", "cost_per_instance": 2.23, "average_runtime": 1725.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-openrouter-z-ai-glm-5/25008748071/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-27T22:07:02+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/76ea5306-b580-47c0-a9ad-67a1c995bfba" }, { "benchmark": "swt-bench", "score": 47.3, "metric": "accuracy", "cost_per_instance": 0.91, "average_runtime": 1085.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-openrouter-z-ai-glm-5/22240291718/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.11.5", "submission_time": "2026-02-21T09:26:31+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/a7d29286-6810-4ca8-8a11-21d6ae33822b" }, { "benchmark": "gaia", "score": 60.0, "metric": "accuracy", "cost_per_instance": 0.36, "average_runtime": 1328.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-openrouter-z-ai-glm-5/22117141473/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.11.5", "submission_time": "2026-02-18T03:25:43+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/c37cd7c9-3839-402d-bdf6-a0c10c4c085b" }, { "benchmark": "swe-bench-multimodal", "score": 35.3, "metric": "solveable_accuracy", "cost_per_instance": 0.58, "average_runtime": 1140.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openrouter-z-ai-glm-5/22881368139/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.11.5", "submission_time": "2026-03-10T15:12:57+00:00", "component_scores": { "solveable_accuracy": 35.3, "unsolveable_accuracy": 8.8, "combined_accuracy": 26.5 }, "eval_visualization_page": "https://laminar.sh/shared/evals/756a9904-f7f3-4c23-a663-8a7dd853d6dd" } ]