[ { "benchmark": "swt-bench", "score": 71.8, "metric": "accuracy", "cost_per_instance": 0.8, "average_runtime": 265.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-gemini-3-1-pro-preview/1778132376/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.16.1", "acp_agent_name": "gemini-cli", "acp_agent_version": "v0.36.0", "submission_time": "2026-04-06T01:37:00.436584+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/a6cd4b36-f020-4cd2-b765-003fa8d133a6" }, { "benchmark": "swe-bench", "score": 79.8, "metric": "accuracy", "cost_per_instance": 1.4, "average_runtime": 403.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-gemini-3-1-pro-preview/1778132369/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.16.1", "acp_agent_name": "gemini-cli", "acp_agent_version": "v0.36.0", "submission_time": "2026-04-04T23:18:19+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/1130583d-352c-48b3-aa03-5f81dbed1833" }, { "benchmark": "gaia", "score": 88.5, "metric": "accuracy", "cost_per_instance": 0.61, "average_runtime": 324.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-gemini-3-1-pro-preview/1778132355/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.16.1", "acp_agent_name": "gemini-cli", "acp_agent_version": "v0.36.0", "submission_time": "2026-04-04T18:25:27+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/124609e8-75f5-4ef4-9bcf-b09d631142c8" }, { "benchmark": "commit0", "score": 37.5, "metric": "accuracy", "cost_per_instance": 7.55, "average_runtime": 1072.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-gemini-3-1-pro-preview/1778132348/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.19.0", "acp_agent_name": "gemini-cli", "acp_agent_version": "v0.38.0", "submission_time": "2026-04-28T10:01:02+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/4a760cca-c1e2-4b81-af5e-7a97d4bb2394" }, { "benchmark": "swe-bench-multimodal", "score": 41.2, "metric": "solveable_accuracy", "cost_per_instance": 2.42, "average_runtime": 546.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-gemini-3-1-pro-preview/1778132362/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.16.1", "acp_agent_name": "gemini-cli", "acp_agent_version": "v0.36.0", "submission_time": "2026-04-04T21:16:23+00:00", "component_scores": { "solveable_accuracy": 41.2, "unsolveable_accuracy": 2.9, "combined_accuracy": 28.4 }, "eval_visualization_page": "https://laminar.sh/shared/evals/aca23b2d-36bd-42e5-9024-3344c38fe4d7" } ]