[ { "benchmark": "swe-bench-multimodal", "score": 35.3, "metric": "solveable_accuracy", "cost_per_instance": 1.88, "average_runtime": 221.0, "full_archive": "https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openai-gpt-5-5/1778132364/results.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.19.1", "acp_agent_name": "codex-acp", "acp_agent_version": "v0.11.1", "submission_time": "2026-05-01T22:57:01+00:00", "component_scores": { "solveable_accuracy": 35.3, "unsolveable_accuracy": 0.0, "combined_accuracy": 23.5 }, "eval_visualization_page": "https://laminar.sh/shared/evals/7979f19b-67f3-4dff-b5a6-877398a8afe5" }, { "benchmark": "swe-bench", "score": 78.0, "metric": "accuracy", "cost_per_instance": 1.17, "average_runtime": 171.0, "full_archive": "https://results.eval.all-hands.dev/swebench/litellm_proxy-openai-gpt-5-5/1778132371/results.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.19.1", "acp_agent_name": "codex-acp", "acp_agent_version": "v0.11.1", "submission_time": "2026-05-04T06:52:30+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/ee96327c-b067-4dd2-adc0-72204b08fbb3" }, { "benchmark": "gaia", "score": 84.8, "metric": "accuracy", "cost_per_instance": 0.28, "average_runtime": 111.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-openai-gpt-5-5/1778132357/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.19.1", "acp_agent_name": "codex-acp", "acp_agent_version": "v0.11.1", "submission_time": "2026-05-04T03:11:56+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/d635f611-1e95-4934-a8b6-02eee6fb3fed" }, { "benchmark": "swt-bench", "score": 80.4, "metric": "accuracy", "cost_per_instance": 1.11, "average_runtime": 146.0, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-openai-gpt-5-5/1778132378/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.19.0", "acp_agent_name": "codex-acp", "acp_agent_version": "v0.11.1", "submission_time": "2026-04-30T23:18:25+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/d4ba8910-78eb-43bc-954b-2b5589d079e8" }, { "benchmark": "commit0", "score": 37.5, "metric": "accuracy", "cost_per_instance": 5.57, "average_runtime": 425.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-openai-gpt-5-5/1778132350/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.19.1", "acp_agent_name": "codex-acp", "acp_agent_version": "v0.11.1", "submission_time": "2026-05-04T03:13:32+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/cef29694-8467-418d-bc74-859870453732" } ]