[ { "benchmark": "swe-bench", "score": 71.6, "metric": "accuracy", "cost_per_instance": 0.16, "average_runtime": 1429.0, "full_archive": "https://results.eval.all-hands.dev/eval-21386741317-deepseek-v_litellm_proxy-deepseek-deepseek-reasoner_26-01-27-17-23.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-27T18:40:51.252521+00:00" }, { "benchmark": "gaia", "score": 50.3, "metric": "accuracy", "cost_per_instance": 0.06, "average_runtime": 427.0, "full_archive": "https://results.eval.all-hands.dev/eval-21070491317-deepseek-v_litellm_proxy-deepseek-deepseek-reasoner_26-01-16-16-39.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.8.3", "submission_time": "2026-01-27T18:40:51.252521+00:00" }, { "benchmark": "swt-bench", "score": 53.6, "metric": "accuracy", "cost_per_instance": 0.12, "average_runtime": 1215.0, "full_archive": "https://results.eval.all-hands.dev/eval-21233988879-deepseek-v_litellm_proxy-deepseek-deepseek-reasoner_26-01-24-02-34.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-27T18:40:51.252521+00:00" }, { "benchmark": "commit0", "score": 25.0, "metric": "accuracy", "cost_per_instance": 0.57, "average_runtime": 1683.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-deepseek-deepseek-reasoner/25033123976/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-28T08:57:17+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/df76145a-e319-468e-ae2e-07f71208496f" }, { "benchmark": "swe-bench-multimodal", "score": 27.9, "metric": "solveable_accuracy", "cost_per_instance": 0.19, "average_runtime": 1515.0, "full_archive": "https://results.eval.all-hands.dev/eval-21345780997-deepseek-v_litellm_proxy-deepseek-deepseek-reasoner_26-01-26-11-33.tar.gz", "tags": [ "swe-bench-multimodal" ], "component_scores": { "solveable_accuracy": 27.9, "unsolveable_accuracy": 0.0, "combined_accuracy": 18.6 }, "agent_version": "v1.8.3", "submission_time": "2026-01-27T18:40:51.252521+00:00" } ]