[ { "benchmark": "swe-bench", "score": 34.2, "metric": "accuracy", "cost_per_instance": 0.24, "average_runtime": 1344.0, "full_archive": "https://results.eval.all-hands.dev/eval-21370501716-nemotron-3_litellm_proxy-openai-NVIDIA-Nemotron-3-Nano-30B-A3B-FP8_26-01-27-05-06.tar.gz", "tags": [ "swe-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-27T20:00:30.624153+00:00" }, { "benchmark": "gaia", "score": 8.5, "metric": "accuracy", "cost_per_instance": 0.04, "average_runtime": 794.0, "full_archive": "https://results.eval.all-hands.dev/gaia/litellm_proxy-openai-NVIDIA-Nemotron-3-Nano-30B-A3B-FP8/21412718442/results.tar.gz", "tags": [ "gaia" ], "agent_version": "v1.8.3", "submission_time": "2026-01-27T20:00:30.624153+00:00" }, { "benchmark": "commit0", "score": 6.2, "metric": "accuracy", "cost_per_instance": 0.03, "average_runtime": 21.0, "full_archive": "https://results.eval.all-hands.dev/commit0/litellm_proxy-openai-NVIDIA-Nemotron-3-Nano-30B-A3B-FP8/25079507282/results.tar.gz", "tags": [ "commit0" ], "agent_version": "v1.17.0", "submission_time": "2026-04-28T22:20:32+00:00", "eval_visualization_page": "https://laminar.sh/shared/evals/fb317122-02be-4878-a1be-7465af2fb5e7" }, { "benchmark": "swe-bench-multimodal", "score": 20.9, "metric": "solveable_accuracy", "cost_per_instance": 0.4, "average_runtime": 1179.0, "full_archive": "https://results.eval.all-hands.dev/21402472780-nemotron-3_litellm_proxy-openai-NVIDIA-Nemotron-3-Nano-30B-A3B-FP8_26-01-27-17-53.tar.gz", "tags": [ "swe-bench-multimodal" ], "agent_version": "v1.8.3", "submission_time": "2026-01-27T20:00:30.624153+00:00", "component_scores": { "solveable_accuracy": 20.9, "unsolveable_accuracy": 6.1, "combined_accuracy": 16.0, "solveable_resolved": 14, "solveable_total": 67, "unsolveable_resolved": 2, "unsolveable_total": 33 } }, { "benchmark": "swt-bench", "score": 7.6, "metric": "accuracy", "cost_per_instance": 0.19, "average_runtime": 896.3, "full_archive": "https://results.eval.all-hands.dev/swtbench/litellm_proxy-openai-NVIDIA-Nemotron-3-Nano-30B-A3B-FP8/21413263674/results.tar.gz", "tags": [ "swt-bench" ], "agent_version": "v1.8.3", "submission_time": "2026-01-27T20:00:30.624153+00:00" } ]