{ "tasks": [ "Astronomy_000", "Astronomy_001", "Astronomy_002", "Astronomy_003", "Chemistry_000", "Chemistry_001", "Chemistry_002", "Chemistry_003", "Earth_000", "Earth_001", "Earth_002", "Earth_003", "Energy_000", "Energy_001", "Energy_002", "Energy_003", "Information_000", "Information_001", "Information_002", "Information_003", "Life_000", "Life_001", "Life_002", "Life_003", "Material_000", "Material_001", "Material_002", "Material_003", "Math_000", "Math_001", "Math_002", "Math_003", "Neuroscience_000", "Neuroscience_001", "Neuroscience_002", "Neuroscience_003", "Physics_000", "Physics_001", "Physics_002", "Physics_003" ], "agents": [ "Claude Code", "Codex CLI", "ARIS Codex", "OpenClaw", "Nanobot", "EvoScientist", "ResearchClaw", "ResearchHarness (Claude-Opus-4.6)", "ResearchHarness (Claude-Opus-4.7)", "ResearchHarness (DeepSeek-V4-Pro)", "ResearchHarness (GLM-5.1)", "ResearchHarness (GPT-5.4)", "ResearchHarness (GPT-5.5)", "ResearchHarness (Gemini-3.1-Pro)", "ResearchHarness (Gemini-3.5-Flash)", "ResearchHarness (Grok-4.1)", "ResearchHarness (Grok-4.3)", "ResearchHarness (Kimi-K2.5)", "ResearchHarness (Kimi-K2.6)", "ResearchHarness (MiMo-V2-Pro)", "ResearchHarness (MiMo-V2.5)", "ResearchHarness (Qwen3.5-397B-A17B)", "ResearchHarness (Qwen3.6-Plus)", "ResearchHarness (Qwen3.7-Max)" ], "scores": { "Claude Code": { "Astronomy_000": { "score": 23.5, "run_id": "Astronomy_000_20260319_184609", "duration_seconds": 563, "cost_usd": 1.876667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Astronomy_001": { "score": 27.4, "run_id": "Astronomy_001_20260401_141304", "duration_seconds": 308, "cost_usd": 1.026667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Astronomy_002": { "score": 23.5, "run_id": "Astronomy_002_20260401_141959", "duration_seconds": 799, "cost_usd": 2.663333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Astronomy_003": { "score": 46.5, "run_id": "Astronomy_003_20260401_143610", "duration_seconds": 1078, "cost_usd": 3.593333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Chemistry_000": { "score": 14.75, "run_id": "Chemistry_000_20260325_131753", "duration_seconds": 2560, "cost_usd": 8.533333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Chemistry_001": { "score": 3.5, "run_id": "Chemistry_001_20260401_145553", "duration_seconds": 3032, "cost_usd": 10.106667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Chemistry_002": { "score": 1.0, "run_id": "Chemistry_002_20260401_154822", "duration_seconds": 3258, "cost_usd": 10.86, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Chemistry_003": { "score": 18.0, "run_id": "Chemistry_003_20260401_190857", "duration_seconds": 7930, "cost_usd": 26.433333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Earth_000": { "score": 24.6, "run_id": "Earth_000_20260320_014954", "duration_seconds": 604, "cost_usd": 2.013333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Earth_001": { "score": 33.56, "run_id": "Earth_001_20260401_191305", "duration_seconds": 455, "cost_usd": 1.516667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Earth_002": { "score": 31.7, "run_id": "Earth_002_20260401_191349", "duration_seconds": 690, "cost_usd": 2.3, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Earth_003": { "score": 1.0, "run_id": "Earth_003_20260401_191636", "duration_seconds": 1047, "cost_usd": 3.49, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Energy_000": { "score": 11.3, "run_id": "Energy_000_20260320_205623", "duration_seconds": 1043, "cost_usd": 3.476667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Energy_001": { "score": 21.7, "run_id": "Energy_001_20260401_191819", "duration_seconds": 463, "cost_usd": 1.543333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Energy_002": { "score": 38.6, "run_id": "Energy_002_20260401_191836", "duration_seconds": 1443, "cost_usd": 4.81, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Energy_003": { "score": 15.3, "run_id": "Energy_003_20260401_200259", "duration_seconds": 3649, "cost_usd": 12.163333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Information_000": { "score": 48.0, "run_id": "Information_000_20260325_155748", "duration_seconds": 747, "cost_usd": 2.49, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Information_001": { "score": 7.0, "run_id": "Information_001_20260401_192424", "duration_seconds": 591, "cost_usd": 1.97, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Information_002": { "score": 27.1, "run_id": "Information_002_20260401_192455", "duration_seconds": 315, "cost_usd": 1.05, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Information_003": { "score": 17.75, "run_id": "Information_003_20260401_224112", "duration_seconds": 3104, "cost_usd": 10.346667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Life_000": { "score": 8.2, "run_id": "Life_000_20260326_142039", "duration_seconds": 455, "cost_usd": 1.516667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Life_001": { "score": 19.55, "run_id": "Life_001_20260321_181849", "duration_seconds": 318, "cost_usd": 1.06, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Life_002": { "score": 6.9, "run_id": "Life_002_20260401_200700", "duration_seconds": 1072, "cost_usd": 3.573333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Life_003": { "score": 27.6, "run_id": "Life_003_20260401_200717", "duration_seconds": 278, "cost_usd": 0.926667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Material_000": { "score": 14.5, "run_id": "Material_000_20260326_144303", "duration_seconds": 12684, "cost_usd": 42.28, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Material_001": { "score": 23.6, "run_id": "Material_001_20260401_224352", "duration_seconds": 1111, "cost_usd": 3.703333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Material_002": { "score": 35.1, "run_id": "Material_002_20260401_231210", "duration_seconds": 814, "cost_usd": 2.713333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Material_003": { "score": 28.8, "run_id": "Material_003_20260401_232634", "duration_seconds": 2248, "cost_usd": 7.493333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Math_000": { "score": 26.2, "run_id": "Math_000_20260401_233652", "duration_seconds": 559, "cost_usd": 1.863333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Math_001": { "score": 44.1, "run_id": "Math_001_20260401_235039", "duration_seconds": 1846, "cost_usd": 6.153333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Math_002": { "score": 10.2, "run_id": "Math_002_20260401_235043", "duration_seconds": 1781, "cost_usd": 5.936667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Math_003": { "score": 29.6, "run_id": "Math_003_20260401_235138", "duration_seconds": 1159, "cost_usd": 3.863333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Neuroscience_000": { "score": 8.6, "run_id": "Neuroscience_000_20260321_010354", "duration_seconds": 595, "cost_usd": 1.983333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Neuroscience_001": { "score": 4.95, "run_id": "Neuroscience_001_20260321_182515", "duration_seconds": 669, "cost_usd": 2.23, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Neuroscience_002": { "score": 0.75, "run_id": "Neuroscience_002_20260402_002234", "duration_seconds": 999, "cost_usd": 3.33, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Neuroscience_003": { "score": 7.7, "run_id": "Neuroscience_003_20260402_011020", "duration_seconds": 713, "cost_usd": 2.376667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Physics_000": { "score": 27.4, "run_id": "Physics_000_20260326_150535", "duration_seconds": 333, "cost_usd": 1.11, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Physics_001": { "score": 30.25, "run_id": "Physics_001_20260402_002321", "duration_seconds": 947, "cost_usd": 3.156667, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Physics_002": { "score": 25.05, "run_id": "Physics_002_20260402_002341", "duration_seconds": 376, "cost_usd": 1.253333, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" }, "Physics_003": { "score": 46.5, "run_id": "Physics_003_20260321_135653", "duration_seconds": 378, "cost_usd": 1.26, "model": "claude-opus-4-6", "model_display": "Claude-Opus-4.6" } }, "Codex CLI": { "Astronomy_000": { "score": 29.4, "run_id": "Astronomy_000_20260319_145223", "duration_seconds": 978, "cost_usd": 1.83375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_001": { "score": 20.0, "run_id": "Astronomy_001_20260401_120316", "duration_seconds": 646, "cost_usd": 1.21125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_002": { "score": 12.7, "run_id": "Astronomy_002_20260401_135149", "duration_seconds": 716, "cost_usd": 1.3425, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_003": { "score": 44.0, "run_id": "Astronomy_003_20260402_011445", "duration_seconds": 385, "cost_usd": 0.721875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_000": { "score": 11.0, "run_id": "Chemistry_000_20260325_133117", "duration_seconds": 2038, "cost_usd": 3.82125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_001": { "score": 6.2, "run_id": "Chemistry_001_20260402_133326", "duration_seconds": 1479, "cost_usd": 2.773125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_002": { "score": 1.0, "run_id": "Chemistry_002_20260402_133337", "duration_seconds": 529, "cost_usd": 0.991875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_003": { "score": 12.3, "run_id": "Chemistry_003_20260402_133344", "duration_seconds": 3267, "cost_usd": 6.125625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_000": { "score": 25.3, "run_id": "Earth_000_20260325_142223", "duration_seconds": 622, "cost_usd": 1.16625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_001": { "score": 40.87, "run_id": "Earth_001_20260402_020635", "duration_seconds": 622, "cost_usd": 1.16625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_002": { "score": 22.6, "run_id": "Earth_002_20260321_014305", "duration_seconds": 1367, "cost_usd": 2.563125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_003": { "score": 0.0, "run_id": "Earth_003_20260402_020613", "duration_seconds": 478, "cost_usd": 0.89625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_000": { "score": 13.2, "run_id": "Energy_000_20260325_145840", "duration_seconds": 747, "cost_usd": 1.400625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_001": { "score": 24.5, "run_id": "Energy_001_20260402_021036", "duration_seconds": 1855, "cost_usd": 3.478125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_002": { "score": 33.0, "run_id": "Energy_002_20260402_021822", "duration_seconds": 637, "cost_usd": 1.194375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_003": { "score": 21.5, "run_id": "Energy_003_20260402_021829", "duration_seconds": 781, "cost_usd": 1.464375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_000": { "score": 10.0, "run_id": "Information_000_20260321_012226", "duration_seconds": 633, "cost_usd": 1.186875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_001": { "score": 5.6, "run_id": "Information_001_20260402_023436", "duration_seconds": 695, "cost_usd": 1.303125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_002": { "score": 36.8, "run_id": "Information_002_20260402_023445", "duration_seconds": 518, "cost_usd": 0.97125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_003": { "score": 15.6, "run_id": "Information_003_20260402_023453", "duration_seconds": 1895, "cost_usd": 3.553125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_000": { "score": 7.55, "run_id": "Life_000_20260320_211701", "duration_seconds": 871, "cost_usd": 1.633125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_001": { "score": 12.7, "run_id": "Life_001_20260402_024829", "duration_seconds": 426, "cost_usd": 0.79875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_002": { "score": 1.5, "run_id": "Life_002_20260402_024836", "duration_seconds": 1173, "cost_usd": 2.199375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_003": { "score": 36.0, "run_id": "Life_003_20260402_025011", "duration_seconds": 1088, "cost_usd": 2.04, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_000": { "score": 17.5, "run_id": "Material_000_20260326_151955", "duration_seconds": 608, "cost_usd": 1.14, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_001": { "score": 11.55, "run_id": "Material_001_20260402_112556", "duration_seconds": 932, "cost_usd": 1.7475, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_002": { "score": 0.99, "run_id": "Material_002_20260402_113105", "duration_seconds": 782, "cost_usd": 1.46625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_003": { "score": 21.8, "run_id": "Material_003_20260402_112258", "duration_seconds": 3645, "cost_usd": 6.834375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_000": { "score": 26.65, "run_id": "Math_000_20260321_013909", "duration_seconds": 498, "cost_usd": 0.93375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_001": { "score": 39.0, "run_id": "Math_001_20260402_122551", "duration_seconds": 512, "cost_usd": 0.96, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_002": { "score": 7.4, "run_id": "Math_002_20260402_122607", "duration_seconds": 2451, "cost_usd": 4.595625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_003": { "score": 10.0, "run_id": "Math_003_20260402_122648", "duration_seconds": 921, "cost_usd": 1.726875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_000": { "score": 14.0, "run_id": "Neuroscience_000_20260402_122827", "duration_seconds": 827, "cost_usd": 1.550625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_001": { "score": 2.55, "run_id": "Neuroscience_001_20260402_122849", "duration_seconds": 625, "cost_usd": 1.171875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_002": { "score": 2.0, "run_id": "Neuroscience_002_20260402_134508", "duration_seconds": 3363, "cost_usd": 6.305625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_003": { "score": 15.8, "run_id": "Neuroscience_003_20260402_124451", "duration_seconds": 1041, "cost_usd": 1.951875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_000": { "score": 32.1, "run_id": "Physics_000_20260321_011953", "duration_seconds": 521, "cost_usd": 0.976875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_001": { "score": 25.7, "run_id": "Physics_001_20260402_124607", "duration_seconds": 566, "cost_usd": 1.06125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_002": { "score": 21.4, "run_id": "Physics_002_20260402_124626", "duration_seconds": 638, "cost_usd": 1.19625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_003": { "score": 45.0, "run_id": "Physics_003_20260402_125047", "duration_seconds": 548, "cost_usd": 1.0275, "model": "gpt-5.4", "model_display": "GPT-5.4" } }, "ARIS Codex": { "Astronomy_000": { "score": 16.5, "run_id": "Astronomy_000_20260408_123003", "duration_seconds": 338, "cost_usd": 0.63375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_001": { "score": 10.4, "run_id": "Astronomy_001_20260409_012219", "duration_seconds": 276, "cost_usd": 0.5175, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_002": { "score": 11.0, "run_id": "Astronomy_002_20260409_012231", "duration_seconds": 297, "cost_usd": 0.556875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_003": { "score": 47.4, "run_id": "Astronomy_003_20260409_012239", "duration_seconds": 212, "cost_usd": 0.3975, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_000": { "score": 15.4, "run_id": "Chemistry_000_20260408_152441", "duration_seconds": 1784, "cost_usd": 3.345, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_001": { "score": 2.5, "run_id": "Chemistry_001_20260409_091054", "duration_seconds": 390, "cost_usd": 0.73125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_002": { "score": 0.0, "run_id": "Chemistry_002_20260409_091108", "duration_seconds": 384, "cost_usd": 0.72, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_003": { "score": 11.7, "run_id": "Chemistry_003_20260409_091136", "duration_seconds": 252, "cost_usd": 0.4725, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_000": { "score": 14.5, "run_id": "Earth_000_20260409_004459", "duration_seconds": 303, "cost_usd": 0.568125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_001": { "score": 34.48, "run_id": "Earth_001_20260409_092349", "duration_seconds": 238, "cost_usd": 0.44625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_002": { "score": 11.4, "run_id": "Earth_002_20260409_092357", "duration_seconds": 296, "cost_usd": 0.555, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_003": { "score": 0.0, "run_id": "Earth_003_20260409_092406", "duration_seconds": 209, "cost_usd": 0.391875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_000": { "score": 10.7, "run_id": "Energy_000_20260409_001855", "duration_seconds": 239, "cost_usd": 0.448125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_001": { "score": 3.6, "run_id": "Energy_001_20260409_093008", "duration_seconds": 628, "cost_usd": 1.1775, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_002": { "score": 32.55, "run_id": "Energy_002_20260409_093014", "duration_seconds": 319, "cost_usd": 0.598125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_003": { "score": 7.5, "run_id": "Energy_003_20260409_093023", "duration_seconds": 244, "cost_usd": 0.4575, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_000": { "score": 1.2, "run_id": "Information_000_20260409_002925", "duration_seconds": 248, "cost_usd": 0.465, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_001": { "score": 3.6, "run_id": "Information_001_20260409_094219", "duration_seconds": 261, "cost_usd": 0.489375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_002": { "score": 11.4, "run_id": "Information_002_20260409_094249", "duration_seconds": 217, "cost_usd": 0.406875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_003": { "score": 7.8, "run_id": "Information_003_20260409_094306", "duration_seconds": 657, "cost_usd": 1.231875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_000": { "score": 5.95, "run_id": "Life_000_20260409_005147", "duration_seconds": 340, "cost_usd": 0.6375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_001": { "score": 14.0, "run_id": "Life_001_20260409_095447", "duration_seconds": 259, "cost_usd": 0.485625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_002": { "score": 7.3, "run_id": "Life_002_20260409_095510", "duration_seconds": 238, "cost_usd": 0.44625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_003": { "score": 40.5, "run_id": "Life_003_20260409_095535", "duration_seconds": 312, "cost_usd": 0.585, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_000": { "score": 11.5, "run_id": "Material_000_20260409_005742", "duration_seconds": 679, "cost_usd": 1.273125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_001": { "score": 8.5, "run_id": "Material_001_20260409_100107", "duration_seconds": 231, "cost_usd": 0.433125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_002": { "score": 11.75, "run_id": "Material_002_20260409_113816", "duration_seconds": 298, "cost_usd": 0.55875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_003": { "score": 18.0, "run_id": "Material_003_20260409_100203", "duration_seconds": 564, "cost_usd": 1.0575, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_000": { "score": 7.8, "run_id": "Math_000_20260409_011005", "duration_seconds": 308, "cost_usd": 0.5775, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_001": { "score": 31.3, "run_id": "Math_001_20260409_101638", "duration_seconds": 711, "cost_usd": 1.333125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_002": { "score": 6.5, "run_id": "Math_002_20260409_101644", "duration_seconds": 522, "cost_usd": 0.97875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_003": { "score": 0.0, "run_id": "Math_003_20260409_103114", "duration_seconds": 249, "cost_usd": 0.466875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_000": { "score": 5.0, "run_id": "Neuroscience_000_20260409_011039", "duration_seconds": 447, "cost_usd": 0.838125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_001": { "score": 2.0, "run_id": "Neuroscience_001_20260409_104144", "duration_seconds": 532, "cost_usd": 0.9975, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_002": { "score": 1.0, "run_id": "Neuroscience_002_20260409_104202", "duration_seconds": 693, "cost_usd": 1.299375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_003": { "score": 19.65, "run_id": "Neuroscience_003_20260409_104225", "duration_seconds": 280, "cost_usd": 0.525, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_000": { "score": 18.2, "run_id": "Physics_000_20260409_011045", "duration_seconds": 327, "cost_usd": 0.613125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_001": { "score": 25.9, "run_id": "Physics_001_20260409_105940", "duration_seconds": 365, "cost_usd": 0.684375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_002": { "score": 20.0, "run_id": "Physics_002_20260409_110111", "duration_seconds": 280, "cost_usd": 0.525, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_003": { "score": 34.6, "run_id": "Physics_003_20260409_110326", "duration_seconds": 430, "cost_usd": 0.80625, "model": "gpt-5.4", "model_display": "GPT-5.4" } }, "OpenClaw": { "Astronomy_000": { "score": 18.6, "run_id": "Astronomy_000_20260325_124153", "duration_seconds": 186, "cost_usd": 0.34875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_001": { "score": 36.0, "run_id": "Astronomy_001_20260402_210925", "duration_seconds": 314, "cost_usd": 0.58875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_002": { "score": 11.6, "run_id": "Astronomy_002_20260321_184042", "duration_seconds": 242, "cost_usd": 0.45375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_003": { "score": 47.3, "run_id": "Astronomy_003_20260402_211549", "duration_seconds": 301, "cost_usd": 0.564375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_000": { "score": 12.85, "run_id": "Chemistry_000_20260325_142150", "duration_seconds": 461, "cost_usd": 0.864375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_001": { "score": 0.5, "run_id": "Chemistry_001_20260402_212107", "duration_seconds": 294, "cost_usd": 0.55125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_002": { "score": 1.0, "run_id": "Chemistry_002_20260402_212616", "duration_seconds": 356, "cost_usd": 0.6675, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_003": { "score": 9.5, "run_id": "Chemistry_003_20260402_213222", "duration_seconds": 533, "cost_usd": 0.999375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_000": { "score": 20.1, "run_id": "Earth_000_20260325_144124", "duration_seconds": 281, "cost_usd": 0.526875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_001": { "score": 31.38, "run_id": "Earth_001_20260402_214137", "duration_seconds": 276, "cost_usd": 0.5175, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_002": { "score": 16.8, "run_id": "Earth_002_20260402_214628", "duration_seconds": 449, "cost_usd": 0.841875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_003": { "score": 0.9, "run_id": "Earth_003_20260402_215412", "duration_seconds": 286, "cost_usd": 0.53625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_000": { "score": 16.0, "run_id": "Energy_000_20260325_151822", "duration_seconds": 363, "cost_usd": 0.680625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_001": { "score": 15.5, "run_id": "Energy_001_20260402_215912", "duration_seconds": 609, "cost_usd": 1.141875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_002": { "score": 31.0, "run_id": "Energy_002_20260321_150347", "duration_seconds": 334, "cost_usd": 0.62625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_003": { "score": 25.5, "run_id": "Energy_003_20260402_220933", "duration_seconds": 322, "cost_usd": 0.60375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_000": { "score": 35.1, "run_id": "Information_000_20260325_163554", "duration_seconds": 228, "cost_usd": 0.4275, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_001": { "score": 0.0, "run_id": "Information_001_20260402_221510", "duration_seconds": 309, "cost_usd": 0.579375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_002": { "score": 15.8, "run_id": "Information_002_20260402_222052", "duration_seconds": 222, "cost_usd": 0.41625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_003": { "score": 5.2, "run_id": "Information_003_20260402_222436", "duration_seconds": 570, "cost_usd": 1.06875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_000": { "score": 5.4, "run_id": "Life_000_20260326_142858", "duration_seconds": 361, "cost_usd": 0.676875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_001": { "score": 13.35, "run_id": "Life_001_20260402_223418", "duration_seconds": 285, "cost_usd": 0.534375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_002": { "score": 9.7, "run_id": "Life_002_20260321_145056", "duration_seconds": 438, "cost_usd": 0.82125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_003": { "score": 34.5, "run_id": "Life_003_20260402_223914", "duration_seconds": 356, "cost_usd": 0.6675, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_000": { "score": 12.0, "run_id": "Material_000_20260326_145735", "duration_seconds": 374, "cost_usd": 0.70125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_001": { "score": 13.45, "run_id": "Material_001_20260402_224523", "duration_seconds": 275, "cost_usd": 0.515625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_002": { "score": 6.12, "run_id": "Material_002_20260402_225007", "duration_seconds": 398, "cost_usd": 0.74625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_003": { "score": 20.1, "run_id": "Material_003_20260402_225655", "duration_seconds": 305, "cost_usd": 0.571875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_000": { "score": 24.7, "run_id": "Math_000_20260402_230222", "duration_seconds": 549, "cost_usd": 1.029375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_001": { "score": 23.9, "run_id": "Math_001_20260402_231143", "duration_seconds": 299, "cost_usd": 0.560625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_002": { "score": 8.6, "run_id": "Math_002_20260402_231652", "duration_seconds": 387, "cost_usd": 0.725625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_003": { "score": 0.0, "run_id": "Math_003_20260402_232335", "duration_seconds": 281, "cost_usd": 0.526875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_000": { "score": 13.2, "run_id": "Neuroscience_000_20260402_232819", "duration_seconds": 336, "cost_usd": 0.63, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_001": { "score": 3.75, "run_id": "Neuroscience_001_20260402_233406", "duration_seconds": 368, "cost_usd": 0.69, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_002": { "score": 1.0, "run_id": "Neuroscience_002_20260402_234030", "duration_seconds": 1043, "cost_usd": 1.955625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_003": { "score": 15.05, "run_id": "Neuroscience_003_20260402_235804", "duration_seconds": 367, "cost_usd": 0.688125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_000": { "score": 7.5, "run_id": "Physics_000_20260326_151120", "duration_seconds": 272, "cost_usd": 0.51, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_001": { "score": 32.0, "run_id": "Physics_001_20260403_000425", "duration_seconds": 392, "cost_usd": 0.735, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_002": { "score": 27.45, "run_id": "Physics_002_20260403_001108", "duration_seconds": 327, "cost_usd": 0.613125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_003": { "score": 43.3, "run_id": "Physics_003_20260403_001651", "duration_seconds": 279, "cost_usd": 0.523125, "model": "gpt-5.4", "model_display": "GPT-5.4" } }, "Nanobot": { "Astronomy_000": { "score": 14.5, "run_id": "Astronomy_000_20260325_124221", "duration_seconds": 259, "cost_usd": 0.485625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_001": { "score": 20.4, "run_id": "Astronomy_001_20260331_192927", "duration_seconds": 154, "cost_usd": 0.28875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_002": { "score": 10.0, "run_id": "Astronomy_002_20260331_193218", "duration_seconds": 146, "cost_usd": 0.27375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_003": { "score": 44.2, "run_id": "Astronomy_003_20260331_193503", "duration_seconds": 101, "cost_usd": 0.189375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_000": { "score": 16.85, "run_id": "Chemistry_000_20260320_193804", "duration_seconds": 2033, "cost_usd": 3.811875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_001": { "score": 0.5, "run_id": "Chemistry_001_20260331_194645", "duration_seconds": 248, "cost_usd": 0.465, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_002": { "score": 0.0, "run_id": "Chemistry_002_20260331_195151", "duration_seconds": 90, "cost_usd": 0.16875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_003": { "score": 7.5, "run_id": "Chemistry_003_20260331_195455", "duration_seconds": 223, "cost_usd": 0.418125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_000": { "score": 21.5, "run_id": "Earth_000_20260325_135520", "duration_seconds": 441, "cost_usd": 0.826875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_001": { "score": 25.3, "run_id": "Earth_001_20260331_201226", "duration_seconds": 79, "cost_usd": 0.148125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_002": { "score": 9.9, "run_id": "Earth_002_20260331_201408", "duration_seconds": 262, "cost_usd": 0.49125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_003": { "score": 0.6, "run_id": "Earth_003_20260331_201851", "duration_seconds": 86, "cost_usd": 0.16125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_000": { "score": 9.5, "run_id": "Energy_000_20260325_151836", "duration_seconds": 483, "cost_usd": 0.905625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_001": { "score": 7.6, "run_id": "Energy_001_20260331_200332", "duration_seconds": 229, "cost_usd": 0.429375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_002": { "score": 27.85, "run_id": "Energy_002_20260331_200739", "duration_seconds": 166, "cost_usd": 0.31125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_003": { "score": 9.0, "run_id": "Energy_003_20260331_201029", "duration_seconds": 101, "cost_usd": 0.189375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_000": { "score": 22.7, "run_id": "Information_000_20260325_164339", "duration_seconds": 271, "cost_usd": 0.508125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_001": { "score": 1.0, "run_id": "Information_001_20260331_202137", "duration_seconds": 137, "cost_usd": 0.256875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_002": { "score": 11.5, "run_id": "Information_002_20260331_202504", "duration_seconds": 146, "cost_usd": 0.27375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_003": { "score": 9.2, "run_id": "Information_003_20260331_202744", "duration_seconds": 888, "cost_usd": 1.665, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_000": { "score": 7.95, "run_id": "Life_000_20260401_111247", "duration_seconds": 326, "cost_usd": 0.61125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_001": { "score": 9.65, "run_id": "Life_001_20260401_111910", "duration_seconds": 98, "cost_usd": 0.18375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_002": { "score": 4.9, "run_id": "Life_002_20260401_112219", "duration_seconds": 161, "cost_usd": 0.301875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_003": { "score": 29.3, "run_id": "Life_003_20260401_112646", "duration_seconds": 228, "cost_usd": 0.4275, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_000": { "score": 23.0, "run_id": "Material_000_20260321_132712", "duration_seconds": 538, "cost_usd": 1.00875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_001": { "score": 1.75, "run_id": "Material_001_20260401_113052", "duration_seconds": 89, "cost_usd": 0.166875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_002": { "score": 14.9, "run_id": "Material_002_20260401_115629", "duration_seconds": 157, "cost_usd": 0.294375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_003": { "score": 12.35, "run_id": "Material_003_20260401_115948", "duration_seconds": 147, "cost_usd": 0.275625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_000": { "score": 17.7, "run_id": "Math_000_20260401_120618", "duration_seconds": 221, "cost_usd": 0.414375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_001": { "score": 30.6, "run_id": "Math_001_20260401_121718", "duration_seconds": 88, "cost_usd": 0.165, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_002": { "score": 0.0, "run_id": "Math_002_20260401_122108", "duration_seconds": 125, "cost_usd": 0.234375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_003": { "score": 0.0, "run_id": "Math_003_20260401_122803", "duration_seconds": 130, "cost_usd": 0.24375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_000": { "score": 7.4, "run_id": "Neuroscience_000_20260401_123535", "duration_seconds": 125, "cost_usd": 0.234375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_001": { "score": 1.2, "run_id": "Neuroscience_001_20260401_123958", "duration_seconds": 100, "cost_usd": 0.1875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_002": { "score": 0.4, "run_id": "Neuroscience_002_20260401_124612", "duration_seconds": 295, "cost_usd": 0.553125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_003": { "score": 4.25, "run_id": "Neuroscience_003_20260401_125606", "duration_seconds": 155, "cost_usd": 0.290625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_000": { "score": 7.8, "run_id": "Physics_000_20260326_151123", "duration_seconds": 345, "cost_usd": 0.646875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_001": { "score": 27.45, "run_id": "Physics_001_20260401_130120", "duration_seconds": 213, "cost_usd": 0.399375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_002": { "score": 10.35, "run_id": "Physics_002_20260401_130518", "duration_seconds": 194, "cost_usd": 0.36375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_003": { "score": 31.8, "run_id": "Physics_003_20260401_131002", "duration_seconds": 142, "cost_usd": 0.26625, "model": "gpt-5.4", "model_display": "GPT-5.4" } }, "EvoScientist": { "Astronomy_000": { "score": 11.5, "run_id": "Astronomy_000_20260330_094408", "duration_seconds": 683, "cost_usd": 1.280625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_001": { "score": 21.2, "run_id": "Astronomy_001_20260403_194127", "duration_seconds": 420, "cost_usd": 0.7875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_002": { "score": 23.9, "run_id": "Astronomy_002_20260404_132455", "duration_seconds": 410, "cost_usd": 0.76875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_003": { "score": 47.3, "run_id": "Astronomy_003_20260404_133145", "duration_seconds": 208, "cost_usd": 0.39, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_000": { "score": 7.5, "run_id": "Chemistry_000_20260330_095531", "duration_seconds": 2706, "cost_usd": 5.07375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_001": { "score": 7.3, "run_id": "Chemistry_001_20260404_133513", "duration_seconds": 366, "cost_usd": 0.68625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_002": { "score": 1.4, "run_id": "Chemistry_002_20260404_134119", "duration_seconds": 341, "cost_usd": 0.639375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_003": { "score": 1.5, "run_id": "Chemistry_003_20260404_134700", "duration_seconds": 727, "cost_usd": 1.363125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_000": { "score": 12.8, "run_id": "Earth_000_20260330_104037", "duration_seconds": 279, "cost_usd": 0.523125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_001": { "score": 39.97, "run_id": "Earth_001_20260404_135907", "duration_seconds": 229, "cost_usd": 0.429375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_002": { "score": 15.1, "run_id": "Earth_002_20260404_140256", "duration_seconds": 488, "cost_usd": 0.915, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_003": { "score": 0.0, "run_id": "Earth_003_20260404_141105", "duration_seconds": 497, "cost_usd": 0.931875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_000": { "score": 12.4, "run_id": "Energy_000_20260330_104517", "duration_seconds": 1488, "cost_usd": 2.79, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_001": { "score": 20.9, "run_id": "Energy_001_20260404_141923", "duration_seconds": 404, "cost_usd": 0.7575, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_002": { "score": 36.4, "run_id": "Energy_002_20260404_142606", "duration_seconds": 304, "cost_usd": 0.57, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_003": { "score": 26.3, "run_id": "Energy_003_20260404_143111", "duration_seconds": 336, "cost_usd": 0.63, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_000": { "score": 7.2, "run_id": "Information_000_20260330_111006", "duration_seconds": 421, "cost_usd": 0.789375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_001": { "score": 3.6, "run_id": "Information_001_20260404_143647", "duration_seconds": 547, "cost_usd": 1.025625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_002": { "score": 14.1, "run_id": "Information_002_20260404_144554", "duration_seconds": 311, "cost_usd": 0.583125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_003": { "score": 4.75, "run_id": "Information_003_20260404_145105", "duration_seconds": 1406, "cost_usd": 2.63625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_000": { "score": 7.05, "run_id": "Life_000_20260330_111707", "duration_seconds": 702, "cost_usd": 1.31625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_001": { "score": 13.6, "run_id": "Life_001_20260404_151432", "duration_seconds": 333, "cost_usd": 0.624375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_002": { "score": 9.1, "run_id": "Life_002_20260404_152005", "duration_seconds": 206, "cost_usd": 0.38625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_003": { "score": 36.0, "run_id": "Life_003_20260404_152331", "duration_seconds": 388, "cost_usd": 0.7275, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_000": { "score": 17.5, "run_id": "Material_000_20260330_112849", "duration_seconds": 2047, "cost_usd": 3.838125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_001": { "score": 6.3, "run_id": "Material_001_20260404_152959", "duration_seconds": 323, "cost_usd": 0.605625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_002": { "score": 10.05, "run_id": "Material_002_20260404_153523", "duration_seconds": 239, "cost_usd": 0.448125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_003": { "score": 19.95, "run_id": "Material_003_20260404_153922", "duration_seconds": 977, "cost_usd": 1.831875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_000": { "score": 8.0, "run_id": "Math_000_20260330_120257", "duration_seconds": 572, "cost_usd": 1.0725, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_001": { "score": 35.2, "run_id": "Math_001_20260404_155539", "duration_seconds": 416, "cost_usd": 0.78, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_002": { "score": 14.1, "run_id": "Math_002_20260404_160235", "duration_seconds": 588, "cost_usd": 1.1025, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_003": { "score": 0.0, "run_id": "Math_003_20260404_161228", "duration_seconds": 341, "cost_usd": 0.639375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_000": { "score": 13.6, "run_id": "Neuroscience_000_20260330_121229", "duration_seconds": 533, "cost_usd": 0.999375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_001": { "score": 0.75, "run_id": "Neuroscience_001_20260404_161809", "duration_seconds": 478, "cost_usd": 0.89625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_002": { "score": 1.0, "run_id": "Neuroscience_002_20260404_162607", "duration_seconds": 2282, "cost_usd": 4.27875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_003": { "score": 5.95, "run_id": "Neuroscience_003_20260404_170410", "duration_seconds": 625, "cost_usd": 1.171875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_000": { "score": 19.2, "run_id": "Physics_000_20260330_122122", "duration_seconds": 390, "cost_usd": 0.73125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_001": { "score": 24.5, "run_id": "Physics_001_20260404_171435", "duration_seconds": 354, "cost_usd": 0.66375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_002": { "score": 24.85, "run_id": "Physics_002_20260404_172029", "duration_seconds": 794, "cost_usd": 1.48875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_003": { "score": 36.8, "run_id": "Physics_003_20260404_173350", "duration_seconds": 321, "cost_usd": 0.601875, "model": "gpt-5.4", "model_display": "GPT-5.4" } }, "ResearchClaw": { "Astronomy_000": { "score": 20.8, "run_id": "Astronomy_000_20260409_104742", "duration_seconds": 285, "cost_usd": 0.534375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_001": { "score": 14.4, "run_id": "Astronomy_001_20260408_180204", "duration_seconds": 255, "cost_usd": 0.478125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_002": { "score": 11.1, "run_id": "Astronomy_002_20260408_182333", "duration_seconds": 390, "cost_usd": 0.73125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Astronomy_003": { "score": 45.9, "run_id": "Astronomy_003_20260408_183133", "duration_seconds": 213, "cost_usd": 0.399375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_000": { "score": 18.25, "run_id": "Chemistry_000_20260409_105911", "duration_seconds": 1869, "cost_usd": 3.504375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_001": { "score": 2.15, "run_id": "Chemistry_001_20260409_113212", "duration_seconds": 223, "cost_usd": 0.418125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_002": { "score": 0.0, "run_id": "Chemistry_002_20260409_114123", "duration_seconds": 228, "cost_usd": 0.4275, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Chemistry_003": { "score": 13.5, "run_id": "Chemistry_003_20260409_114635", "duration_seconds": 346, "cost_usd": 0.64875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_000": { "score": 14.7, "run_id": "Earth_000_20260409_115522", "duration_seconds": 297, "cost_usd": 0.556875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_001": { "score": 32.97, "run_id": "Earth_001_20260409_120559", "duration_seconds": 366, "cost_usd": 0.68625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_002": { "score": 20.6, "run_id": "Earth_002_20260409_130041", "duration_seconds": 291, "cost_usd": 0.545625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Earth_003": { "score": 0.0, "run_id": "Earth_003_20260409_130946", "duration_seconds": 173, "cost_usd": 0.324375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_000": { "score": 3.5, "run_id": "Energy_000_20260409_134154", "duration_seconds": 257, "cost_usd": 0.481875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_001": { "score": 15.4, "run_id": "Energy_001_20260409_135102", "duration_seconds": 277, "cost_usd": 0.519375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_002": { "score": 37.65, "run_id": "Energy_002_20260409_135612", "duration_seconds": 301, "cost_usd": 0.564375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Energy_003": { "score": 19.5, "run_id": "Energy_003_20260409_142519", "duration_seconds": 172, "cost_usd": 0.3225, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_000": { "score": 10.0, "run_id": "Information_000_20260409_150604", "duration_seconds": 191, "cost_usd": 0.358125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_001": { "score": 0.0, "run_id": "Information_001_20260409_154703", "duration_seconds": 242, "cost_usd": 0.45375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_002": { "score": 39.8, "run_id": "Information_002_20260409_155531", "duration_seconds": 224, "cost_usd": 0.42, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Information_003": { "score": 9.8, "run_id": "Information_003_20260409_161445", "duration_seconds": 1072, "cost_usd": 2.01, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_000": { "score": 10.05, "run_id": "Life_000_20260409_173006", "duration_seconds": 867, "cost_usd": 1.625625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_001": { "score": 5.7, "run_id": "Life_001_20260409_171658", "duration_seconds": 265, "cost_usd": 0.496875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_002": { "score": 6.6, "run_id": "Life_002_20260409_184513", "duration_seconds": 252, "cost_usd": 0.4725, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Life_003": { "score": 33.6, "run_id": "Life_003_20260409_192448", "duration_seconds": 306, "cost_usd": 0.57375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_000": { "score": 12.1, "run_id": "Material_000_20260410_135328", "duration_seconds": 458, "cost_usd": 0.85875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_001": { "score": 12.95, "run_id": "Material_001_20260410_141525", "duration_seconds": 325, "cost_usd": 0.609375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_002": { "score": 35.75, "run_id": "Material_002_20260410_142401", "duration_seconds": 1108, "cost_usd": 2.0775, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Material_003": { "score": 16.25, "run_id": "Material_003_20260410_145941", "duration_seconds": 270, "cost_usd": 0.50625, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_000": { "score": 7.2, "run_id": "Math_000_20260410_150535", "duration_seconds": 196, "cost_usd": 0.3675, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_001": { "score": 37.7, "run_id": "Math_001_20260410_151201", "duration_seconds": 172, "cost_usd": 0.3225, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_002": { "score": 6.3, "run_id": "Math_002_20260410_151706", "duration_seconds": 1568, "cost_usd": 2.94, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Math_003": { "score": 0.0, "run_id": "Math_003_20260410_154450", "duration_seconds": 208, "cost_usd": 0.39, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_000": { "score": 7.8, "run_id": "Neuroscience_000_20260410_155023", "duration_seconds": 273, "cost_usd": 0.511875, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_001": { "score": 0.5, "run_id": "Neuroscience_001_20260410_155818", "duration_seconds": 207, "cost_usd": 0.388125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_002": { "score": 1.0, "run_id": "Neuroscience_002_20260410_160729", "duration_seconds": 2214, "cost_usd": 4.15125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Neuroscience_003": { "score": 7.35, "run_id": "Neuroscience_003_20260410_164642", "duration_seconds": 245, "cost_usd": 0.459375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_000": { "score": 22.7, "run_id": "Physics_000_20260410_165157", "duration_seconds": 150, "cost_usd": 0.28125, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_001": { "score": 29.25, "run_id": "Physics_001_20260410_170205", "duration_seconds": 261, "cost_usd": 0.489375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_002": { "score": 24.25, "run_id": "Physics_002_20260410_171139", "duration_seconds": 226, "cost_usd": 0.42375, "model": "gpt-5.4", "model_display": "GPT-5.4" }, "Physics_003": { "score": 44.3, "run_id": "Physics_003_20260410_171712", "duration_seconds": 171, "cost_usd": 0.320625, "model": "gpt-5.4", "model_display": "GPT-5.4" } }, "ResearchHarness (Claude-Opus-4.6)": { "Astronomy_000": { "score": 33.1, "run_id": "Astronomy_000_20260416_171148", "duration_seconds": 1160, "cost_usd": 3.866667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Astronomy_002": { "score": 24.5, "run_id": "Astronomy_002_20260415_121118", "duration_seconds": 2558, "cost_usd": 8.526667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Astronomy_003": { "score": 47.6, "run_id": "Astronomy_003_20260415_125416", "duration_seconds": 578, "cost_usd": 1.926667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Chemistry_000": { "score": 9.35, "run_id": "Chemistry_000_20260416_173125", "duration_seconds": 1965, "cost_usd": 6.55, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Chemistry_001": { "score": 9.0, "run_id": "Chemistry_001_20260415_134024", "duration_seconds": 1742, "cost_usd": 5.806667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Chemistry_002": { "score": 3.4, "run_id": "Chemistry_002_20260416_175027", "duration_seconds": 3323, "cost_usd": 11.076667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Chemistry_003": { "score": 7.2, "run_id": "Chemistry_003_20260416_180425", "duration_seconds": 931, "cost_usd": 3.103333, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Earth_000": { "score": 21.5, "run_id": "Earth_000_20260417_110902", "duration_seconds": 603, "cost_usd": 2.01, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Earth_001": { "score": 40.94, "run_id": "Earth_001_20260417_103256", "duration_seconds": 426, "cost_usd": 1.42, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Earth_002": { "score": 28.8, "run_id": "Earth_002_20260417_110902", "duration_seconds": 874, "cost_usd": 2.913333, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Energy_000": { "score": 12.3, "run_id": "Energy_000_20260416_182016", "duration_seconds": 2154, "cost_usd": 7.18, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Energy_001": { "score": 25.1, "run_id": "Energy_001_20260416_184602", "duration_seconds": 764, "cost_usd": 2.546667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Energy_002": { "score": 42.45, "run_id": "Energy_002_20260416_185623", "duration_seconds": 733, "cost_usd": 2.443333, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Energy_003": { "score": 24.0, "run_id": "Energy_003_20260416_185858", "duration_seconds": 516, "cost_usd": 1.72, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Information_000": { "score": 23.2, "run_id": "Information_000_20260416_190754", "duration_seconds": 2510, "cost_usd": 8.366667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Information_001": { "score": 3.6, "run_id": "Information_001_20260416_190849", "duration_seconds": 2244, "cost_usd": 7.48, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Information_002": { "score": 20.1, "run_id": "Information_002_20260416_194642", "duration_seconds": 506, "cost_usd": 1.686667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Information_003": { "score": 4.35, "run_id": "Information_003_20260416_195002", "duration_seconds": 8470, "cost_usd": 28.233333, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Life_000": { "score": 9.35, "run_id": "Life_000_20260416_195517", "duration_seconds": 729, "cost_usd": 2.43, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Life_001": { "score": 3.6, "run_id": "Life_001_20260416_200743", "duration_seconds": 501, "cost_usd": 1.67, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Life_002": { "score": 5.7, "run_id": "Life_002_20260416_201623", "duration_seconds": 761, "cost_usd": 2.536667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Life_003": { "score": 32.1, "run_id": "Life_003_20260416_202913", "duration_seconds": 729, "cost_usd": 2.43, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Material_000": { "score": 25.1, "run_id": "Material_000_20260416_204143", "duration_seconds": 5640, "cost_usd": 18.8, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Material_001": { "score": 1.75, "run_id": "Material_001_20260416_221126", "duration_seconds": 3458, "cost_usd": 11.526667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Material_002": { "score": 39.08, "run_id": "Material_002_20260416_221556", "duration_seconds": 8363, "cost_usd": 27.876667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Material_003": { "score": 14.0, "run_id": "Material_003_20260416_230923", "duration_seconds": 3295, "cost_usd": 10.983333, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Math_000": { "score": 23.0, "run_id": "Math_000_20260417_000449", "duration_seconds": 1245, "cost_usd": 4.15, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Math_001": { "score": 34.3, "run_id": "Math_001_20260417_002550", "duration_seconds": 883, "cost_usd": 2.943333, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Math_002": { "score": 20.5, "run_id": "Math_002_20260417_112354", "duration_seconds": 1904, "cost_usd": 6.346667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Math_003": { "score": 11.75, "run_id": "Math_003_20260417_003641", "duration_seconds": 557, "cost_usd": 1.856667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Neuroscience_000": { "score": 10.4, "run_id": "Neuroscience_000_20260417_004047", "duration_seconds": 1198, "cost_usd": 3.993333, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Neuroscience_001": { "score": 2.7, "run_id": "Neuroscience_001_20260417_004607", "duration_seconds": 689, "cost_usd": 2.296667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Neuroscience_002": { "score": 0.75, "run_id": "Neuroscience_002_20260417_005802", "duration_seconds": 986, "cost_usd": 3.286667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Neuroscience_003": { "score": 15.55, "run_id": "Neuroscience_003_20260417_010102", "duration_seconds": 681, "cost_usd": 2.27, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Physics_000": { "score": 26.8, "run_id": "Physics_000_20260417_011243", "duration_seconds": 941, "cost_usd": 3.136667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Physics_002": { "score": 32.4, "run_id": "Physics_002_20260417_012839", "duration_seconds": 495, "cost_usd": 1.65, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" }, "Physics_003": { "score": 45.8, "run_id": "Physics_003_20260417_013739", "duration_seconds": 2015, "cost_usd": 6.716667, "model": "Claude-Opus-4.6", "model_display": "Claude-Opus-4.6" } }, "ResearchHarness (Claude-Opus-4.7)": { "Astronomy_000": { "score": 25.6, "run_id": "Astronomy_000_20260427_114915", "duration_seconds": 748, "cost_usd": 2.493333, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Astronomy_001": { "score": 26.6, "run_id": "Astronomy_001_20260427_114915", "duration_seconds": 471, "cost_usd": 1.57, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Astronomy_002": { "score": 32.0, "run_id": "Astronomy_002_20260427_115721", "duration_seconds": 572, "cost_usd": 1.906667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Astronomy_003": { "score": 47.4, "run_id": "Astronomy_003_20260427_120156", "duration_seconds": 265, "cost_usd": 0.883333, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Chemistry_001": { "score": 7.4, "run_id": "Chemistry_001_20260427_120701", "duration_seconds": 4208, "cost_usd": 14.026667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Chemistry_002": { "score": 1.0, "run_id": "Chemistry_002_20260427_131724", "duration_seconds": 3976, "cost_usd": 13.253333, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Earth_000": { "score": 15.5, "run_id": "Earth_000_20260427_143635", "duration_seconds": 809, "cost_usd": 2.696667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Earth_001": { "score": 38.94, "run_id": "Earth_001_20260427_145034", "duration_seconds": 475, "cost_usd": 1.583333, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Earth_002": { "score": 34.0, "run_id": "Earth_002_20260427_145841", "duration_seconds": 942, "cost_usd": 3.14, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Earth_003": { "score": 1.6, "run_id": "Earth_003_20260427_151445", "duration_seconds": 791, "cost_usd": 2.636667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Energy_001": { "score": 17.1, "run_id": "Energy_001_20260427_153415", "duration_seconds": 913, "cost_usd": 3.043333, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Energy_002": { "score": 41.0, "run_id": "Energy_002_20260427_163143", "duration_seconds": 1164, "cost_usd": 3.88, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Energy_003": { "score": 11.5, "run_id": "Energy_003_20260427_163143", "duration_seconds": 525, "cost_usd": 1.75, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Information_000": { "score": 0.8, "run_id": "Information_000_20260427_164043", "duration_seconds": 1820, "cost_usd": 6.066667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Information_001": { "score": 7.6, "run_id": "Information_001_20260427_165119", "duration_seconds": 4385, "cost_usd": 14.616667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Information_002": { "score": 32.3, "run_id": "Information_002_20260427_171113", "duration_seconds": 673, "cost_usd": 2.243333, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Information_003": { "score": 14.9, "run_id": "Information_003_20260427_172234", "duration_seconds": 3684, "cost_usd": 12.28, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Life_000": { "score": 5.45, "run_id": "Life_000_20260427_180448", "duration_seconds": 1286, "cost_usd": 4.286667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Life_001": { "score": 12.45, "run_id": "Life_001_20260427_182412", "duration_seconds": 350, "cost_usd": 1.166667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Life_002": { "score": 6.5, "run_id": "Life_002_20260427_182632", "duration_seconds": 1700, "cost_usd": 5.666667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Life_003": { "score": 26.9, "run_id": "Life_003_20260427_183017", "duration_seconds": 490, "cost_usd": 1.633333, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Material_000": { "score": 21.6, "run_id": "Material_000_20260427_183838", "duration_seconds": 1806, "cost_usd": 6.02, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Material_001": { "score": 18.0, "run_id": "Material_001_20260427_185459", "duration_seconds": 902, "cost_usd": 3.006667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Material_002": { "score": 35.81, "run_id": "Material_002_20260427_190855", "duration_seconds": 3824, "cost_usd": 12.746667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Material_003": { "score": 21.15, "run_id": "Material_003_20260427_191010", "duration_seconds": 4679, "cost_usd": 15.596667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Math_000": { "score": 22.95, "run_id": "Math_000_20260427_201256", "duration_seconds": 1031, "cost_usd": 3.436667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Math_001": { "score": 24.4, "run_id": "Math_001_20260427_202826", "duration_seconds": 710, "cost_usd": 2.366667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Math_002": { "score": 9.4, "run_id": "Math_002_20260427_203016", "duration_seconds": 3106, "cost_usd": 10.353333, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Neuroscience_000": { "score": 10.6, "run_id": "Neuroscience_000_20260427_211138", "duration_seconds": 961, "cost_usd": 3.203333, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Neuroscience_002": { "score": 3.75, "run_id": "Neuroscience_002_20260427_212314", "duration_seconds": 1546, "cost_usd": 5.153333, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Neuroscience_003": { "score": 14.7, "run_id": "Neuroscience_003_20260428_111256", "duration_seconds": 1020, "cost_usd": 3.4, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Physics_000": { "score": 14.2, "run_id": "Physics_000_20260428_113010", "duration_seconds": 665, "cost_usd": 2.216667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Physics_001": { "score": 39.95, "run_id": "Physics_001_20260428_114127", "duration_seconds": 1239, "cost_usd": 4.13, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Physics_002": { "score": 38.6, "run_id": "Physics_002_20260428_120221", "duration_seconds": 516, "cost_usd": 1.72, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" }, "Physics_003": { "score": 44.1, "run_id": "Physics_003_20260428_121142", "duration_seconds": 893, "cost_usd": 2.976667, "model": "Claude-Opus-4.7", "model_display": "Claude-Opus-4.7" } }, "ResearchHarness (DeepSeek-V4-Pro)": { "Astronomy_000": { "score": 25.6, "run_id": "Astronomy_000_20260515_153403", "duration_seconds": 1025, "cost_usd": 0.185781, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Astronomy_001": { "score": 6.0, "run_id": "Astronomy_001_20260515_155108", "duration_seconds": 515, "cost_usd": 0.093344, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Astronomy_002": { "score": 22.1, "run_id": "Astronomy_002_20260515_155943", "duration_seconds": 1160, "cost_usd": 0.21025, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Astronomy_003": { "score": 47.1, "run_id": "Astronomy_003_20260515_161903", "duration_seconds": 693, "cost_usd": 0.125606, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Chemistry_000": { "score": 15.1, "run_id": "Chemistry_000_20260518_001006", "duration_seconds": 6700, "cost_usd": 1.214375, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Chemistry_001": { "score": 5.0, "run_id": "Chemistry_001_20260515_190038", "duration_seconds": 1022, "cost_usd": 0.185237, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Chemistry_002": { "score": 0.0, "run_id": "Chemistry_002_20260515_191740", "duration_seconds": 691, "cost_usd": 0.125244, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Chemistry_003": { "score": 10.0, "run_id": "Chemistry_003_20260515_192911", "duration_seconds": 2886, "cost_usd": 0.523087, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Earth_000": { "score": 16.7, "run_id": "Earth_000_20260515_201717", "duration_seconds": 2653, "cost_usd": 0.480856, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Earth_001": { "score": 37.38, "run_id": "Earth_001_20260515_210130", "duration_seconds": 581, "cost_usd": 0.105306, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Earth_002": { "score": 25.5, "run_id": "Earth_002_20260515_211111", "duration_seconds": 1165, "cost_usd": 0.211156, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Earth_003": { "score": 1.0, "run_id": "Earth_003_20260515_213036", "duration_seconds": 1031, "cost_usd": 0.186869, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Energy_000": { "score": 17.5, "run_id": "Energy_000_20260515_214747", "duration_seconds": 1516, "cost_usd": 0.274775, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Energy_001": { "score": 25.0, "run_id": "Energy_001_20260515_221303", "duration_seconds": 1397, "cost_usd": 0.253206, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Energy_002": { "score": 38.65, "run_id": "Energy_002_20260515_223620", "duration_seconds": 997, "cost_usd": 0.180706, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Energy_003": { "score": 6.0, "run_id": "Energy_003_20260515_225258", "duration_seconds": 887, "cost_usd": 0.160769, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Information_000": { "score": 0.0, "run_id": "Information_000_20260515_230745", "duration_seconds": 988, "cost_usd": 0.179075, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Information_001": { "score": 1.0, "run_id": "Information_001_20260515_232413", "duration_seconds": 725, "cost_usd": 0.131406, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Information_002": { "score": 12.9, "run_id": "Information_002_20260515_233618", "duration_seconds": 769, "cost_usd": 0.139381, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Information_003": { "score": 4.6, "run_id": "Information_003_20260515_234907", "duration_seconds": 2904, "cost_usd": 0.52635, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Life_000": { "score": 10.15, "run_id": "Life_000_20260516_003731", "duration_seconds": 1533, "cost_usd": 0.277856, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Life_001": { "score": 9.5, "run_id": "Life_001_20260516_010304", "duration_seconds": 646, "cost_usd": 0.117087, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Life_002": { "score": 1.0, "run_id": "Life_002_20260516_011350", "duration_seconds": 1431, "cost_usd": 0.259369, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Life_003": { "score": 32.5, "run_id": "Life_003_20260516_013741", "duration_seconds": 958, "cost_usd": 0.173638, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Material_000": { "score": 24.0, "run_id": "Material_000_20260516_015340", "duration_seconds": 5376, "cost_usd": 0.9744, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Material_001": { "score": 21.0, "run_id": "Material_001_20260516_032315", "duration_seconds": 785, "cost_usd": 0.142281, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Material_002": { "score": 38.4, "run_id": "Material_002_20260516_033621", "duration_seconds": 1922, "cost_usd": 0.348362, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Material_003": { "score": 15.15, "run_id": "Material_003_20260516_040823", "duration_seconds": 2020, "cost_usd": 0.366125, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Math_000": { "score": 23.35, "run_id": "Math_000_20260516_044203", "duration_seconds": 2965, "cost_usd": 0.537406, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Math_001": { "score": 22.9, "run_id": "Math_001_20260516_053128", "duration_seconds": 691, "cost_usd": 0.125244, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Math_002": { "score": 7.5, "run_id": "Math_002_20260516_054259", "duration_seconds": 2476, "cost_usd": 0.448775, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Neuroscience_000": { "score": 10.0, "run_id": "Neuroscience_000_20260516_062458", "duration_seconds": 990, "cost_usd": 0.179437, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Neuroscience_001": { "score": 7.05, "run_id": "Neuroscience_001_20260516_064127", "duration_seconds": 754, "cost_usd": 0.136662, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Neuroscience_002": { "score": 0.0, "run_id": "Neuroscience_002_20260516_065402", "duration_seconds": 1465, "cost_usd": 0.265531, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Neuroscience_003": { "score": 13.65, "run_id": "Neuroscience_003_20260516_071828", "duration_seconds": 764, "cost_usd": 0.138475, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Physics_000": { "score": 14.1, "run_id": "Physics_000_20260516_073111", "duration_seconds": 921, "cost_usd": 0.166931, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Physics_001": { "score": 38.6, "run_id": "Physics_001_20260516_074632", "duration_seconds": 1732, "cost_usd": 0.313925, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Physics_002": { "score": 28.2, "run_id": "Physics_002_20260516_081523", "duration_seconds": 797, "cost_usd": 0.144456, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" }, "Physics_003": { "score": 33.6, "run_id": "Physics_003_20260516_082845", "duration_seconds": 1258, "cost_usd": 0.228012, "model": "DeepSeek-V4-Pro", "model_display": "DeepSeek-V4-Pro" } }, "ResearchHarness (GLM-5.1)": { "Astronomy_000": { "score": 9.0, "run_id": "Astronomy_000_20260414_195806", "duration_seconds": 1877, "cost_usd": 1.345183, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Astronomy_001": { "score": 37.0, "run_id": "Astronomy_001_20260414_202922", "duration_seconds": 1769, "cost_usd": 1.267783, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Astronomy_002": { "score": 29.9, "run_id": "Astronomy_002_20260414_205851", "duration_seconds": 2457, "cost_usd": 1.76085, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Astronomy_003": { "score": 43.1, "run_id": "Astronomy_003_20260414_213949", "duration_seconds": 444, "cost_usd": 0.3182, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Chemistry_000": { "score": 18.95, "run_id": "Chemistry_000_20260416_104049", "duration_seconds": 5568, "cost_usd": 3.9904, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Chemistry_001": { "score": 2.75, "run_id": "Chemistry_001_20260415_001714", "duration_seconds": 1983, "cost_usd": 1.42115, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Chemistry_002": { "score": 1.4, "run_id": "Chemistry_002_20260415_005017", "duration_seconds": 779, "cost_usd": 0.558283, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Chemistry_003": { "score": 22.5, "run_id": "Chemistry_003_20260415_010316", "duration_seconds": 3233, "cost_usd": 2.316983, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Earth_000": { "score": 14.8, "run_id": "Earth_000_20260415_015710", "duration_seconds": 683, "cost_usd": 0.489483, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Earth_001": { "score": 33.04, "run_id": "Earth_001_20260415_020834", "duration_seconds": 567, "cost_usd": 0.40635, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Earth_002": { "score": 30.4, "run_id": "Earth_002_20260415_021801", "duration_seconds": 997, "cost_usd": 0.714517, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Earth_003": { "score": 4.0, "run_id": "Earth_003_20260415_023438", "duration_seconds": 1728, "cost_usd": 1.2384, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Energy_000": { "score": 16.0, "run_id": "Energy_000_20260415_030326", "duration_seconds": 2435, "cost_usd": 1.745083, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Energy_001": { "score": 18.2, "run_id": "Energy_001_20260415_034401", "duration_seconds": 966, "cost_usd": 0.6923, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Energy_002": { "score": 39.85, "run_id": "Energy_002_20260415_040007", "duration_seconds": 719, "cost_usd": 0.515283, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Energy_003": { "score": 7.5, "run_id": "Energy_003_20260415_041206", "duration_seconds": 866, "cost_usd": 0.620633, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Information_000": { "score": 19.5, "run_id": "Information_000_20260415_211636", "duration_seconds": 1428, "cost_usd": 1.0234, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Information_001": { "score": 0.6, "run_id": "Information_001_20260415_214024", "duration_seconds": 620, "cost_usd": 0.444333, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Information_002": { "score": 36.2, "run_id": "Information_002_20260415_215045", "duration_seconds": 613, "cost_usd": 0.439317, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Information_003": { "score": 8.65, "run_id": "Information_003_20260415_220058", "duration_seconds": 1710, "cost_usd": 1.2255, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Life_000": { "score": 9.95, "run_id": "Life_000_20260415_094513", "duration_seconds": 1788, "cost_usd": 1.2814, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Life_001": { "score": 8.75, "run_id": "Life_001_20260415_101501", "duration_seconds": 779, "cost_usd": 0.558283, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Life_002": { "score": 6.3, "run_id": "Life_002_20260415_102800", "duration_seconds": 1197, "cost_usd": 0.85785, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Life_003": { "score": 23.8, "run_id": "Life_003_20260415_110858", "duration_seconds": 1480, "cost_usd": 1.060667, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Material_000": { "score": 21.7, "run_id": "Material_000_20260415_113339", "duration_seconds": 6139, "cost_usd": 4.399617, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Material_001": { "score": 4.15, "run_id": "Material_001_20260415_131558", "duration_seconds": 1513, "cost_usd": 1.084317, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Material_002": { "score": 28.4, "run_id": "Material_002_20260415_134111", "duration_seconds": 2247, "cost_usd": 1.61035, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Material_003": { "score": 20.3, "run_id": "Material_003_20260415_141838", "duration_seconds": 5450, "cost_usd": 3.905833, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Math_000": { "score": 23.95, "run_id": "Math_000_20260415_094450", "duration_seconds": 2454, "cost_usd": 1.7587, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Math_001": { "score": 27.0, "run_id": "Math_001_20260415_160410", "duration_seconds": 3433, "cost_usd": 2.460317, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Math_002": { "score": 10.9, "run_id": "Math_002_20260416_105258", "duration_seconds": 2889, "cost_usd": 2.07045, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Math_003": { "score": 10.0, "run_id": "Math_003_20260415_180131", "duration_seconds": 1515, "cost_usd": 1.08575, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Neuroscience_000": { "score": 10.4, "run_id": "Neuroscience_000_20260415_182645", "duration_seconds": 980, "cost_usd": 0.702333, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Neuroscience_001": { "score": 2.55, "run_id": "Neuroscience_001_20260415_184305", "duration_seconds": 1076, "cost_usd": 0.771133, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Neuroscience_002": { "score": 1.0, "run_id": "Neuroscience_002_20260415_190101", "duration_seconds": 1386, "cost_usd": 0.9933, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Neuroscience_003": { "score": 9.45, "run_id": "Neuroscience_003_20260415_192407", "duration_seconds": 2021, "cost_usd": 1.448383, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Physics_000": { "score": 23.8, "run_id": "Physics_000_20260415_195749", "duration_seconds": 995, "cost_usd": 0.713083, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Physics_001": { "score": 31.9, "run_id": "Physics_001_20260415_201423", "duration_seconds": 1346, "cost_usd": 0.964633, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Physics_002": { "score": 32.9, "run_id": "Physics_002_20260415_203649", "duration_seconds": 1375, "cost_usd": 0.985417, "model": "GLM-5.1", "model_display": "GLM-5.1" }, "Physics_003": { "score": 27.1, "run_id": "Physics_003_20260415_205947", "duration_seconds": 1009, "cost_usd": 0.723117, "model": "GLM-5.1", "model_display": "GLM-5.1" } }, "ResearchHarness (GPT-5.4)": { "Astronomy_000": { "score": 24.9, "run_id": "Astronomy_000_20260414_113238", "duration_seconds": 1174, "cost_usd": 2.20125, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Astronomy_001": { "score": 4.0, "run_id": "Astronomy_001_20260414_170852", "duration_seconds": 355, "cost_usd": 0.665625, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Astronomy_002": { "score": 14.6, "run_id": "Astronomy_002_20260414_170852", "duration_seconds": 487, "cost_usd": 0.913125, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Astronomy_003": { "score": 44.8, "run_id": "Astronomy_003_20260414_170852", "duration_seconds": 368, "cost_usd": 0.69, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Chemistry_000": { "score": 17.05, "run_id": "Chemistry_000_20260414_195840", "duration_seconds": 2897, "cost_usd": 5.431875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Chemistry_001": { "score": 0.5, "run_id": "Chemistry_001_20260414_170852", "duration_seconds": 378, "cost_usd": 0.70875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Chemistry_002": { "score": 1.0, "run_id": "Chemistry_002_20260414_170852", "duration_seconds": 412, "cost_usd": 0.7725, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Chemistry_003": { "score": 9.0, "run_id": "Chemistry_003_20260414_170852", "duration_seconds": 483, "cost_usd": 0.905625, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Earth_000": { "score": 12.1, "run_id": "Earth_000_20260414_170852", "duration_seconds": 433, "cost_usd": 0.811875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Earth_001": { "score": 36.71, "run_id": "Earth_001_20260414_171447", "duration_seconds": 369, "cost_usd": 0.691875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Earth_002": { "score": 25.0, "run_id": "Earth_002_20260414_171501", "duration_seconds": 521, "cost_usd": 0.976875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Earth_003": { "score": 1.5, "run_id": "Earth_003_20260414_171511", "duration_seconds": 471, "cost_usd": 0.883125, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Energy_000": { "score": 11.8, "run_id": "Energy_000_20260414_195840", "duration_seconds": 1041, "cost_usd": 1.951875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Energy_001": { "score": 8.5, "run_id": "Energy_001_20260414_201619", "duration_seconds": 639, "cost_usd": 1.198125, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Energy_002": { "score": 28.75, "run_id": "Energy_002_20260414_171655", "duration_seconds": 701, "cost_usd": 1.314375, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Energy_003": { "score": 19.5, "run_id": "Energy_003_20260414_171659", "duration_seconds": 408, "cost_usd": 0.765, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Information_000": { "score": 29.0, "run_id": "Information_000_20260414_172057", "duration_seconds": 377, "cost_usd": 0.706875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Information_001": { "score": 0.0, "run_id": "Information_001_20260414_202717", "duration_seconds": 669, "cost_usd": 1.254375, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Information_002": { "score": 22.9, "run_id": "Information_002_20260414_203902", "duration_seconds": 656, "cost_usd": 1.23, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Information_003": { "score": 11.0, "run_id": "Information_003_20260414_204710", "duration_seconds": 1113, "cost_usd": 2.086875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Life_000": { "score": 8.85, "run_id": "Life_000_20260414_205006", "duration_seconds": 986, "cost_usd": 1.84875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Life_001": { "score": 7.45, "run_id": "Life_001_20260414_210600", "duration_seconds": 777, "cost_usd": 1.456875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Life_002": { "score": 8.8, "run_id": "Life_002_20260414_210650", "duration_seconds": 797, "cost_usd": 1.494375, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Life_003": { "score": 31.8, "run_id": "Life_003_20260414_211912", "duration_seconds": 814, "cost_usd": 1.52625, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Material_000": { "score": 5.0, "run_id": "Material_000_20260414_212015", "duration_seconds": 1085, "cost_usd": 2.034375, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Material_001": { "score": 3.5, "run_id": "Material_001_20260414_213300", "duration_seconds": 761, "cost_usd": 1.426875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Material_002": { "score": 3.3, "run_id": "Material_002_20260414_213833", "duration_seconds": 565, "cost_usd": 1.059375, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Material_003": { "score": 18.8, "run_id": "Material_003_20260414_214555", "duration_seconds": 1147, "cost_usd": 2.150625, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Math_000": { "score": 11.0, "run_id": "Math_000_20260414_214810", "duration_seconds": 849, "cost_usd": 1.591875, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Math_001": { "score": 42.0, "run_id": "Math_001_20260414_220232", "duration_seconds": 555, "cost_usd": 1.040625, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Math_002": { "score": 10.2, "run_id": "Math_002_20260414_220537", "duration_seconds": 645, "cost_usd": 1.209375, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Math_003": { "score": 10.0, "run_id": "Math_003_20260414_221200", "duration_seconds": 701, "cost_usd": 1.314375, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Neuroscience_000": { "score": 13.0, "run_id": "Neuroscience_000_20260414_221650", "duration_seconds": 2278, "cost_usd": 4.27125, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Neuroscience_001": { "score": 2.2, "run_id": "Neuroscience_001_20260414_222526", "duration_seconds": 1501, "cost_usd": 2.814375, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Neuroscience_002": { "score": 0.0, "run_id": "Neuroscience_002_20260414_225053", "duration_seconds": 7570, "cost_usd": 14.19375, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Neuroscience_003": { "score": 4.35, "run_id": "Neuroscience_003_20260414_225711", "duration_seconds": 7167, "cost_usd": 13.438125, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Physics_000": { "score": 19.2, "run_id": "Physics_000_20260415_005716", "duration_seconds": 726, "cost_usd": 1.36125, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Physics_001": { "score": 28.5, "run_id": "Physics_001_20260415_005728", "duration_seconds": 1068, "cost_usd": 2.0025, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Physics_002": { "score": 18.4, "run_id": "Physics_002_20260415_010948", "duration_seconds": 628, "cost_usd": 1.1775, "model": "GPT-5.4", "model_display": "GPT-5.4" }, "Physics_003": { "score": 42.1, "run_id": "Physics_003_20260415_011540", "duration_seconds": 751, "cost_usd": 1.408125, "model": "GPT-5.4", "model_display": "GPT-5.4" } }, "ResearchHarness (GPT-5.5)": { "Astronomy_000": { "score": 26.0, "run_id": "Astronomy_000_20260429_160234", "duration_seconds": 476, "cost_usd": 1.785, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Astronomy_001": { "score": 2.0, "run_id": "Astronomy_001_20260429_160234", "duration_seconds": 331, "cost_usd": 1.24125, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Astronomy_002": { "score": 24.5, "run_id": "Astronomy_002_20260429_160819", "duration_seconds": 372, "cost_usd": 1.395, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Astronomy_003": { "score": 43.4, "run_id": "Astronomy_003_20260429_161044", "duration_seconds": 276, "cost_usd": 1.035, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Chemistry_000": { "score": 19.1, "run_id": "Chemistry_000_20260429_161444", "duration_seconds": 613, "cost_usd": 2.29875, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Chemistry_001": { "score": 2.0, "run_id": "Chemistry_001_20260429_161544", "duration_seconds": 377, "cost_usd": 1.41375, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Chemistry_002": { "score": 5.0, "run_id": "Chemistry_002_20260429_162220", "duration_seconds": 258, "cost_usd": 0.9675, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Chemistry_003": { "score": 15.5, "run_id": "Chemistry_003_20260429_162510", "duration_seconds": 489, "cost_usd": 1.83375, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Earth_000": { "score": 21.8, "run_id": "Earth_000_20260429_162650", "duration_seconds": 274, "cost_usd": 1.0275, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Earth_001": { "score": 39.24, "run_id": "Earth_001_20260429_163146", "duration_seconds": 183, "cost_usd": 0.68625, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Earth_002": { "score": 11.8, "run_id": "Earth_002_20260429_163331", "duration_seconds": 341, "cost_usd": 1.27875, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Earth_003": { "score": 0.6, "run_id": "Earth_003_20260429_163506", "duration_seconds": 243, "cost_usd": 0.91125, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Energy_000": { "score": 16.0, "run_id": "Energy_000_20260429_163926", "duration_seconds": 505, "cost_usd": 1.89375, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Energy_001": { "score": 20.1, "run_id": "Energy_001_20260429_163931", "duration_seconds": 2058, "cost_usd": 7.7175, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Energy_002": { "score": 37.0, "run_id": "Energy_002_20260429_164806", "duration_seconds": 266, "cost_usd": 0.9975, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Energy_003": { "score": 17.5, "run_id": "Energy_003_20260429_165242", "duration_seconds": 213, "cost_usd": 0.79875, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Information_000": { "score": 22.7, "run_id": "Information_000_20260429_165637", "duration_seconds": 235, "cost_usd": 0.88125, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Information_001": { "score": 6.0, "run_id": "Information_001_20260429_170047", "duration_seconds": 293, "cost_usd": 1.09875, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Information_002": { "score": 33.1, "run_id": "Information_002_20260429_170607", "duration_seconds": 279, "cost_usd": 1.04625, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Information_003": { "score": 9.55, "run_id": "Information_003_20260429_171107", "duration_seconds": 2079, "cost_usd": 7.79625, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Life_000": { "score": 2.25, "run_id": "Life_000_20260429_171403", "duration_seconds": 1349, "cost_usd": 5.05875, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Life_001": { "score": 7.2, "run_id": "Life_001_20260429_173722", "duration_seconds": 286, "cost_usd": 1.0725, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Life_002": { "score": 8.5, "run_id": "Life_002_20260429_174238", "duration_seconds": 439, "cost_usd": 1.64625, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Life_003": { "score": 29.4, "run_id": "Life_003_20260429_174558", "duration_seconds": 246, "cost_usd": 0.9225, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Material_000": { "score": 5.6, "run_id": "Material_000_20260429_175003", "duration_seconds": 1479, "cost_usd": 5.54625, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Material_001": { "score": 8.0, "run_id": "Material_001_20260429_175018", "duration_seconds": 224, "cost_usd": 0.84, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Material_002": { "score": 26.09, "run_id": "Material_002_20260429_175413", "duration_seconds": 291, "cost_usd": 1.09125, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Material_003": { "score": 18.95, "run_id": "Material_003_20260429_175919", "duration_seconds": 750, "cost_usd": 2.8125, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Math_000": { "score": 8.25, "run_id": "Math_000_20260429_181207", "duration_seconds": 243, "cost_usd": 0.91125, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Math_001": { "score": 23.0, "run_id": "Math_001_20260429_181452", "duration_seconds": 510, "cost_usd": 1.9125, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Math_002": { "score": 11.0, "run_id": "Math_002_20260429_181622", "duration_seconds": 501, "cost_usd": 1.87875, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Math_003": { "score": 10.0, "run_id": "Math_003_20260429_182334", "duration_seconds": 310, "cost_usd": 1.1625, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Neuroscience_000": { "score": 11.0, "run_id": "Neuroscience_000_20260429_182509", "duration_seconds": 238, "cost_usd": 0.8925, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Neuroscience_001": { "score": 3.75, "run_id": "Neuroscience_001_20260429_182849", "duration_seconds": 517, "cost_usd": 1.93875, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Neuroscience_002": { "score": 1.0, "run_id": "Neuroscience_002_20260429_182919", "duration_seconds": 215, "cost_usd": 0.80625, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Neuroscience_003": { "score": 9.6, "run_id": "Neuroscience_003_20260429_183309", "duration_seconds": 701, "cost_usd": 2.62875, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Physics_000": { "score": 18.3, "run_id": "Physics_000_20260429_183745", "duration_seconds": 202, "cost_usd": 0.7575, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Physics_001": { "score": 29.75, "run_id": "Physics_001_20260429_184120", "duration_seconds": 266, "cost_usd": 0.9975, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Physics_002": { "score": 26.4, "run_id": "Physics_002_20260429_184505", "duration_seconds": 239, "cost_usd": 0.89625, "model": "GPT-5.5", "model_display": "GPT-5.5" }, "Physics_003": { "score": 49.0, "run_id": "Physics_003_20260429_184601", "duration_seconds": 264, "cost_usd": 0.99, "model": "GPT-5.5", "model_display": "GPT-5.5" } }, "ResearchHarness (Gemini-3.1-Pro)": { "Astronomy_000": { "score": 7.5, "run_id": "Astronomy_000_20260415_162619", "duration_seconds": 2068, "cost_usd": 3.102, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Astronomy_001": { "score": 4.0, "run_id": "Astronomy_001_20260415_162323", "duration_seconds": 142, "cost_usd": 0.213, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Astronomy_002": { "score": 20.0, "run_id": "Astronomy_002_20260416_132754", "duration_seconds": 914, "cost_usd": 1.371, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Astronomy_003": { "score": 45.8, "run_id": "Astronomy_003_20260415_170228", "duration_seconds": 163, "cost_usd": 0.2445, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Chemistry_000": { "score": 19.6, "run_id": "Chemistry_000_20260416_180812", "duration_seconds": 2919, "cost_usd": 4.3785, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Chemistry_001": { "score": 4.25, "run_id": "Chemistry_001_20260415_190626", "duration_seconds": 535, "cost_usd": 0.8025, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Chemistry_002": { "score": 0.0, "run_id": "Chemistry_002_20260415_193549", "duration_seconds": 176, "cost_usd": 0.264, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Earth_000": { "score": 14.8, "run_id": "Earth_000_20260415_200139", "duration_seconds": 196, "cost_usd": 0.294, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Earth_001": { "score": 28.05, "run_id": "Earth_001_20260415_200511", "duration_seconds": 244, "cost_usd": 0.366, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Earth_002": { "score": 11.0, "run_id": "Earth_002_20260415_200932", "duration_seconds": 300, "cost_usd": 0.45, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Earth_003": { "score": 1.5, "run_id": "Earth_003_20260415_201446", "duration_seconds": 575, "cost_usd": 0.8625, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Energy_000": { "score": 3.5, "run_id": "Energy_000_20260416_135241", "duration_seconds": 529, "cost_usd": 0.7935, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Energy_001": { "score": 3.0, "run_id": "Energy_001_20260415_232419", "duration_seconds": 937, "cost_usd": 1.4055, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Energy_002": { "score": 21.25, "run_id": "Energy_002_20260415_234011", "duration_seconds": 416, "cost_usd": 0.624, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Energy_003": { "score": 20.3, "run_id": "Energy_003_20260415_234723", "duration_seconds": 144, "cost_usd": 0.216, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Information_000": { "score": 10.5, "run_id": "Information_000_20260415_234958", "duration_seconds": 606, "cost_usd": 0.909, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Information_001": { "score": 1.0, "run_id": "Information_001_20260416_135421", "duration_seconds": 1023, "cost_usd": 1.5345, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Information_002": { "score": 8.8, "run_id": "Information_002_20260416_010053", "duration_seconds": 428, "cost_usd": 0.642, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Life_000": { "score": 3.55, "run_id": "Life_000_20260416_104333", "duration_seconds": 277, "cost_usd": 0.4155, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Life_001": { "score": 8.5, "run_id": "Life_001_20260416_104333", "duration_seconds": 187, "cost_usd": 0.2805, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Life_002": { "score": 7.5, "run_id": "Life_002_20260416_141143", "duration_seconds": 4135, "cost_usd": 6.2025, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Life_003": { "score": 18.0, "run_id": "Life_003_20260416_104825", "duration_seconds": 106, "cost_usd": 0.159, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Material_000": { "score": 20.0, "run_id": "Material_000_20260416_141659", "duration_seconds": 7433, "cost_usd": 11.1495, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Material_001": { "score": 5.25, "run_id": "Material_001_20260416_111314", "duration_seconds": 272, "cost_usd": 0.408, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Material_003": { "score": 8.7, "run_id": "Material_003_20260416_114219", "duration_seconds": 2961, "cost_usd": 4.4415, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Math_000": { "score": 21.1, "run_id": "Math_000_20260416_114950", "duration_seconds": 1794, "cost_usd": 2.691, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Math_001": { "score": 13.5, "run_id": "Math_001_20260416_121957", "duration_seconds": 962, "cost_usd": 1.443, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Math_003": { "score": 10.35, "run_id": "Math_003_20260416_162104", "duration_seconds": 198, "cost_usd": 0.297, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Neuroscience_000": { "score": 2.0, "run_id": "Neuroscience_000_20260416_123952", "duration_seconds": 97, "cost_usd": 0.1455, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Neuroscience_002": { "score": 0.0, "run_id": "Neuroscience_002_20260416_130633", "duration_seconds": 196, "cost_usd": 0.294, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Neuroscience_003": { "score": 2.7, "run_id": "Neuroscience_003_20260416_131003", "duration_seconds": 343, "cost_usd": 0.5145, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Physics_000": { "score": 22.0, "run_id": "Physics_000_20260416_162550", "duration_seconds": 1977, "cost_usd": 2.9655, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Physics_001": { "score": 22.55, "run_id": "Physics_001_20260416_131118", "duration_seconds": 96, "cost_usd": 0.144, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Physics_002": { "score": 27.55, "run_id": "Physics_002_20260416_165907", "duration_seconds": 3650, "cost_usd": 5.475, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" }, "Physics_003": { "score": 46.8, "run_id": "Physics_003_20260416_131439", "duration_seconds": 333, "cost_usd": 0.4995, "model": "Gemini-3.1-Pro", "model_display": "Gemini-3.1-Pro" } }, "ResearchHarness (Gemini-3.5-Flash)": { "Astronomy_000": { "score": 24.0, "run_id": "Astronomy_000_20260527_101521", "duration_seconds": 812, "cost_usd": 0.9135, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Astronomy_001": { "score": 13.2, "run_id": "Astronomy_001_20260527_165228", "duration_seconds": 3343, "cost_usd": 3.760875, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Astronomy_003": { "score": 47.0, "run_id": "Astronomy_003_20260526_204227", "duration_seconds": 575, "cost_usd": 0.646875, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Chemistry_000": { "score": 9.0, "run_id": "Chemistry_000_20260526_205216", "duration_seconds": 764, "cost_usd": 0.8595, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Chemistry_001": { "score": 6.5, "run_id": "Chemistry_001_20260526_210513", "duration_seconds": 804, "cost_usd": 0.9045, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Chemistry_002": { "score": 1.0, "run_id": "Chemistry_002_20260526_211856", "duration_seconds": 799, "cost_usd": 0.898875, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Earth_000": { "score": 25.5, "run_id": "Earth_000_20260526_220638", "duration_seconds": 707, "cost_usd": 0.795375, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Earth_001": { "score": 33.19, "run_id": "Earth_001_20260526_221848", "duration_seconds": 800, "cost_usd": 0.9, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Earth_002": { "score": 27.0, "run_id": "Earth_002_20260527_183331", "duration_seconds": 795, "cost_usd": 0.894375, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Earth_003": { "score": 10.5, "run_id": "Earth_003_20260526_224841", "duration_seconds": 1238, "cost_usd": 1.39275, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Energy_000": { "score": 4.4, "run_id": "Energy_000_20260527_184722", "duration_seconds": 737, "cost_usd": 0.829125, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Energy_001": { "score": 21.0, "run_id": "Energy_001_20260526_232608", "duration_seconds": 894, "cost_usd": 1.00575, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Energy_002": { "score": 31.7, "run_id": "Energy_002_20260526_234126", "duration_seconds": 1249, "cost_usd": 1.405125, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Energy_003": { "score": 16.3, "run_id": "Energy_003_20260527_120825", "duration_seconds": 1967, "cost_usd": 2.212875, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Information_000": { "score": 49.4, "run_id": "Information_000_20260527_002336", "duration_seconds": 399, "cost_usd": 0.448875, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Information_001": { "score": 9.0, "run_id": "Information_001_20260527_124145", "duration_seconds": 3374, "cost_usd": 3.79575, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Information_002": { "score": 27.2, "run_id": "Information_002_20260527_010834", "duration_seconds": 443, "cost_usd": 0.498375, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Information_003": { "score": 10.8, "run_id": "Information_003_20260527_011602", "duration_seconds": 1487, "cost_usd": 1.672875, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Life_000": { "score": 7.7, "run_id": "Life_000_20260527_185953", "duration_seconds": 491, "cost_usd": 0.552375, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Life_001": { "score": 12.95, "run_id": "Life_001_20260527_015452", "duration_seconds": 725, "cost_usd": 0.815625, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Life_002": { "score": 6.3, "run_id": "Life_002_20260527_190829", "duration_seconds": 858, "cost_usd": 0.96525, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Life_003": { "score": 26.5, "run_id": "Life_003_20260527_022742", "duration_seconds": 347, "cost_usd": 0.390375, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Material_000": { "score": 22.6, "run_id": "Material_000_20260527_023350", "duration_seconds": 1436, "cost_usd": 1.6155, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Material_001": { "score": 0.0, "run_id": "Material_001_20260527_025759", "duration_seconds": 516, "cost_usd": 0.5805, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Material_002": { "score": 40.71, "run_id": "Material_002_20260527_030652", "duration_seconds": 2584, "cost_usd": 2.907, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Material_003": { "score": 15.6, "run_id": "Material_003_20260527_035012", "duration_seconds": 940, "cost_usd": 1.0575, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Math_001": { "score": 22.0, "run_id": "Math_001_20260527_042119", "duration_seconds": 793, "cost_usd": 0.892125, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Neuroscience_000": { "score": 8.0, "run_id": "Neuroscience_000_20260527_051146", "duration_seconds": 877, "cost_usd": 0.986625, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Neuroscience_001": { "score": 0.0, "run_id": "Neuroscience_001_20260527_170712", "duration_seconds": 2120, "cost_usd": 2.385, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Neuroscience_002": { "score": 0.0, "run_id": "Neuroscience_002_20260527_054624", "duration_seconds": 715, "cost_usd": 0.804375, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Neuroscience_003": { "score": 0.75, "run_id": "Neuroscience_003_20260527_055839", "duration_seconds": 839, "cost_usd": 0.943875, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Physics_000": { "score": 14.5, "run_id": "Physics_000_20260527_061312", "duration_seconds": 260, "cost_usd": 0.2925, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Physics_001": { "score": 31.65, "run_id": "Physics_001_20260527_205419", "duration_seconds": 954, "cost_usd": 1.07325, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false }, "Physics_003": { "score": 34.3, "run_id": "Physics_003_20260527_064416", "duration_seconds": 744, "cost_usd": 0.837, "model": "gemini-3.5-flash", "model_display": "Gemini-3.5-Flash", "details_exported": false } }, "ResearchHarness (Grok-4.1)": { "Astronomy_000": { "score": 20.0, "run_id": "Astronomy_000_20260414_175140", "duration_seconds": 434, "cost_usd": 0.039783, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Astronomy_001": { "score": 29.4, "run_id": "Astronomy_001_20260414_175938", "duration_seconds": 294, "cost_usd": 0.02695, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Astronomy_002": { "score": 20.4, "run_id": "Astronomy_002_20260414_175938", "duration_seconds": 538, "cost_usd": 0.049317, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Astronomy_003": { "score": 44.7, "run_id": "Astronomy_003_20260414_180447", "duration_seconds": 167, "cost_usd": 0.015308, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Chemistry_000": { "score": 3.8, "run_id": "Chemistry_000_20260414_180759", "duration_seconds": 326, "cost_usd": 0.029883, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Chemistry_001": { "score": 1.5, "run_id": "Chemistry_001_20260414_180852", "duration_seconds": 252, "cost_usd": 0.0231, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Chemistry_002": { "score": 0.4, "run_id": "Chemistry_002_20260414_181322", "duration_seconds": 214, "cost_usd": 0.019617, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Chemistry_003": { "score": 6.0, "run_id": "Chemistry_003_20260414_181341", "duration_seconds": 609, "cost_usd": 0.055825, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Earth_000": { "score": 11.9, "run_id": "Earth_000_20260414_205103", "duration_seconds": 158, "cost_usd": 0.014483, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Earth_001": { "score": 30.78, "run_id": "Earth_001_20260414_205105", "duration_seconds": 236, "cost_usd": 0.021633, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Earth_002": { "score": 13.6, "run_id": "Earth_002_20260414_205407", "duration_seconds": 415, "cost_usd": 0.038042, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Earth_003": { "score": 3.5, "run_id": "Earth_003_20260415_112001", "duration_seconds": 210, "cost_usd": 0.01925, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Energy_000": { "score": 2.3, "run_id": "Energy_000_20260414_205947", "duration_seconds": 419, "cost_usd": 0.038408, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Energy_001": { "score": 8.0, "run_id": "Energy_001_20260414_210132", "duration_seconds": 407, "cost_usd": 0.037308, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Energy_002": { "score": 28.2, "run_id": "Energy_002_20260414_210702", "duration_seconds": 361, "cost_usd": 0.033092, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Energy_003": { "score": 6.0, "run_id": "Energy_003_20260414_210838", "duration_seconds": 582, "cost_usd": 0.05335, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Information_000": { "score": 25.5, "run_id": "Information_000_20260414_211319", "duration_seconds": 341, "cost_usd": 0.031258, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Information_001": { "score": 5.0, "run_id": "Information_001_20260414_211837", "duration_seconds": 361, "cost_usd": 0.033092, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Information_002": { "score": 10.8, "run_id": "Information_002_20260414_211914", "duration_seconds": 225, "cost_usd": 0.020625, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Information_003": { "score": 6.2, "run_id": "Information_003_20260414_212308", "duration_seconds": 360, "cost_usd": 0.033, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Life_000": { "score": 0.5, "run_id": "Life_000_20260414_212517", "duration_seconds": 275, "cost_usd": 0.025208, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Life_001": { "score": 13.7, "run_id": "Life_001_20260414_212921", "duration_seconds": 248, "cost_usd": 0.022733, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Life_002": { "score": 2.0, "run_id": "Life_002_20260414_213014", "duration_seconds": 396, "cost_usd": 0.0363, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Life_003": { "score": 36.5, "run_id": "Life_003_20260414_213353", "duration_seconds": 257, "cost_usd": 0.023558, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Material_000": { "score": 16.4, "run_id": "Material_000_20260414_213659", "duration_seconds": 625, "cost_usd": 0.057292, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Material_001": { "score": 11.55, "run_id": "Material_001_20260415_112001", "duration_seconds": 360, "cost_usd": 0.033, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Material_002": { "score": 0.66, "run_id": "Material_002_20260414_213934", "duration_seconds": 374, "cost_usd": 0.034283, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Material_003": { "score": 9.2, "run_id": "Material_003_20260414_214604", "duration_seconds": 376, "cost_usd": 0.034467, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Math_000": { "score": 7.7, "run_id": "Math_000_20260415_112001", "duration_seconds": 360, "cost_usd": 0.033, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Math_001": { "score": 26.4, "run_id": "Math_001_20260414_214828", "duration_seconds": 270, "cost_usd": 0.02475, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Math_002": { "score": 2.6, "run_id": "Math_002_20260414_215242", "duration_seconds": 450, "cost_usd": 0.04125, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Neuroscience_000": { "score": 8.4, "run_id": "Neuroscience_000_20260414_215537", "duration_seconds": 871, "cost_usd": 0.079842, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Neuroscience_001": { "score": 4.0, "run_id": "Neuroscience_001_20260414_220041", "duration_seconds": 324, "cost_usd": 0.0297, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Neuroscience_002": { "score": 0.0, "run_id": "Neuroscience_002_20260415_112044", "duration_seconds": 308, "cost_usd": 0.028233, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Neuroscience_003": { "score": 6.75, "run_id": "Neuroscience_003_20260414_221036", "duration_seconds": 815, "cost_usd": 0.074708, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Physics_000": { "score": 15.4, "run_id": "Physics_000_20260414_222311", "duration_seconds": 202, "cost_usd": 0.018517, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Physics_001": { "score": 34.7, "run_id": "Physics_001_20260414_222500", "duration_seconds": 297, "cost_usd": 0.027225, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Physics_002": { "score": 11.25, "run_id": "Physics_002_20260414_222658", "duration_seconds": 1118, "cost_usd": 0.102483, "model": "Grok-4.1", "model_display": "Grok-4.1" }, "Physics_003": { "score": 40.9, "run_id": "Physics_003_20260414_223016", "duration_seconds": 370, "cost_usd": 0.033917, "model": "Grok-4.1", "model_display": "Grok-4.1" } }, "ResearchHarness (Grok-4.3)": { "Astronomy_001": { "score": 7.2, "run_id": "Astronomy_001_20260515_164154", "duration_seconds": 271, "cost_usd": 0.141146, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Astronomy_002": { "score": 20.6, "run_id": "Astronomy_002_20260515_164632", "duration_seconds": 1271, "cost_usd": 0.661979, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Astronomy_003": { "score": 46.4, "run_id": "Astronomy_003_20260515_170759", "duration_seconds": 207, "cost_usd": 0.107813, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Chemistry_000": { "score": 13.3, "run_id": "Chemistry_000_20260515_171134", "duration_seconds": 1540, "cost_usd": 0.802083, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Chemistry_001": { "score": 0.5, "run_id": "Chemistry_001_20260518_003544", "duration_seconds": 1632, "cost_usd": 0.85, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Chemistry_002": { "score": 0.0, "run_id": "Chemistry_002_20260515_181127", "duration_seconds": 648, "cost_usd": 0.3375, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Chemistry_003": { "score": 0.3, "run_id": "Chemistry_003_20260515_182224", "duration_seconds": 573, "cost_usd": 0.298438, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Earth_000": { "score": 11.9, "run_id": "Earth_000_20260515_183205", "duration_seconds": 331, "cost_usd": 0.172396, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Earth_001": { "score": 35.56, "run_id": "Earth_001_20260515_183747", "duration_seconds": 525, "cost_usd": 0.273438, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Earth_002": { "score": 13.0, "run_id": "Earth_002_20260515_184648", "duration_seconds": 343, "cost_usd": 0.178646, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Earth_003": { "score": 0.0, "run_id": "Earth_003_20260515_185243", "duration_seconds": 1380, "cost_usd": 0.71875, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Energy_000": { "score": 14.5, "run_id": "Energy_000_20260515_191600", "duration_seconds": 1256, "cost_usd": 0.654167, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Energy_002": { "score": 33.35, "run_id": "Energy_002_20260518_013609", "duration_seconds": 208, "cost_usd": 0.108333, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Energy_003": { "score": 8.7, "run_id": "Energy_003_20260515_211305", "duration_seconds": 275, "cost_usd": 0.143229, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Information_000": { "score": 2.0, "run_id": "Information_000_20260515_211758", "duration_seconds": 332, "cost_usd": 0.172917, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Information_001": { "score": 3.6, "run_id": "Information_001_20260515_212339", "duration_seconds": 655, "cost_usd": 0.341146, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Information_002": { "score": 1.8, "run_id": "Information_002_20260515_213501", "duration_seconds": 247, "cost_usd": 0.128646, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Information_003": { "score": 2.65, "run_id": "Information_003_20260515_213914", "duration_seconds": 856, "cost_usd": 0.445833, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Life_000": { "score": 4.35, "run_id": "Life_000_20260515_215342", "duration_seconds": 561, "cost_usd": 0.292187, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Life_001": { "score": 11.05, "run_id": "Life_001_20260515_220315", "duration_seconds": 889, "cost_usd": 0.463021, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Life_002": { "score": 5.5, "run_id": "Life_002_20260515_221819", "duration_seconds": 386, "cost_usd": 0.201042, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Life_003": { "score": 29.0, "run_id": "Life_003_20260515_222450", "duration_seconds": 606, "cost_usd": 0.315625, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Material_000": { "score": 20.9, "run_id": "Material_000_20260515_223508", "duration_seconds": 578, "cost_usd": 0.301042, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Material_001": { "score": 6.85, "run_id": "Material_001_20260515_224500", "duration_seconds": 294, "cost_usd": 0.153125, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Material_002": { "score": 16.98, "run_id": "Material_002_20260515_225003", "duration_seconds": 3248, "cost_usd": 1.691667, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Material_003": { "score": 4.65, "run_id": "Material_003_20260518_013948", "duration_seconds": 487, "cost_usd": 0.253646, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Math_000": { "score": 6.2, "run_id": "Math_000_20260516_021645", "duration_seconds": 237, "cost_usd": 0.123438, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Math_001": { "score": 13.8, "run_id": "Math_001_20260516_022047", "duration_seconds": 1801, "cost_usd": 0.938021, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Math_002": { "score": 2.0, "run_id": "Math_002_20260516_025058", "duration_seconds": 627, "cost_usd": 0.326562, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Math_003": { "score": 12.0, "run_id": "Math_003_20260516_030137", "duration_seconds": 196, "cost_usd": 0.102083, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Neuroscience_000": { "score": 6.0, "run_id": "Neuroscience_000_20260516_030457", "duration_seconds": 184, "cost_usd": 0.095833, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Neuroscience_001": { "score": 1.2, "run_id": "Neuroscience_001_20260516_030812", "duration_seconds": 8576, "cost_usd": 4.466667, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Neuroscience_002": { "score": 0.0, "run_id": "Neuroscience_002_20260516_053119", "duration_seconds": 155, "cost_usd": 0.080729, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Neuroscience_003": { "score": 3.2, "run_id": "Neuroscience_003_20260516_053408", "duration_seconds": 73, "cost_usd": 0.038021, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Physics_000": { "score": 35.7, "run_id": "Physics_000_20260516_053530", "duration_seconds": 156, "cost_usd": 0.08125, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Physics_001": { "score": 17.95, "run_id": "Physics_001_20260516_053817", "duration_seconds": 206, "cost_usd": 0.107292, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Physics_002": { "score": 19.0, "run_id": "Physics_002_20260516_054156", "duration_seconds": 96, "cost_usd": 0.05, "model": "Grok-4.3", "model_display": "Grok-4.3" }, "Physics_003": { "score": 40.0, "run_id": "Physics_003_20260516_054345", "duration_seconds": 69, "cost_usd": 0.035937, "model": "Grok-4.3", "model_display": "Grok-4.3" } }, "ResearchHarness (Kimi-K2.5)": { "Astronomy_000": { "score": 14.4, "run_id": "Astronomy_000_20260415_115142", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Astronomy_001": { "score": 21.2, "run_id": "Astronomy_001_20260415_115142", "duration_seconds": 649, "cost_usd": 0.2596, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Astronomy_002": { "score": 11.0, "run_id": "Astronomy_002_20260415_115310", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Astronomy_003": { "score": 46.8, "run_id": "Astronomy_003_20260415_120217", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Chemistry_000": { "score": 18.9, "run_id": "Chemistry_000_20260415_120315", "duration_seconds": 2612, "cost_usd": 1.0448, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Chemistry_001": { "score": 3.55, "run_id": "Chemistry_001_20260415_120942", "duration_seconds": 1772, "cost_usd": 0.7088, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Chemistry_002": { "score": 1.2, "run_id": "Chemistry_002_20260415_123938", "duration_seconds": 593, "cost_usd": 0.2372, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Chemistry_003": { "score": 6.9, "run_id": "Chemistry_003_20260415_124720", "duration_seconds": 513, "cost_usd": 0.2052, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Earth_000": { "score": 15.8, "run_id": "Earth_000_20260415_125003", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Earth_001": { "score": 21.06, "run_id": "Earth_001_20260415_125648", "duration_seconds": 580, "cost_usd": 0.232, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Earth_002": { "score": 25.2, "run_id": "Earth_002_20260415_130601", "duration_seconds": 835, "cost_usd": 0.334, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Earth_003": { "score": 3.6, "run_id": "Earth_003_20260415_130710", "duration_seconds": 855, "cost_usd": 0.342, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Energy_000": { "score": 5.0, "run_id": "Energy_000_20260415_132037", "duration_seconds": 1069, "cost_usd": 0.4276, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Energy_001": { "score": 3.4, "run_id": "Energy_001_20260415_132153", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Energy_002": { "score": 25.3, "run_id": "Energy_002_20260415_132706", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Energy_003": { "score": 19.5, "run_id": "Energy_003_20260415_133243", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Information_000": { "score": 16.4, "run_id": "Information_000_20260415_140033", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Information_001": { "score": 7.6, "run_id": "Information_001_20260415_140033", "duration_seconds": 1095, "cost_usd": 0.438, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Information_002": { "score": 10.3, "run_id": "Information_002_20260415_140621", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Information_003": { "score": 3.25, "run_id": "Information_003_20260415_141853", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Life_000": { "score": 7.45, "run_id": "Life_000_20260415_141919", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Life_001": { "score": 7.75, "run_id": "Life_001_20260415_143226", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Life_002": { "score": 5.0, "run_id": "Life_002_20260415_143514", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Life_003": { "score": 25.4, "run_id": "Life_003_20260415_144005", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Material_000": { "score": 4.0, "run_id": "Material_000_20260415_144516", "duration_seconds": 3600, "cost_usd": 1.44, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Material_001": { "score": 6.3, "run_id": "Material_001_20260415_150331", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Material_002": { "score": 31.15, "run_id": "Material_002_20260415_150736", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Material_003": { "score": 10.95, "run_id": "Material_003_20260415_153600", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Math_000": { "score": 18.3, "run_id": "Math_000_20260415_154537", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Math_001": { "score": 15.2, "run_id": "Math_001_20260415_155046", "duration_seconds": 3600, "cost_usd": 1.44, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Math_002": { "score": 14.7, "run_id": "Math_002_20260415_160729", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Neuroscience_000": { "score": 7.4, "run_id": "Neuroscience_000_20260415_165658", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Neuroscience_001": { "score": 0.75, "run_id": "Neuroscience_001_20260415_170436", "duration_seconds": 1206, "cost_usd": 0.4824, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Neuroscience_002": { "score": 0.0, "run_id": "Neuroscience_002_20260415_170839", "duration_seconds": 2612, "cost_usd": 1.0448, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Neuroscience_003": { "score": 3.7, "run_id": "Neuroscience_003_20260415_172530", "duration_seconds": 1087, "cost_usd": 0.4348, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Physics_000": { "score": 13.2, "run_id": "Physics_000_20260415_174439", "duration_seconds": 763, "cost_usd": 0.3052, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Physics_001": { "score": 32.05, "run_id": "Physics_001_20260415_175239", "duration_seconds": 1544, "cost_usd": 0.6176, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Physics_002": { "score": 20.1, "run_id": "Physics_002_20260415_175755", "duration_seconds": 2306, "cost_usd": 0.9224, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" }, "Physics_003": { "score": 40.5, "run_id": "Physics_003_20260415_181856", "duration_seconds": 887, "cost_usd": 0.3548, "model": "Kimi-K2.5", "model_display": "Kimi-K2.5" } }, "ResearchHarness (Kimi-K2.6)": { "Astronomy_000": { "score": 3.1, "run_id": "Astronomy_000_20260518_114619", "duration_seconds": 3099, "cost_usd": 1.769013, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Astronomy_001": { "score": 36.0, "run_id": "Astronomy_001_20260518_123809", "duration_seconds": 2440, "cost_usd": 1.392833, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Astronomy_002": { "score": 28.1, "run_id": "Astronomy_002_20260518_131859", "duration_seconds": 5001, "cost_usd": 2.854738, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Astronomy_003": { "score": 44.0, "run_id": "Astronomy_003_20260518_144228", "duration_seconds": 739, "cost_usd": 0.421846, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Chemistry_001": { "score": 5.75, "run_id": "Chemistry_001_20260518_172502", "duration_seconds": 3702, "cost_usd": 2.113225, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Chemistry_002": { "score": 0.4, "run_id": "Chemistry_002_20260518_182701", "duration_seconds": 684, "cost_usd": 0.39045, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Earth_000": { "score": 21.6, "run_id": "Earth_000_20260518_202022", "duration_seconds": 2049, "cost_usd": 1.169638, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Earth_001": { "score": 41.65, "run_id": "Earth_001_20260518_205500", "duration_seconds": 589, "cost_usd": 0.336221, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Earth_002": { "score": 24.8, "run_id": "Earth_002_20260518_210503", "duration_seconds": 773, "cost_usd": 0.441254, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Earth_003": { "score": 3.4, "run_id": "Earth_003_20260518_211840", "duration_seconds": 5307, "cost_usd": 3.029413, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Energy_000": { "score": 9.5, "run_id": "Energy_000_20260518_224729", "duration_seconds": 1794, "cost_usd": 1.024075, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Energy_001": { "score": 10.4, "run_id": "Energy_001_20260518_231734", "duration_seconds": 1753, "cost_usd": 1.000671, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Energy_002": { "score": 31.4, "run_id": "Energy_002_20260518_234701", "duration_seconds": 2050, "cost_usd": 1.170208, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Energy_003": { "score": 21.0, "run_id": "Energy_003_20260519_002123", "duration_seconds": 351, "cost_usd": 0.200362, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Information_000": { "score": 0.0, "run_id": "Information_000_20260519_002724", "duration_seconds": 3051, "cost_usd": 1.741613, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Information_001": { "score": 18.0, "run_id": "Information_001_20260519_011821", "duration_seconds": 1432, "cost_usd": 0.817433, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Information_002": { "score": 27.8, "run_id": "Information_002_20260519_014235", "duration_seconds": 283, "cost_usd": 0.161546, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Information_003": { "score": 5.25, "run_id": "Information_003_20260519_094352", "duration_seconds": 4119, "cost_usd": 2.351263, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Life_000": { "score": 6.8, "run_id": "Life_000_20260519_105356", "duration_seconds": 2016, "cost_usd": 1.1508, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Life_001": { "score": 11.55, "run_id": "Life_001_20260519_112751", "duration_seconds": 634, "cost_usd": 0.361908, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Life_002": { "score": 2.8, "run_id": "Life_002_20260519_113839", "duration_seconds": 751, "cost_usd": 0.428696, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Life_003": { "score": 34.1, "run_id": "Life_003_20260519_115113", "duration_seconds": 725, "cost_usd": 0.413854, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Material_000": { "score": 15.5, "run_id": "Material_000_20260519_120332", "duration_seconds": 4276, "cost_usd": 2.440883, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Material_001": { "score": 10.1, "run_id": "Material_001_20260519_131456", "duration_seconds": 634, "cost_usd": 0.361908, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Material_002": { "score": 39.08, "run_id": "Material_002_20260519_132542", "duration_seconds": 2495, "cost_usd": 1.424229, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Material_003": { "score": 24.65, "run_id": "Material_003_20260519_140746", "duration_seconds": 1787, "cost_usd": 1.020079, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Math_000": { "score": 23.55, "run_id": "Math_000_20260519_143753", "duration_seconds": 4408, "cost_usd": 2.516233, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Math_001": { "score": 21.6, "run_id": "Math_001_20260519_155141", "duration_seconds": 2290, "cost_usd": 1.307208, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Math_002": { "score": 14.0, "run_id": "Math_002_20260519_163350", "duration_seconds": 3375, "cost_usd": 1.926563, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Math_003": { "score": 18.75, "run_id": "Math_003_20260519_173023", "duration_seconds": 3529, "cost_usd": 2.014471, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Neuroscience_000": { "score": 9.0, "run_id": "Neuroscience_000_20260519_182920", "duration_seconds": 3328, "cost_usd": 1.899733, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Neuroscience_001": { "score": 3.5, "run_id": "Neuroscience_001_20260519_192503", "duration_seconds": 2171, "cost_usd": 1.239279, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Neuroscience_002": { "score": 0.5, "run_id": "Neuroscience_002_20260519_200133", "duration_seconds": 1944, "cost_usd": 1.1097, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Neuroscience_003": { "score": 20.4, "run_id": "Neuroscience_003_20260519_203414", "duration_seconds": 1360, "cost_usd": 0.776333, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Physics_000": { "score": 25.6, "run_id": "Physics_000_20260519_205720", "duration_seconds": 873, "cost_usd": 0.498338, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Physics_001": { "score": 17.0, "run_id": "Physics_001_20260519_211209", "duration_seconds": 1423, "cost_usd": 0.812296, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Physics_002": { "score": 21.1, "run_id": "Physics_002_20260519_213610", "duration_seconds": 625, "cost_usd": 0.356771, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" }, "Physics_003": { "score": 32.1, "run_id": "Physics_003_20260519_214659", "duration_seconds": 1204, "cost_usd": 0.687283, "model": "Kimi-K2.6", "model_display": "Kimi-K2.6" } }, "ResearchHarness (MiMo-V2-Pro)": { "Astronomy_000": { "score": 11.2, "run_id": "Astronomy_000_20260414_152814", "duration_seconds": 7017, "cost_usd": 3.5085, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Astronomy_001": { "score": 10.0, "run_id": "Astronomy_001_20260414_114811", "duration_seconds": 406, "cost_usd": 0.203, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Astronomy_002": { "score": 19.1, "run_id": "Astronomy_002_20260414_154725", "duration_seconds": 4094, "cost_usd": 2.047, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Astronomy_003": { "score": 46.7, "run_id": "Astronomy_003_20260414_120359", "duration_seconds": 264, "cost_usd": 0.132, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Chemistry_000": { "score": 14.0, "run_id": "Chemistry_000_20260414_120823", "duration_seconds": 4588, "cost_usd": 2.294, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Chemistry_001": { "score": 0.75, "run_id": "Chemistry_001_20260414_132451", "duration_seconds": 384, "cost_usd": 0.192, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Chemistry_002": { "score": 1.0, "run_id": "Chemistry_002_20260414_133115", "duration_seconds": 505, "cost_usd": 0.2525, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Chemistry_003": { "score": 9.5, "run_id": "Chemistry_003_20260414_165658", "duration_seconds": 3989, "cost_usd": 1.9945, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Earth_000": { "score": 14.7, "run_id": "Earth_000_20260414_150832", "duration_seconds": 772, "cost_usd": 0.386, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Earth_001": { "score": 35.57, "run_id": "Earth_001_20260414_152124", "duration_seconds": 516, "cost_usd": 0.258, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Earth_002": { "score": 28.1, "run_id": "Earth_002_20260414_180327", "duration_seconds": 690, "cost_usd": 0.345, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Earth_003": { "score": 1.0, "run_id": "Earth_003_20260414_181457", "duration_seconds": 673, "cost_usd": 0.3365, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Energy_000": { "score": 12.5, "run_id": "Energy_000_20260414_182610", "duration_seconds": 1499, "cost_usd": 0.7495, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Energy_001": { "score": 9.1, "run_id": "Energy_001_20260414_185109", "duration_seconds": 2118, "cost_usd": 1.059, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Energy_002": { "score": 21.9, "run_id": "Energy_002_20260414_192627", "duration_seconds": 1779, "cost_usd": 0.8895, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Energy_003": { "score": 7.5, "run_id": "Energy_003_20260414_195607", "duration_seconds": 587, "cost_usd": 0.2935, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Information_002": { "score": 13.2, "run_id": "Information_002_20260414_213609", "duration_seconds": 321, "cost_usd": 0.1605, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Information_003": { "score": 5.5, "run_id": "Information_003_20260414_214130", "duration_seconds": 1529, "cost_usd": 0.7645, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Life_000": { "score": 4.9, "run_id": "Life_000_20260414_220659", "duration_seconds": 646, "cost_usd": 0.323, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Life_001": { "score": 7.05, "run_id": "Life_001_20260414_221745", "duration_seconds": 338, "cost_usd": 0.169, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Life_002": { "score": 6.6, "run_id": "Life_002_20260414_222323", "duration_seconds": 472, "cost_usd": 0.236, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Life_003": { "score": 32.6, "run_id": "Life_003_20260414_223115", "duration_seconds": 396, "cost_usd": 0.198, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Material_000": { "score": 13.2, "run_id": "Material_000_20260414_223751", "duration_seconds": 1286, "cost_usd": 0.643, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Material_001": { "score": 18.0, "run_id": "Material_001_20260414_225917", "duration_seconds": 523, "cost_usd": 0.2615, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Material_002": { "score": 37.31, "run_id": "Material_002_20260414_230800", "duration_seconds": 674, "cost_usd": 0.337, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Material_003": { "score": 17.0, "run_id": "Material_003_20260414_231914", "duration_seconds": 916, "cost_usd": 0.458, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Math_000": { "score": 7.2, "run_id": "Math_000_20260414_091314", "duration_seconds": 789, "cost_usd": 0.3945, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Math_001": { "score": 33.6, "run_id": "Math_001_20260414_233431", "duration_seconds": 333, "cost_usd": 0.1665, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Math_002": { "score": 3.5, "run_id": "Math_002_20260414_234004", "duration_seconds": 2411, "cost_usd": 1.2055, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Math_003": { "score": 10.0, "run_id": "Math_003_20260415_002019", "duration_seconds": 621, "cost_usd": 0.3105, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Neuroscience_000": { "score": 9.2, "run_id": "Neuroscience_000_20260415_003040", "duration_seconds": 408, "cost_usd": 0.204, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Neuroscience_001": { "score": 2.7, "run_id": "Neuroscience_001_20260415_003728", "duration_seconds": 491, "cost_usd": 0.2455, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Neuroscience_002": { "score": 0.0, "run_id": "Neuroscience_002_20260415_004540", "duration_seconds": 1514, "cost_usd": 0.757, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Neuroscience_003": { "score": 13.05, "run_id": "Neuroscience_003_20260415_011054", "duration_seconds": 1610, "cost_usd": 0.805, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Physics_000": { "score": 17.5, "run_id": "Physics_000_20260415_013744", "duration_seconds": 461, "cost_usd": 0.2305, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Physics_001": { "score": 22.55, "run_id": "Physics_001_20260415_100406", "duration_seconds": 709, "cost_usd": 0.3545, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Physics_002": { "score": 22.0, "run_id": "Physics_002_20260415_145205", "duration_seconds": 423, "cost_usd": 0.2115, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" }, "Physics_003": { "score": 43.5, "run_id": "Physics_003_20260415_022350", "duration_seconds": 337, "cost_usd": 0.1685, "model": "MiMo-V2-Pro", "model_display": "MiMo-V2-Pro" } }, "ResearchHarness (MiMo-V2.5)": { "Astronomy_000": { "score": 19.0, "run_id": "Astronomy_000_20260515_161340", "duration_seconds": 915, "cost_usd": 0.2562, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Astronomy_001": { "score": 8.0, "run_id": "Astronomy_001_20260515_162909", "duration_seconds": 321, "cost_usd": 0.08988, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Astronomy_002": { "score": 27.5, "run_id": "Astronomy_002_20260515_163452", "duration_seconds": 943, "cost_usd": 0.26404, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Astronomy_003": { "score": 43.1, "run_id": "Astronomy_003_20260515_165041", "duration_seconds": 221, "cost_usd": 0.06188, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Chemistry_001": { "score": 5.25, "run_id": "Chemistry_001_20260515_192436", "duration_seconds": 352, "cost_usd": 0.09856, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Chemistry_002": { "score": 0.0, "run_id": "Chemistry_002_20260515_193045", "duration_seconds": 307, "cost_usd": 0.08596, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Chemistry_003": { "score": 8.0, "run_id": "Chemistry_003_20260515_193607", "duration_seconds": 941, "cost_usd": 0.26348, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Earth_000": { "score": 15.2, "run_id": "Earth_000_20260515_195158", "duration_seconds": 439, "cost_usd": 0.12292, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Earth_001": { "score": 22.54, "run_id": "Earth_001_20260515_195931", "duration_seconds": 305, "cost_usd": 0.0854, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Earth_002": { "score": 25.9, "run_id": "Earth_002_20260515_200447", "duration_seconds": 426, "cost_usd": 0.11928, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Earth_003": { "score": 0.0, "run_id": "Earth_003_20260515_201206", "duration_seconds": 2145, "cost_usd": 0.6006, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Energy_000": { "score": 14.7, "run_id": "Energy_000_20260518_000259", "duration_seconds": 1148, "cost_usd": 0.32144, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Energy_001": { "score": 23.4, "run_id": "Energy_001_20260518_002218", "duration_seconds": 464, "cost_usd": 0.12992, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Energy_002": { "score": 23.7, "run_id": "Energy_002_20260518_003015", "duration_seconds": 599, "cost_usd": 0.16772, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Energy_003": { "score": 16.3, "run_id": "Energy_003_20260518_004025", "duration_seconds": 320, "cost_usd": 0.0896, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Information_000": { "score": 16.7, "run_id": "Information_000_20260518_004600", "duration_seconds": 1598, "cost_usd": 0.44744, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Information_001": { "score": 1.0, "run_id": "Information_001_20260518_011248", "duration_seconds": 1575, "cost_usd": 0.441, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Information_002": { "score": 26.9, "run_id": "Information_002_20260518_013922", "duration_seconds": 200, "cost_usd": 0.056, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Information_003": { "score": 4.6, "run_id": "Information_003_20260518_014248", "duration_seconds": 6544, "cost_usd": 1.83232, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Life_000": { "score": 4.75, "run_id": "Life_000_20260518_033206", "duration_seconds": 409, "cost_usd": 0.11452, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Life_001": { "score": 11.95, "run_id": "Life_001_20260518_033906", "duration_seconds": 200, "cost_usd": 0.056, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Life_002": { "score": 3.5, "run_id": "Life_002_20260518_034243", "duration_seconds": 496, "cost_usd": 0.13888, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Life_003": { "score": 30.9, "run_id": "Life_003_20260518_035105", "duration_seconds": 233, "cost_usd": 0.06524, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Material_000": { "score": 23.5, "run_id": "Material_000_20260518_035508", "duration_seconds": 1609, "cost_usd": 0.45052, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Material_001": { "score": 7.0, "run_id": "Material_001_20260518_042207", "duration_seconds": 223, "cost_usd": 0.06244, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Material_002": { "score": 28.5, "run_id": "Material_002_20260518_042559", "duration_seconds": 219, "cost_usd": 0.06132, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Material_003": { "score": 17.7, "run_id": "Material_003_20260518_042948", "duration_seconds": 469, "cost_usd": 0.13132, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Math_000": { "score": 18.5, "run_id": "Math_000_20260518_043746", "duration_seconds": 788, "cost_usd": 0.22064, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Math_001": { "score": 30.2, "run_id": "Math_001_20260518_045107", "duration_seconds": 750, "cost_usd": 0.21, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Math_002": { "score": 14.1, "run_id": "Math_002_20260518_050356", "duration_seconds": 1345, "cost_usd": 0.3766, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Math_003": { "score": 10.0, "run_id": "Math_003_20260518_052635", "duration_seconds": 397, "cost_usd": 0.11116, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Neuroscience_000": { "score": 3.6, "run_id": "Neuroscience_000_20260518_053316", "duration_seconds": 665, "cost_usd": 0.1862, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Neuroscience_001": { "score": 1.25, "run_id": "Neuroscience_001_20260518_054434", "duration_seconds": 455, "cost_usd": 0.1274, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Neuroscience_002": { "score": 1.0, "run_id": "Neuroscience_002_20260518_055222", "duration_seconds": 1222, "cost_usd": 0.34216, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Neuroscience_003": { "score": 12.75, "run_id": "Neuroscience_003_20260518_061256", "duration_seconds": 426, "cost_usd": 0.11928, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Physics_000": { "score": 27.4, "run_id": "Physics_000_20260518_062014", "duration_seconds": 287, "cost_usd": 0.08036, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Physics_001": { "score": 46.25, "run_id": "Physics_001_20260518_062515", "duration_seconds": 1462, "cost_usd": 0.40936, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Physics_002": { "score": 29.0, "run_id": "Physics_002_20260518_064947", "duration_seconds": 273, "cost_usd": 0.07644, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" }, "Physics_003": { "score": 36.0, "run_id": "Physics_003_20260518_065440", "duration_seconds": 268, "cost_usd": 0.07504, "model": "MiMo-V2.5", "model_display": "MiMo-V2.5" } }, "ResearchHarness (Qwen3.5-397B-A17B)": { "Astronomy_000": { "score": 11.1, "run_id": "Astronomy_000_20260416_152811", "duration_seconds": 946, "cost_usd": 0.4257, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Astronomy_001": { "score": 8.0, "run_id": "Astronomy_001_20260416_152811", "duration_seconds": 463, "cost_usd": 0.20835, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Astronomy_002": { "score": 3.1, "run_id": "Astronomy_002_20260416_153612", "duration_seconds": 1505, "cost_usd": 0.67725, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Astronomy_003": { "score": 46.4, "run_id": "Astronomy_003_20260416_154412", "duration_seconds": 764, "cost_usd": 0.3438, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Chemistry_000": { "score": 20.4, "run_id": "Chemistry_000_20260416_155726", "duration_seconds": 2870, "cost_usd": 1.2915, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Chemistry_001": { "score": 0.5, "run_id": "Chemistry_001_20260416_160135", "duration_seconds": 1467, "cost_usd": 0.66015, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Chemistry_002": { "score": 3.6, "run_id": "Chemistry_002_20260416_162621", "duration_seconds": 419, "cost_usd": 0.18855, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Chemistry_003": { "score": 6.9, "run_id": "Chemistry_003_20260416_163347", "duration_seconds": 1435, "cost_usd": 0.64575, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Earth_000": { "score": 15.6, "run_id": "Earth_000_20260416_164538", "duration_seconds": 970, "cost_usd": 0.4365, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Earth_001": { "score": 35.58, "run_id": "Earth_001_20260416_165809", "duration_seconds": 781, "cost_usd": 0.35145, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Earth_002": { "score": 18.6, "run_id": "Earth_002_20260416_170232", "duration_seconds": 829, "cost_usd": 0.37305, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Earth_003": { "score": 3.5, "run_id": "Earth_003_20260416_171139", "duration_seconds": 1117, "cost_usd": 0.50265, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Energy_000": { "score": 16.9, "run_id": "Energy_000_20260416_171718", "duration_seconds": 1841, "cost_usd": 0.82845, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Energy_001": { "score": 10.6, "run_id": "Energy_001_20260416_173052", "duration_seconds": 1066, "cost_usd": 0.4797, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Energy_002": { "score": 31.55, "run_id": "Energy_002_20260416_174825", "duration_seconds": 690, "cost_usd": 0.3105, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Energy_003": { "score": 15.5, "run_id": "Energy_003_20260416_174903", "duration_seconds": 1066, "cost_usd": 0.4797, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Information_000": { "score": 9.5, "run_id": "Information_000_20260416_174938", "duration_seconds": 478, "cost_usd": 0.2151, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Information_001": { "score": 6.4, "run_id": "Information_001_20260416_175758", "duration_seconds": 249, "cost_usd": 0.11205, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Information_002": { "score": 7.2, "run_id": "Information_002_20260416_180020", "duration_seconds": 426, "cost_usd": 0.1917, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Information_003": { "score": 0.75, "run_id": "Information_003_20260416_180247", "duration_seconds": 2679, "cost_usd": 1.20555, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Life_000": { "score": 4.8, "run_id": "Life_000_20260416_180742", "duration_seconds": 521, "cost_usd": 0.23445, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Life_001": { "score": 10.75, "run_id": "Life_001_20260416_181657", "duration_seconds": 553, "cost_usd": 0.24885, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Life_002": { "score": 6.5, "run_id": "Life_002_20260416_182642", "duration_seconds": 879, "cost_usd": 0.39555, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Life_003": { "score": 23.8, "run_id": "Life_003_20260416_184145", "duration_seconds": 1304, "cost_usd": 0.5868, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Material_000": { "score": 14.0, "run_id": "Material_000_20260416_184755", "duration_seconds": 3600, "cost_usd": 1.62, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Material_001": { "score": 17.1, "run_id": "Material_001_20260416_190409", "duration_seconds": 1024, "cost_usd": 0.4608, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Material_002": { "score": 33.5, "run_id": "Material_002_20260416_192151", "duration_seconds": 474, "cost_usd": 0.2133, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Material_003": { "score": 8.9, "run_id": "Material_003_20260416_193039", "duration_seconds": 3273, "cost_usd": 1.47285, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Math_000": { "score": 23.1, "run_id": "Math_000_20260416_194756", "duration_seconds": 1030, "cost_usd": 0.4635, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Math_001": { "score": 30.4, "run_id": "Math_001_20260416_200558", "duration_seconds": 3600, "cost_usd": 1.62, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Math_002": { "score": 3.5, "run_id": "Math_002_20260416_202559", "duration_seconds": 1762, "cost_usd": 0.7929, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Math_003": { "score": 0.0, "run_id": "Math_003_20260416_205639", "duration_seconds": 880, "cost_usd": 0.396, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Neuroscience_000": { "score": 11.4, "run_id": "Neuroscience_000_20260416_210559", "duration_seconds": 758, "cost_usd": 0.3411, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Neuroscience_001": { "score": 1.25, "run_id": "Neuroscience_001_20260416_211156", "duration_seconds": 1102, "cost_usd": 0.4959, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Neuroscience_002": { "score": 0.0, "run_id": "Neuroscience_002_20260416_211933", "duration_seconds": 1949, "cost_usd": 0.87705, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Neuroscience_003": { "score": 4.05, "run_id": "Neuroscience_003_20260416_213123", "duration_seconds": 2255, "cost_usd": 1.01475, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Physics_000": { "score": 20.9, "run_id": "Physics_000_20260416_215341", "duration_seconds": 1093, "cost_usd": 0.49185, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Physics_001": { "score": 25.8, "run_id": "Physics_001_20260416_221052", "duration_seconds": 1166, "cost_usd": 0.5247, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Physics_002": { "score": 19.55, "run_id": "Physics_002_20260416_221313", "duration_seconds": 991, "cost_usd": 0.44595, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" }, "Physics_003": { "score": 38.3, "run_id": "Physics_003_20260416_223115", "duration_seconds": 985, "cost_usd": 0.44325, "model": "Qwen3.5-397B-A17B", "model_display": "Qwen3.5-397B-A17B" } }, "ResearchHarness (Qwen3.6-Plus)": { "Astronomy_000": { "score": 21.0, "run_id": "Astronomy_000_20260414_215221", "duration_seconds": 1639, "cost_usd": 0.614625, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Astronomy_001": { "score": 35.2, "run_id": "Astronomy_001_20260414_221941", "duration_seconds": 1788, "cost_usd": 0.6705, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Astronomy_002": { "score": 20.0, "run_id": "Astronomy_002_20260414_224929", "duration_seconds": 2483, "cost_usd": 0.931125, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Astronomy_003": { "score": 46.9, "run_id": "Astronomy_003_20260414_233052", "duration_seconds": 395, "cost_usd": 0.148125, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Chemistry_000": { "score": 17.9, "run_id": "Chemistry_000_20260414_233727", "duration_seconds": 8019, "cost_usd": 3.007125, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Chemistry_001": { "score": 3.95, "run_id": "Chemistry_001_20260415_015105", "duration_seconds": 1406, "cost_usd": 0.52725, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Chemistry_002": { "score": 0.0, "run_id": "Chemistry_002_20260415_021431", "duration_seconds": 387, "cost_usd": 0.145125, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Chemistry_003": { "score": 9.0, "run_id": "Chemistry_003_20260415_022059", "duration_seconds": 1552, "cost_usd": 0.582, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Earth_000": { "score": 14.2, "run_id": "Earth_000_20260415_024651", "duration_seconds": 370, "cost_usd": 0.13875, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Earth_001": { "score": 26.63, "run_id": "Earth_001_20260415_025301", "duration_seconds": 459, "cost_usd": 0.172125, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Earth_002": { "score": 24.4, "run_id": "Earth_002_20260415_030040", "duration_seconds": 827, "cost_usd": 0.310125, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Earth_003": { "score": 1.2, "run_id": "Earth_003_20260415_031428", "duration_seconds": 1063, "cost_usd": 0.398625, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Energy_000": { "score": 22.0, "run_id": "Energy_000_20260415_130453", "duration_seconds": 1615, "cost_usd": 0.605625, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Energy_001": { "score": 24.7, "run_id": "Energy_001_20260415_034329", "duration_seconds": 1136, "cost_usd": 0.426, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Energy_002": { "score": 30.55, "run_id": "Energy_002_20260415_040225", "duration_seconds": 468, "cost_usd": 0.1755, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Energy_003": { "score": 7.5, "run_id": "Energy_003_20260415_133148", "duration_seconds": 1004, "cost_usd": 0.3765, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Information_000": { "score": 47.1, "run_id": "Information_000_20260415_134832", "duration_seconds": 3370, "cost_usd": 1.26375, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Information_001": { "score": 5.6, "run_id": "Information_001_20260415_144442", "duration_seconds": 721, "cost_usd": 0.270375, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Information_002": { "score": 12.0, "run_id": "Information_002_20260415_145643", "duration_seconds": 460, "cost_usd": 0.1725, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Information_003": { "score": 5.95, "run_id": "Information_003_20260415_171427", "duration_seconds": 1325, "cost_usd": 0.496875, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Life_000": { "score": 7.0, "run_id": "Life_000_20260415_094527", "duration_seconds": 2376, "cost_usd": 0.891, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Life_001": { "score": 5.7, "run_id": "Life_001_20260415_102503", "duration_seconds": 425, "cost_usd": 0.159375, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Life_002": { "score": 7.3, "run_id": "Life_002_20260415_103208", "duration_seconds": 637, "cost_usd": 0.238875, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Life_003": { "score": 28.4, "run_id": "Life_003_20260415_104245", "duration_seconds": 368, "cost_usd": 0.138, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Material_000": { "score": 20.8, "run_id": "Material_000_20260415_104854", "duration_seconds": 2619, "cost_usd": 0.982125, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Material_001": { "score": 15.95, "run_id": "Material_001_20260415_113232", "duration_seconds": 864, "cost_usd": 0.324, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Material_002": { "score": 27.05, "run_id": "Material_002_20260415_114656", "duration_seconds": 805, "cost_usd": 0.301875, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Material_003": { "score": 14.45, "run_id": "Material_003_20260415_120022", "duration_seconds": 2323, "cost_usd": 0.871125, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Math_000": { "score": 23.5, "run_id": "Math_000_20260415_191952", "duration_seconds": 1825, "cost_usd": 0.684375, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Math_001": { "score": 33.1, "run_id": "Math_001_20260415_131344", "duration_seconds": 928, "cost_usd": 0.348, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Math_002": { "score": 9.6, "run_id": "Math_002_20260415_132912", "duration_seconds": 3192, "cost_usd": 1.197, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Math_003": { "score": 10.0, "run_id": "Math_003_20260415_142226", "duration_seconds": 1467, "cost_usd": 0.550125, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Neuroscience_000": { "score": 8.6, "run_id": "Neuroscience_000_20260415_101555", "duration_seconds": 421, "cost_usd": 0.157875, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Neuroscience_001": { "score": 2.0, "run_id": "Neuroscience_001_20260415_102256", "duration_seconds": 942, "cost_usd": 0.35325, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Neuroscience_002": { "score": 0.75, "run_id": "Neuroscience_002_20260415_103839", "duration_seconds": 1868, "cost_usd": 0.7005, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Neuroscience_003": { "score": 7.2, "run_id": "Neuroscience_003_20260415_110948", "duration_seconds": 1180, "cost_usd": 0.4425, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Physics_000": { "score": 24.8, "run_id": "Physics_000_20260415_164039", "duration_seconds": 679, "cost_usd": 0.254625, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Physics_001": { "score": 27.55, "run_id": "Physics_001_20260415_113755", "duration_seconds": 761, "cost_usd": 0.285375, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Physics_002": { "score": 28.5, "run_id": "Physics_002_20260415_115036", "duration_seconds": 566, "cost_usd": 0.21225, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" }, "Physics_003": { "score": 41.8, "run_id": "Physics_003_20260415_122845", "duration_seconds": 546, "cost_usd": 0.20475, "model": "Qwen3.6-Plus", "model_display": "Qwen3.6-Plus" } }, "ResearchHarness (Qwen3.7-Max)": { "Astronomy_000": { "score": 15.9, "run_id": "Astronomy_000_20260527_123627", "duration_seconds": 807, "cost_usd": 0.504375, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Astronomy_001": { "score": 11.0, "run_id": "Astronomy_001_20260526_201220", "duration_seconds": 321, "cost_usd": 0.200625, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Astronomy_002": { "score": 28.0, "run_id": "Astronomy_002_20260526_201800", "duration_seconds": 891, "cost_usd": 0.556875, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Astronomy_003": { "score": 40.2, "run_id": "Astronomy_003_20260526_203301", "duration_seconds": 330, "cost_usd": 0.20625, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Chemistry_000": { "score": 13.25, "run_id": "Chemistry_000_20260526_203850", "duration_seconds": 2213, "cost_usd": 1.383125, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Chemistry_001": { "score": 0.5, "run_id": "Chemistry_001_20260526_211600", "duration_seconds": 694, "cost_usd": 0.43375, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Chemistry_002": { "score": 1.0, "run_id": "Chemistry_002_20260527_102132", "duration_seconds": 559, "cost_usd": 0.349375, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Chemistry_003": { "score": 12.9, "run_id": "Chemistry_003_20260527_125008", "duration_seconds": 854, "cost_usd": 0.53375, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Earth_000": { "score": 12.5, "run_id": "Earth_000_20260526_214031", "duration_seconds": 339, "cost_usd": 0.211875, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Earth_001": { "score": 36.65, "run_id": "Earth_001_20260526_214648", "duration_seconds": 354, "cost_usd": 0.22125, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Earth_002": { "score": 9.6, "run_id": "Earth_002_20260526_215300", "duration_seconds": 562, "cost_usd": 0.35125, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Earth_003": { "score": 0.6, "run_id": "Earth_003_20260526_220242", "duration_seconds": 568, "cost_usd": 0.355, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Energy_001": { "score": 27.0, "run_id": "Energy_001_20260526_222002", "duration_seconds": 369, "cost_usd": 0.230625, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Energy_002": { "score": 32.35, "run_id": "Energy_002_20260526_222628", "duration_seconds": 815, "cost_usd": 0.509375, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Energy_003": { "score": 16.5, "run_id": "Energy_003_20260527_104651", "duration_seconds": 486, "cost_usd": 0.30375, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Information_002": { "score": 38.4, "run_id": "Information_002_20260526_225326", "duration_seconds": 273, "cost_usd": 0.170625, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Information_003": { "score": 5.25, "run_id": "Information_003_20260526_225818", "duration_seconds": 715, "cost_usd": 0.446875, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Life_001": { "score": 0.0, "run_id": "Life_001_20260526_231703", "duration_seconds": 342, "cost_usd": 0.21375, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Life_002": { "score": 6.1, "run_id": "Life_002_20260526_232305", "duration_seconds": 306, "cost_usd": 0.19125, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Life_003": { "score": 24.8, "run_id": "Life_003_20260526_232817", "duration_seconds": 305, "cost_usd": 0.190625, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Material_001": { "score": 15.9, "run_id": "Material_001_20260527_111743", "duration_seconds": 512, "cost_usd": 0.32, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Material_002": { "score": 30.49, "run_id": "Material_002_20260527_001300", "duration_seconds": 1430, "cost_usd": 0.89375, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Material_003": { "score": 25.25, "run_id": "Material_003_20260527_145708", "duration_seconds": 1299, "cost_usd": 0.811875, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Math_000": { "score": 29.05, "run_id": "Math_000_20260527_004252", "duration_seconds": 476, "cost_usd": 0.2975, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Math_001": { "score": 31.2, "run_id": "Math_001_20260527_005133", "duration_seconds": 472, "cost_usd": 0.295, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Math_002": { "score": 19.1, "run_id": "Math_002_20260527_005940", "duration_seconds": 505, "cost_usd": 0.315625, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Math_003": { "score": 10.0, "run_id": "Math_003_20260527_133920", "duration_seconds": 450, "cost_usd": 0.28125, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Neuroscience_000": { "score": 11.0, "run_id": "Neuroscience_000_20260527_011357", "duration_seconds": 409, "cost_usd": 0.255625, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Neuroscience_001": { "score": 3.75, "run_id": "Neuroscience_001_20260527_012118", "duration_seconds": 470, "cost_usd": 0.29375, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Neuroscience_002": { "score": 0.0, "run_id": "Neuroscience_002_20260527_013027", "duration_seconds": 1548, "cost_usd": 0.9675, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Neuroscience_003": { "score": 13.15, "run_id": "Neuroscience_003_20260527_151915", "duration_seconds": 1922, "cost_usd": 1.20125, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Physics_001": { "score": 29.45, "run_id": "Physics_001_20260527_021219", "duration_seconds": 349, "cost_usd": 0.218125, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Physics_002": { "score": 40.05, "run_id": "Physics_002_20260527_021848", "duration_seconds": 390, "cost_usd": 0.24375, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false }, "Physics_003": { "score": 45.4, "run_id": "Physics_003_20260527_022559", "duration_seconds": 372, "cost_usd": 0.2325, "model": "qwen3.7-max", "model_display": "Qwen3.7-Max", "details_exported": false } } }, "frontier": { "Astronomy_000": 33.1, "Astronomy_001": 37.0, "Astronomy_002": 32.0, "Astronomy_003": 47.6, "Chemistry_000": 20.4, "Chemistry_001": 9.0, "Chemistry_002": 5.0, "Chemistry_003": 22.5, "Earth_000": 25.5, "Earth_001": 41.65, "Earth_002": 34.0, "Earth_003": 10.5, "Energy_000": 22.0, "Energy_001": 27.0, "Energy_002": 42.45, "Energy_003": 26.3, "Information_000": 49.4, "Information_001": 18.0, "Information_002": 39.8, "Information_003": 17.75, "Life_000": 10.15, "Life_001": 19.55, "Life_002": 9.7, "Life_003": 40.5, "Material_000": 25.1, "Material_001": 23.6, "Material_002": 40.71, "Material_003": 28.8, "Math_000": 29.05, "Math_001": 44.1, "Math_002": 20.5, "Math_003": 29.6, "Neuroscience_000": 14.0, "Neuroscience_001": 7.05, "Neuroscience_002": 3.75, "Neuroscience_003": 20.4, "Physics_000": 35.7, "Physics_001": 46.25, "Physics_002": 40.05, "Physics_003": 49.0 } }