{ "schema_version": 1, "hardware": { "name": "RTX PRO 6000 Blackwell Workstation", "sm": "sm_120a", "vram_gb": 96, "peak_bandwidth_gb_s": 1800 }, "problems": [ "01_fp8_gemm", "02_kda_cutlass", "03_paged_attention", "04_kahan_softmax", "05_topk_bitonic", "06_sonic_moe_swiglu", "07_w4a16_gemm", "09_fmha_preattn_mrope", "10_patch_embed_conv3d_gemm" ], "models": [ { "label": "codex/gpt-5.5 [xhigh]", "harness": "codex", "model": "gpt-5.5", "effort": "xhigh", "results": { "01_fp8_gemm": { "run_id": "20260427_230711_codex_gpt-5.5_01_fp8_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.4234, "elapsed_seconds": 950 }, "02_kda_cutlass": { "run_id": "20260427_232335_codex_gpt-5.5_02_kda_cutlass", "correct": true, "has_solution": true, "peak_fraction": 0.032, "elapsed_seconds": 1534 }, "03_paged_attention": { "run_id": "20260427_235415_codex_gpt-5.5_03_paged_attention", "correct": true, "has_solution": true, "peak_fraction": 0.4975, "elapsed_seconds": 585 }, "04_kahan_softmax": { "run_id": "20260428_000416_codex_gpt-5.5_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.3634, "elapsed_seconds": 595 }, "05_topk_bitonic": { "run_id": "20260428_001416_codex_gpt-5.5_05_topk_bitonic", "correct": true, "has_solution": true, "peak_fraction": 0.0423, "elapsed_seconds": 1485 }, "06_sonic_moe_swiglu": { "run_id": "20260428_003928_codex_gpt-5.5_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.2514, "elapsed_seconds": 1578 }, "07_w4a16_gemm": { "run_id": "20260428_015550_codex_gpt-5.5_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1588, "elapsed_seconds": 855 }, "09_fmha_preattn_mrope": { "run_id": "20260507_012649_codex_gpt-5.5_09_fmha_preattn_mrope", "correct": true, "has_solution": true, "peak_fraction": 0.6659, "elapsed_seconds": 876 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_042802_codex_gpt-5.5_10_patch_embed_conv3d_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.2484, "elapsed_seconds": 627 } }, "pass_count": 9, "total_runs": 9 }, { "label": "claude/claude-opus-4-7 [max]", "harness": "claude", "model": "claude-opus-4-7", "effort": "max", "results": { "01_fp8_gemm": { "run_id": "20260428_040539_claude_claude-opus-4-7_01_fp8_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.5339, "elapsed_seconds": 2700 }, "02_kda_cutlass": { "run_id": "20260428_045050_claude_claude-opus-4-7_02_kda_cutlass", "correct": true, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 887 }, "03_paged_attention": { "run_id": "20260428_053541_claude_claude-opus-4-7_03_paged_attention", "correct": true, "has_solution": true, "peak_fraction": 0.6016, "elapsed_seconds": 2701 }, "04_kahan_softmax": { "run_id": "20260428_062104_claude_claude-opus-4-7_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.3166, "elapsed_seconds": 2700 }, "05_topk_bitonic": { "run_id": "20260428_070610_claude_claude-opus-4-7_05_topk_bitonic", "correct": true, "has_solution": true, "peak_fraction": 0.0197, "elapsed_seconds": 2700 }, "06_sonic_moe_swiglu": { "run_id": "20260428_075117_claude_claude-opus-4-7_06_sonic_moe_swiglu", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260428_083917_claude_claude-opus-4-7_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1837, "elapsed_seconds": 2700 }, "09_fmha_preattn_mrope": { "run_id": "20260507_004128_claude_claude-opus-4-7_09_fmha_preattn_mrope", "correct": true, "has_solution": true, "peak_fraction": 0.7221, "elapsed_seconds": 2700 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_034256_claude_claude-opus-4-7_10_patch_embed_conv3d_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.2119, "elapsed_seconds": 2701 } }, "pass_count": 8, "total_runs": 9 }, { "label": "kimi/kimi-k2.6", "harness": "kimi", "model": "kimi-k2.6", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260428_092426_kimi_kimi-k2.6_01_fp8_gemm", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2041 }, "02_kda_cutlass": { "run_id": "20260428_095829_kimi_kimi-k2.6_02_kda_cutlass", "correct": true, "has_solution": true, "peak_fraction": 0.0218, "elapsed_seconds": 2700 }, "03_paged_attention": { "run_id": "20260428_110049_kimi_kimi-k2.6_03_paged_attention", "correct": true, "has_solution": true, "peak_fraction": 0.4316, "elapsed_seconds": 2656 }, "04_kahan_softmax": { "run_id": "20260428_114524_kimi_kimi-k2.6_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.1178, "elapsed_seconds": 1573 }, "05_topk_bitonic": { "run_id": "20260428_121144_kimi_kimi-k2.6_05_topk_bitonic", "correct": true, "has_solution": true, "peak_fraction": 0.0136, "elapsed_seconds": 2700 }, "06_sonic_moe_swiglu": { "run_id": "20260428_125718_kimi_kimi-k2.6_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.1612, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260428_134617_kimi_kimi-k2.6_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.2203, "elapsed_seconds": 2700 }, "09_fmha_preattn_mrope": { "run_id": "20260507_014201_kimi_kimi-k2.6_09_fmha_preattn_mrope", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 5 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_043834_kimi_kimi-k2.6_10_patch_embed_conv3d_gemm", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 4 } }, "pass_count": 6, "total_runs": 9 }, { "label": "opencode/openrouter-pinned/xiaomi/mimo-v2.5-pro", "harness": "opencode", "model": "openrouter-pinned/xiaomi/mimo-v2.5-pro", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260428_184159_opencode_openrouter-pinned_xiaomi_mimo-v2.5-pro_01_fp8_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.4343, "elapsed_seconds": 2476 }, "02_kda_cutlass": { "run_id": "20260428_192328_opencode_openrouter-pinned_xiaomi_mimo-v2.5-pro_02_kda_cutlass", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "03_paged_attention": { "run_id": "20260428_200832_opencode_openrouter-pinned_xiaomi_mimo-v2.5-pro_03_paged_attention", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 586 }, "04_kahan_softmax": { "run_id": "20260428_201818_opencode_openrouter-pinned_xiaomi_mimo-v2.5-pro_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.121, "elapsed_seconds": 2508 }, "05_topk_bitonic": { "run_id": "20260428_210011_opencode_openrouter-pinned_xiaomi_mimo-v2.5-pro_05_topk_bitonic", "correct": true, "has_solution": true, "peak_fraction": 0.0168, "elapsed_seconds": 2076 }, "06_sonic_moe_swiglu": { "run_id": "20260428_213514_opencode_openrouter-pinned_xiaomi_mimo-v2.5-pro_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.211, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260428_222337_opencode_openrouter-pinned_xiaomi_mimo-v2.5-pro_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1366, "elapsed_seconds": 2112 }, "09_fmha_preattn_mrope": { "run_id": "20260507_133507_opencode_openrouter-pinned_xiaomi_mimo-v2.5-pro_09_fmha_preattn_mrope", "correct": true, "has_solution": true, "peak_fraction": 0.5164, "elapsed_seconds": 2700 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_161536_opencode_openrouter-pinned_xiaomi_mimo-v2.5-pro_10_patch_embed_conv3d_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.19, "elapsed_seconds": 2261 } }, "pass_count": 7, "total_runs": 9 }, { "label": "opencode/openrouter-pinned/qwen/qwen3.6-max-preview", "harness": "opencode", "model": "openrouter-pinned/qwen/qwen3.6-max-preview", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260428_225859_opencode_openrouter-pinned_qwen_qwen3.6-max-preview_01_fp8_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.4288, "elapsed_seconds": 2332 }, "02_kda_cutlass": { "run_id": "20260428_233804_opencode_openrouter-pinned_qwen_qwen3.6-max-preview_02_kda_cutlass", "correct": true, "has_solution": true, "peak_fraction": 0.0106, "elapsed_seconds": 2700 }, "03_paged_attention": { "run_id": "20260429_002816_opencode_openrouter-pinned_qwen_qwen3.6-max-preview_03_paged_attention", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 48 }, "04_kahan_softmax": { "run_id": "20260429_002904_opencode_openrouter-pinned_qwen_qwen3.6-max-preview_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.0775, "elapsed_seconds": 2101 }, "05_topk_bitonic": { "run_id": "20260429_035235_opencode_openrouter-pinned_qwen_qwen3.6-max-preview_05_topk_bitonic", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "06_sonic_moe_swiglu": { "run_id": "20260429_043758_opencode_openrouter-pinned_qwen_qwen3.6-max-preview_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.0037, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260429_052810_opencode_openrouter-pinned_qwen_qwen3.6-max-preview_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1097, "elapsed_seconds": 2700 }, "09_fmha_preattn_mrope": { "run_id": "20260507_113709_opencode_openrouter-pinned_qwen_qwen3.6-max-preview_09_fmha_preattn_mrope", "correct": true, "has_solution": true, "peak_fraction": 0.3953, "elapsed_seconds": 2700 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_145657_opencode_openrouter-pinned_qwen_qwen3.6-max-preview_10_patch_embed_conv3d_gemm", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 336 } }, "pass_count": 6, "total_runs": 9 }, { "label": "opencode/deepseek/deepseek-v4-flash", "harness": "opencode", "model": "deepseek/deepseek-v4-flash", "effort": "", "results": { "05_topk_bitonic": { "run_id": "20260427_104347_opencode_deepseek_deepseek-v4-flash_05_topk_bitonic", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 510 }, "01_fp8_gemm": { "run_id": "20260427_044258_opencode_deepseek_deepseek-v4-flash_01_fp8_gemm", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 1875 }, "02_kda_cutlass": { "run_id": "20260427_060330_opencode_deepseek_deepseek-v4-flash_02_kda_cutlass", "correct": true, "has_solution": true, "peak_fraction": 0.009, "elapsed_seconds": 2700 }, "03_paged_attention": { "run_id": "20260427_082406_opencode_deepseek_deepseek-v4-flash_03_paged_attention", "correct": true, "has_solution": true, "peak_fraction": 0.1673, "elapsed_seconds": 1676 }, "04_kahan_softmax": { "run_id": "20260427_094004_opencode_deepseek_deepseek-v4-flash_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.1378, "elapsed_seconds": 943 }, "06_sonic_moe_swiglu": { "run_id": "20260427_113525_opencode_deepseek_deepseek-v4-flash_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.0832, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260427_140540_opencode_deepseek_deepseek-v4-flash_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1344, "elapsed_seconds": 1065 }, "09_fmha_preattn_mrope": { "run_id": "20260507_030639_opencode_deepseek_deepseek-v4-flash_09_fmha_preattn_mrope", "correct": true, "has_solution": true, "peak_fraction": 0.5769, "elapsed_seconds": 2160 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_060857_opencode_deepseek_deepseek-v4-flash_10_patch_embed_conv3d_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.196, "elapsed_seconds": 2700 } }, "pass_count": 7, "total_runs": 9 }, { "label": "opencode/deepseek/deepseek-v4-pro", "harness": "opencode", "model": "deepseek/deepseek-v4-pro", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260427_051426_opencode_deepseek_deepseek-v4-pro_01_fp8_gemm", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "02_kda_cutlass": { "run_id": "20260427_065343_opencode_deepseek_deepseek-v4-pro_02_kda_cutlass", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "03_paged_attention": { "run_id": "20260427_085225_opencode_deepseek_deepseek-v4-pro_03_paged_attention", "correct": true, "has_solution": true, "peak_fraction": 0.0268, "elapsed_seconds": 1933 }, "04_kahan_softmax": { "run_id": "20260427_095552_opencode_deepseek_deepseek-v4-pro_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.1011, "elapsed_seconds": 1847 }, "05_topk_bitonic": { "run_id": "20260427_105224_opencode_deepseek_deepseek-v4-pro_05_topk_bitonic", "correct": true, "has_solution": true, "peak_fraction": 0.0111, "elapsed_seconds": 1674 }, "06_sonic_moe_swiglu": { "run_id": "20260427_122352_opencode_deepseek_deepseek-v4-pro_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.1077, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260427_142403_opencode_deepseek_deepseek-v4-pro_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1251, "elapsed_seconds": 2673 }, "09_fmha_preattn_mrope": { "run_id": "20260507_022718_opencode_deepseek_deepseek-v4-pro_09_fmha_preattn_mrope", "correct": true, "has_solution": true, "peak_fraction": 0.6672, "elapsed_seconds": 2349 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_052351_opencode_deepseek_deepseek-v4-pro_10_patch_embed_conv3d_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.2319, "elapsed_seconds": 2700 } }, "pass_count": 7, "total_runs": 9 }, { "label": "opencode/openrouter-pinned/qwen/qwen3.6-plus", "harness": "opencode", "model": "openrouter-pinned/qwen/qwen3.6-plus", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260429_061331_opencode_openrouter-pinned_qwen_qwen3.6-plus_01_fp8_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.431, "elapsed_seconds": 2700 }, "02_kda_cutlass": { "run_id": "20260429_065845_opencode_openrouter-pinned_qwen_qwen3.6-plus_02_kda_cutlass", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 717 }, "03_paged_attention": { "run_id": "20260429_071042_opencode_openrouter-pinned_qwen_qwen3.6-plus_03_paged_attention", "correct": true, "has_solution": true, "peak_fraction": 0.0218, "elapsed_seconds": 2700 }, "04_kahan_softmax": { "run_id": "20260429_075600_opencode_openrouter-pinned_qwen_qwen3.6-plus_04_kahan_softmax", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 283 }, "05_topk_bitonic": { "run_id": "20260429_080043_opencode_openrouter-pinned_qwen_qwen3.6-plus_05_topk_bitonic", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 392 }, "06_sonic_moe_swiglu": { "run_id": "20260429_080716_opencode_openrouter-pinned_qwen_qwen3.6-plus_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.0402, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260429_085704_opencode_openrouter-pinned_qwen_qwen3.6-plus_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1253, "elapsed_seconds": 1440 }, "09_fmha_preattn_mrope": { "run_id": "20260507_122230_opencode_openrouter-pinned_qwen_qwen3.6-plus_09_fmha_preattn_mrope", "correct": true, "has_solution": true, "peak_fraction": 0.5692, "elapsed_seconds": 2700 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_150233_opencode_openrouter-pinned_qwen_qwen3.6-plus_10_patch_embed_conv3d_gemm", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 1657 } }, "pass_count": 5, "total_runs": 9 }, { "label": "opencode/zai/glm-5.1", "harness": "opencode", "model": "zai/glm-5.1", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260428_143210_opencode_zai_glm-5.1_01_fp8_gemm", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "02_kda_cutlass": { "run_id": "20260428_151712_opencode_zai_glm-5.1_02_kda_cutlass", "correct": true, "has_solution": true, "peak_fraction": 0.0051, "elapsed_seconds": 2700 }, "03_paged_attention": { "run_id": "20260428_160715_opencode_zai_glm-5.1_03_paged_attention", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 918 }, "04_kahan_softmax": { "run_id": "20260428_162234_opencode_zai_glm-5.1_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.1251, "elapsed_seconds": 1964 }, "05_topk_bitonic": { "run_id": "20260428_165523_opencode_zai_glm-5.1_05_topk_bitonic", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 754 }, "06_sonic_moe_swiglu": { "run_id": "20260428_170757_opencode_zai_glm-5.1_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.2379, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260428_175639_opencode_zai_glm-5.1_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.18, "elapsed_seconds": 2700 }, "09_fmha_preattn_mrope": { "run_id": "20260507_014206_opencode_zai_glm-5.1_09_fmha_preattn_mrope", "correct": true, "has_solution": true, "peak_fraction": 0.589, "elapsed_seconds": 2700 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_043838_opencode_zai_glm-5.1_10_patch_embed_conv3d_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1817, "elapsed_seconds": 2700 } }, "pass_count": 6, "total_runs": 9 }, { "label": "opencode/zai/glm-5.1 [2026-05-08]", "harness": "opencode", "model": "zai/glm-5.1", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260508_121020_opencode_zai_glm-5.1_01_fp8_gemm", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "02_kda_cutlass": { "run_id": "20260508_125527_opencode_zai_glm-5.1_02_kda_cutlass", "correct": true, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "03_paged_attention": { "run_id": "20260508_141032_opencode_zai_glm-5.1_03_paged_attention", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 638 }, "04_kahan_softmax": { "run_id": "20260508_142110_opencode_zai_glm-5.1_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.0561, "elapsed_seconds": 1908 }, "05_topk_bitonic": { "run_id": "20260508_145305_opencode_zai_glm-5.1_05_topk_bitonic", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 646 }, "06_sonic_moe_swiglu": { "run_id": "20260508_150351_opencode_zai_glm-5.1_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.2154, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260508_154851_opencode_zai_glm-5.1_07_w4a16_gemm", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 875 }, "09_fmha_preattn_mrope": { "run_id": "20260508_160326_opencode_zai_glm-5.1_09_fmha_preattn_mrope", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 794 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260508_161641_opencode_zai_glm-5.1_10_patch_embed_conv3d_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1742, "elapsed_seconds": 2700 } }, "pass_count": 4, "total_runs": 9 }, { "label": "droid/zai/glm-5.1 [2026-05-08]", "harness": "droid", "model": "custom:GLM-5.1-[Z.AI-Coding-Plan]-0", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260508_150740_droid_custom_GLM-5.1-_Z.AI-Coding-Plan_-0_01_fp8_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.414, "elapsed_seconds": 2700 }, "02_kda_cutlass": { "run_id": "20260508_155315_droid_custom_GLM-5.1-_Z.AI-Coding-Plan_-0_02_kda_cutlass", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 2700 }, "03_paged_attention": { "run_id": "20260508_163816_droid_custom_GLM-5.1-_Z.AI-Coding-Plan_-0_03_paged_attention", "correct": true, "has_solution": true, "peak_fraction": 0.2523, "elapsed_seconds": 2700 }, "04_kahan_softmax": { "run_id": "20260508_172333_droid_custom_GLM-5.1-_Z.AI-Coding-Plan_-0_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.2339, "elapsed_seconds": 1747 }, "05_topk_bitonic": { "run_id": "20260508_175246_droid_custom_GLM-5.1-_Z.AI-Coding-Plan_-0_05_topk_bitonic", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 2700 }, "06_sonic_moe_swiglu": { "run_id": "20260508_183746_droid_custom_GLM-5.1-_Z.AI-Coding-Plan_-0_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.149, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260508_192626_droid_custom_GLM-5.1-_Z.AI-Coding-Plan_-0_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.0863, "elapsed_seconds": 2700 }, "09_fmha_preattn_mrope": { "run_id": "20260508_201159_droid_custom_GLM-5.1-_Z.AI-Coding-Plan_-0_09_fmha_preattn_mrope", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 2701 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260508_205700_droid_custom_GLM-5.1-_Z.AI-Coding-Plan_-0_10_patch_embed_conv3d_gemm", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 2700 } }, "pass_count": 5, "total_runs": 9 }, { "label": "opencode/openrouter-pinned/minimax/minimax-m2.7", "harness": "opencode", "model": "openrouter-pinned/minimax/minimax-m2.7", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260427_055936_opencode_openrouter-pinned_minimax_minimax-m2.7_01_fp8_gemm", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 160 }, "02_kda_cutlass": { "run_id": "20260427_073847_opencode_openrouter-pinned_minimax_minimax-m2.7_02_kda_cutlass", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 2700 }, "03_paged_attention": { "run_id": "20260427_092504_opencode_openrouter-pinned_minimax_minimax-m2.7_03_paged_attention", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 506 }, "04_kahan_softmax": { "run_id": "20260427_102645_opencode_openrouter-pinned_minimax_minimax-m2.7_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.0339, "elapsed_seconds": 995 }, "05_topk_bitonic": { "run_id": "20260427_112034_opencode_openrouter-pinned_minimax_minimax-m2.7_05_topk_bitonic", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 232 }, "06_sonic_moe_swiglu": { "run_id": "20260427_131223_opencode_openrouter-pinned_minimax_minimax-m2.7_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.076, "elapsed_seconds": 2700 }, "07_w4a16_gemm": { "run_id": "20260427_150902_opencode_openrouter-pinned_minimax_minimax-m2.7_07_w4a16_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.0305, "elapsed_seconds": 1260 }, "09_fmha_preattn_mrope": { "run_id": "20260507_105254_opencode_openrouter-pinned_minimax_minimax-m2.7_09_fmha_preattn_mrope", "correct": true, "has_solution": true, "peak_fraction": 0.1134, "elapsed_seconds": 2634 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_142049_opencode_openrouter-pinned_minimax_minimax-m2.7_10_patch_embed_conv3d_gemm", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2166 } }, "pass_count": 4, "total_runs": 9 }, { "label": "opencode/openrouter-pinned/qwen/qwen3.6-27b", "harness": "opencode", "model": "openrouter-pinned/qwen/qwen3.6-27b", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260427_175551_opencode_openrouter-pinned_qwen_qwen3.6-27b_01_fp8_gemm", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 417 }, "02_kda_cutlass": { "run_id": "20260427_180248_opencode_openrouter-pinned_qwen_qwen3.6-27b_02_kda_cutlass", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "03_paged_attention": { "run_id": "20260427_184750_opencode_openrouter-pinned_qwen_qwen3.6-27b_03_paged_attention", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "04_kahan_softmax": { "run_id": "20260427_193251_opencode_openrouter-pinned_qwen_qwen3.6-27b_04_kahan_softmax", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 698 }, "05_topk_bitonic": { "run_id": "20260427_194429_opencode_openrouter-pinned_qwen_qwen3.6-27b_05_topk_bitonic", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700 }, "06_sonic_moe_swiglu": { "run_id": "20260427_202932_opencode_openrouter-pinned_qwen_qwen3.6-27b_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.0822, "elapsed_seconds": 1106 }, "07_w4a16_gemm": { "run_id": "20260427_205128_opencode_openrouter-pinned_qwen_qwen3.6-27b_07_w4a16_gemm", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 20 }, "09_fmha_preattn_mrope": { "run_id": "20260507_130754_opencode_openrouter-pinned_qwen_qwen3.6-27b_09_fmha_preattn_mrope", "correct": true, "has_solution": true, "peak_fraction": 0.4356, "elapsed_seconds": 1610 }, "10_patch_embed_conv3d_gemm": { "run_id": "20260507_153012_opencode_openrouter-pinned_qwen_qwen3.6-27b_10_patch_embed_conv3d_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1046, "elapsed_seconds": 2700 } }, "pass_count": 3, "total_runs": 9 }, { "label": "zai-claude/glm-5.1 [2026-05-13]", "harness": "zai-claude", "model": "glm-5.1", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260513_153015_zai-claude_glm-5.1_01_fp8_gemm", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700, "session_complete": false, "invalid_reason": "modified_problem_file" }, "02_kda_cutlass": { "run_id": "20260513_161534_zai-claude_glm-5.1_02_kda_cutlass", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 1011, "session_complete": true }, "03_paged_attention": { "run_id": "20260513_163226_zai-claude_glm-5.1_03_paged_attention", "correct": true, "has_solution": true, "peak_fraction": 0.222, "elapsed_seconds": 2700, "session_complete": false }, "04_kahan_softmax": { "run_id": "20260513_171742_zai-claude_glm-5.1_04_kahan_softmax", "correct": true, "has_solution": true, "peak_fraction": 0.3367, "elapsed_seconds": 1910, "session_complete": true }, "05_topk_bitonic": { "run_id": "20260513_174937_zai-claude_glm-5.1_05_topk_bitonic", "correct": true, "has_solution": true, "peak_fraction": 0.0029, "elapsed_seconds": 2700, "session_complete": false }, "06_sonic_moe_swiglu": { "run_id": "20260513_183442_zai-claude_glm-5.1_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "peak_fraction": 0.1111, "elapsed_seconds": 2700, "session_complete": false }, "07_w4a16_gemm": { "run_id": "20260513_192326_zai-claude_glm-5.1_07_w4a16_gemm", "correct": false, "has_solution": false, "peak_fraction": null, "elapsed_seconds": 2701, "session_complete": false }, "09_fmha_preattn_mrope": { "run_id": "20260513_200827_zai-claude_glm-5.1_09_fmha_preattn_mrope", "correct": false, "has_solution": true, "peak_fraction": null, "elapsed_seconds": 2700, "session_complete": false }, "10_patch_embed_conv3d_gemm": { "run_id": "20260513_205337_zai-claude_glm-5.1_10_patch_embed_conv3d_gemm", "correct": true, "has_solution": true, "peak_fraction": 0.1471, "elapsed_seconds": 2701, "session_complete": false } }, "pass_count": 5, "total_runs": 9 }, { "label": "claude/claude-opus-4-7 [2026-05-28 finish max]", "harness": "claude", "model": "claude-opus-4-7", "effort": "max", "results": { "01_fp8_gemm": { "run_id": "20260523_162348_claude_claude-opus-4-7_01_fp8_gemm", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.5241, "elapsed_seconds": 2700, "total_elapsed_seconds": 2816, "check_elapsed_seconds": 91, "benchmark_elapsed_seconds": 25, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 2.668148148148148, "usage": { "input_tokens": 298, "output_tokens": 7204, "cache_read_tokens": 27573027, "cache_creation_tokens": 1679350, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": false, "harness_exit_code": 124, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 43, "gpu_lock_active_seconds_total": 73 }, "02_kda_cutlass": { "run_id": "20260528_040335_claude_claude-opus-4-7_02_kda_cutlass", "correct": true, "has_solution": true, "failure_reason": "benchmark_timeout", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 1184, "total_elapsed_seconds": 5646, "check_elapsed_seconds": 766, "benchmark_elapsed_seconds": 3696, "check_exit_code": 0, "benchmark_exit_code": 124, "output_tokens_per_second": 72.74493243243244, "usage": { "input_tokens": 65, "output_tokens": 86130, "cache_read_tokens": 5441546, "cache_creation_tokens": 209203, "reasoning_tokens": null, "total_cost_usd": 6.18333475 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 2652, "gpu_lock_active_seconds_total": 1810 }, "03_paged_attention": { "run_id": "20260523_170018_claude_claude-opus-4-7_03_paged_attention", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0256, "elapsed_seconds": 2036, "total_elapsed_seconds": 2053, "check_elapsed_seconds": 10, "benchmark_elapsed_seconds": 7, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 84.4499017681729, "usage": { "input_tokens": 68, "output_tokens": 171940, "cache_read_tokens": 5399702, "cache_creation_tokens": 268095, "reasoning_tokens": null, "total_cost_usd": 8.675728750000001 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 17 }, "04_kahan_softmax": { "run_id": "20260523_184945_claude_claude-opus-4-7_04_kahan_softmax", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 603, "total_elapsed_seconds": 606, "check_elapsed_seconds": 3, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": 82.8424543946932, "usage": { "input_tokens": 7307, "output_tokens": 49954, "cache_read_tokens": 817936, "cache_creation_tokens": 93565, "reasoning_tokens": null, "total_cost_usd": 2.28043925 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 1, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 3 }, "05_topk_bitonic": { "run_id": "20260528_040335_claude_claude-opus-4-7_05_topk_bitonic", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0452, "elapsed_seconds": 1266, "total_elapsed_seconds": 5654, "check_elapsed_seconds": 706, "benchmark_elapsed_seconds": 3682, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 65.92575039494471, "usage": { "input_tokens": 89, "output_tokens": 83462, "cache_read_tokens": 8546399, "cache_creation_tokens": 165943, "reasoning_tokens": null, "total_cost_usd": 7.398716249999998 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 4361, "gpu_lock_active_seconds_total": 27 }, "06_sonic_moe_swiglu": { "run_id": "20260523_173433_claude_claude-opus-4-7_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.2469, "elapsed_seconds": 1368, "total_elapsed_seconds": 1582, "check_elapsed_seconds": 122, "benchmark_elapsed_seconds": 92, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 83.35672514619883, "usage": { "input_tokens": 7611, "output_tokens": 114032, "cache_read_tokens": 3499008, "cache_creation_tokens": 171742, "reasoning_tokens": null, "total_cost_usd": 5.7131615 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 5, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 214 }, "07_w4a16_gemm": { "run_id": "20260523_180103_claude_claude-opus-4-7_07_w4a16_gemm", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0999, "elapsed_seconds": 2130, "total_elapsed_seconds": 2141, "check_elapsed_seconds": 4, "benchmark_elapsed_seconds": 7, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 85.65305164319248, "usage": { "input_tokens": 126, "output_tokens": 182441, "cache_read_tokens": 9364203, "cache_creation_tokens": 346902, "reasoning_tokens": null, "total_cost_usd": 11.413319999999999 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 3, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 12 } }, "pass_count": 6, "total_runs": 7 }, { "label": "codex/gpt-5.5 [2026-05-28 finish xhigh]", "harness": "codex", "model": "gpt-5.5", "effort": "xhigh", "results": { "01_fp8_gemm": { "run_id": "20260528_040335_codex_gpt-5.5_01_fp8_gemm", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.5375, "elapsed_seconds": 301, "total_elapsed_seconds": 2045, "check_elapsed_seconds": 1630, "benchmark_elapsed_seconds": 114, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 63.33887043189369, "usage": { "input_tokens": 1441774, "output_tokens": 19065, "cache_read_tokens": 1323520, "cache_creation_tokens": null, "reasoning_tokens": 8018, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 1700, "gpu_lock_active_seconds_total": 44 }, "02_kda_cutlass": { "run_id": "20260528_040335_codex_gpt-5.5_02_kda_cutlass", "correct": true, "has_solution": true, "failure_reason": "benchmark_timeout", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 556, "total_elapsed_seconds": 3846, "check_elapsed_seconds": 1379, "benchmark_elapsed_seconds": 1911, "check_exit_code": 0, "benchmark_exit_code": 124, "output_tokens_per_second": 57.01978417266187, "usage": { "input_tokens": 1295950, "output_tokens": 31703, "cache_read_tokens": 1223552, "cache_creation_tokens": null, "reasoning_tokens": 21057, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 1485, "gpu_lock_active_seconds_total": 1805 }, "03_paged_attention": { "run_id": "20260528_043740_codex_gpt-5.5_03_paged_attention", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.6637, "elapsed_seconds": 352, "total_elapsed_seconds": 3647, "check_elapsed_seconds": 3276, "benchmark_elapsed_seconds": 19, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 55.94602272727273, "usage": { "input_tokens": 1933052, "output_tokens": 19693, "cache_read_tokens": 1830144, "cache_creation_tokens": null, "reasoning_tokens": 7532, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 3276, "gpu_lock_active_seconds_total": 19 }, "04_kahan_softmax": { "run_id": "20260528_050741_codex_gpt-5.5_04_kahan_softmax", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.1126, "elapsed_seconds": 359, "total_elapsed_seconds": 1852, "check_elapsed_seconds": 1472, "benchmark_elapsed_seconds": 21, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 48.93314763231198, "usage": { "input_tokens": 526981, "output_tokens": 17567, "cache_read_tokens": 483968, "cache_creation_tokens": null, "reasoning_tokens": 12380, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 1483, "gpu_lock_active_seconds_total": 10 }, "05_topk_bitonic": { "run_id": "20260528_053827_codex_gpt-5.5_05_topk_bitonic", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 713, "total_elapsed_seconds": 722, "check_elapsed_seconds": 9, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": 55.34081346423562, "usage": { "input_tokens": 1607660, "output_tokens": 39458, "cache_read_tokens": 1502976, "cache_creation_tokens": null, "reasoning_tokens": 27275, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 1, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 9 }, "06_sonic_moe_swiglu": { "run_id": "20260528_053833_codex_gpt-5.5_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.2536, "elapsed_seconds": 211, "total_elapsed_seconds": 419, "check_elapsed_seconds": 118, "benchmark_elapsed_seconds": 90, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 57.22274881516588, "usage": { "input_tokens": 1209063, "output_tokens": 12074, "cache_read_tokens": 1116288, "cache_creation_tokens": null, "reasoning_tokens": 3841, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 208 }, "07_w4a16_gemm": { "run_id": "20260523_164703_codex_gpt-5.5_07_w4a16_gemm", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0953, "elapsed_seconds": 333, "total_elapsed_seconds": 506, "check_elapsed_seconds": 166, "benchmark_elapsed_seconds": 7, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 60.37537537537538, "usage": { "input_tokens": 641875, "output_tokens": 20105, "cache_read_tokens": 585600, "cache_creation_tokens": null, "reasoning_tokens": 13191, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 137, "gpu_lock_active_seconds_total": 36 } }, "pass_count": 6, "total_runs": 7 }, { "label": "cursor/composer-2.5-fast [2026-05-28 finish]", "harness": "cursor", "model": "composer-2.5-fast", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260528_040335_cursor_composer-2.5-fast_01_fp8_gemm", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 82, "total_elapsed_seconds": 1892, "check_elapsed_seconds": 1810, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": 173.0731707317073, "usage": { "input_tokens": 121998, "output_tokens": 14192, "cache_read_tokens": 883168, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 1, "gpu_lock_wait_seconds_total": 1786, "gpu_lock_active_seconds_total": 24 }, "02_kda_cutlass": { "run_id": "20260528_040335_cursor_composer-2.5-fast_02_kda_cutlass", "correct": true, "has_solution": true, "failure_reason": "benchmark_timeout", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 57, "total_elapsed_seconds": 1868, "check_elapsed_seconds": 10, "benchmark_elapsed_seconds": 1801, "check_exit_code": 0, "benchmark_exit_code": 124, "output_tokens_per_second": 176.56140350877192, "usage": { "input_tokens": 47899, "output_tokens": 10064, "cache_read_tokens": 324132, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 1811 }, "03_paged_attention": { "run_id": "20260528_043443_cursor_composer-2.5-fast_03_paged_attention", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.6249, "elapsed_seconds": 48, "total_elapsed_seconds": 3794, "check_elapsed_seconds": 119, "benchmark_elapsed_seconds": 3627, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 176.85416666666666, "usage": { "input_tokens": 43924, "output_tokens": 8489, "cache_read_tokens": 320809, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 3675, "gpu_lock_active_seconds_total": 71 }, "04_kahan_softmax": { "run_id": "20260528_043508_cursor_composer-2.5-fast_04_kahan_softmax", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.3154, "elapsed_seconds": 70, "total_elapsed_seconds": 3791, "check_elapsed_seconds": 3686, "benchmark_elapsed_seconds": 35, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 159.07142857142858, "usage": { "input_tokens": 100254, "output_tokens": 11135, "cache_read_tokens": 670144, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 3711, "gpu_lock_active_seconds_total": 10 }, "05_topk_bitonic": { "run_id": "20260528_053757_cursor_composer-2.5-fast_05_topk_bitonic", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0315, "elapsed_seconds": 89, "total_elapsed_seconds": 169, "check_elapsed_seconds": 23, "benchmark_elapsed_seconds": 57, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 179.61797752808988, "usage": { "input_tokens": 56867, "output_tokens": 15986, "cache_read_tokens": 572320, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 51, "gpu_lock_active_seconds_total": 29 }, "06_sonic_moe_swiglu": { "run_id": "20260528_053819_cursor_composer-2.5-fast_06_sonic_moe_swiglu", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 84, "total_elapsed_seconds": 141, "check_elapsed_seconds": 57, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": 153.28571428571428, "usage": { "input_tokens": 56422, "output_tokens": 12876, "cache_read_tokens": 726462, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 1, "gpu_lock_wait_seconds_total": 6, "gpu_lock_active_seconds_total": 51 }, "07_w4a16_gemm": { "run_id": "20260528_054040_cursor_composer-2.5-fast_07_w4a16_gemm", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.1189, "elapsed_seconds": 38, "total_elapsed_seconds": 74, "check_elapsed_seconds": 21, "benchmark_elapsed_seconds": 15, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 199.47368421052633, "usage": { "input_tokens": 46107, "output_tokens": 7580, "cache_read_tokens": 326112, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 36 } }, "pass_count": 5, "total_runs": 7 }, { "label": "gemini/gemini-3.5-flash [2026-05-28 finish]", "harness": "gemini", "model": "gemini-3.5-flash", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260528_040335_gemini_gemini-3.5-flash_01_fp8_gemm", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 839, "total_elapsed_seconds": 1940, "check_elapsed_seconds": 1101, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": 6.499404052443385, "usage": { "input_tokens": 460173, "output_tokens": 5453, "cache_read_tokens": 285566, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 1096, "gpu_lock_active_seconds_total": 5 }, "02_kda_cutlass": { "run_id": "20260528_040335_gemini_gemini-3.5-flash_02_kda_cutlass", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 168, "total_elapsed_seconds": 1897, "check_elapsed_seconds": 1729, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": 24.476190476190474, "usage": { "input_tokens": 334668, "output_tokens": 4112, "cache_read_tokens": 163030, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 1724, "gpu_lock_active_seconds_total": 5 }, "03_paged_attention": { "run_id": "20260523_162918_gemini_gemini-3.5-flash_03_paged_attention", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.2245, "elapsed_seconds": 2289, "total_elapsed_seconds": 2413, "check_elapsed_seconds": 23, "benchmark_elapsed_seconds": 101, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 2.241590214067278, "usage": { "input_tokens": 544634, "output_tokens": 5131, "cache_read_tokens": 398608, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 3, "gpu_lock_wait_seconds_total": 1638, "gpu_lock_active_seconds_total": 19 }, "04_kahan_softmax": { "run_id": "20260523_164033_gemini_gemini-3.5-flash_04_kahan_softmax", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0809, "elapsed_seconds": 1013, "total_elapsed_seconds": 1023, "check_elapsed_seconds": 3, "benchmark_elapsed_seconds": 7, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 3.510365251727542, "usage": { "input_tokens": 305813, "output_tokens": 3556, "cache_read_tokens": 179254, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 7, "gpu_lock_wait_seconds_total": 866, "gpu_lock_active_seconds_total": 11 }, "05_topk_bitonic": { "run_id": "20260523_165748_gemini_gemini-3.5-flash_05_topk_bitonic", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0101, "elapsed_seconds": 212, "total_elapsed_seconds": 245, "check_elapsed_seconds": 27, "benchmark_elapsed_seconds": 6, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 29.37264150943396, "usage": { "input_tokens": 297470, "output_tokens": 6227, "cache_read_tokens": 146998, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 3, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 33 }, "06_sonic_moe_swiglu": { "run_id": "20260523_170203_gemini_gemini-3.5-flash_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.1835, "elapsed_seconds": 210, "total_elapsed_seconds": 441, "check_elapsed_seconds": 125, "benchmark_elapsed_seconds": 106, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 21.52857142857143, "usage": { "input_tokens": 268328, "output_tokens": 4521, "cache_read_tokens": 134653, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 3, "gpu_lock_wait_seconds_total": 12, "gpu_lock_active_seconds_total": 219 }, "07_w4a16_gemm": { "run_id": "20260523_170933_gemini_gemini-3.5-flash_07_w4a16_gemm", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0775, "elapsed_seconds": 229, "total_elapsed_seconds": 259, "check_elapsed_seconds": 21, "benchmark_elapsed_seconds": 9, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 37.60698689956332, "usage": { "input_tokens": 413116, "output_tokens": 8612, "cache_read_tokens": 252831, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 5, "gpu_lock_wait_seconds_total": 44, "gpu_lock_active_seconds_total": 30 } }, "pass_count": 5, "total_runs": 7 }, { "label": "opencode/zai/glm-5.1 [2026-05-28 finish]", "harness": "opencode", "model": "zai/glm-5.1", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260528_040335_opencode_zai_glm-5.1_01_fp8_gemm", "correct": false, "has_solution": false, "failure_reason": "provider_early_stop", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 787, "total_elapsed_seconds": 787, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 0.7522236340533672, "usage": { "input_tokens": 25511, "output_tokens": 592, "cache_read_tokens": 58624, "cache_creation_tokens": 0, "reasoning_tokens": 32089, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 0, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 0 }, "02_kda_cutlass": { "run_id": "20260528_040335_opencode_zai_glm-5.1_02_kda_cutlass", "correct": false, "has_solution": false, "failure_reason": "provider_early_stop", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 720, "total_elapsed_seconds": 720, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 0.7097222222222223, "usage": { "input_tokens": 27857, "output_tokens": 511, "cache_read_tokens": 38464, "cache_creation_tokens": 0, "reasoning_tokens": 32187, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 0, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 0 }, "03_paged_attention": { "run_id": "20260528_041535_opencode_zai_glm-5.1_03_paged_attention", "correct": false, "has_solution": false, "failure_reason": "provider_early_stop", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 738, "total_elapsed_seconds": 738, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 0.5853658536585366, "usage": { "input_tokens": 26441, "output_tokens": 432, "cache_read_tokens": 38464, "cache_creation_tokens": 0, "reasoning_tokens": 32043, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 0, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 0 }, "04_kahan_softmax": { "run_id": "20260523_191015_opencode_zai_glm-5.1_04_kahan_softmax", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0393, "elapsed_seconds": 357, "total_elapsed_seconds": 366, "check_elapsed_seconds": 3, "benchmark_elapsed_seconds": 6, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 3.9719887955182074, "usage": { "input_tokens": 42383, "output_tokens": 1418, "cache_read_tokens": 108032, "cache_creation_tokens": 0, "reasoning_tokens": 16849, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 9 }, "05_topk_bitonic": { "run_id": "20260528_041642_opencode_zai_glm-5.1_05_topk_bitonic", "correct": false, "has_solution": false, "failure_reason": "provider_early_stop", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 691, "total_elapsed_seconds": 691, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 0.6468885672937771, "usage": { "input_tokens": 25204, "output_tokens": 447, "cache_read_tokens": 42240, "cache_creation_tokens": 0, "reasoning_tokens": 32238, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 0, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 0 }, "06_sonic_moe_swiglu": { "run_id": "20260528_042753_opencode_zai_glm-5.1_06_sonic_moe_swiglu", "correct": false, "has_solution": false, "failure_reason": "provider_early_stop", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 741, "total_elapsed_seconds": 741, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 0.9149797570850202, "usage": { "input_tokens": 26781, "output_tokens": 678, "cache_read_tokens": 58688, "cache_creation_tokens": 0, "reasoning_tokens": 32194, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 0, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 0 }, "07_w4a16_gemm": { "run_id": "20260528_042813_opencode_zai_glm-5.1_07_w4a16_gemm", "correct": false, "has_solution": false, "failure_reason": "provider_early_stop", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 705, "total_elapsed_seconds": 705, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 0.851063829787234, "usage": { "input_tokens": 29001, "output_tokens": 600, "cache_read_tokens": 43904, "cache_creation_tokens": 0, "reasoning_tokens": 32450, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 0, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 0 } }, "pass_count": 1, "total_runs": 7 }, { "label": "zai-claude/glm-5.1 [2026-05-28 finish]", "harness": "zai-claude", "model": "glm-5.1", "effort": "", "results": { "01_fp8_gemm": { "run_id": "20260523_162348_zai-claude_glm-5.1_01_fp8_gemm", "correct": false, "has_solution": false, "failure_reason": "no_solution", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 1772, "total_elapsed_seconds": 1772, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 42.31489841986456, "usage": { "input_tokens": 94575, "output_tokens": 74982, "cache_read_tokens": 1663360, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": 3.182673 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 0, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 0 }, "02_kda_cutlass": { "run_id": "20260523_192901_zai-claude_glm-5.1_02_kda_cutlass", "correct": false, "has_solution": false, "failure_reason": "timeout", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 2700, "total_elapsed_seconds": 2700, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 0, "usage": { "input_tokens": 0, "output_tokens": 0, "cache_read_tokens": 0, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": false, "harness_exit_code": 124, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 0, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 0 }, "03_paged_attention": { "run_id": "20260523_165333_zai-claude_glm-5.1_03_paged_attention", "correct": false, "has_solution": false, "failure_reason": "no_solution", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 1703, "total_elapsed_seconds": 1703, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 52.2442748091603, "usage": { "input_tokens": 117821, "output_tokens": 88972, "cache_read_tokens": 1912768, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": 3.7741170000000004 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 7, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 3 }, "04_kahan_softmax": { "run_id": "20260523_170903_zai-claude_glm-5.1_04_kahan_softmax", "correct": false, "has_solution": false, "failure_reason": "no_solution", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 839, "total_elapsed_seconds": 839, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 61.786650774731825, "usage": { "input_tokens": 89690, "output_tokens": 51839, "cache_read_tokens": 170688, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": 1.9770259999999997 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 0, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 0 }, "05_topk_bitonic": { "run_id": "20260523_192901_zai-claude_glm-5.1_05_topk_bitonic", "correct": false, "has_solution": false, "failure_reason": "timeout", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 2700, "total_elapsed_seconds": 2700, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 0, "usage": { "input_tokens": 0, "output_tokens": 0, "cache_read_tokens": 0, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": false, "harness_exit_code": 124, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 0, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 0 }, "06_sonic_moe_swiglu": { "run_id": "20260523_172303_zai-claude_glm-5.1_06_sonic_moe_swiglu", "correct": false, "has_solution": false, "failure_reason": "no_solution", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 1399, "total_elapsed_seconds": 1399, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 52.269478198713365, "usage": { "input_tokens": 100918, "output_tokens": 73125, "cache_read_tokens": 1498880, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": 3.086523 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 12, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 69 }, "07_w4a16_gemm": { "run_id": "20260523_174633_zai-claude_glm-5.1_07_w4a16_gemm", "correct": false, "has_solution": false, "failure_reason": "no_solution", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 1386, "total_elapsed_seconds": 1386, "check_elapsed_seconds": null, "benchmark_elapsed_seconds": null, "check_exit_code": null, "benchmark_exit_code": null, "output_tokens_per_second": 50.896103896103895, "usage": { "input_tokens": 95490, "output_tokens": 70542, "cache_read_tokens": 1452672, "cache_creation_tokens": 0, "reasoning_tokens": null, "total_cost_usd": 2.9717839999999995 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 9, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 7 } }, "pass_count": 0, "total_runs": 7 }, { "label": "claude/claude-opus-4-8 [2026-05-28 opus48-grok max]", "harness": "claude", "model": "claude-opus-4-8", "effort": "max", "results": { "01_fp8_gemm": { "run_id": "20260528_125852_claude_claude-opus-4-8_01_fp8_gemm", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.5332, "elapsed_seconds": 1511, "total_elapsed_seconds": 3873, "check_elapsed_seconds": 551, "benchmark_elapsed_seconds": 1811, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 73.22303110522833, "usage": { "input_tokens": 58, "output_tokens": 110640, "cache_read_tokens": 2337540, "cache_creation_tokens": 328363, "reasoning_tokens": null, "total_cost_usd": 6.389639499999999 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 2295, "gpu_lock_active_seconds_total": 67 }, "02_kda_cutlass": { "run_id": "20260528_125852_claude_claude-opus-4-8_02_kda_cutlass", "correct": true, "has_solution": true, "failure_reason": "benchmark_timeout", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 826, "total_elapsed_seconds": 3863, "check_elapsed_seconds": 1179, "benchmark_elapsed_seconds": 1858, "check_exit_code": 0, "benchmark_exit_code": 124, "output_tokens_per_second": 76.28329297820824, "usage": { "input_tokens": 68, "output_tokens": 63010, "cache_read_tokens": 3266761, "cache_creation_tokens": 202825, "reasoning_tokens": null, "total_cost_usd": 4.478109749999999 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 1226, "gpu_lock_active_seconds_total": 1811 }, "03_paged_attention": { "run_id": "20260528_140323_claude_claude-opus-4-8_03_paged_attention", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.6517, "elapsed_seconds": 1886, "total_elapsed_seconds": 1905, "check_elapsed_seconds": 12, "benchmark_elapsed_seconds": 7, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 73.15853658536585, "usage": { "input_tokens": 88, "output_tokens": 137977, "cache_read_tokens": 3943680, "cache_creation_tokens": 277847, "reasoning_tokens": null, "total_cost_usd": 7.57057575 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 19 }, "04_kahan_softmax": { "run_id": "20260528_140338_claude_claude-opus-4-8_04_kahan_softmax", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.3517, "elapsed_seconds": 1060, "total_elapsed_seconds": 1069, "check_elapsed_seconds": 3, "benchmark_elapsed_seconds": 6, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 76.79056603773584, "usage": { "input_tokens": 54, "output_tokens": 81398, "cache_read_tokens": 2659039, "cache_creation_tokens": 138815, "reasoning_tokens": null, "total_cost_usd": 4.23375825 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 6, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 9 }, "05_topk_bitonic": { "run_id": "20260528_142138_claude_claude-opus-4-8_05_topk_bitonic", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0462, "elapsed_seconds": 2030, "total_elapsed_seconds": 2058, "check_elapsed_seconds": 22, "benchmark_elapsed_seconds": 6, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 71.08374384236453, "usage": { "input_tokens": 96, "output_tokens": 144300, "cache_read_tokens": 4227355, "cache_creation_tokens": 372082, "reasoning_tokens": null, "total_cost_usd": 8.314763 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 46, "gpu_lock_wait_seconds_total": 54, "gpu_lock_active_seconds_total": 28 }, "06_sonic_moe_swiglu": { "run_id": "20260528_143523_claude_claude-opus-4-8_06_sonic_moe_swiglu", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.2507, "elapsed_seconds": 835, "total_elapsed_seconds": 1048, "check_elapsed_seconds": 121, "benchmark_elapsed_seconds": 92, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 77.34251497005988, "usage": { "input_tokens": 42, "output_tokens": 64581, "cache_read_tokens": 2022292, "cache_creation_tokens": 110251, "reasoning_tokens": null, "total_cost_usd": 3.31652875 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 4, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 213 }, "07_w4a16_gemm": { "run_id": "20260528_145253_claude_claude-opus-4-8_07_w4a16_gemm", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.1127, "elapsed_seconds": 1473, "total_elapsed_seconds": 1484, "check_elapsed_seconds": 4, "benchmark_elapsed_seconds": 7, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": 72.36388323150034, "usage": { "input_tokens": 68, "output_tokens": 106592, "cache_read_tokens": 3357435, "cache_creation_tokens": 298177, "reasoning_tokens": null, "total_cost_usd": 6.542916000000001 }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 11 } }, "pass_count": 7, "total_runs": 7 }, { "label": "grok/grok-build [2026-05-28 opus48-grok max]", "harness": "grok", "model": "grok-build", "effort": "max", "results": { "01_fp8_gemm": { "run_id": "20260528_125852_grok_grok-build_01_fp8_gemm", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 473, "total_elapsed_seconds": 1995, "check_elapsed_seconds": 1522, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": null, "usage": { "input_tokens": null, "output_tokens": null, "cache_read_tokens": null, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 1, "gpu_lock_wait_seconds_total": 1519, "gpu_lock_active_seconds_total": 3 }, "02_kda_cutlass": { "run_id": "20260528_125852_grok_grok-build_02_kda_cutlass", "correct": true, "has_solution": true, "failure_reason": "benchmark_timeout", "retryable_infra_failure": true, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 183, "total_elapsed_seconds": 1992, "check_elapsed_seconds": 8, "benchmark_elapsed_seconds": 1801, "check_exit_code": 0, "benchmark_exit_code": 124, "output_tokens_per_second": null, "usage": { "input_tokens": null, "output_tokens": null, "cache_read_tokens": null, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 1809 }, "03_paged_attention": { "run_id": "20260528_133207_grok_grok-build_03_paged_attention", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 473, "total_elapsed_seconds": 1885, "check_elapsed_seconds": 1412, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": null, "usage": { "input_tokens": null, "output_tokens": null, "cache_read_tokens": null, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 1, "gpu_lock_wait_seconds_total": 1410, "gpu_lock_active_seconds_total": 2 }, "04_kahan_softmax": { "run_id": "20260528_133207_grok_grok-build_04_kahan_softmax", "correct": true, "has_solution": true, "failure_reason": "pass", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": 0.0373, "elapsed_seconds": 138, "total_elapsed_seconds": 1894, "check_elapsed_seconds": 1745, "benchmark_elapsed_seconds": 11, "check_exit_code": 0, "benchmark_exit_code": 0, "output_tokens_per_second": null, "usage": { "input_tokens": null, "output_tokens": null, "cache_read_tokens": null, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 2, "gpu_lock_wait_seconds_total": 1742, "gpu_lock_active_seconds_total": 14 }, "05_topk_bitonic": { "run_id": "20260528_140338_grok_grok-build_05_topk_bitonic", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 283, "total_elapsed_seconds": 296, "check_elapsed_seconds": 13, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": null, "usage": { "input_tokens": null, "output_tokens": null, "cache_read_tokens": null, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 1, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 13 }, "06_sonic_moe_swiglu": { "run_id": "20260528_140353_grok_grok-build_06_sonic_moe_swiglu", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 415, "total_elapsed_seconds": 466, "check_elapsed_seconds": 51, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": null, "usage": { "input_tokens": null, "output_tokens": null, "cache_read_tokens": null, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 1, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 51 }, "07_w4a16_gemm": { "run_id": "20260528_140838_grok_grok-build_07_w4a16_gemm", "correct": false, "has_solution": true, "failure_reason": "check_failed", "retryable_infra_failure": false, "minimum_useful_output_tokens": 5000, "peak_fraction": null, "elapsed_seconds": 256, "total_elapsed_seconds": 258, "check_elapsed_seconds": 2, "benchmark_elapsed_seconds": null, "check_exit_code": 1, "benchmark_exit_code": null, "output_tokens_per_second": null, "usage": { "input_tokens": null, "output_tokens": null, "cache_read_tokens": null, "cache_creation_tokens": null, "reasoning_tokens": null, "total_cost_usd": null }, "session_complete": true, "harness_exit_code": 0, "agent_cuda_disabled": true, "gpu_queue_mode": "agent_phase_cuda_guard_harness_gpu_lock", "gpu_lock_calls": 1, "gpu_lock_wait_seconds_total": 0, "gpu_lock_active_seconds_total": 2 } }, "pass_count": 2, "total_runs": 7 } ], "per_problem": { "01_fp8_gemm": { "n_attempted": 22, "n_passed": 9, "best_peak_fraction": 0.5375, "best_model": "codex/gpt-5.5 [2026-05-28 finish xhigh]", "ranked_passes": [ { "model": "codex/gpt-5.5 [2026-05-28 finish xhigh]", "peak_fraction": 0.5375 }, { "model": "claude/claude-opus-4-7 [max]", "peak_fraction": 0.5339 }, { "model": "claude/claude-opus-4-8 [2026-05-28 opus48-grok max]", "peak_fraction": 0.5332 }, { "model": "claude/claude-opus-4-7 [2026-05-28 finish max]", "peak_fraction": 0.5241 }, { "model": "opencode/openrouter-pinned/xiaomi/mimo-v2.5-pro", "peak_fraction": 0.4343 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-plus", "peak_fraction": 0.431 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-max-preview", "peak_fraction": 0.4288 }, { "model": "codex/gpt-5.5 [xhigh]", "peak_fraction": 0.4234 }, { "model": "droid/zai/glm-5.1 [2026-05-08]", "peak_fraction": 0.414 } ] }, "02_kda_cutlass": { "n_attempted": 22, "n_passed": 12, "best_peak_fraction": 0.032, "best_model": "codex/gpt-5.5 [xhigh]", "ranked_passes": [ { "model": "codex/gpt-5.5 [xhigh]", "peak_fraction": 0.032 }, { "model": "kimi/kimi-k2.6", "peak_fraction": 0.0218 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-max-preview", "peak_fraction": 0.0106 }, { "model": "opencode/deepseek/deepseek-v4-flash", "peak_fraction": 0.009 }, { "model": "opencode/zai/glm-5.1", "peak_fraction": 0.0051 } ] }, "03_paged_attention": { "n_attempted": 22, "n_passed": 13, "best_peak_fraction": 0.6637, "best_model": "codex/gpt-5.5 [2026-05-28 finish xhigh]", "ranked_passes": [ { "model": "codex/gpt-5.5 [2026-05-28 finish xhigh]", "peak_fraction": 0.6637 }, { "model": "claude/claude-opus-4-8 [2026-05-28 opus48-grok max]", "peak_fraction": 0.6517 }, { "model": "cursor/composer-2.5-fast [2026-05-28 finish]", "peak_fraction": 0.6249 }, { "model": "claude/claude-opus-4-7 [max]", "peak_fraction": 0.6016 }, { "model": "codex/gpt-5.5 [xhigh]", "peak_fraction": 0.4975 }, { "model": "kimi/kimi-k2.6", "peak_fraction": 0.4316 }, { "model": "droid/zai/glm-5.1 [2026-05-08]", "peak_fraction": 0.2523 }, { "model": "gemini/gemini-3.5-flash [2026-05-28 finish]", "peak_fraction": 0.2245 }, { "model": "zai-claude/glm-5.1 [2026-05-13]", "peak_fraction": 0.222 }, { "model": "opencode/deepseek/deepseek-v4-flash", "peak_fraction": 0.1673 }, { "model": "opencode/deepseek/deepseek-v4-pro", "peak_fraction": 0.0268 }, { "model": "claude/claude-opus-4-7 [2026-05-28 finish max]", "peak_fraction": 0.0256 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-plus", "peak_fraction": 0.0218 } ] }, "04_kahan_softmax": { "n_attempted": 22, "n_passed": 18, "best_peak_fraction": 0.3634, "best_model": "codex/gpt-5.5 [xhigh]", "ranked_passes": [ { "model": "codex/gpt-5.5 [xhigh]", "peak_fraction": 0.3634 }, { "model": "claude/claude-opus-4-8 [2026-05-28 opus48-grok max]", "peak_fraction": 0.3517 }, { "model": "zai-claude/glm-5.1 [2026-05-13]", "peak_fraction": 0.3367 }, { "model": "claude/claude-opus-4-7 [max]", "peak_fraction": 0.3166 }, { "model": "cursor/composer-2.5-fast [2026-05-28 finish]", "peak_fraction": 0.3154 }, { "model": "droid/zai/glm-5.1 [2026-05-08]", "peak_fraction": 0.2339 }, { "model": "opencode/deepseek/deepseek-v4-flash", "peak_fraction": 0.1378 }, { "model": "opencode/zai/glm-5.1", "peak_fraction": 0.1251 }, { "model": "opencode/openrouter-pinned/xiaomi/mimo-v2.5-pro", "peak_fraction": 0.121 }, { "model": "kimi/kimi-k2.6", "peak_fraction": 0.1178 }, { "model": "codex/gpt-5.5 [2026-05-28 finish xhigh]", "peak_fraction": 0.1126 }, { "model": "opencode/deepseek/deepseek-v4-pro", "peak_fraction": 0.1011 }, { "model": "gemini/gemini-3.5-flash [2026-05-28 finish]", "peak_fraction": 0.0809 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-max-preview", "peak_fraction": 0.0775 }, { "model": "opencode/zai/glm-5.1 [2026-05-08]", "peak_fraction": 0.0561 }, { "model": "opencode/zai/glm-5.1 [2026-05-28 finish]", "peak_fraction": 0.0393 }, { "model": "grok/grok-build [2026-05-28 opus48-grok max]", "peak_fraction": 0.0373 }, { "model": "opencode/openrouter-pinned/minimax/minimax-m2.7", "peak_fraction": 0.0339 } ] }, "05_topk_bitonic": { "n_attempted": 22, "n_passed": 10, "best_peak_fraction": 0.0462, "best_model": "claude/claude-opus-4-8 [2026-05-28 opus48-grok max]", "ranked_passes": [ { "model": "claude/claude-opus-4-8 [2026-05-28 opus48-grok max]", "peak_fraction": 0.0462 }, { "model": "claude/claude-opus-4-7 [2026-05-28 finish max]", "peak_fraction": 0.0452 }, { "model": "codex/gpt-5.5 [xhigh]", "peak_fraction": 0.0423 }, { "model": "cursor/composer-2.5-fast [2026-05-28 finish]", "peak_fraction": 0.0315 }, { "model": "claude/claude-opus-4-7 [max]", "peak_fraction": 0.0197 }, { "model": "opencode/openrouter-pinned/xiaomi/mimo-v2.5-pro", "peak_fraction": 0.0168 }, { "model": "kimi/kimi-k2.6", "peak_fraction": 0.0136 }, { "model": "opencode/deepseek/deepseek-v4-pro", "peak_fraction": 0.0111 }, { "model": "gemini/gemini-3.5-flash [2026-05-28 finish]", "peak_fraction": 0.0101 }, { "model": "zai-claude/glm-5.1 [2026-05-13]", "peak_fraction": 0.0029 } ] }, "06_sonic_moe_swiglu": { "n_attempted": 22, "n_passed": 17, "best_peak_fraction": 0.2536, "best_model": "codex/gpt-5.5 [2026-05-28 finish xhigh]", "ranked_passes": [ { "model": "codex/gpt-5.5 [2026-05-28 finish xhigh]", "peak_fraction": 0.2536 }, { "model": "codex/gpt-5.5 [xhigh]", "peak_fraction": 0.2514 }, { "model": "claude/claude-opus-4-8 [2026-05-28 opus48-grok max]", "peak_fraction": 0.2507 }, { "model": "claude/claude-opus-4-7 [2026-05-28 finish max]", "peak_fraction": 0.2469 }, { "model": "opencode/zai/glm-5.1", "peak_fraction": 0.2379 }, { "model": "opencode/zai/glm-5.1 [2026-05-08]", "peak_fraction": 0.2154 }, { "model": "opencode/openrouter-pinned/xiaomi/mimo-v2.5-pro", "peak_fraction": 0.211 }, { "model": "gemini/gemini-3.5-flash [2026-05-28 finish]", "peak_fraction": 0.1835 }, { "model": "kimi/kimi-k2.6", "peak_fraction": 0.1612 }, { "model": "droid/zai/glm-5.1 [2026-05-08]", "peak_fraction": 0.149 }, { "model": "zai-claude/glm-5.1 [2026-05-13]", "peak_fraction": 0.1111 }, { "model": "opencode/deepseek/deepseek-v4-pro", "peak_fraction": 0.1077 }, { "model": "opencode/deepseek/deepseek-v4-flash", "peak_fraction": 0.0832 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-27b", "peak_fraction": 0.0822 }, { "model": "opencode/openrouter-pinned/minimax/minimax-m2.7", "peak_fraction": 0.076 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-plus", "peak_fraction": 0.0402 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-max-preview", "peak_fraction": 0.0037 } ] }, "07_w4a16_gemm": { "n_attempted": 22, "n_passed": 16, "best_peak_fraction": 0.2203, "best_model": "kimi/kimi-k2.6", "ranked_passes": [ { "model": "kimi/kimi-k2.6", "peak_fraction": 0.2203 }, { "model": "claude/claude-opus-4-7 [max]", "peak_fraction": 0.1837 }, { "model": "opencode/zai/glm-5.1", "peak_fraction": 0.18 }, { "model": "codex/gpt-5.5 [xhigh]", "peak_fraction": 0.1588 }, { "model": "opencode/openrouter-pinned/xiaomi/mimo-v2.5-pro", "peak_fraction": 0.1366 }, { "model": "opencode/deepseek/deepseek-v4-flash", "peak_fraction": 0.1344 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-plus", "peak_fraction": 0.1253 }, { "model": "opencode/deepseek/deepseek-v4-pro", "peak_fraction": 0.1251 }, { "model": "cursor/composer-2.5-fast [2026-05-28 finish]", "peak_fraction": 0.1189 }, { "model": "claude/claude-opus-4-8 [2026-05-28 opus48-grok max]", "peak_fraction": 0.1127 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-max-preview", "peak_fraction": 0.1097 }, { "model": "claude/claude-opus-4-7 [2026-05-28 finish max]", "peak_fraction": 0.0999 }, { "model": "codex/gpt-5.5 [2026-05-28 finish xhigh]", "peak_fraction": 0.0953 }, { "model": "droid/zai/glm-5.1 [2026-05-08]", "peak_fraction": 0.0863 }, { "model": "gemini/gemini-3.5-flash [2026-05-28 finish]", "peak_fraction": 0.0775 }, { "model": "opencode/openrouter-pinned/minimax/minimax-m2.7", "peak_fraction": 0.0305 } ] }, "09_fmha_preattn_mrope": { "n_attempted": 14, "n_passed": 10, "best_peak_fraction": 0.7221, "best_model": "claude/claude-opus-4-7 [max]", "ranked_passes": [ { "model": "claude/claude-opus-4-7 [max]", "peak_fraction": 0.7221 }, { "model": "opencode/deepseek/deepseek-v4-pro", "peak_fraction": 0.6672 }, { "model": "codex/gpt-5.5 [xhigh]", "peak_fraction": 0.6659 }, { "model": "opencode/zai/glm-5.1", "peak_fraction": 0.589 }, { "model": "opencode/deepseek/deepseek-v4-flash", "peak_fraction": 0.5769 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-plus", "peak_fraction": 0.5692 }, { "model": "opencode/openrouter-pinned/xiaomi/mimo-v2.5-pro", "peak_fraction": 0.5164 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-27b", "peak_fraction": 0.4356 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-max-preview", "peak_fraction": 0.3953 }, { "model": "opencode/openrouter-pinned/minimax/minimax-m2.7", "peak_fraction": 0.1134 } ] }, "10_patch_embed_conv3d_gemm": { "n_attempted": 14, "n_passed": 9, "best_peak_fraction": 0.2484, "best_model": "codex/gpt-5.5 [xhigh]", "ranked_passes": [ { "model": "codex/gpt-5.5 [xhigh]", "peak_fraction": 0.2484 }, { "model": "opencode/deepseek/deepseek-v4-pro", "peak_fraction": 0.2319 }, { "model": "claude/claude-opus-4-7 [max]", "peak_fraction": 0.2119 }, { "model": "opencode/deepseek/deepseek-v4-flash", "peak_fraction": 0.196 }, { "model": "opencode/openrouter-pinned/xiaomi/mimo-v2.5-pro", "peak_fraction": 0.19 }, { "model": "opencode/zai/glm-5.1", "peak_fraction": 0.1817 }, { "model": "opencode/zai/glm-5.1 [2026-05-08]", "peak_fraction": 0.1742 }, { "model": "zai-claude/glm-5.1 [2026-05-13]", "peak_fraction": 0.1471 }, { "model": "opencode/openrouter-pinned/qwen/qwen3.6-27b", "peak_fraction": 0.1046 } ] } }, "generated_from_summary": { "input": "benchmarks/hard/results/fresh/kbh_opus48_grok_full_20260528_125852.summary.raw.json", "tag": "2026-05-28 opus48-grok", "imported_rows": 14, "generated_at": "2026-05-28T21:20:25.323Z" } }