{ "model_id": "openai-gpt-oss-120b", "provider_id": "groq", "quality": { "accuracy": 84.7, "correct": 205, "total": 242, "model_id": "openai-gpt-oss-120b", "provider_id": "groq", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-20T13:48:18.728844+00:00", "model_id": "openai-gpt-oss-120b", "model_name": "openai/gpt-oss-120b", "provider_id": "groq", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 105.51983213424683, "summary": { "success": 50, "total": 50, "wall_s": 105.52, "avg_latency_s": 8.061, "throughput_rps": 0.474, "completion_toks_s": 1603.7, "total_toks_s": 3532.4, "out_in_ratio": 0.831, "tokens_per_fact": 204.1 }, "tests": [ { "test_index": 1, "latency_s": 10.703886985778809, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4012, "completion_tokens": 4081, "error": "" }, { "test_index": 2, "latency_s": 7.256205081939697, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4127, "completion_tokens": 3278, "error": "" }, { "test_index": 3, "latency_s": 8.45443081855774, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3977, "completion_tokens": 3768, "error": "" }, { "test_index": 4, "latency_s": 7.600319862365723, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4187, "completion_tokens": 3277, "error": "" }, { "test_index": 5, "latency_s": 6.063141822814941, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4055, "completion_tokens": 2743, "error": "" }, { "test_index": 6, "latency_s": 7.788000106811523, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4102, "completion_tokens": 3579, "error": "" }, { "test_index": 7, "latency_s": 6.487876892089844, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4012, "completion_tokens": 2857, "error": "" }, { "test_index": 8, "latency_s": 8.38117003440857, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4178, "completion_tokens": 3495, "error": "" }, { "test_index": 9, "latency_s": 6.575287103652954, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3893, "completion_tokens": 2360, "error": "" }, { "test_index": 10, "latency_s": 10.159324169158936, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3869, "completion_tokens": 4688, "error": "" }, { "test_index": 11, "latency_s": 21.404969215393066, "num_facts": 29, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4015, "completion_tokens": 7921, "error": "" }, { "test_index": 12, "latency_s": 5.797878980636597, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4144, "completion_tokens": 2409, "error": "" }, { "test_index": 13, "latency_s": 7.769459009170532, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4036, "completion_tokens": 3216, "error": "" }, { "test_index": 14, "latency_s": 4.435054063796997, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4127, "completion_tokens": 1737, "error": "" }, { "test_index": 15, "latency_s": 8.718156814575195, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4189, "completion_tokens": 4032, "error": "" }, { "test_index": 16, "latency_s": 5.634125709533691, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 2381, "error": "" }, { "test_index": 17, "latency_s": 5.328796863555908, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4144, "completion_tokens": 2430, "error": "" }, { "test_index": 18, "latency_s": 6.2759108543396, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4126, "completion_tokens": 2411, "error": "" }, { "test_index": 19, "latency_s": 6.2071311473846436, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3952, "completion_tokens": 2652, "error": "" }, { "test_index": 20, "latency_s": 9.330627679824829, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4150, "completion_tokens": 4120, "error": "" }, { "test_index": 21, "latency_s": 5.731391906738281, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4144, "completion_tokens": 2532, "error": "" }, { "test_index": 22, "latency_s": 4.358479976654053, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 1900, "error": "" }, { "test_index": 23, "latency_s": 9.945259094238281, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4127, "completion_tokens": 4004, "error": "" }, { "test_index": 24, "latency_s": 7.10275411605835, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4102, "completion_tokens": 3229, "error": "" }, { "test_index": 25, "latency_s": 8.650690078735352, "num_facts": 28, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4150, "completion_tokens": 3616, "error": "" }, { "test_index": 26, "latency_s": 6.407299995422363, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4126, "completion_tokens": 2411, "error": "" }, { "test_index": 27, "latency_s": 8.790443658828735, "num_facts": 28, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3869, "completion_tokens": 4051, "error": "" }, { "test_index": 28, "latency_s": 10.052036046981812, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 4366, "error": "" }, { "test_index": 29, "latency_s": 10.9812331199646, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4187, "completion_tokens": 4524, "error": "" }, { "test_index": 30, "latency_s": 7.351511001586914, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3987, "completion_tokens": 3179, "error": "" }, { "test_index": 31, "latency_s": 5.525190830230713, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4055, "completion_tokens": 2306, "error": "" }, { "test_index": 32, "latency_s": 7.86234188079834, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4127, "completion_tokens": 3415, "error": "" }, { "test_index": 33, "latency_s": 12.07485318183899, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4173, "completion_tokens": 4045, "error": "" }, { "test_index": 34, "latency_s": 9.071007013320923, "num_facts": 27, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4015, "completion_tokens": 3979, "error": "" }, { "test_index": 35, "latency_s": 11.65303111076355, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4189, "completion_tokens": 4523, "error": "" }, { "test_index": 36, "latency_s": 4.627575159072876, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4127, "completion_tokens": 1943, "error": "" }, { "test_index": 37, "latency_s": 6.658849000930786, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3987, "completion_tokens": 2934, "error": "" }, { "test_index": 38, "latency_s": 8.078442811965942, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3893, "completion_tokens": 3484, "error": "" }, { "test_index": 39, "latency_s": 7.639266014099121, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4036, "completion_tokens": 3289, "error": "" }, { "test_index": 40, "latency_s": 6.7630908489227295, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3952, "completion_tokens": 2929, "error": "" }, { "test_index": 41, "latency_s": 6.594578266143799, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4036, "completion_tokens": 3004, "error": "" }, { "test_index": 42, "latency_s": 6.0806639194488525, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3869, "completion_tokens": 2821, "error": "" }, { "test_index": 43, "latency_s": 11.589098930358887, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4187, "completion_tokens": 5432, "error": "" }, { "test_index": 44, "latency_s": 6.776654958724976, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4178, "completion_tokens": 3149, "error": "" }, { "test_index": 45, "latency_s": 8.311676025390625, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3987, "completion_tokens": 3499, "error": "" }, { "test_index": 46, "latency_s": 6.197706699371338, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4055, "completion_tokens": 2850, "error": "" }, { "test_index": 47, "latency_s": 6.635499715805054, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4178, "completion_tokens": 2634, "error": "" }, { "test_index": 48, "latency_s": 13.59626579284668, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4173, "completion_tokens": 4575, "error": "" }, { "test_index": 49, "latency_s": 10.679135799407959, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4150, "completion_tokens": 4502, "error": "" }, { "test_index": 50, "latency_s": 6.9108099937438965, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3977, "completion_tokens": 2659, "error": "" } ] }, "reflect": { "accuracy": 94.2, "correct": 228, "total": 242, "avg_latency_s": 2.208, "model_id": "openai-gpt-oss-120b", "provider_id": "groq", "sample_id": "conv-43" } }