{ "model_id": "gpt-5-nano", "provider_id": "openai", "quality": { "accuracy": 83.9, "correct": 203, "total": 242, "model_id": "gpt-5-nano", "provider_id": "openai", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-20T14:33:32.863085+00:00", "model_id": "gpt-5-nano", "model_name": "gpt-5-nano", "provider_id": "openai", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 1613.3413410186768, "summary": { "success": 50, "total": 50, "wall_s": 1613.341, "avg_latency_s": 117.251, "throughput_rps": 0.031, "completion_toks_s": 341.7, "total_toks_s": 465.8, "out_in_ratio": 2.752, "tokens_per_fact": 880.6 }, "tests": [ { "test_index": 1, "latency_s": 99.33797311782837, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3948, "completion_tokens": 11112, "error": "" }, { "test_index": 2, "latency_s": 87.2441189289093, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 10201, "error": "" }, { "test_index": 3, "latency_s": 93.2771680355072, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3913, "completion_tokens": 12368, "error": "" }, { "test_index": 4, "latency_s": 313.39211225509644, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 8474, "error": "" }, { "test_index": 5, "latency_s": 89.09124684333801, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 9706, "error": "" }, { "test_index": 6, "latency_s": 68.44614601135254, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4038, "completion_tokens": 7453, "error": "" }, { "test_index": 7, "latency_s": 103.361576795578, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3948, "completion_tokens": 11705, "error": "" }, { "test_index": 8, "latency_s": 107.56028413772583, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 13748, "error": "" }, { "test_index": 9, "latency_s": 103.72367000579834, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3829, "completion_tokens": 10519, "error": "" }, { "test_index": 10, "latency_s": 104.28453993797302, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 14021, "error": "" }, { "test_index": 11, "latency_s": 93.53310894966125, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3951, "completion_tokens": 10825, "error": "" }, { "test_index": 12, "latency_s": 116.77906918525696, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 12757, "error": "" }, { "test_index": 13, "latency_s": 91.56516885757446, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 10152, "error": "" }, { "test_index": 14, "latency_s": 183.69347524642944, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 6665, "error": "" }, { "test_index": 15, "latency_s": 80.35701179504395, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4125, "completion_tokens": 9748, "error": "" }, { "test_index": 16, "latency_s": 73.73928999900818, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 8894, "error": "" }, { "test_index": 17, "latency_s": 109.82009792327881, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 12770, "error": "" }, { "test_index": 18, "latency_s": 113.67025399208069, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4062, "completion_tokens": 11909, "error": "" }, { "test_index": 19, "latency_s": 113.97578501701355, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3888, "completion_tokens": 11399, "error": "" }, { "test_index": 20, "latency_s": 110.4770860671997, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 13096, "error": "" }, { "test_index": 21, "latency_s": 206.50294399261475, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 11830, "error": "" }, { "test_index": 22, "latency_s": 88.09804916381836, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 9750, "error": "" }, { "test_index": 23, "latency_s": 87.91020798683167, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 9002, "error": "" }, { "test_index": 24, "latency_s": 117.60908794403076, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4038, "completion_tokens": 13890, "error": "" }, { "test_index": 25, "latency_s": 330.222158908844, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 11801, "error": "" }, { "test_index": 26, "latency_s": 84.58954787254333, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4062, "completion_tokens": 11749, "error": "" }, { "test_index": 27, "latency_s": 68.03389024734497, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 9571, "error": "" }, { "test_index": 28, "latency_s": 86.93390107154846, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 11529, "error": "" }, { "test_index": 29, "latency_s": 208.92341589927673, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 11910, "error": "" }, { "test_index": 30, "latency_s": 114.2025990486145, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 12145, "error": "" }, { "test_index": 31, "latency_s": 70.23858404159546, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 8162, "error": "" }, { "test_index": 32, "latency_s": 84.73999691009521, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 9411, "error": "" }, { "test_index": 33, "latency_s": 70.90422415733337, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4109, "completion_tokens": 9640, "error": "" }, { "test_index": 34, "latency_s": 206.5544707775116, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3951, "completion_tokens": 10783, "error": "" }, { "test_index": 35, "latency_s": 94.88146305084229, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4125, "completion_tokens": 11422, "error": "" }, { "test_index": 36, "latency_s": 91.0378360748291, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 11709, "error": "" }, { "test_index": 37, "latency_s": 86.14653372764587, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 10280, "error": "" }, { "test_index": 38, "latency_s": 62.92929983139038, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3829, "completion_tokens": 8034, "error": "" }, { "test_index": 39, "latency_s": 100.12940502166748, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 12025, "error": "" }, { "test_index": 40, "latency_s": 82.66286516189575, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3888, "completion_tokens": 10541, "error": "" }, { "test_index": 41, "latency_s": 88.37270927429199, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 12271, "error": "" }, { "test_index": 42, "latency_s": 84.64227414131165, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 11403, "error": "" }, { "test_index": 43, "latency_s": 94.28329706192017, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 12675, "error": "" }, { "test_index": 44, "latency_s": 119.71824479103088, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 16032, "error": "" }, { "test_index": 45, "latency_s": 58.16398906707764, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 7863, "error": "" }, { "test_index": 46, "latency_s": 64.69804120063782, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 8104, "error": "" }, { "test_index": 47, "latency_s": 114.84225583076477, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 13657, "error": "" }, { "test_index": 48, "latency_s": 230.87452793121338, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4109, "completion_tokens": 13089, "error": "" }, { "test_index": 49, "latency_s": 320.9459309577942, "num_facts": 20, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4086, "completion_tokens": 13269, "error": "" }, { "test_index": 50, "latency_s": 85.45319867134094, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3913, "completion_tokens": 10178, "error": "" } ] }, "reflect": { "accuracy": 91.7, "correct": 222, "total": 242, "avg_latency_s": 22.463, "model_id": "gpt-5-nano", "provider_id": "openai", "sample_id": "conv-43" } }