{ "model_id": "gpt-5.2", "provider_id": "openai", "quality": { "accuracy": 83.5, "correct": 202, "total": 242, "model_id": "gpt-5.2", "provider_id": "openai", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-20T13:20:42.605575+00:00", "model_id": "gpt-5.2", "model_name": "gpt-5.2", "provider_id": "openai", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 428.8963179588318, "summary": { "success": 50, "total": 50, "wall_s": 428.896, "avg_latency_s": 33.367, "throughput_rps": 0.117, "completion_toks_s": 239.4, "total_toks_s": 706.5, "out_in_ratio": 0.513, "tokens_per_fact": 99.6 }, "tests": [ { "test_index": 1, "latency_s": 36.91886901855469, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3948, "completion_tokens": 2438, "error": "" }, { "test_index": 2, "latency_s": 28.307222843170166, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 1800, "error": "" }, { "test_index": 3, "latency_s": 27.597256898880005, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3913, "completion_tokens": 1779, "error": "" }, { "test_index": 4, "latency_s": 39.366369009017944, "num_facts": 25, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 2644, "error": "" }, { "test_index": 5, "latency_s": 24.557897090911865, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 1590, "error": "" }, { "test_index": 6, "latency_s": 28.856066942214966, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4038, "completion_tokens": 1966, "error": "" }, { "test_index": 7, "latency_s": 34.93845009803772, "num_facts": 24, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3948, "completion_tokens": 2138, "error": "" }, { "test_index": 8, "latency_s": 25.3975727558136, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 1699, "error": "" }, { "test_index": 9, "latency_s": 31.82369875907898, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3829, "completion_tokens": 1808, "error": "" }, { "test_index": 10, "latency_s": 30.068869829177856, "num_facts": 24, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 1839, "error": "" }, { "test_index": 11, "latency_s": 45.05664587020874, "num_facts": 26, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3951, "completion_tokens": 2524, "error": "" }, { "test_index": 12, "latency_s": 34.68178391456604, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 2168, "error": "" }, { "test_index": 13, "latency_s": 25.023245096206665, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 1462, "error": "" }, { "test_index": 14, "latency_s": 34.99050998687744, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 2123, "error": "" }, { "test_index": 15, "latency_s": 24.313291788101196, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4125, "completion_tokens": 1608, "error": "" }, { "test_index": 16, "latency_s": 49.024142026901245, "num_facts": 27, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 2782, "error": "" }, { "test_index": 17, "latency_s": 28.359426975250244, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 2050, "error": "" }, { "test_index": 18, "latency_s": 23.840433835983276, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4062, "completion_tokens": 1900, "error": "" }, { "test_index": 19, "latency_s": 39.61686873435974, "num_facts": 24, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3888, "completion_tokens": 2648, "error": "" }, { "test_index": 20, "latency_s": 59.9250910282135, "num_facts": 28, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 3672, "error": "" }, { "test_index": 21, "latency_s": 33.947978019714355, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 2009, "error": "" }, { "test_index": 22, "latency_s": 58.79788398742676, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 2171, "error": "" }, { "test_index": 23, "latency_s": 30.551977157592773, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 1816, "error": "" }, { "test_index": 24, "latency_s": 27.88133692741394, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4038, "completion_tokens": 1619, "error": "" }, { "test_index": 25, "latency_s": 42.00712704658508, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 2535, "error": "" }, { "test_index": 26, "latency_s": 26.68713092803955, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4062, "completion_tokens": 1695, "error": "" }, { "test_index": 27, "latency_s": 32.607380867004395, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 1995, "error": "" }, { "test_index": 28, "latency_s": 41.372549057006836, "num_facts": 26, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 2602, "error": "" }, { "test_index": 29, "latency_s": 36.57978296279907, "num_facts": 26, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 2067, "error": "" }, { "test_index": 30, "latency_s": 43.132107973098755, "num_facts": 27, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 2554, "error": "" }, { "test_index": 31, "latency_s": 21.19453525543213, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 1369, "error": "" }, { "test_index": 32, "latency_s": 30.737770557403564, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 1985, "error": "" }, { "test_index": 33, "latency_s": 24.47699213027954, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4109, "completion_tokens": 1551, "error": "" }, { "test_index": 34, "latency_s": 40.586503982543945, "num_facts": 25, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3951, "completion_tokens": 2308, "error": "" }, { "test_index": 35, "latency_s": 18.107745885849, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4125, "completion_tokens": 1121, "error": "" }, { "test_index": 36, "latency_s": 30.494719982147217, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 1727, "error": "" }, { "test_index": 37, "latency_s": 40.38123631477356, "num_facts": 26, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 2415, "error": "" }, { "test_index": 38, "latency_s": 29.119977951049805, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3829, "completion_tokens": 1885, "error": "" }, { "test_index": 39, "latency_s": 26.887585163116455, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 1525, "error": "" }, { "test_index": 40, "latency_s": 40.050414085388184, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3888, "completion_tokens": 2705, "error": "" }, { "test_index": 41, "latency_s": 24.824501991271973, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 1476, "error": "" }, { "test_index": 42, "latency_s": 24.06221604347229, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 1759, "error": "" }, { "test_index": 43, "latency_s": 46.023432970047, "num_facts": 25, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 2755, "error": "" }, { "test_index": 44, "latency_s": 26.953015089035034, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 1499, "error": "" }, { "test_index": 45, "latency_s": 42.36101698875427, "num_facts": 28, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 2536, "error": "" }, { "test_index": 46, "latency_s": 30.64927101135254, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 1969, "error": "" }, { "test_index": 47, "latency_s": 23.258939266204834, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 1580, "error": "" }, { "test_index": 48, "latency_s": 28.57912278175354, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4109, "completion_tokens": 1942, "error": "" }, { "test_index": 49, "latency_s": 41.51776170730591, "num_facts": 25, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 2656, "error": "" }, { "test_index": 50, "latency_s": 31.83618187904358, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3913, "completion_tokens": 2219, "error": "" } ] }, "reflect": { "accuracy": 71.9, "correct": 174, "total": 242, "avg_latency_s": 9.659, "model_id": "gpt-5.2", "provider_id": "openai", "sample_id": "conv-43" } }