{ "model_id": "gpt-5-mini", "provider_id": "openai", "quality": { "accuracy": 89.7, "correct": 217, "total": 242, "model_id": "gpt-5-mini", "provider_id": "openai", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-23T10:38:34.653977+00:00", "model_id": "gpt-5-mini", "model_name": "gpt-5-mini", "provider_id": "openai", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 5, "wall_s": 728.2944321632385, "summary": { "success": 50, "total": 50, "wall_s": 728.294, "avg_latency_s": 68.482, "throughput_rps": 0.069, "completion_toks_s": 372.6, "total_toks_s": 647.7, "out_in_ratio": 1.355, "tokens_per_fact": 475.3 }, "tests": [ { "test_index": 1, "latency_s": 70.65611124038696, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3948, "completion_tokens": 4929, "error": "" }, { "test_index": 2, "latency_s": 76.17569279670715, "num_facts": 7, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 5443, "error": "" }, { "test_index": 3, "latency_s": 90.13853120803833, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3913, "completion_tokens": 6350, "error": "" }, { "test_index": 4, "latency_s": 76.9803819656372, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 5553, "error": "" }, { "test_index": 5, "latency_s": 71.089919090271, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 4833, "error": "" }, { "test_index": 6, "latency_s": 60.02982997894287, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4038, "completion_tokens": 4937, "error": "" }, { "test_index": 7, "latency_s": 96.19124603271484, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3948, "completion_tokens": 6824, "error": "" }, { "test_index": 8, "latency_s": 55.98761820793152, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 4614, "error": "" }, { "test_index": 9, "latency_s": 59.80140805244446, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3829, "completion_tokens": 4995, "error": "" }, { "test_index": 10, "latency_s": 74.00846910476685, "num_facts": 7, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 5242, "error": "" }, { "test_index": 11, "latency_s": 86.70702791213989, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3951, "completion_tokens": 5087, "error": "" }, { "test_index": 12, "latency_s": 59.71712684631348, "num_facts": 7, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 5340, "error": "" }, { "test_index": 13, "latency_s": 53.856579065322876, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 4788, "error": "" }, { "test_index": 14, "latency_s": 71.56555199623108, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 4437, "error": "" }, { "test_index": 15, "latency_s": 67.05123686790466, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4125, "completion_tokens": 6053, "error": "" }, { "test_index": 16, "latency_s": 69.59452223777771, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 6375, "error": "" }, { "test_index": 17, "latency_s": 55.52915835380554, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 5019, "error": "" }, { "test_index": 18, "latency_s": 65.23474907875061, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4062, "completion_tokens": 4872, "error": "" }, { "test_index": 19, "latency_s": 72.19697213172913, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3888, "completion_tokens": 6087, "error": "" }, { "test_index": 20, "latency_s": 66.02004599571228, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 5804, "error": "" }, { "test_index": 21, "latency_s": 67.15297818183899, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 5745, "error": "" }, { "test_index": 22, "latency_s": 75.84364199638367, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 6213, "error": "" }, { "test_index": 23, "latency_s": 42.76110792160034, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 3722, "error": "" }, { "test_index": 24, "latency_s": 71.75279188156128, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4038, "completion_tokens": 5879, "error": "" }, { "test_index": 25, "latency_s": 69.178946018219, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 5622, "error": "" }, { "test_index": 26, "latency_s": 54.79685616493225, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4062, "completion_tokens": 4621, "error": "" }, { "test_index": 27, "latency_s": 64.14428210258484, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 5325, "error": "" }, { "test_index": 28, "latency_s": 61.04027700424194, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 4932, "error": "" }, { "test_index": 29, "latency_s": 70.74163508415222, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 5257, "error": "" }, { "test_index": 30, "latency_s": 78.69463920593262, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 6019, "error": "" }, { "test_index": 31, "latency_s": 69.28982496261597, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 5244, "error": "" }, { "test_index": 32, "latency_s": 70.26716995239258, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 5418, "error": "" }, { "test_index": 33, "latency_s": 63.06647515296936, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4109, "completion_tokens": 4949, "error": "" }, { "test_index": 34, "latency_s": 90.75518703460693, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3951, "completion_tokens": 7790, "error": "" }, { "test_index": 35, "latency_s": 59.43641185760498, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4125, "completion_tokens": 5513, "error": "" }, { "test_index": 36, "latency_s": 44.94361209869385, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 3788, "error": "" }, { "test_index": 37, "latency_s": 58.082329750061035, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 4915, "error": "" }, { "test_index": 38, "latency_s": 50.94555687904358, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3829, "completion_tokens": 4892, "error": "" }, { "test_index": 39, "latency_s": 57.39473104476929, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 5217, "error": "" }, { "test_index": 40, "latency_s": 66.40707492828369, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3888, "completion_tokens": 5521, "error": "" }, { "test_index": 41, "latency_s": 56.74499797821045, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 5047, "error": "" }, { "test_index": 42, "latency_s": 54.08118510246277, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 4464, "error": "" }, { "test_index": 43, "latency_s": 104.71720385551453, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 8301, "error": "" }, { "test_index": 44, "latency_s": 72.97875618934631, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 5376, "error": "" }, { "test_index": 45, "latency_s": 68.16306591033936, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 5142, "error": "" }, { "test_index": 46, "latency_s": 69.8156681060791, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 5204, "error": "" }, { "test_index": 47, "latency_s": 79.48013305664062, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 5405, "error": "" }, { "test_index": 48, "latency_s": 63.990400075912476, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4109, "completion_tokens": 5187, "error": "" }, { "test_index": 49, "latency_s": 76.74648594856262, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 5677, "error": "" }, { "test_index": 50, "latency_s": 92.14215397834778, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3913, "completion_tokens": 7413, "error": "" } ] }, "reflect": { "accuracy": 70.7, "correct": 171, "total": 242, "avg_latency_s": 22.758, "model_id": "gpt-5-mini", "provider_id": "openai", "sample_id": "conv-43" } }