{ "model_id": "gpt-5.4-nano", "provider_id": "openai", "retain": { "timestamp": "2026-03-19T16:52:40.166285+00:00", "model_id": "gpt-5.4-nano", "model_name": "gpt-5.4-nano", "provider_id": "openai", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 84.10241413116455, "summary": { "success": 50, "total": 50, "wall_s": 84.102, "avg_latency_s": 6.433, "throughput_rps": 0.595, "completion_toks_s": 685.5, "total_toks_s": 3067.4, "out_in_ratio": 0.288, "tokens_per_fact": 88.6 }, "tests": [ { "test_index": 1, "latency_s": 8.818637132644653, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3948, "completion_tokens": 1348, "error": "" }, { "test_index": 2, "latency_s": 7.529800891876221, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 920, "error": "" }, { "test_index": 3, "latency_s": 5.800513029098511, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3913, "completion_tokens": 891, "error": "" }, { "test_index": 4, "latency_s": 8.169946908950806, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 1235, "error": "" }, { "test_index": 5, "latency_s": 4.702268838882446, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 878, "error": "" }, { "test_index": 6, "latency_s": 5.442898750305176, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4038, "completion_tokens": 1025, "error": "" }, { "test_index": 7, "latency_s": 8.561651945114136, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3948, "completion_tokens": 1511, "error": "" }, { "test_index": 8, "latency_s": 9.00028109550476, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 1542, "error": "" }, { "test_index": 9, "latency_s": 7.610112905502319, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3829, "completion_tokens": 1486, "error": "" }, { "test_index": 10, "latency_s": 5.109586238861084, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 949, "error": "" }, { "test_index": 11, "latency_s": 6.36152720451355, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3951, "completion_tokens": 1033, "error": "" }, { "test_index": 12, "latency_s": 3.770098924636841, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 683, "error": "" }, { "test_index": 13, "latency_s": 4.979625940322876, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 987, "error": "" }, { "test_index": 14, "latency_s": 5.2081520557403564, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 1020, "error": "" }, { "test_index": 15, "latency_s": 11.32240915298462, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4125, "completion_tokens": 1902, "error": "" }, { "test_index": 16, "latency_s": 8.21775507926941, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 1524, "error": "" }, { "test_index": 17, "latency_s": 5.107132911682129, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 1016, "error": "" }, { "test_index": 18, "latency_s": 3.3764610290527344, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4062, "completion_tokens": 622, "error": "" }, { "test_index": 19, "latency_s": 6.28628396987915, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3888, "completion_tokens": 1084, "error": "" }, { "test_index": 20, "latency_s": 8.847455978393555, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 1520, "error": "" }, { "test_index": 21, "latency_s": 6.053600072860718, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 1052, "error": "" }, { "test_index": 22, "latency_s": 5.1749467849731445, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 851, "error": "" }, { "test_index": 23, "latency_s": 5.801233768463135, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 941, "error": "" }, { "test_index": 24, "latency_s": 6.629810810089111, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4038, "completion_tokens": 1249, "error": "" }, { "test_index": 25, "latency_s": 7.361502408981323, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 1176, "error": "" }, { "test_index": 26, "latency_s": 5.972562313079834, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4062, "completion_tokens": 1025, "error": "" }, { "test_index": 27, "latency_s": 5.661386013031006, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 1016, "error": "" }, { "test_index": 28, "latency_s": 6.045496940612793, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 1164, "error": "" }, { "test_index": 29, "latency_s": 5.377661943435669, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 1092, "error": "" }, { "test_index": 30, "latency_s": 5.150969743728638, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 986, "error": "" }, { "test_index": 31, "latency_s": 4.146904945373535, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 755, "error": "" }, { "test_index": 32, "latency_s": 4.517411231994629, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 897, "error": "" }, { "test_index": 33, "latency_s": 5.4721839427948, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4109, "completion_tokens": 1038, "error": "" }, { "test_index": 34, "latency_s": 10.064404964447021, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3951, "completion_tokens": 1919, "error": "" }, { "test_index": 35, "latency_s": 6.194079875946045, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4125, "completion_tokens": 1155, "error": "" }, { "test_index": 36, "latency_s": 4.685106039047241, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 937, "error": "" }, { "test_index": 37, "latency_s": 5.43241286277771, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 995, "error": "" }, { "test_index": 38, "latency_s": 5.675719976425171, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3829, "completion_tokens": 1087, "error": "" }, { "test_index": 39, "latency_s": 6.695685863494873, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 1397, "error": "" }, { "test_index": 40, "latency_s": 7.7273850440979, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3888, "completion_tokens": 1527, "error": "" }, { "test_index": 41, "latency_s": 7.022140026092529, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 1484, "error": "" }, { "test_index": 42, "latency_s": 3.91442608833313, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 736, "error": "" }, { "test_index": 43, "latency_s": 5.577815055847168, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 1107, "error": "" }, { "test_index": 44, "latency_s": 7.429023027420044, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 1149, "error": "" }, { "test_index": 45, "latency_s": 7.60917592048645, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 1463, "error": "" }, { "test_index": 46, "latency_s": 6.443664073944092, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 1177, "error": "" }, { "test_index": 47, "latency_s": 6.903820991516113, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 1299, "error": "" }, { "test_index": 48, "latency_s": 6.1944260597229, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4109, "completion_tokens": 1117, "error": "" }, { "test_index": 49, "latency_s": 7.382529020309448, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 1247, "error": "" }, { "test_index": 50, "latency_s": 9.103618860244751, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3913, "completion_tokens": 1440, "error": "" } ] }, "quality": { "accuracy": 83.9, "correct": 203, "total": 242, "model_id": "gpt-5.4-nano", "provider_id": "openai", "sample_id": "conv-43" }, "reflect": { "accuracy": 79.3, "correct": 192, "total": 242, "avg_latency_s": 5.155, "model_id": "gpt-5.4-nano", "provider_id": "openai", "sample_id": "conv-43" } }