{ "model_id": "gpt-5.4-mini", "provider_id": "openai", "retain": { "timestamp": "2026-03-19T16:52:38.505881+00:00", "model_id": "gpt-5.4-mini", "model_name": "gpt-5.4-mini", "provider_id": "openai", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 70.49308705329895, "summary": { "success": 50, "total": 50, "wall_s": 70.493, "avg_latency_s": 5.398, "throughput_rps": 0.709, "completion_toks_s": 704.7, "total_toks_s": 3546.5, "out_in_ratio": 0.248, "tokens_per_fact": 66.7 }, "tests": [ { "test_index": 1, "latency_s": 7.0363311767578125, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3948, "completion_tokens": 1269, "error": "" }, { "test_index": 2, "latency_s": 4.592425107955933, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 485, "error": "" }, { "test_index": 3, "latency_s": 4.666959762573242, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3913, "completion_tokens": 792, "error": "" }, { "test_index": 4, "latency_s": 7.274669170379639, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 1384, "error": "" }, { "test_index": 5, "latency_s": 4.000578880310059, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 616, "error": "" }, { "test_index": 6, "latency_s": 3.9234492778778076, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4038, "completion_tokens": 543, "error": "" }, { "test_index": 7, "latency_s": 6.000988245010376, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3948, "completion_tokens": 1127, "error": "" }, { "test_index": 8, "latency_s": 7.489457845687866, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 1666, "error": "" }, { "test_index": 9, "latency_s": 4.789580821990967, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3829, "completion_tokens": 893, "error": "" }, { "test_index": 10, "latency_s": 7.644698858261108, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 1408, "error": "" }, { "test_index": 11, "latency_s": 6.112342119216919, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3951, "completion_tokens": 1271, "error": "" }, { "test_index": 12, "latency_s": 3.3609330654144287, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 665, "error": "" }, { "test_index": 13, "latency_s": 4.643766164779663, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 968, "error": "" }, { "test_index": 14, "latency_s": 5.153470993041992, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 713, "error": "" }, { "test_index": 15, "latency_s": 5.2355711460113525, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4125, "completion_tokens": 1012, "error": "" }, { "test_index": 16, "latency_s": 5.235713005065918, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 1039, "error": "" }, { "test_index": 17, "latency_s": 6.4158408641815186, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 985, "error": "" }, { "test_index": 18, "latency_s": 4.910176992416382, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4062, "completion_tokens": 860, "error": "" }, { "test_index": 19, "latency_s": 5.769942998886108, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3888, "completion_tokens": 1094, "error": "" }, { "test_index": 20, "latency_s": 7.850183010101318, "num_facts": 24, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 1579, "error": "" }, { "test_index": 21, "latency_s": 4.789521932601929, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4080, "completion_tokens": 841, "error": "" }, { "test_index": 22, "latency_s": 6.695142030715942, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 1378, "error": "" }, { "test_index": 23, "latency_s": 6.103028059005737, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 962, "error": "" }, { "test_index": 24, "latency_s": 2.96653413772583, "num_facts": 5, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4038, "completion_tokens": 440, "error": "" }, { "test_index": 25, "latency_s": 8.6826171875, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 2113, "error": "" }, { "test_index": 26, "latency_s": 2.9780068397521973, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4062, "completion_tokens": 525, "error": "" }, { "test_index": 27, "latency_s": 4.064404010772705, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 808, "error": "" }, { "test_index": 28, "latency_s": 4.196819067001343, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4000, "completion_tokens": 838, "error": "" }, { "test_index": 29, "latency_s": 4.82550573348999, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 976, "error": "" }, { "test_index": 30, "latency_s": 4.352637052536011, "num_facts": 7, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 786, "error": "" }, { "test_index": 31, "latency_s": 3.567363739013672, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 555, "error": "" }, { "test_index": 32, "latency_s": 4.16477108001709, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 629, "error": "" }, { "test_index": 33, "latency_s": 4.365840911865234, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4109, "completion_tokens": 840, "error": "" }, { "test_index": 34, "latency_s": 19.8822660446167, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3951, "completion_tokens": 1137, "error": "" }, { "test_index": 35, "latency_s": 5.609043836593628, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4125, "completion_tokens": 1192, "error": "" }, { "test_index": 36, "latency_s": 3.1503400802612305, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 675, "error": "" }, { "test_index": 37, "latency_s": 3.857560873031616, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 760, "error": "" }, { "test_index": 38, "latency_s": 3.7969679832458496, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3829, "completion_tokens": 836, "error": "" }, { "test_index": 39, "latency_s": 3.645642042160034, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 836, "error": "" }, { "test_index": 40, "latency_s": 5.441574811935425, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3888, "completion_tokens": 1284, "error": "" }, { "test_index": 41, "latency_s": 4.9717490673065186, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3972, "completion_tokens": 1216, "error": "" }, { "test_index": 42, "latency_s": 5.733656883239746, "num_facts": 26, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3805, "completion_tokens": 1457, "error": "" }, { "test_index": 43, "latency_s": 3.4433021545410156, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4123, "completion_tokens": 730, "error": "" }, { "test_index": 44, "latency_s": 5.024696111679077, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 1005, "error": "" }, { "test_index": 45, "latency_s": 4.194928169250488, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3923, "completion_tokens": 978, "error": "" }, { "test_index": 46, "latency_s": 3.114682912826538, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3991, "completion_tokens": 692, "error": "" }, { "test_index": 47, "latency_s": 6.222630023956299, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4114, "completion_tokens": 1230, "error": "" }, { "test_index": 48, "latency_s": 4.7448272705078125, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4109, "completion_tokens": 1019, "error": "" }, { "test_index": 49, "latency_s": 8.83645510673523, "num_facts": 27, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4086, "completion_tokens": 1689, "error": "" }, { "test_index": 50, "latency_s": 4.364808082580566, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3913, "completion_tokens": 881, "error": "" } ] }, "quality": { "accuracy": 86.4, "correct": 209, "total": 242, "model_id": "gpt-5.4-mini", "provider_id": "openai", "sample_id": "conv-43" }, "reflect": { "accuracy": 83.5, "correct": 202, "total": 242, "avg_latency_s": 7.456, "model_id": "gpt-5.4-mini", "provider_id": "openai", "sample_id": "conv-43" } }