{ "model_id": "gpt-4.1-mini", "provider_id": "openai", "quality": { "accuracy": 86.4, "correct": 209, "total": 242, "model_id": "gpt-4.1-mini", "provider_id": "openai", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-20T13:15:29.709910+00:00", "model_id": "gpt-4.1-mini", "model_name": "gpt-4.1-mini", "provider_id": "openai", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 199.94987392425537, "summary": { "success": 50, "total": 50, "wall_s": 199.95, "avg_latency_s": 15.416, "throughput_rps": 0.25, "completion_toks_s": 228.6, "total_toks_s": 1230.7, "out_in_ratio": 0.228, "tokens_per_fact": 55.5 }, "tests": [ { "test_index": 1, "latency_s": 19.301893711090088, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3949, "completion_tokens": 871, "error": "" }, { "test_index": 2, "latency_s": 18.652348041534424, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 834, "error": "" }, { "test_index": 3, "latency_s": 14.473172187805176, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3914, "completion_tokens": 716, "error": "" }, { "test_index": 4, "latency_s": 18.970566034317017, "num_facts": 24, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4124, "completion_tokens": 1023, "error": "" }, { "test_index": 5, "latency_s": 10.57516884803772, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3992, "completion_tokens": 647, "error": "" }, { "test_index": 6, "latency_s": 9.569795846939087, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4039, "completion_tokens": 680, "error": "" }, { "test_index": 7, "latency_s": 14.34142279624939, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3949, "completion_tokens": 998, "error": "" }, { "test_index": 8, "latency_s": 11.446866035461426, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4115, "completion_tokens": 830, "error": "" }, { "test_index": 9, "latency_s": 16.0763840675354, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3830, "completion_tokens": 1054, "error": "" }, { "test_index": 10, "latency_s": 20.608627796173096, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3806, "completion_tokens": 1079, "error": "" }, { "test_index": 11, "latency_s": 24.568825006484985, "num_facts": 27, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3952, "completion_tokens": 1399, "error": "" }, { "test_index": 12, "latency_s": 12.16731595993042, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4081, "completion_tokens": 678, "error": "" }, { "test_index": 13, "latency_s": 14.61956787109375, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3973, "completion_tokens": 718, "error": "" }, { "test_index": 14, "latency_s": 12.388746976852417, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 726, "error": "" }, { "test_index": 15, "latency_s": 12.526453971862793, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4126, "completion_tokens": 784, "error": "" }, { "test_index": 16, "latency_s": 19.488906145095825, "num_facts": 24, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4001, "completion_tokens": 1097, "error": "" }, { "test_index": 17, "latency_s": 11.132081985473633, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4081, "completion_tokens": 661, "error": "" }, { "test_index": 18, "latency_s": 9.09581184387207, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 619, "error": "" }, { "test_index": 19, "latency_s": 11.592493057250977, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3889, "completion_tokens": 741, "error": "" }, { "test_index": 20, "latency_s": 21.06968402862549, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4087, "completion_tokens": 1418, "error": "" }, { "test_index": 21, "latency_s": 12.608012914657593, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4081, "completion_tokens": 818, "error": "" }, { "test_index": 22, "latency_s": 21.778927087783813, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4001, "completion_tokens": 1053, "error": "" }, { "test_index": 23, "latency_s": 9.809706926345825, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 715, "error": "" }, { "test_index": 24, "latency_s": 11.339006900787354, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4039, "completion_tokens": 742, "error": "" }, { "test_index": 25, "latency_s": 21.977086782455444, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4087, "completion_tokens": 1505, "error": "" }, { "test_index": 26, "latency_s": 9.389520168304443, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 439, "error": "" }, { "test_index": 27, "latency_s": 16.026299238204956, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3806, "completion_tokens": 1108, "error": "" }, { "test_index": 28, "latency_s": 14.348052978515625, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4001, "completion_tokens": 933, "error": "" }, { "test_index": 29, "latency_s": 23.75454592704773, "num_facts": 25, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4124, "completion_tokens": 1212, "error": "" }, { "test_index": 30, "latency_s": 19.471304893493652, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3924, "completion_tokens": 826, "error": "" }, { "test_index": 31, "latency_s": 8.814897298812866, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3992, "completion_tokens": 544, "error": "" }, { "test_index": 32, "latency_s": 11.384836196899414, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 667, "error": "" }, { "test_index": 33, "latency_s": 18.53458285331726, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4110, "completion_tokens": 1082, "error": "" }, { "test_index": 34, "latency_s": 16.76546812057495, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3952, "completion_tokens": 1083, "error": "" }, { "test_index": 35, "latency_s": 14.29448390007019, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4126, "completion_tokens": 911, "error": "" }, { "test_index": 36, "latency_s": 14.454350233078003, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 608, "error": "" }, { "test_index": 37, "latency_s": 15.509953022003174, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3924, "completion_tokens": 892, "error": "" }, { "test_index": 38, "latency_s": 14.724937915802002, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3830, "completion_tokens": 1116, "error": "" }, { "test_index": 39, "latency_s": 15.157469034194946, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3973, "completion_tokens": 1157, "error": "" }, { "test_index": 40, "latency_s": 14.057775020599365, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3889, "completion_tokens": 1149, "error": "" }, { "test_index": 41, "latency_s": 13.388677835464478, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3973, "completion_tokens": 954, "error": "" }, { "test_index": 42, "latency_s": 20.330511808395386, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3806, "completion_tokens": 960, "error": "" }, { "test_index": 43, "latency_s": 18.473856925964355, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4124, "completion_tokens": 817, "error": "" }, { "test_index": 44, "latency_s": 9.33489203453064, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4115, "completion_tokens": 680, "error": "" }, { "test_index": 45, "latency_s": 11.764449119567871, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3924, "completion_tokens": 711, "error": "" }, { "test_index": 46, "latency_s": 16.33203411102295, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3992, "completion_tokens": 746, "error": "" }, { "test_index": 47, "latency_s": 10.113329887390137, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4115, "completion_tokens": 709, "error": "" }, { "test_index": 48, "latency_s": 23.259809017181396, "num_facts": 25, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4110, "completion_tokens": 1449, "error": "" }, { "test_index": 49, "latency_s": 24.98543906211853, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4087, "completion_tokens": 1516, "error": "" }, { "test_index": 50, "latency_s": 15.933000087738037, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3914, "completion_tokens": 1025, "error": "" } ] }, "reflect": { "accuracy": 88.0, "correct": 213, "total": 242, "avg_latency_s": 5.604, "model_id": "gpt-4.1-mini", "provider_id": "openai", "sample_id": "conv-43" } }