{ "model_id": "gpt-4.1-nano", "provider_id": "openai", "quality": { "accuracy": 87.2, "correct": 211, "total": 242, "model_id": "gpt-4.1-nano", "provider_id": "openai", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-20T13:18:49.666056+00:00", "model_id": "gpt-4.1-nano", "model_name": "gpt-4.1-nano", "provider_id": "openai", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 112.9343900680542, "summary": { "success": 50, "total": 50, "wall_s": 112.934, "avg_latency_s": 8.525, "throughput_rps": 0.443, "completion_toks_s": 263.0, "total_toks_s": 2037.2, "out_in_ratio": 0.148, "tokens_per_fact": 56.5 }, "tests": [ { "test_index": 1, "latency_s": 8.70841097831726, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3949, "completion_tokens": 762, "error": "" }, { "test_index": 2, "latency_s": 7.7724809646606445, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 660, "error": "" }, { "test_index": 3, "latency_s": 7.018157958984375, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3914, "completion_tokens": 587, "error": "" }, { "test_index": 4, "latency_s": 6.039341688156128, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4124, "completion_tokens": 501, "error": "" }, { "test_index": 5, "latency_s": 6.780200004577637, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3992, "completion_tokens": 593, "error": "" }, { "test_index": 6, "latency_s": 4.876862049102783, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4039, "completion_tokens": 406, "error": "" }, { "test_index": 7, "latency_s": 11.806228876113892, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3949, "completion_tokens": 933, "error": "" }, { "test_index": 8, "latency_s": 4.778717994689941, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4115, "completion_tokens": 369, "error": "" }, { "test_index": 9, "latency_s": 7.27567720413208, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3830, "completion_tokens": 566, "error": "" }, { "test_index": 10, "latency_s": 8.036480903625488, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3806, "completion_tokens": 615, "error": "" }, { "test_index": 11, "latency_s": 8.551651000976562, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3952, "completion_tokens": 663, "error": "" }, { "test_index": 12, "latency_s": 7.825654029846191, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4081, "completion_tokens": 557, "error": "" }, { "test_index": 13, "latency_s": 7.587748050689697, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3973, "completion_tokens": 532, "error": "" }, { "test_index": 14, "latency_s": 6.2311999797821045, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 443, "error": "" }, { "test_index": 15, "latency_s": 8.27198076248169, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4126, "completion_tokens": 568, "error": "" }, { "test_index": 16, "latency_s": 5.879573106765747, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4001, "completion_tokens": 405, "error": "" }, { "test_index": 17, "latency_s": 7.761192083358765, "num_facts": 7, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4081, "completion_tokens": 480, "error": "" }, { "test_index": 18, "latency_s": 9.94628095626831, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 675, "error": "" }, { "test_index": 19, "latency_s": 8.246530055999756, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3889, "completion_tokens": 579, "error": "" }, { "test_index": 20, "latency_s": 7.396991968154907, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4087, "completion_tokens": 542, "error": "" }, { "test_index": 21, "latency_s": 5.405543088912964, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4081, "completion_tokens": 480, "error": "" }, { "test_index": 22, "latency_s": 7.959005832672119, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4001, "completion_tokens": 711, "error": "" }, { "test_index": 23, "latency_s": 9.123106002807617, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 859, "error": "" }, { "test_index": 24, "latency_s": 8.913774251937866, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4039, "completion_tokens": 774, "error": "" }, { "test_index": 25, "latency_s": 23.12553882598877, "num_facts": 10, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4087, "completion_tokens": 635, "error": "" }, { "test_index": 26, "latency_s": 8.036060333251953, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 543, "error": "" }, { "test_index": 27, "latency_s": 11.066758871078491, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3806, "completion_tokens": 859, "error": "" }, { "test_index": 28, "latency_s": 9.372154235839844, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4001, "completion_tokens": 627, "error": "" }, { "test_index": 29, "latency_s": 5.045485019683838, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4124, "completion_tokens": 332, "error": "" }, { "test_index": 30, "latency_s": 9.813133001327515, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3924, "completion_tokens": 674, "error": "" }, { "test_index": 31, "latency_s": 6.14360499382019, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3992, "completion_tokens": 453, "error": "" }, { "test_index": 32, "latency_s": 7.723020076751709, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 565, "error": "" }, { "test_index": 33, "latency_s": 5.689085006713867, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4110, "completion_tokens": 339, "error": "" }, { "test_index": 34, "latency_s": 8.44208312034607, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3952, "completion_tokens": 544, "error": "" }, { "test_index": 35, "latency_s": 8.11308217048645, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4126, "completion_tokens": 581, "error": "" }, { "test_index": 36, "latency_s": 6.9827330112457275, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 479, "error": "" }, { "test_index": 37, "latency_s": 15.939194202423096, "num_facts": 24, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3924, "completion_tokens": 1095, "error": "" }, { "test_index": 38, "latency_s": 26.400827884674072, "num_facts": 16, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 3830, "completion_tokens": 755, "error": "" }, { "test_index": 39, "latency_s": 12.171085834503174, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3973, "completion_tokens": 836, "error": "" }, { "test_index": 40, "latency_s": 13.27104902267456, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3889, "completion_tokens": 599, "error": "" }, { "test_index": 41, "latency_s": 11.947882890701294, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3973, "completion_tokens": 940, "error": "" }, { "test_index": 42, "latency_s": 6.3082287311553955, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3806, "completion_tokens": 448, "error": "" }, { "test_index": 43, "latency_s": 5.209590196609497, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4124, "completion_tokens": 412, "error": "" }, { "test_index": 44, "latency_s": 4.035406112670898, "num_facts": 5, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4115, "completion_tokens": 282, "error": "" }, { "test_index": 45, "latency_s": 8.272960662841797, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3924, "completion_tokens": 691, "error": "" }, { "test_index": 46, "latency_s": 3.4155237674713135, "num_facts": 4, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3992, "completion_tokens": 255, "error": "" }, { "test_index": 47, "latency_s": 3.530700206756592, "num_facts": 5, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4115, "completion_tokens": 276, "error": "" }, { "test_index": 48, "latency_s": 3.2459909915924072, "num_facts": 5, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4110, "completion_tokens": 312, "error": "" }, { "test_index": 49, "latency_s": 12.530314922332764, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4087, "completion_tokens": 1079, "error": "" }, { "test_index": 50, "latency_s": 8.230755090713501, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3914, "completion_tokens": 825, "error": "" } ] }, "reflect": { "accuracy": 80.2, "correct": 194, "total": 242, "avg_latency_s": 3.407, "model_id": "gpt-4.1-nano", "provider_id": "openai", "sample_id": "conv-43" } }