{ "model_id": "gpt-4o-mini", "provider_id": "openai", "quality": { "accuracy": 81.0, "correct": 196, "total": 242, "model_id": "gpt-4o-mini", "provider_id": "openai", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-20T13:32:59.418453+00:00", "model_id": "gpt-4o-mini", "model_name": "gpt-4o-mini", "provider_id": "openai", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 118.57616376876831, "summary": { "success": 50, "total": 50, "wall_s": 118.576, "avg_latency_s": 9.042, "throughput_rps": 0.422, "completion_toks_s": 183.1, "total_toks_s": 1872.9, "out_in_ratio": 0.108, "tokens_per_fact": 42.1 }, "tests": [ { "test_index": 1, "latency_s": 6.793259143829346, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3949, "completion_tokens": 327, "error": "" }, { "test_index": 2, "latency_s": 9.283790111541748, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 454, "error": "" }, { "test_index": 3, "latency_s": 9.664158821105957, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3914, "completion_tokens": 457, "error": "" }, { "test_index": 4, "latency_s": 7.906769037246704, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4124, "completion_tokens": 387, "error": "" }, { "test_index": 5, "latency_s": 7.804099082946777, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3992, "completion_tokens": 353, "error": "" }, { "test_index": 6, "latency_s": 12.71466588973999, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4039, "completion_tokens": 514, "error": "" }, { "test_index": 7, "latency_s": 9.274500131607056, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3949, "completion_tokens": 388, "error": "" }, { "test_index": 8, "latency_s": 10.489143133163452, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4115, "completion_tokens": 425, "error": "" }, { "test_index": 9, "latency_s": 9.524769067764282, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3830, "completion_tokens": 351, "error": "" }, { "test_index": 10, "latency_s": 8.337597846984863, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3806, "completion_tokens": 329, "error": "" }, { "test_index": 11, "latency_s": 13.559634923934937, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3952, "completion_tokens": 626, "error": "" }, { "test_index": 12, "latency_s": 6.162470817565918, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4081, "completion_tokens": 264, "error": "" }, { "test_index": 13, "latency_s": 14.396564245223999, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3973, "completion_tokens": 672, "error": "" }, { "test_index": 14, "latency_s": 8.02343201637268, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 379, "error": "" }, { "test_index": 15, "latency_s": 7.997811794281006, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4126, "completion_tokens": 380, "error": "" }, { "test_index": 16, "latency_s": 8.824660778045654, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4001, "completion_tokens": 417, "error": "" }, { "test_index": 17, "latency_s": 7.973604679107666, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4081, "completion_tokens": 376, "error": "" }, { "test_index": 18, "latency_s": 10.281862020492554, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 479, "error": "" }, { "test_index": 19, "latency_s": 10.760391235351562, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3889, "completion_tokens": 549, "error": "" }, { "test_index": 20, "latency_s": 11.145496129989624, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4087, "completion_tokens": 547, "error": "" }, { "test_index": 21, "latency_s": 6.807541131973267, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4081, "completion_tokens": 343, "error": "" }, { "test_index": 22, "latency_s": 8.057214975357056, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4001, "completion_tokens": 405, "error": "" }, { "test_index": 23, "latency_s": 6.654329061508179, "num_facts": 7, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 338, "error": "" }, { "test_index": 24, "latency_s": 5.9727349281311035, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4039, "completion_tokens": 298, "error": "" }, { "test_index": 25, "latency_s": 13.38800597190857, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4087, "completion_tokens": 642, "error": "" }, { "test_index": 26, "latency_s": 8.429743766784668, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4063, "completion_tokens": 419, "error": "" }, { "test_index": 27, "latency_s": 6.976932764053345, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3806, "completion_tokens": 332, "error": "" }, { "test_index": 28, "latency_s": 8.760913848876953, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4001, "completion_tokens": 393, "error": "" }, { "test_index": 29, "latency_s": 7.795588254928589, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4124, "completion_tokens": 358, "error": "" }, { "test_index": 30, "latency_s": 12.262765169143677, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3924, "completion_tokens": 583, "error": "" }, { "test_index": 31, "latency_s": 7.819066047668457, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3992, "completion_tokens": 393, "error": "" }, { "test_index": 32, "latency_s": 7.445061206817627, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 373, "error": "" }, { "test_index": 33, "latency_s": 7.5399861335754395, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4110, "completion_tokens": 368, "error": "" }, { "test_index": 34, "latency_s": 12.767820119857788, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3952, "completion_tokens": 648, "error": "" }, { "test_index": 35, "latency_s": 7.116754770278931, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4126, "completion_tokens": 366, "error": "" }, { "test_index": 36, "latency_s": 6.208675861358643, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 325, "error": "" }, { "test_index": 37, "latency_s": 10.55941104888916, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3924, "completion_tokens": 541, "error": "" }, { "test_index": 38, "latency_s": 7.460961103439331, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3830, "completion_tokens": 376, "error": "" }, { "test_index": 39, "latency_s": 9.582482099533081, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3973, "completion_tokens": 475, "error": "" }, { "test_index": 40, "latency_s": 11.611489057540894, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3889, "completion_tokens": 599, "error": "" }, { "test_index": 41, "latency_s": 7.529881954193115, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3973, "completion_tokens": 385, "error": "" }, { "test_index": 42, "latency_s": 6.469527959823608, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3806, "completion_tokens": 328, "error": "" }, { "test_index": 43, "latency_s": 6.868930101394653, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4124, "completion_tokens": 347, "error": "" }, { "test_index": 44, "latency_s": 8.490588903427124, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4115, "completion_tokens": 447, "error": "" }, { "test_index": 45, "latency_s": 10.162550926208496, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3924, "completion_tokens": 549, "error": "" }, { "test_index": 46, "latency_s": 7.584892988204956, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3992, "completion_tokens": 403, "error": "" }, { "test_index": 47, "latency_s": 8.280133724212646, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4115, "completion_tokens": 438, "error": "" }, { "test_index": 48, "latency_s": 7.761176824569702, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4110, "completion_tokens": 390, "error": "" }, { "test_index": 49, "latency_s": 14.062762975692749, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4087, "completion_tokens": 666, "error": "" }, { "test_index": 50, "latency_s": 10.729506015777588, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3914, "completion_tokens": 506, "error": "" } ] }, "reflect": { "accuracy": 93.8, "correct": 227, "total": 242, "avg_latency_s": 6.992, "model_id": "gpt-4o-mini", "provider_id": "openai", "sample_id": "conv-43" } }