{ "model_id": "gemini-3-flash-preview", "provider_id": "gemini", "quality": { "accuracy": 83.5, "correct": 202, "total": 242, "model_id": "gemini-3-flash-preview", "provider_id": "gemini", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-20T14:06:48.043289+00:00", "model_id": "gemini-3-flash-preview", "model_name": "gemini-3-flash-preview", "provider_id": "gemini", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 1603.9191689491272, "summary": { "success": 48, "total": 50, "wall_s": 1603.919, "avg_latency_s": 109.261, "throughput_rps": 0.03, "completion_toks_s": 41.1, "total_toks_s": 175.3, "out_in_ratio": 0.306, "tokens_per_fact": 145.7 }, "tests": [ { "test_index": 1, "latency_s": 494.8810501098633, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "Expecting property name enclosed in double quotes: line 129 column 52 (char 3525)" }, { "test_index": 2, "latency_s": 333.086678981781, "num_facts": 9, "valid_json": true, "success": true, "retries": 2, "prompt_tokens": 4516, "completion_tokens": 1430, "error": "" }, { "test_index": 3, "latency_s": 48.396066665649414, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4353, "completion_tokens": 1445, "error": "" }, { "test_index": 4, "latency_s": 233.0535340309143, "num_facts": 7, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4625, "completion_tokens": 1177, "error": "" }, { "test_index": 5, "latency_s": 156.5632619857788, "num_facts": 7, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4467, "completion_tokens": 1195, "error": "" }, { "test_index": 6, "latency_s": 156.92006301879883, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4537, "completion_tokens": 938, "error": "" }, { "test_index": 7, "latency_s": 189.46649026870728, "num_facts": 15, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4444, "completion_tokens": 2317, "error": "" }, { "test_index": 8, "latency_s": 19.663038969039917, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4621, "completion_tokens": 1272, "error": "" }, { "test_index": 9, "latency_s": 240.14998865127563, "num_facts": 7, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4300, "completion_tokens": 1106, "error": "" }, { "test_index": 10, "latency_s": 401.8747019767761, "num_facts": 8, "valid_json": true, "success": true, "retries": 2, "prompt_tokens": 4298, "completion_tokens": 1127, "error": "" }, { "test_index": 11, "latency_s": 27.580506086349487, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4464, "completion_tokens": 1490, "error": "" }, { "test_index": 12, "latency_s": 16.66841983795166, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4605, "completion_tokens": 1152, "error": "" }, { "test_index": 13, "latency_s": 22.27617573738098, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4378, "completion_tokens": 1829, "error": "" }, { "test_index": 14, "latency_s": 160.94938206672668, "num_facts": 7, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4556, "completion_tokens": 1059, "error": "" }, { "test_index": 15, "latency_s": 25.880698204040527, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4590, "completion_tokens": 1360, "error": "" }, { "test_index": 16, "latency_s": 69.299320936203, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4501, "completion_tokens": 1531, "error": "" }, { "test_index": 17, "latency_s": 228.2582492828369, "num_facts": 11, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4605, "completion_tokens": 1369, "error": "" }, { "test_index": 18, "latency_s": 184.1486940383911, "num_facts": 11, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4570, "completion_tokens": 1809, "error": "" }, { "test_index": 19, "latency_s": 15.666108846664429, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4360, "completion_tokens": 1258, "error": "" }, { "test_index": 20, "latency_s": 320.4760510921478, "num_facts": 8, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4495, "completion_tokens": 1307, "error": "" }, { "test_index": 21, "latency_s": 174.93927717208862, "num_facts": 12, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4605, "completion_tokens": 1399, "error": "" }, { "test_index": 22, "latency_s": 18.441706895828247, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4501, "completion_tokens": 1343, "error": "" }, { "test_index": 23, "latency_s": 181.19468426704407, "num_facts": 10, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4516, "completion_tokens": 1476, "error": "" }, { "test_index": 24, "latency_s": 19.701930046081543, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4537, "completion_tokens": 880, "error": "" }, { "test_index": 25, "latency_s": 27.676724910736084, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4495, "completion_tokens": 2009, "error": "" }, { "test_index": 26, "latency_s": 477.47695994377136, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "Expecting value: line 117 column 13 (char 3658)" }, { "test_index": 27, "latency_s": 21.472869873046875, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4298, "completion_tokens": 1264, "error": "" }, { "test_index": 28, "latency_s": 22.97124409675598, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4501, "completion_tokens": 1259, "error": "" }, { "test_index": 29, "latency_s": 157.74026703834534, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4625, "completion_tokens": 1295, "error": "" }, { "test_index": 30, "latency_s": 35.63132905960083, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4409, "completion_tokens": 1613, "error": "" }, { "test_index": 31, "latency_s": 17.178550004959106, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4467, "completion_tokens": 1323, "error": "" }, { "test_index": 32, "latency_s": 21.993601083755493, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4516, "completion_tokens": 1326, "error": "" }, { "test_index": 33, "latency_s": 30.49292278289795, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4598, "completion_tokens": 1738, "error": "" }, { "test_index": 34, "latency_s": 158.57465386390686, "num_facts": 7, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4464, "completion_tokens": 926, "error": "" }, { "test_index": 35, "latency_s": 23.086374759674072, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4590, "completion_tokens": 1600, "error": "" }, { "test_index": 36, "latency_s": 70.22229099273682, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4556, "completion_tokens": 1066, "error": "" }, { "test_index": 37, "latency_s": 28.16380500793457, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4409, "completion_tokens": 1317, "error": "" }, { "test_index": 38, "latency_s": 157.86343574523926, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4300, "completion_tokens": 1297, "error": "" }, { "test_index": 39, "latency_s": 20.30461025238037, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4378, "completion_tokens": 1148, "error": "" }, { "test_index": 40, "latency_s": 19.172024726867676, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4360, "completion_tokens": 1267, "error": "" }, { "test_index": 41, "latency_s": 185.73001408576965, "num_facts": 9, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4378, "completion_tokens": 1428, "error": "" }, { "test_index": 42, "latency_s": 178.37917494773865, "num_facts": 11, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4298, "completion_tokens": 1750, "error": "" }, { "test_index": 43, "latency_s": 196.44439792633057, "num_facts": 10, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4625, "completion_tokens": 1502, "error": "" }, { "test_index": 44, "latency_s": 18.695759057998657, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4621, "completion_tokens": 1204, "error": "" }, { "test_index": 45, "latency_s": 53.41467595100403, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4409, "completion_tokens": 878, "error": "" }, { "test_index": 46, "latency_s": 163.80215501785278, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4467, "completion_tokens": 1263, "error": "" }, { "test_index": 47, "latency_s": 157.6338129043579, "num_facts": 7, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4621, "completion_tokens": 1078, "error": "" }, { "test_index": 48, "latency_s": 188.63230800628662, "num_facts": 12, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4598, "completion_tokens": 1771, "error": "" }, { "test_index": 49, "latency_s": 21.333661317825317, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4495, "completion_tokens": 1478, "error": "" }, { "test_index": 50, "latency_s": 23.276240825653076, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4353, "completion_tokens": 1794, "error": "" } ] }, "reflect": { "accuracy": 93.0, "correct": 225, "total": 242, "avg_latency_s": 13.398, "model_id": "gemini-3-flash-preview", "provider_id": "gemini", "sample_id": "conv-43" } }