{ "model_id": "gemini-2.5-flash-lite", "provider_id": "gemini", "quality": { "accuracy": 84.7, "correct": 205, "total": 242, "model_id": "gemini-2.5-flash-lite", "provider_id": "gemini", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-20T13:50:05.610804+00:00", "model_id": "gemini-2.5-flash-lite", "model_name": "gemini-2.5-flash-lite", "provider_id": "gemini", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 240.30693101882935, "summary": { "success": 48, "total": 50, "wall_s": 240.307, "avg_latency_s": 15.813, "throughput_rps": 0.2, "completion_toks_s": 621.1, "total_toks_s": 1516.7, "out_in_ratio": 0.693, "tokens_per_fact": 114.0 }, "tests": [ { "test_index": 1, "latency_s": 6.08125376701355, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "503 UNAVAILABLE. {'error': {'code': 503, 'message': 'This model is currently experiencing high demand. Spikes in demand are usually temporary. Please try again later.', 'status': 'UNAVAILABLE'}}" }, { "test_index": 2, "latency_s": 6.860608100891113, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4516, "completion_tokens": 2425, "error": "" }, { "test_index": 3, "latency_s": 7.489064931869507, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4353, "completion_tokens": 1966, "error": "" }, { "test_index": 4, "latency_s": 14.060073137283325, "num_facts": 34, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4625, "completion_tokens": 3732, "error": "" }, { "test_index": 5, "latency_s": 3.7020721435546875, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4467, "completion_tokens": 1120, "error": "" }, { "test_index": 6, "latency_s": 11.095499038696289, "num_facts": 25, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4537, "completion_tokens": 3393, "error": "" }, { "test_index": 7, "latency_s": 12.078857898712158, "num_facts": 26, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4444, "completion_tokens": 4196, "error": "" }, { "test_index": 8, "latency_s": 5.074668645858765, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "503 UNAVAILABLE. {'error': {'code': 503, 'message': 'This model is currently experiencing high demand. Spikes in demand are usually temporary. Please try again later.', 'status': 'UNAVAILABLE'}}" }, { "test_index": 9, "latency_s": 100.45269870758057, "num_facts": 26, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4300, "completion_tokens": 3841, "error": "" }, { "test_index": 10, "latency_s": 9.153260707855225, "num_facts": 28, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4298, "completion_tokens": 3166, "error": "" }, { "test_index": 11, "latency_s": 15.246495962142944, "num_facts": 43, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4464, "completion_tokens": 4616, "error": "" }, { "test_index": 12, "latency_s": 8.425334930419922, "num_facts": 26, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4605, "completion_tokens": 2603, "error": "" }, { "test_index": 13, "latency_s": 8.571813821792603, "num_facts": 25, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4378, "completion_tokens": 2719, "error": "" }, { "test_index": 14, "latency_s": 8.691036939620972, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4556, "completion_tokens": 1910, "error": "" }, { "test_index": 15, "latency_s": 8.023442029953003, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4590, "completion_tokens": 2200, "error": "" }, { "test_index": 16, "latency_s": 7.559647083282471, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4501, "completion_tokens": 2202, "error": "" }, { "test_index": 17, "latency_s": 9.893832921981812, "num_facts": 29, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4605, "completion_tokens": 2937, "error": "" }, { "test_index": 18, "latency_s": 10.556596994400024, "num_facts": 24, "valid_json": true, "success": true, "retries": 2, "prompt_tokens": 4570, "completion_tokens": 2492, "error": "" }, { "test_index": 19, "latency_s": 13.84652304649353, "num_facts": 31, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4360, "completion_tokens": 4102, "error": "" }, { "test_index": 20, "latency_s": 15.100675106048584, "num_facts": 36, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4495, "completion_tokens": 4029, "error": "" }, { "test_index": 21, "latency_s": 9.0363609790802, "num_facts": 36, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4605, "completion_tokens": 3626, "error": "" }, { "test_index": 22, "latency_s": 9.337828159332275, "num_facts": 17, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4501, "completion_tokens": 2071, "error": "" }, { "test_index": 23, "latency_s": 6.952221870422363, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4516, "completion_tokens": 2521, "error": "" }, { "test_index": 24, "latency_s": 12.155405044555664, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4537, "completion_tokens": 3056, "error": "" }, { "test_index": 25, "latency_s": 16.177682876586914, "num_facts": 41, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4495, "completion_tokens": 4480, "error": "" }, { "test_index": 26, "latency_s": 7.036257028579712, "num_facts": 24, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4570, "completion_tokens": 2489, "error": "" }, { "test_index": 27, "latency_s": 12.430346012115479, "num_facts": 28, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4298, "completion_tokens": 3263, "error": "" }, { "test_index": 28, "latency_s": 9.017959833145142, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4501, "completion_tokens": 2623, "error": "" }, { "test_index": 29, "latency_s": 13.840017795562744, "num_facts": 32, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4625, "completion_tokens": 3846, "error": "" }, { "test_index": 30, "latency_s": 9.015188932418823, "num_facts": 35, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4409, "completion_tokens": 3823, "error": "" }, { "test_index": 31, "latency_s": 7.482144117355347, "num_facts": 26, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4467, "completion_tokens": 2616, "error": "" }, { "test_index": 32, "latency_s": 6.140012979507446, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4516, "completion_tokens": 2225, "error": "" }, { "test_index": 33, "latency_s": 7.589418888092041, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4598, "completion_tokens": 2549, "error": "" }, { "test_index": 34, "latency_s": 14.801798105239868, "num_facts": 48, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4464, "completion_tokens": 5055, "error": "" }, { "test_index": 35, "latency_s": 8.173665761947632, "num_facts": 23, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4590, "completion_tokens": 2813, "error": "" }, { "test_index": 36, "latency_s": 6.828280210494995, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4556, "completion_tokens": 1939, "error": "" }, { "test_index": 37, "latency_s": 8.558558940887451, "num_facts": 28, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4409, "completion_tokens": 3034, "error": "" }, { "test_index": 38, "latency_s": 102.069256067276, "num_facts": 19, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4300, "completion_tokens": 2890, "error": "" }, { "test_index": 39, "latency_s": 11.35008716583252, "num_facts": 27, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4378, "completion_tokens": 2825, "error": "" }, { "test_index": 40, "latency_s": 10.347617864608765, "num_facts": 30, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4360, "completion_tokens": 3507, "error": "" }, { "test_index": 41, "latency_s": 9.935868263244629, "num_facts": 39, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4378, "completion_tokens": 4098, "error": "" }, { "test_index": 42, "latency_s": 7.733833074569702, "num_facts": 27, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4298, "completion_tokens": 3058, "error": "" }, { "test_index": 43, "latency_s": 13.979326248168945, "num_facts": 37, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4625, "completion_tokens": 4001, "error": "" }, { "test_index": 44, "latency_s": 7.576695919036865, "num_facts": 24, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4621, "completion_tokens": 2522, "error": "" }, { "test_index": 45, "latency_s": 10.60389494895935, "num_facts": 27, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4409, "completion_tokens": 2944, "error": "" }, { "test_index": 46, "latency_s": 107.6986129283905, "num_facts": 67, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4467, "completion_tokens": 7323, "error": "" }, { "test_index": 47, "latency_s": 8.337063074111938, "num_facts": 25, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4621, "completion_tokens": 2613, "error": "" }, { "test_index": 48, "latency_s": 11.642383813858032, "num_facts": 21, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4598, "completion_tokens": 2043, "error": "" }, { "test_index": 49, "latency_s": 13.999919176101685, "num_facts": 38, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4495, "completion_tokens": 3793, "error": "" }, { "test_index": 50, "latency_s": 8.38817310333252, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4353, "completion_tokens": 1953, "error": "" } ] }, "reflect": { "accuracy": 88.4, "correct": 214, "total": 242, "avg_latency_s": 2.387, "model_id": "gemini-2.5-flash-lite", "provider_id": "gemini", "sample_id": "conv-43" } }