{ "model_id": "openai-gpt-oss-20b", "provider_id": "groq", "quality": { "accuracy": 83.9, "correct": 203, "total": 242, "model_id": "openai-gpt-oss-20b", "provider_id": "groq", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-20T13:46:29.591306+00:00", "model_id": "openai-gpt-oss-20b", "model_name": "openai/gpt-oss-20b", "provider_id": "groq", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 108.41618013381958, "summary": { "success": 50, "total": 50, "wall_s": 108.416, "avg_latency_s": 7.539, "throughput_rps": 0.461, "completion_toks_s": 2433.8, "total_toks_s": 4311.1, "out_in_ratio": 1.296, "tokens_per_fact": 361.0 }, "tests": [ { "test_index": 1, "latency_s": 5.452346086502075, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4012, "completion_tokens": 4298, "error": "" }, { "test_index": 2, "latency_s": 7.2091920375823975, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4127, "completion_tokens": 6227, "error": "" }, { "test_index": 3, "latency_s": 7.800779819488525, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3977, "completion_tokens": 6808, "error": "" }, { "test_index": 4, "latency_s": 6.532285213470459, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4187, "completion_tokens": 5094, "error": "" }, { "test_index": 5, "latency_s": 4.470461130142212, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4055, "completion_tokens": 3868, "error": "" }, { "test_index": 6, "latency_s": 5.3224921226501465, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4102, "completion_tokens": 4297, "error": "" }, { "test_index": 7, "latency_s": 6.014250040054321, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4012, "completion_tokens": 5294, "error": "" }, { "test_index": 8, "latency_s": 3.9205479621887207, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4178, "completion_tokens": 3383, "error": "" }, { "test_index": 9, "latency_s": 5.328213930130005, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3893, "completion_tokens": 4654, "error": "" }, { "test_index": 10, "latency_s": 3.730180025100708, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3869, "completion_tokens": 3126, "error": "" }, { "test_index": 11, "latency_s": 5.53375506401062, "num_facts": 19, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4015, "completion_tokens": 4490, "error": "" }, { "test_index": 12, "latency_s": 6.223914861679077, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4144, "completion_tokens": 5353, "error": "" }, { "test_index": 13, "latency_s": 7.082575082778931, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4036, "completion_tokens": 6199, "error": "" }, { "test_index": 14, "latency_s": 4.480597019195557, "num_facts": 17, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4127, "completion_tokens": 3394, "error": "" }, { "test_index": 15, "latency_s": 8.05655288696289, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4189, "completion_tokens": 7171, "error": "" }, { "test_index": 16, "latency_s": 4.50150728225708, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 3550, "error": "" }, { "test_index": 17, "latency_s": 3.6245858669281006, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4144, "completion_tokens": 3126, "error": "" }, { "test_index": 18, "latency_s": 5.900245189666748, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4126, "completion_tokens": 5316, "error": "" }, { "test_index": 19, "latency_s": 4.506105899810791, "num_facts": 20, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3952, "completion_tokens": 3952, "error": "" }, { "test_index": 20, "latency_s": 5.334199666976929, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4150, "completion_tokens": 4736, "error": "" }, { "test_index": 21, "latency_s": 4.144942045211792, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4144, "completion_tokens": 3705, "error": "" }, { "test_index": 22, "latency_s": 8.113770008087158, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4064, "completion_tokens": 7332, "error": "" }, { "test_index": 23, "latency_s": 7.979068994522095, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4127, "completion_tokens": 7193, "error": "" }, { "test_index": 24, "latency_s": 4.216696739196777, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4102, "completion_tokens": 3587, "error": "" }, { "test_index": 25, "latency_s": 4.196286916732788, "num_facts": 28, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4150, "completion_tokens": 3704, "error": "" }, { "test_index": 26, "latency_s": 9.183225154876709, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4126, "completion_tokens": 8241, "error": "" }, { "test_index": 27, "latency_s": 3.3910558223724365, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3869, "completion_tokens": 2913, "error": "" }, { "test_index": 28, "latency_s": 22.537015199661255, "num_facts": 8, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4064, "completion_tokens": 4088, "error": "" }, { "test_index": 29, "latency_s": 4.442286729812622, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4187, "completion_tokens": 3875, "error": "" }, { "test_index": 30, "latency_s": 8.8919358253479, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3987, "completion_tokens": 8080, "error": "" }, { "test_index": 31, "latency_s": 2.8853681087493896, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4055, "completion_tokens": 2434, "error": "" }, { "test_index": 32, "latency_s": 9.074826002120972, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4127, "completion_tokens": 8097, "error": "" }, { "test_index": 33, "latency_s": 8.290356159210205, "num_facts": 6, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4173, "completion_tokens": 6574, "error": "" }, { "test_index": 34, "latency_s": 7.869858980178833, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4015, "completion_tokens": 6727, "error": "" }, { "test_index": 35, "latency_s": 6.835728168487549, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4189, "completion_tokens": 6054, "error": "" }, { "test_index": 36, "latency_s": 4.748181104660034, "num_facts": 16, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4127, "completion_tokens": 4277, "error": "" }, { "test_index": 37, "latency_s": 4.607952117919922, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3987, "completion_tokens": 4037, "error": "" }, { "test_index": 38, "latency_s": 7.261283874511719, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3893, "completion_tokens": 6566, "error": "" }, { "test_index": 39, "latency_s": 3.844590187072754, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4036, "completion_tokens": 3308, "error": "" }, { "test_index": 40, "latency_s": 6.580095052719116, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3952, "completion_tokens": 5757, "error": "" }, { "test_index": 41, "latency_s": 3.8639581203460693, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4036, "completion_tokens": 3488, "error": "" }, { "test_index": 42, "latency_s": 23.61368203163147, "num_facts": 16, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 3869, "completion_tokens": 5400, "error": "" }, { "test_index": 43, "latency_s": 7.070716857910156, "num_facts": 21, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4187, "completion_tokens": 6341, "error": "" }, { "test_index": 44, "latency_s": 8.33295488357544, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4178, "completion_tokens": 7503, "error": "" }, { "test_index": 45, "latency_s": 23.87314200401306, "num_facts": 9, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 3987, "completion_tokens": 5632, "error": "" }, { "test_index": 46, "latency_s": 18.63341999053955, "num_facts": 12, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4055, "completion_tokens": 12982, "error": "" }, { "test_index": 47, "latency_s": 4.4618401527404785, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4178, "completion_tokens": 3876, "error": "" }, { "test_index": 48, "latency_s": 9.846404790878296, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4173, "completion_tokens": 9068, "error": "" }, { "test_index": 49, "latency_s": 22.264797925949097, "num_facts": 12, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4150, "completion_tokens": 4330, "error": "" }, { "test_index": 50, "latency_s": 4.8821001052856445, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 3977, "completion_tokens": 4361, "error": "" } ] }, "reflect": { "accuracy": 94.2, "correct": 228, "total": 242, "avg_latency_s": 2.781, "model_id": "openai-gpt-oss-20b", "provider_id": "groq", "sample_id": "conv-43" } }