{ "model_id": "llama-3.3-70b-versatile", "provider_id": "groq", "quality": { "accuracy": 85.5, "correct": 207, "total": 242, "model_id": "llama-3.3-70b-versatile", "provider_id": "groq", "sample_id": "conv-43" }, "retain": { "timestamp": "2026-02-20T13:45:00.773816+00:00", "model_id": "llama-3.3-70b-versatile", "model_name": "llama-3.3-70b-versatile", "provider_id": "groq", "size_gb": 0.0, "dataset": "locomo_3k_50", "concurrency": 4, "wall_s": 88.36780405044556, "summary": { "success": 42, "total": 50, "wall_s": 88.368, "avg_latency_s": 4.845, "throughput_rps": 0.475, "completion_toks_s": 306.3, "total_toks_s": 2474.0, "out_in_ratio": 0.141, "tokens_per_fact": 52.4 }, "tests": [ { "test_index": 1, "latency_s": 3.3499231338500977, "num_facts": 22, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4511, "completion_tokens": 1015, "error": "" }, { "test_index": 2, "latency_s": 2.1673691272735596, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4630, "completion_tokens": 412, "error": "" }, { "test_index": 3, "latency_s": 2.7230610847473145, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4478, "completion_tokens": 515, "error": "" }, { "test_index": 4, "latency_s": 3.0520498752593994, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4679, "completion_tokens": 691, "error": "" }, { "test_index": 5, "latency_s": 3.4882102012634277, "num_facts": 9, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4568, "completion_tokens": 556, "error": "" }, { "test_index": 6, "latency_s": 1.557898998260498, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4605, "completion_tokens": 556, "error": "" }, { "test_index": 7, "latency_s": 2.085783004760742, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4511, "completion_tokens": 655, "error": "" }, { "test_index": 8, "latency_s": 36.45011878013611, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "attempt 3: exception: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01hqv4z7ajf4yt2a7bdbdew16n` service tier `on_demand` on tokens per minute (TPM): Limit 100000, Used 99690, Requested 4684. Please try again in 2.6244s. Need more tokens? Visit https://groq.com/self-serve-support/ to request higher limits.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}" }, { "test_index": 9, "latency_s": 2.3061561584472656, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4395, "completion_tokens": 784, "error": "" }, { "test_index": 10, "latency_s": 1.986107349395752, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4370, "completion_tokens": 742, "error": "" }, { "test_index": 11, "latency_s": 2.1826720237731934, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4524, "completion_tokens": 773, "error": "" }, { "test_index": 12, "latency_s": 1.5446391105651855, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4684, "completion_tokens": 488, "error": "" }, { "test_index": 13, "latency_s": 4.654382944107056, "num_facts": 19, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4520, "completion_tokens": 1019, "error": "" }, { "test_index": 14, "latency_s": 1.5796828269958496, "num_facts": 9, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4635, "completion_tokens": 451, "error": "" }, { "test_index": 15, "latency_s": 1.699681043624878, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4700, "completion_tokens": 446, "error": "" }, { "test_index": 16, "latency_s": 1.9406039714813232, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4584, "completion_tokens": 722, "error": "" }, { "test_index": 17, "latency_s": 1.6683881282806396, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4684, "completion_tokens": 468, "error": "" }, { "test_index": 18, "latency_s": 1.6154577732086182, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4640, "completion_tokens": 514, "error": "" }, { "test_index": 19, "latency_s": 1.7669298648834229, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4452, "completion_tokens": 553, "error": "" }, { "test_index": 20, "latency_s": 2.164731979370117, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4633, "completion_tokens": 773, "error": "" }, { "test_index": 21, "latency_s": 1.6445770263671875, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4684, "completion_tokens": 491, "error": "" }, { "test_index": 22, "latency_s": 3.101768970489502, "num_facts": 13, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4584, "completion_tokens": 725, "error": "" }, { "test_index": 23, "latency_s": 5.701862812042236, "num_facts": 8, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4630, "completion_tokens": 408, "error": "" }, { "test_index": 24, "latency_s": 7.741659879684448, "num_facts": 10, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4605, "completion_tokens": 528, "error": "" }, { "test_index": 25, "latency_s": 12.513216018676758, "num_facts": 14, "valid_json": true, "success": true, "retries": 2, "prompt_tokens": 4633, "completion_tokens": 744, "error": "" }, { "test_index": 26, "latency_s": 13.684968948364258, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "attempt 3: exception: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01hqv4z7ajf4yt2a7bdbdew16n` service tier `on_demand` on tokens per minute (TPM): Limit 100000, Used 95552, Requested 4641. Please try again in 115.8ms. Need more tokens? Visit https://groq.com/self-serve-support/ to request higher limits.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}" }, { "test_index": 27, "latency_s": 3.4779279232025146, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4370, "completion_tokens": 484, "error": "" }, { "test_index": 28, "latency_s": 6.716690301895142, "num_facts": 17, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4584, "completion_tokens": 951, "error": "" }, { "test_index": 29, "latency_s": 10.700183153152466, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "attempt 3: exception: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01hqv4z7ajf4yt2a7bdbdew16n` service tier `on_demand` on tokens per minute (TPM): Limit 100000, Used 99875, Requested 4680. Please try again in 2.733s. Need more tokens? Visit https://groq.com/self-serve-support/ to request higher limits.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}" }, { "test_index": 30, "latency_s": 8.811289072036743, "num_facts": 12, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4485, "completion_tokens": 620, "error": "" }, { "test_index": 31, "latency_s": 5.043259859085083, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4568, "completion_tokens": 635, "error": "" }, { "test_index": 32, "latency_s": 5.7830491065979, "num_facts": 8, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4630, "completion_tokens": 408, "error": "" }, { "test_index": 33, "latency_s": 11.457019805908203, "num_facts": 13, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4695, "completion_tokens": 662, "error": "" }, { "test_index": 34, "latency_s": 8.163962125778198, "num_facts": 14, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4524, "completion_tokens": 798, "error": "" }, { "test_index": 35, "latency_s": 10.565417051315308, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "attempt 3: exception: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01hqv4z7ajf4yt2a7bdbdew16n` service tier `on_demand` on tokens per minute (TPM): Limit 100000, Used 95634, Requested 4701. Please try again in 201ms. Need more tokens? Visit https://groq.com/self-serve-support/ to request higher limits.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}" }, { "test_index": 36, "latency_s": 17.835405111312866, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "attempt 3: exception: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01hqv4z7ajf4yt2a7bdbdew16n` service tier `on_demand` on tokens per minute (TPM): Limit 100000, Used 98632, Requested 4636. Please try again in 1.9608s. Need more tokens? Visit https://groq.com/self-serve-support/ to request higher limits.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}" }, { "test_index": 37, "latency_s": 5.958096981048584, "num_facts": 12, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4485, "completion_tokens": 617, "error": "" }, { "test_index": 38, "latency_s": 8.866497993469238, "num_facts": 10, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4395, "completion_tokens": 500, "error": "" }, { "test_index": 39, "latency_s": 5.701374053955078, "num_facts": 15, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4520, "completion_tokens": 794, "error": "" }, { "test_index": 40, "latency_s": 9.090084075927734, "num_facts": 12, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4452, "completion_tokens": 553, "error": "" }, { "test_index": 41, "latency_s": 12.520002126693726, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "attempt 3: exception: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01hqv4z7ajf4yt2a7bdbdew16n` service tier `on_demand` on tokens per minute (TPM): Limit 100000, Used 95899, Requested 4521. Please try again in 252ms. Need more tokens? Visit https://groq.com/self-serve-support/ to request higher limits.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}" }, { "test_index": 42, "latency_s": 6.3448240756988525, "num_facts": 14, "valid_json": true, "success": true, "retries": 1, "prompt_tokens": 4370, "completion_tokens": 742, "error": "" }, { "test_index": 43, "latency_s": 14.73279094696045, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "attempt 3: exception: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01hqv4z7ajf4yt2a7bdbdew16n` service tier `on_demand` on tokens per minute (TPM): Limit 100000, Used 95454, Requested 4680. Please try again in 80.4ms. Need more tokens? Visit https://groq.com/self-serve-support/ to request higher limits.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}" }, { "test_index": 44, "latency_s": 10.723397016525269, "num_facts": 0, "valid_json": false, "success": false, "retries": 3, "prompt_tokens": 0, "completion_tokens": 0, "error": "attempt 3: exception: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01hqv4z7ajf4yt2a7bdbdew16n` service tier `on_demand` on tokens per minute (TPM): Limit 100000, Used 99977, Requested 4684. Please try again in 2.796599999s. Need more tokens? Visit https://groq.com/self-serve-support/ to request higher limits.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}" }, { "test_index": 45, "latency_s": 3.0316009521484375, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4485, "completion_tokens": 717, "error": "" }, { "test_index": 46, "latency_s": 6.128647327423096, "num_facts": 10, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4568, "completion_tokens": 635, "error": "" }, { "test_index": 47, "latency_s": 3.4611830711364746, "num_facts": 18, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4683, "completion_tokens": 890, "error": "" }, { "test_index": 48, "latency_s": 12.769873857498169, "num_facts": 15, "valid_json": true, "success": true, "retries": 2, "prompt_tokens": 4695, "completion_tokens": 758, "error": "" }, { "test_index": 49, "latency_s": 14.5935218334198, "num_facts": 15, "valid_json": true, "success": true, "retries": 2, "prompt_tokens": 4633, "completion_tokens": 777, "error": "" }, { "test_index": 50, "latency_s": 3.860891819000244, "num_facts": 11, "valid_json": true, "success": true, "retries": 0, "prompt_tokens": 4478, "completion_tokens": 493, "error": "" } ] }, "reflect": { "accuracy": 73.1, "correct": 177, "total": 242, "avg_latency_s": 76.005, "model_id": "llama-3.3-70b-versatile", "provider_id": "groq", "sample_id": "conv-43" } }