{ "id": "c3d4e5f6-a7b8-9012-cdef-123456789012", "dataset_name": "qa-evaluation-dataset", "dataset_id": "d4e5f6a7-b8c9-0123-defa-234567890123", "project_name": "my-rag-chatbot", "project_id": "b2c3d4e5-f6a7-8901-bcde-f12345678901", "name": "gpt-4o-vs-claude-3-5-baseline", "metadata": { "model": "gpt-4o", "temperature": 0.0, "retrieval_top_k": 5, "run_by": "team-evaluation" }, "tags": ["baseline", "gpt-4o", "rag", "q2-2026"], "type": "regular", "evaluation_method": "dataset", "feedback_scores": [ { "name": "correctness", "value": 0.87, "count": 100 }, { "name": "relevance", "value": 0.91, "count": 100 }, { "name": "hallucination", "value": 0.06, "count": 100 } ], "created_at": "2026-06-13T09:00:00.000Z", "last_updated_at": "2026-06-13T09:45:23.000Z", "created_by": "kin@apievangelist.com", "last_updated_by": "kin@apievangelist.com", "trace_count": 100, "duration": 2723000, "total_estimated_cost": 0.1845 }