{ "id": "scorer_faithfulness_v2", "name": "faithfulness", "description": "Measures whether the answer is grounded in the retrieved context (RAG faithfulness). Implemented as an LLM-as-a-judge call against a strict rubric.", "type": "llm_judge", "implementation": "judge:judge_groundedness_v1", "judge_model": "gpt-5", "judge_prompt": "Score how well the answer is grounded in the retrieved context on a 0.0-1.0 scale. Penalize unsupported claims.", "scale": { "min": 0, "max": 1, "step": 0.01, "kind": "continuous" }, "threshold": 0.8, "rubric": "1.0 = every claim supported. 0.5 = mostly supported with minor invented additions. 0.0 = contradicts or invents content.", "tags": ["rag", "groundedness", "faithfulness"] }