{ "model_name": "GLM-5", "model_organization": "Zhipu AI", "submitting_organization": "Sierra", "submission_date": "2026-03-02", "contact_info": { "email": "victor@sierra.ai, ben.s@sierra.ai", "name": "Sierra Research Team" }, "is_new": false, "submission_type": "standard", "trajectories_available": true, "trajectory_files": { "airline": "glm-5_enabled_airline_gpt-5.2_4trials.json", "retail": "glm-5_enabled_retail_gpt-5.2_4trials.json", "telecom": "glm-5_enabled_telecom_gpt-5.2_4trials.json", "banking_knowledge": "glm-5_enabled_banking_knowledge_gpt-5.2_4trials.json" }, "references": [], "results": { "airline": { "pass_1": 82.5, "pass_2": 76.0, "pass_3": 72.0, "pass_4": 70.0, "cost": null }, "retail": { "pass_1": 73.68, "pass_2": 60.38, "pass_3": 51.1, "pass_4": 43.86, "cost": null }, "telecom": { "pass_1": 86.84, "pass_2": 76.32, "pass_3": 68.2, "pass_4": 62.28, "cost": null }, "banking_knowledge": { "pass_1": 9.79, "pass_2": 7.22, "pass_3": 5.67, "pass_4": 3.09, "cost": null, "retrieval_config": "text-emb-3-large" } }, "reasoning_effort": "enabled", "methodology": { "evaluation_date": "2026-02-27", "tau2_bench_version": "0.2.1-dev", "user_simulator": "gpt-5.2", "notes": "Evaluated using GLM-5 with thinking enabled. User simulator: gpt-5.2 with reasoning_effort: low. 4 trials. Seed: 300. Temperature: 1.0, top_p: 0.95. Infrastructure errors treated as failures.", "verification": { "modified_prompts": false, "omitted_questions": false, "details": "Standard tau-bench scaffold." } }, "model_release": { "release_date": "2026-02-11", "announcement_url": "https://docs.z.ai/guides/llm/glm-5", "announcement_title": "GLM-5 - Overview - Z.AI Developer Document" } }