version: v0.3 listeners: - address: 0.0.0.0 name: http-8899 port: 8899 timeout: 1200s providers: defaults: default_model: qwen/qwen3.5-rocm default_reasoning_effort: low reasoning_families: qwen3: parameter: enable_thinking type: chat_template_kwargs models: - backend_refs: - endpoint: vllm:8000 name: vllm_primary protocol: http weight: 1 name: qwen/qwen3.5-rocm pricing: completion_per_1m: 0 currency: USD prompt_per_1m: 0 provider_model_id: qwen/qwen3.5-rocm reasoning_family: qwen3 - backend_refs: - endpoint: vllm:8000 name: vllm_primary protocol: http weight: 1 name: google/gemini-2.5-flash-lite pricing: completion_per_1m: 0.04 currency: USD prompt_per_1m: 0.01 provider_model_id: google/gemini-2.5-flash-lite reasoning_family: qwen3 - backend_refs: - endpoint: vllm:8000 name: vllm_primary protocol: http weight: 1 name: google/gemini-3.1-pro pricing: completion_per_1m: 1.92 currency: USD prompt_per_1m: 0.48 provider_model_id: google/gemini-3.1-pro reasoning_family: qwen3 - backend_refs: - endpoint: vllm:8000 name: vllm_primary protocol: http weight: 1 name: openai/gpt5.4 pricing: completion_per_1m: 4.8 currency: USD prompt_per_1m: 1.2 provider_model_id: openai/gpt5.4 reasoning_family: qwen3 - backend_refs: - endpoint: vllm:8000 name: vllm_primary protocol: http weight: 1 name: anthropic/claude-opus-4.6 pricing: completion_per_1m: 7.2 currency: USD prompt_per_1m: 1.8 provider_model_id: anthropic/claude-opus-4.6 reasoning_family: qwen3 routing: decisions: - description: Premium-only route for high-value legal and compliance analysis. modelRefs: - model: anthropic/claude-opus-4.6 reasoning_effort: high use_reasoning: true name: premium_legal plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 260 rules: conditions: - conditions: - name: law type: domain - name: legal_risk_markers type: keyword - name: premium_legal_analysis type: embedding operator: OR - conditions: - name: premium_legal_analysis type: embedding - name: verification_required type: projection - name: legal_risk:medium type: complexity - name: legal_risk:hard type: complexity operator: OR operator: AND tier: 1 - description: Narrow premium reasoning lane for formal math proofs and derivations. modelRefs: - model: openai/gpt5.4 reasoning_effort: high use_reasoning: true name: formal_math_proof plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 252 rules: conditions: - name: math type: domain - name: reasoning_request_markers type: keyword - conditions: - conditions: - name: verification_required type: projection - name: wrong_answer type: user_feedback - name: correction_feedback_markers type: keyword - name: likely_dissatisfied type: reask - name: verification_markers type: keyword - name: reference_heavy_markers type: keyword - name: needs_fact_check type: fact_check - name: architecture_markers type: keyword - name: agentic_request_markers type: keyword - name: code_request_markers type: keyword - name: implementation_markers type: keyword - name: architecture_design type: embedding - name: complex_stem type: embedding - name: agentic_workflows type: embedding - name: premium_legal_analysis type: embedding operator: OR operator: NOT operator: AND tier: 2 - description: Deep philosophy and first-principles reasoning outside the narrow formal-math overlay. modelRefs: - model: google/gemini-3.1-pro reasoning_effort: high use_reasoning: true name: reasoning_deep plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 250 rules: conditions: - conditions: - conditions: - name: math type: domain - conditions: - name: reasoning_request_markers type: keyword operator: NOT - conditions: - name: balance_reasoning type: projection - name: math_task:medium type: complexity operator: OR operator: AND - conditions: - name: philosophy type: domain - conditions: - name: reasoning_general_en type: embedding - name: reasoning_general_zh type: embedding - name: research_synthesis type: embedding operator: OR operator: AND - conditions: - conditions: - name: reasoning_general_en type: embedding - name: reasoning_general_zh type: embedding - name: research_synthesis type: embedding - name: research_request_markers type: keyword operator: OR - conditions: - name: balance_medium type: projection - name: balance_complex type: projection - name: balance_reasoning type: projection operator: OR operator: AND operator: OR - conditions: - conditions: - name: law type: domain - name: health type: domain - name: verification_required type: projection - name: wrong_answer type: user_feedback - name: correction_feedback_markers type: keyword - name: likely_dissatisfied type: reask - name: verification_markers type: keyword - name: reference_heavy_markers type: keyword - name: needs_fact_check type: fact_check - name: architecture_markers type: keyword - name: agentic_request_markers type: keyword - name: code_request_markers type: keyword - name: implementation_markers type: keyword - name: architecture_design type: embedding - name: complex_stem type: embedding - name: agentic_workflows type: embedding - name: premium_legal_analysis type: embedding operator: OR operator: NOT operator: AND tier: 3 - description: High-structure execution plans, systems design, and specialist STEM synthesis. modelRefs: - model: google/gemini-3.1-pro reasoning_effort: high use_reasoning: true name: complex_specialist plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 242 rules: conditions: - conditions: - conditions: - conditions: - name: agentic_workflows type: embedding - name: agentic_request_markers type: keyword operator: OR - conditions: - name: multi_step_markers type: keyword - name: ordered_workflow type: structure - name: numbered_steps type: structure - name: first_then_flow type: structure - name: constraint_dense type: structure - name: format_directive_dense type: structure operator: OR operator: AND - conditions: - name: computer science type: domain - conditions: - name: architecture_design type: embedding - name: architecture_markers type: keyword operator: OR operator: AND - conditions: - conditions: - name: physics type: domain - name: chemistry type: domain - name: biology type: domain - name: engineering type: domain - name: computer science type: domain operator: OR - name: complex_stem type: embedding operator: AND operator: OR - conditions: - name: balance_medium type: projection - name: balance_complex type: projection - name: balance_reasoning type: projection operator: OR - conditions: - conditions: - name: fast_qa_en type: embedding - name: fast_qa_zh type: embedding - name: simple_request_markers type: keyword - name: creative_request_markers type: keyword - name: creative_tasks type: embedding operator: OR operator: NOT operator: AND tier: 4 - description: Explicit correction requests on evidence-sensitive follow-ups. modelRefs: - model: google/gemini-2.5-flash-lite use_reasoning: false name: feedback_wrong_answer_verified plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 232 rules: conditions: - name: feedback_correction_verified type: projection - conditions: - name: wrong_answer type: user_feedback - name: correction_feedback_markers type: keyword - name: likely_dissatisfied type: reask operator: OR - conditions: - name: code_request_markers type: keyword operator: NOT operator: AND tier: 5 - description: Low-medium cost coding, debugging, refactoring, and technical Q&A. modelRefs: - model: qwen/qwen3.5-rocm reasoning_effort: medium use_reasoning: true name: medium_code_general plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 220 rules: conditions: - conditions: - name: code_request_markers type: keyword - name: implementation_markers type: keyword - name: code_general type: embedding operator: OR - conditions: - name: balance_medium type: projection - name: balance_complex type: projection - conditions: - name: balance_simple type: projection - conditions: - name: urgency_elevated type: projection - name: exclamation_emphasis type: structure operator: OR operator: AND operator: OR - conditions: - conditions: - name: agentic_request_markers type: keyword - name: architecture_markers type: keyword - name: creative_request_markers type: keyword - name: creative_tasks type: embedding - name: architecture_design type: embedding - name: agentic_workflows type: embedding operator: OR operator: NOT operator: AND tier: 6 - description: Conservative route for evidence-sensitive health and medical guidance. modelRefs: - model: google/gemini-3.1-pro reasoning_effort: medium use_reasoning: true name: verified_health plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 218 rules: conditions: - name: health type: domain - conditions: - name: verification_required type: projection - name: verification_markers type: keyword - name: needs_fact_check type: fact_check - name: reference_heavy_markers type: keyword - name: evidence_synthesis:hard type: complexity operator: OR - conditions: - name: health_guidance type: embedding - name: balance_medium type: projection - name: balance_complex type: projection - name: balance_reasoning type: projection operator: OR - conditions: - conditions: - name: wrong_answer type: user_feedback - name: correction_feedback_markers type: keyword - name: likely_dissatisfied type: reask operator: OR operator: NOT operator: AND tier: 7 - description: Evidence-sensitive business, economics, history, and psychology explanation. modelRefs: - model: google/gemini-2.5-flash-lite use_reasoning: false name: verified_explainer plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 214 rules: conditions: - conditions: - name: business type: domain - name: economics type: domain - name: history type: domain - name: psychology type: domain - name: business_analysis type: embedding - name: history_explainer type: embedding - name: psychology_support type: embedding - name: history_topic_markers type: keyword operator: OR - conditions: - name: verification_required type: projection - name: evidence_synthesis:hard type: complexity - name: verification_markers type: keyword - name: reference_heavy_markers type: keyword - name: needs_fact_check type: fact_check operator: OR - conditions: - conditions: - name: fast_qa_en type: embedding - name: fast_qa_zh type: embedding - name: simple_request_markers type: keyword - name: health type: domain - name: law type: domain - name: wrong_answer type: user_feedback - name: correction_feedback_markers type: keyword - name: likely_dissatisfied type: reask operator: OR operator: NOT operator: AND tier: 8 - description: Cheap clarification lane for explicit restatements and single-turn re-asks. modelRefs: - model: qwen/qwen3.5-rocm use_reasoning: false name: feedback_need_clarification plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 212 rules: conditions: - name: feedback_clarification_overlay type: projection - conditions: - conditions: - name: feedback_correction_verified type: projection - name: verification_required type: projection - name: verification_markers type: keyword - name: reference_heavy_markers type: keyword - name: needs_fact_check type: fact_check - name: code_request_markers type: keyword operator: OR operator: NOT operator: AND tier: 9 - description: Low-cost business, history, and psychology explanation when verification pressure is absent. modelRefs: - model: qwen/qwen3.5-rocm reasoning_effort: medium use_reasoning: true name: medium_explainer plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 208 rules: conditions: - conditions: - name: business type: domain - name: economics type: domain - name: history type: domain - name: psychology type: domain - name: business_analysis type: embedding - name: history_explainer type: embedding - name: psychology_support type: embedding - name: history_topic_markers type: keyword operator: OR - conditions: - name: balance_medium type: projection - name: balance_complex type: projection - conditions: - name: balance_simple type: projection - conditions: - name: medium_context type: context - name: history_topic_markers type: keyword - name: evidence_synthesis:medium type: complexity operator: OR operator: AND operator: OR - conditions: - conditions: - name: verification_required type: projection - name: needs_fact_check type: fact_check - name: verification_markers type: keyword - name: reference_heavy_markers type: keyword - name: evidence_synthesis:hard type: complexity - name: health type: domain - name: law type: domain - name: fast_qa_en type: embedding - name: fast_qa_zh type: embedding - name: simple_request_markers type: keyword - name: reasoning_request_markers type: keyword - name: research_request_markers type: keyword - name: creative_request_markers type: keyword - name: reasoning_general_en type: embedding - name: reasoning_general_zh type: embedding - name: research_synthesis type: embedding - name: creative_tasks type: embedding operator: OR operator: NOT operator: AND tier: 10 - description: Low-cost creative writing, copywriting, and interpersonal drafting. modelRefs: - model: qwen/qwen3.5-rocm use_reasoning: false name: medium_creative plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 200 rules: conditions: - conditions: - name: creative_request_markers type: keyword - name: creative_tasks type: embedding operator: OR - conditions: - name: balance_simple type: projection - name: balance_medium type: projection operator: OR - conditions: - conditions: - name: fast_qa_en type: embedding - name: fast_qa_zh type: embedding - name: health_guidance type: embedding - name: code_general type: embedding - name: architecture_design type: embedding - name: agentic_workflows type: embedding - name: premium_legal_analysis type: embedding - name: verification_required type: projection - name: needs_fact_check type: fact_check - name: verification_markers type: keyword - name: reference_heavy_markers type: keyword operator: OR operator: NOT operator: AND tier: 11 - description: Short English or Chinese factual questions, including explicit verification asks, that should stay on the cheap lane. modelRefs: - model: qwen/qwen3.5-rocm use_reasoning: false name: fast_qa plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 184 rules: conditions: - conditions: - conditions: - name: fast_qa_en type: embedding - name: en type: language operator: AND - conditions: - name: fast_qa_zh type: embedding - name: zh type: language operator: AND - name: simple_request_markers type: keyword operator: OR - name: short_context type: context - conditions: - conditions: - conditions: - name: balance_simple type: projection - name: balance_medium type: projection operator: OR - conditions: - name: verification_required type: projection - name: verification_markers type: keyword - name: reference_heavy_markers type: keyword operator: OR - conditions: - conditions: - name: health type: domain - name: law type: domain - name: code_request_markers type: keyword - name: implementation_markers type: keyword - name: urgency_elevated type: projection - name: wrong_answer type: user_feedback - name: correction_feedback_markers type: keyword - name: likely_dissatisfied type: reask operator: OR operator: NOT operator: AND - conditions: - name: balance_simple type: projection - conditions: - conditions: - name: verification_required type: projection - name: needs_fact_check type: fact_check - name: verification_markers type: keyword - name: reference_heavy_markers type: keyword - name: code_request_markers type: keyword - name: implementation_markers type: keyword - name: urgency_elevated type: projection - name: wrong_answer type: user_feedback - name: correction_feedback_markers type: keyword - name: likely_dissatisfied type: reask - name: feedback_clarification_overlay type: projection operator: OR operator: NOT operator: AND operator: OR operator: AND tier: 12 - description: Lowest-cost fallback for everyday traffic and non-specialized requests. modelRefs: - model: qwen/qwen3.5-rocm use_reasoning: false name: simple_general plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 170 rules: conditions: - conditions: - name: short_context type: context - conditions: - name: balance_simple type: projection - name: balance_medium type: projection operator: OR - conditions: - name: general_chat_fallback type: embedding - name: low_question_density type: structure operator: OR - conditions: - conditions: - name: simple_request_markers type: keyword - name: verification_required type: projection - name: wrong_answer type: user_feedback - name: correction_feedback_markers type: keyword - name: likely_dissatisfied type: reask - name: feedback_clarification_overlay type: projection - name: verification_markers type: keyword - name: reference_heavy_markers type: keyword - name: code_request_markers type: keyword - name: architecture_markers type: keyword - name: agentic_request_markers type: keyword - name: creative_request_markers type: keyword operator: OR operator: NOT operator: AND - conditions: - name: medium_context type: context - name: other type: domain - conditions: - name: balance_simple type: projection - name: balance_medium type: projection operator: OR - conditions: - conditions: - name: verification_required type: projection - name: wrong_answer type: user_feedback - name: correction_feedback_markers type: keyword - name: likely_dissatisfied type: reask - name: feedback_clarification_overlay type: projection - name: verification_markers type: keyword - name: reference_heavy_markers type: keyword operator: OR operator: NOT operator: AND operator: OR tier: 13 - description: Absolute final fallback that guarantees a routing decision when no earlier balance lane matches. modelRefs: - model: qwen/qwen3.5-rocm use_reasoning: false name: casual_chat plugins: - configuration: capture_request_body: true capture_response_body: true enabled: true max_body_bytes: 4096 max_records: 100000 type: router_replay priority: 10 rules: operator: AND tier: 14 modelCards: - capabilities: - legal_analysis - policy_review - high_risk_review context_window_size: 262144 description: PREMIUM tier alias reserved for legal and high-risk analysis. modality: text name: anthropic/claude-opus-4.6 quality_score: 0.94 tags: - tier:premium - cost:highest - specialty:legal - capabilities: - verified_explanation - source_backed_correction - nuanced_explanation context_window_size: 262144 description: MEDIUM tier alias for low-cost verified explanation and correction tasks. modality: text name: google/gemini-2.5-flash-lite quality_score: 0.68 tags: - tier:medium - cost:low - specialty:verified - capabilities: - architecture - stem_analysis - long_context - general_reasoning context_window_size: 262144 description: COMPLEX tier alias for systems design, hard STEM, health guidance, and deep general reasoning. modality: text name: google/gemini-3.1-pro quality_score: 0.82 tags: - tier:complex - cost:upper_mid - specialty:complex_generalist - capabilities: - reasoning - proofs - formal_derivation context_window_size: 262144 description: REASONING tier alias for narrow formal math proofs and derivations. modality: text name: openai/gpt5.4 quality_score: 0.9 tags: - tier:reasoning - cost:high - specialty:formal_proof - capabilities: - fast_qa - self_hosted - concise_answers - general_chat - creative_drafting context_window_size: 262144 description: SIMPLE tier alias and free self-hosted default for fast QA, broad fallback, creative drafting, and most low-cost traffic. modality: text name: qwen/qwen3.5-rocm quality_score: 0.58 tags: - tier:simple - cost:free - deployment:self_hosted - traffic:default projections: mappings: - calibration: method: sigmoid_distance slope: 10 method: threshold_bands name: difficulty_band outputs: - lt: 0.18 name: balance_simple - gte: 0.18 lt: 0.48 name: balance_medium - gte: 0.48 lt: 0.82 name: balance_complex - gte: 0.82 name: balance_reasoning source: difficulty_score - calibration: method: sigmoid_distance slope: 12 method: threshold_bands name: verification_band outputs: - lt: 0.35 name: verification_standard - gte: 0.35 name: verification_required source: verification_pressure - calibration: method: sigmoid_distance slope: 12 method: threshold_bands name: feedback_correction_band outputs: - lt: 0.34 name: feedback_correction_standard - gte: 0.34 name: feedback_correction_verified source: feedback_correction_pressure - calibration: method: sigmoid_distance slope: 12 method: threshold_bands name: feedback_clarification_band outputs: - lt: 0.26 name: feedback_clarification_standard - gte: 0.26 name: feedback_clarification_overlay source: feedback_clarification_pressure - calibration: method: sigmoid_distance slope: 12 method: threshold_bands name: urgency_band outputs: - lt: 0.24 name: urgency_standard - gte: 0.24 name: urgency_elevated source: urgency_pressure partitions: - default: other members: - biology - business - chemistry - computer science - economics - engineering - health - history - law - math - other - philosophy - physics - psychology name: balance_domain_partition semantics: softmax_exclusive temperature: 0.1 - default: general_chat_fallback members: - agentic_workflows - architecture_design - business_analysis - code_general - complex_stem - creative_tasks - fast_qa_en - fast_qa_zh - general_chat_fallback - health_guidance - history_explainer - premium_legal_analysis - psychology_support - reasoning_general_en - reasoning_general_zh - research_synthesis name: balance_intent_partition semantics: softmax_exclusive temperature: 0.18 scores: - inputs: - name: simple_request_markers type: keyword weight: -0.26 - name: fast_qa_en type: embedding value_source: confidence weight: -0.18 - name: fast_qa_zh type: embedding value_source: confidence weight: -0.18 - name: short_context type: context weight: -0.1 - name: medium_context type: context weight: 0.03 - name: long_context type: context weight: 0.18 - name: ordered_workflow type: structure weight: 0.12 - name: numbered_steps type: structure weight: 0.08 - name: first_then_flow type: structure weight: 0.1 - name: constraint_dense type: structure weight: 0.06 - name: format_directive_dense type: structure weight: 0.04 - name: low_question_density type: structure weight: -0.05 - name: reasoning_request_markers type: keyword value_source: confidence weight: 0.2 - name: multi_step_markers type: keyword value_source: confidence weight: 0.14 - name: code_request_markers type: keyword value_source: confidence weight: 0.12 - name: architecture_markers type: keyword value_source: confidence weight: 0.12 - name: research_request_markers type: keyword value_source: confidence weight: 0.11 - name: agentic_request_markers type: keyword value_source: confidence weight: 0.16 - name: implementation_markers type: keyword value_source: confidence weight: 0.08 - name: reasoning_general_en type: embedding value_source: confidence weight: 0.18 - name: reasoning_general_zh type: embedding value_source: confidence weight: 0.18 - name: agentic_workflows type: embedding value_source: confidence weight: 0.2 - name: architecture_design type: embedding value_source: confidence weight: 0.16 - name: complex_stem type: embedding value_source: confidence weight: 0.14 - name: research_synthesis type: embedding value_source: confidence weight: 0.14 - name: premium_legal_analysis type: embedding value_source: confidence weight: 0.16 - name: business_analysis type: embedding value_source: confidence weight: 0.08 - name: history_explainer type: embedding value_source: confidence weight: 0.05 - name: psychology_support type: embedding value_source: confidence weight: 0.05 - name: general_reasoning:medium type: complexity weight: 0.08 - name: general_reasoning:hard type: complexity weight: 0.2 - name: code_task:medium type: complexity weight: 0.08 - name: code_task:hard type: complexity weight: 0.18 - name: math_task:medium type: complexity weight: 0.1 - name: math_task:hard type: complexity weight: 0.22 - name: legal_risk:hard type: complexity weight: 0.18 - name: agentic_delivery:medium type: complexity weight: 0.1 - name: agentic_delivery:hard type: complexity weight: 0.22 - name: evidence_synthesis:medium type: complexity weight: 0.08 - name: evidence_synthesis:hard type: complexity weight: 0.18 method: weighted_sum name: difficulty_score - inputs: - name: needs_fact_check type: fact_check weight: 0.28 - name: verification_markers type: keyword value_source: confidence weight: 0.22 - name: reference_heavy_markers type: keyword value_source: confidence weight: 0.18 - name: research_request_markers type: keyword value_source: confidence weight: 0.1 - name: legal_risk_markers type: keyword value_source: confidence weight: 0.08 - name: health type: domain weight: 0.12 - name: law type: domain weight: 0.14 - name: business type: domain weight: 0.05 - name: history type: domain weight: 0.05 - name: wrong_answer type: user_feedback weight: 0.1 - name: correction_feedback_markers type: keyword value_source: confidence weight: 0.06 - name: long_context type: context weight: 0.04 method: weighted_sum name: verification_pressure - inputs: - name: wrong_answer type: user_feedback weight: 0.14 - name: correction_feedback_markers type: keyword value_source: confidence weight: 0.34 - name: likely_dissatisfied type: reask value_source: confidence weight: 0.1 - name: needs_fact_check type: fact_check weight: 0.12 - name: verification_markers type: keyword value_source: confidence weight: 0.18 - name: reference_heavy_markers type: keyword value_source: confidence weight: 0.16 - name: evidence_synthesis:medium type: complexity weight: 0.08 - name: evidence_synthesis:hard type: complexity weight: 0.16 - name: short_context type: context weight: 0.04 - name: medium_context type: context weight: 0.02 - name: code_request_markers type: keyword value_source: confidence weight: -0.22 - name: implementation_markers type: keyword value_source: confidence weight: -0.16 method: weighted_sum name: feedback_correction_pressure - inputs: - name: need_clarification type: user_feedback weight: 0.14 - name: clarification_feedback_markers type: keyword value_source: confidence weight: 0.34 - name: likely_dissatisfied type: reask value_source: confidence weight: 0.24 - name: short_context type: context weight: 0.08 - name: medium_context type: context weight: 0.04 - name: wrong_answer type: user_feedback weight: -0.14 - name: correction_feedback_markers type: keyword value_source: confidence weight: -0.18 - name: verification_markers type: keyword value_source: confidence weight: -0.16 - name: reference_heavy_markers type: keyword value_source: confidence weight: -0.14 - name: needs_fact_check type: fact_check weight: -0.14 - name: code_request_markers type: keyword value_source: confidence weight: -0.2 - name: implementation_markers type: keyword value_source: confidence weight: -0.14 - name: simple_request_markers type: keyword value_source: confidence weight: -0.16 - name: fast_qa_en type: embedding value_source: confidence weight: -0.18 - name: fast_qa_zh type: embedding value_source: confidence weight: -0.18 method: weighted_sum name: feedback_clarification_pressure - inputs: - name: urgency_markers type: keyword value_source: confidence weight: 0.24 - name: exclamation_emphasis type: structure value_source: confidence weight: 0.16 method: weighted_sum name: urgency_pressure signals: complexity: - description: General difficulty boundary for simple answers versus synthesis-heavy reasoning. easy: candidates: - brief definition - quick summary - simple explanation - short direct answer - rewrite this sentence - 简单解释一下 hard: candidates: - compare several approaches and justify the trade-offs - build a rigorous step-by-step argument - synthesize constraints into a plan - root-cause analysis for a complex failure - derive the answer from first principles - 严格论证并比较取舍 name: general_reasoning threshold: 0.14 - composer: conditions: - name: computer science type: domain operator: AND description: Coding and software-engineering difficulty for cheap code help versus hard systems work. easy: candidates: - explain what this function does - fix a small bug in this code snippet - write a helper function - convert this loop to a list comprehension - explain the stack trace briefly hard: candidates: - design a distributed system with failure handling - debug a race condition in production - optimize a database query plan at scale - refactor a large service boundary safely - migrate a monolith to microservices name: code_task threshold: 0.12 - composer: conditions: - name: math type: domain operator: AND description: Math difficulty for simple calculations versus formal proofs and derivations. easy: candidates: - what is 2 plus 2 - solve this simple linear equation - calculate a percentage - basic geometry area question - simple arithmetic word problem hard: candidates: - prove the theorem rigorously - derive the equation step by step - analyze asymptotic behavior formally - solve a differential equation with proof - prove this by contradiction name: math_task threshold: 0.12 - composer: conditions: - name: law type: domain operator: AND description: Legal risk boundary for informational law questions versus premium-risk analysis. easy: candidates: - what does NDA mean - define arbitration clause - explain what compliance means - brief overview of a privacy policy hard: candidates: - analyze indemnity, liability, and jurisdiction risk - compare two compliance strategies and regulatory exposure - draft a legal-risk memo for cross-border operations - interpret contract clauses with risk trade-offs name: legal_risk threshold: 0.12 - description: Workflow and execution difficulty boundary for agentic requests. easy: candidates: - give me a short checklist - provide a simple one-step setup guide - write a small implementation plan - suggest the next step only - give me a brief task list hard: candidates: - create a migration plan with checkpoints, rollback, and validation - troubleshoot this production issue until the root cause is fixed - design an execution workflow with milestones, owners, and guardrails - automate the process and include verification after each phase - break this project into tasks, dependencies, and acceptance checks name: agentic_delivery threshold: 0.12 - composer: conditions: - name: business type: domain - name: economics type: domain - name: health type: domain - name: history type: domain - name: law type: domain - name: philosophy type: domain - name: psychology type: domain operator: OR description: Evidence-heavy research boundary for source-backed synthesis versus lighter overview tasks. easy: candidates: - give a quick overview of the topic - summarize one article briefly - explain the term in simple language - list the main idea without detailed sourcing - provide a short summary only hard: candidates: - compare several sources and recommend the strongest evidence-backed position - write a source-backed memo that synthesizes multiple references - survey the literature and explain conflicting evidence with citations - compare policy or historical interpretations and justify the conclusion - synthesize several studies into one decision recommendation name: evidence_synthesis threshold: 0.12 context: - max_tokens: "999" min_tokens: "0" name: short_context - max_tokens: "7999" min_tokens: 1K name: medium_context - max_tokens: 256K min_tokens: 8K name: long_context domains: - description: Programming, software systems, debugging, APIs, and infrastructure. name: computer science - description: Mathematics, statistics, and quantitative reasoning. name: math - description: Physics and physical sciences. name: physics - description: Chemistry and chemical sciences. name: chemistry - description: Biology and life sciences. name: biology - description: Engineering and technical problem solving. name: engineering - description: Health, medicine, clinical guidance, and patient-facing information. name: health - description: Business, product, operations, and management topics. name: business - description: Economics, pricing, incentives, and market dynamics. name: economics - description: Legal, compliance, policy, and regulatory topics. name: law - description: Psychology, behavior, and mental models. name: psychology - description: Philosophy, ethics, and abstract argumentation. name: philosophy - description: Historical explanation, comparison, and context. name: history - description: General knowledge and miscellaneous topics. name: other embeddings: - aggregation_method: max candidates: - Who are you? - What does CPU stand for? - What is the capital of France? - Briefly explain what an API is. - What is the boiling point of water? - What is 2 + 2? - Does light travel faster than sound? name: fast_qa_en threshold: 0.72 - aggregation_method: max candidates: - 你是谁? - CPU 是什么意思? - 法国的首都是哪里? - 什么是 API?请简单解释。 - 水的沸点是多少? - 一年有几个月? - 太阳系最大的行星是什么? name: fast_qa_zh threshold: 0.72 - aggregation_method: max candidates: - Brainstorm a launch campaign for a new tea brand. - Write a short poem about late-night coding. - Rewrite this paragraph to sound more cinematic. - Create three slogan options for a climate startup. - 帮我想一个更有画面感的品牌故事。 name: creative_tasks threshold: 0.74 - aggregation_method: max candidates: - Compare two pricing strategies for a B2B SaaS product. - Analyze CAC, LTV, and retention trade-offs for a subscription business. - Draft a market-entry strategy for a new AI tooling startup. - Evaluate org design options for a fast-growing product team. - Compare enterprise SaaS churn benchmarks and explain the trade-offs. name: business_analysis threshold: 0.75 - aggregation_method: max candidates: - Explain why the Roman Republic collapsed. - Explain why the Ming dynasty fell. - Compare the causes of the Roman Empire's decline with later empires. - Explain how industrialization changed political power in Europe. - Analyze the historical consequences of the Treaty of Versailles. - 总结明治维新对日本国家能力的长期影响。 name: history_explainer threshold: 0.75 - aggregation_method: max candidates: - Explain why people procrastinate and what interventions usually help. - Explain confirmation bias and what strategies help reduce it. - Explain cognitive biases that affect negotiation outcomes. - Compare attachment styles and how they affect adult relationships. - Analyze burnout patterns in high-pressure knowledge work. - 帮我解释拖延背后的常见心理机制。 name: psychology_support threshold: 0.75 - aggregation_method: max candidates: - Explain common causes of chest pain and when someone should seek urgent care. - Compare evidence-backed treatment options for type 2 diabetes management. - Summarize safe, clinically grounded steps for lowering high blood pressure. - 解释常见呼吸道感染症状的区别,以及何时应尽快就医。 name: health_guidance threshold: 0.77 - aggregation_method: max candidates: - Debug this Python stack trace and explain the likely bug. - Refactor this TypeScript function to reduce duplication. - Write a SQL query to aggregate weekly active users. - Explain the difference between sync and async execution. name: code_general threshold: 0.75 - aggregation_method: max candidates: - Compare modeling approaches for turbulent fluid simulation. - Explain the trade-offs between battery chemistries for grid storage. - Analyze a protein-folding pipeline and its computational bottlenecks. - Design an anomaly-detection method for sensor networks. name: complex_stem threshold: 0.77 - aggregation_method: max candidates: - Design a distributed rate limiter and explain failure modes. - Design a multi-region feature-flag service with storage boundaries, cache strategy, and consistency trade-offs. - Plan a migration from a monolith to event-driven microservices. - Propose a consistency strategy for a global payments platform. - Design a multi-tenant observability architecture with low latency. name: architecture_design threshold: 0.78 - aggregation_method: max candidates: - Create a migration plan with rollback, validation, and checkpoints. - Plan a zero-downtime migration with checkpoints, owners, rollback steps, and validation after each phase. - Troubleshoot the production issue and iterate until it is fixed. - Break this system redesign into phases, owners, and verification steps. - Design an execution workflow with concrete milestones and guardrails. - 给我一个分阶段实施方案,并包含校验与回滚步骤。 name: agentic_workflows threshold: 0.78 - aggregation_method: max candidates: - Compare several studies, cite the evidence, and explain the trade-offs. - Write a source-backed memo that synthesizes multiple references. - Survey the literature and recommend a position with supporting evidence. - Compare conflicting historical or policy interpretations with references. - 请综合多份资料并给出带依据的分析结论。 name: research_synthesis threshold: 0.77 - aggregation_method: max candidates: - Explain this in plain language for a general audience. - Give me a practical answer without assuming special expertise. - Help me understand the basics before we go deeper. - 用通俗的话解释清楚这个问题。 - 先给我一个适合普通用户的直接回答。 name: general_chat_fallback threshold: 0.72 - aggregation_method: max candidates: - Compare multiple approaches and recommend the best one with trade-offs. - Analyze the root cause step by step and justify the conclusion. - Build a rigorous argument from first principles. - Synthesize several constraints into a single recommendation. name: reasoning_general_en threshold: 0.78 - aggregation_method: max candidates: - 请从多个角度严格分析并给出结论。 - 请逐步推理,比较几种方案的取舍。 - 请从第一性原理出发建立完整论证。 - 请综合多个约束给出系统性建议。 name: reasoning_general_zh threshold: 0.78 - aggregation_method: max candidates: - Analyze indemnity, limitation of liability, and governing-law risks in this contract. - Assess the legal risk in this agreement by analyzing indemnification, limitation of liability, and compliance duties. - Draft a legal-risk memo for a cross-border data transfer policy. - Compare regulatory exposure under two compliance strategies. - 评估该合同条款中的责任分配、赔偿和合规风险。 name: premium_legal_analysis threshold: 0.8 fact_check: - description: Narrow factual-verification route for requests that explicitly ask for evidence or source checking. name: needs_fact_check keywords: - case_sensitive: false keywords: - that's wrong - this is wrong - that's incorrect - that is incorrect - incorrect - wrong answer - please correct - correct the explanation - correct this answer - try again - answer again - re-answer - fix your answer - you got this wrong - you got this wrong earlier - 错了 - 不对 - 回答错了 - 重新回答 - 再答一次 name: correction_feedback_markers operator: OR - case_sensitive: false keywords: - explain that more clearly - clarify your answer - give one simple example - restate that more simply - that was confusing - restate it more simply - walk me through that again - use one simple example - 讲清楚一点 - 说得更清楚 - 简单一点 - 举个例子 - 解释得更明白 name: clarification_feedback_markers operator: OR - case_sensitive: false keywords: - verify this - verify the claim - verify with a source - verify with sources - verify with a source whether - cite the source - cite a reliable source - cite sources - with sources - with a source - with citations - answer with citations - with reliable sources - with reputable sources - cite reliable historical sources - reliable historical sources - reputable historical sources - reliable medical sources - is this true - fact check this - verify with evidence - 核实一下 - 给出处 - 请给出处 - 这是真的吗 - 请核验 - 请给来源 - 请核实并给出处 - 请核实并给来源 - 请核验并给来源 name: verification_markers operator: OR - case_sensitive: false keywords: - cite sources - according to - reference the paper - compare the literature - use case law - relevant regulation - cite the rfc - support the answer with sources - support the answer with reputable sources - support the correction with sources - historical sources - reliable historical sources - reputable historical sources - medical sources - verify with a source whether - 请核实并给来源 - 请核实并给出处 - 引用资料 - 根据文献 - 参考法规 name: reference_heavy_markers operator: OR - bm25_threshold: 0.12 case_sensitive: false keywords: - contract clause - liability analysis - compliance memo - regulatory risk - legal exposure - indemnity - jurisdiction - lawsuit - 合同条款 - 合规分析 - 法律风险 - 责任认定 method: bm25 name: legal_risk_markers operator: OR - case_sensitive: false keywords: - quick answer - answer briefly - keep it short - one sentence - simple explanation - tl;dr - briefly explain - concise answer - 简短回答 - 简单解释 - 用一句话 - 直接回答 name: simple_request_markers operator: OR - case_sensitive: false keywords: - brainstorm - creative writing - write a story - write a poem - imagine - slogan - tagline - campaign idea - make it more vivid - rewrite creatively - 想点子 - 头脑风暴 - 写一个故事 - 写一首诗 name: creative_request_markers operator: OR - case_sensitive: false keywords: - step by step - phased plan - implementation plan - migration plan - checklist - roadmap - runbook - rollback plan - rollback steps - rollback criteria - checkpoints - owners - dependencies - verification gates - validation steps - validation after each phase - 分步骤 - 实施计划 - 路线图 - 检查清单 name: multi_step_markers operator: OR - case_sensitive: false keywords: - create a migration plan - propose an execution plan - refactor this system - troubleshoot the issue - implement the workflow - automate the process - break this into tasks - create a runbook - define rollback criteria - include verification gates - phase the rollout - 制定迁移计划 - 排查这个问题 - 实施这个方案 - 自动化这个流程 name: agentic_request_markers operator: OR - bm25_threshold: 0.12 case_sensitive: false keywords: - distributed system - microservices - rate limiter - high availability - consistency model - sharding strategy - event driven architecture - reliability architecture - system design - fault tolerance - observability architecture - service boundaries - storage boundaries - cache strategy - multi-region - control plane - data plane - tradeoffs - trade-offs method: bm25 name: architecture_markers operator: OR - bm25_threshold: 0.12 case_sensitive: false keywords: - build the system - create the service - implement the solution - design the workflow - develop the pipeline - deploy the stack - configure the service - write the implementation - 构建这个系统 - 创建这个服务 - 实现这个方案 - 设计这个流程 - 搭建这个管道 - 部署这个系统 method: bm25 name: implementation_markers operator: OR - bm25_threshold: 0.12 case_sensitive: false keywords: - code - function - class - stack trace - api - sql - python - typescript - debug - refactor - bug - endpoint - algorithm - recursion method: bm25 name: code_request_markers operator: OR - bm25_threshold: 0.08 case_sensitive: false keywords: - roman republic - roman republic collapse - ming dynasty - ming dynasty fell - meiji restoration - empire - dynasty - revolution - treaty - monarchy - republic - historical collapse - 历史 - 王朝 - 帝国 - 革命 method: bm25 name: history_topic_markers operator: OR - bm25_threshold: 0.12 case_sensitive: false keywords: - literature review - compare the evidence - survey the field - synthesize the sources - summarize the research - source-backed analysis - policy memo - 文献综述 - 对比证据 - 综合资料 - 总结研究现状 - 基于来源分析 method: bm25 name: research_request_markers operator: OR - bm25_threshold: 0.18 case_sensitive: false keywords: - think step by step - reason carefully - prove rigorously - derive the formula - compare trade-offs - analyze the root cause - evaluate multiple approaches - formal proof - 逐步推理 - 严格证明 - 推导公式 - 权衡利弊 - 深入分析 method: bm25 name: reasoning_request_markers operator: OR - case_sensitive: false keywords: - urgent - urgently - asap - right now - immediately - as soon as possible - 马上 - 立刻 - 立即 - 尽快 - 赶紧 - 现在就 name: urgency_markers operator: OR language: - description: English language queries name: en - description: Chinese language queries name: zh reasks: - description: Current user turn closely repeats the immediately previous user turn. lookback_turns: 1 name: likely_dissatisfied threshold: 0.8 structure: - description: Prompts with ordered workflow markers that imply phased execution. feature: source: sequences: - - first - then - - first - next - finally - - 先 - 再 - - 首先 - 然后 type: sequence type: sequence name: ordered_workflow - description: Prompts that contain numbered list items such as "1. ..." feature: source: pattern: (?m)^\s*\d+\.\s+ type: regex type: exists name: numbered_steps - description: Prompts that express an ordered workflow. feature: source: sequences: - - first - then - - first - next - finally - - 首先 - 然后 - - 先 - 再 type: sequence type: sequence name: first_then_flow - description: Constraint language is dense relative to multilingual text units. feature: source: keywords: - under - at most - at least - within - no more than - 不超过 - 至少 - 最多 type: keyword_set type: density name: constraint_dense predicate: gt: 0.08 - description: Output-format directives are dense relative to multilingual text units. feature: source: keywords: - table - bullet - json - markdown - 表格 - 列表 - JSON type: keyword_set type: density name: format_directive_dense predicate: gt: 0.08 - description: Prompts with very low question density relative to multilingual text units. feature: source: pattern: '[??]' type: regex type: density name: low_question_density predicate: lt: 0.05 - description: Repeated exclamation marks that usually indicate elevated urgency. feature: source: pattern: '[!!]' type: regex type: count name: exclamation_emphasis predicate: gte: 2 user_feedbacks: - description: Explicit correction or dissatisfaction with the previous answer. name: wrong_answer - description: Explicit request to restate the answer more clearly. name: need_clarification