# Semantic Router Configuration for AI Gateway
config:
  # Response API Configuration
  # Enables OpenAI Response API support with conversation chaining
  response_api:
    enabled: true
    store_backend: "memory"  # Options: "memory", "milvus", "redis"
    ttl_seconds: 86400       # 24 hours
    max_responses: 1000

  # Router Replay Configuration (System-Level)
  # Provides storage backend configuration for router_replay plugin
  # Per-decision settings (max_records, capture settings) are configured via router_replay plugin
  router_replay:
    store_backend: "memory"  # Options: "memory", "redis", "postgres", "milvus"
    ttl_seconds: 2592000     # 30 days retention (for persistent backends)
    async_writes: false      # Enable async writes for better performance

  model_config:
    "base-model":
      reasoning_family: "qwen3"  # This model uses Qwen-3 reasoning syntax
      # Define available LoRA adapters for this base model
      # These names must match the LoRA modules registered with vLLM at startup
      loras:
        - name: "science-expert"
          description: "Specialized for science domains: biology, chemistry, physics, health, engineering"
        - name: "social-expert"
          description: "Optimized for social sciences: business, economics"
        - name: "math-expert"
          description: "Fine-tuned for mathematics and quantitative reasoning"
        - name: "law-expert"
          description: "Specialized for legal questions and law-related topics"
        - name: "humanities-expert"
          description: "Optimized for humanities: psychology, history, philosophy"
        - name: "general-expert"
          description: "General-purpose adapter for diverse topics"

  # Categories for domain classification (used by domain rules)
  # Category names are MMLU category names used by the classifier
  categories:
    - name: business
      description: "Business, corporate strategy, management, finance, marketing"
    - name: law
      description: "Legal principles, case law, statutory interpretation, legal procedures"
    - name: psychology
      description: "Cognitive processes, behavioral patterns, mental health, developmental psychology"
    - name: biology
      description: "Molecular biology, genetics, cell biology, ecology, evolution, anatomy"
    - name: chemistry
      description: "Chemical reactions, molecular structures, laboratory techniques"
    - name: history
      description: "Historical events, time periods, cultures, civilizations"
    - name: health
      description: "Anatomy, physiology, diseases, treatments, preventive care, nutrition"
    - name: economics
      description: "Microeconomics, macroeconomics, financial markets, monetary policy, trade"
    - name: math
      description: "Mathematics, algebra, calculus, geometry, statistics"
    - name: physics
      description: "Physical laws, mechanics, thermodynamics, electromagnetism, quantum physics"
    - name: computer science
      description: "Algorithms, data structures, programming, software engineering"
    - name: philosophy
      description: "Philosophical traditions, ethics, logic, metaphysics, epistemology"
    - name: engineering
      description: "Engineering disciplines, design, problem-solving, systems"
    - name: other
      description: "General knowledge and miscellaneous topics"

  # Decisions with rule-based routing and plugins
  decisions:
    - name: business_decision
      description: "Business and management related queries"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "business"
      modelRefs:
        - model: base-model
          lora_name: social-expert
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
            mode: "replace"

    - name: law_decision
      description: "Legal questions and law-related topics"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "law"
      modelRefs:
        - model: base-model
          lora_name: law-expert
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
            mode: "replace"

    - name: psychology_decision
      description: "Psychology and mental health topics"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "psychology"
      modelRefs:
        - model: base-model
          lora_name: humanities-expert
          use_reasoning: false
      plugins:

        - type: "semantic-cache"
          configuration:
            enabled: true
            similarity_threshold: 0.92
        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
            mode: "replace"

    - name: biology_decision
      description: "Biology and life sciences questions"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "biology"
      modelRefs:
        - model: base-model
          lora_name: science-expert
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
            mode: "replace"

    - name: chemistry_decision
      description: "Chemistry and chemical sciences questions"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "chemistry"
      modelRefs:
        - model: base-model
          lora_name: science-expert
          use_reasoning: true
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
            mode: "replace"

    - name: history_decision
      description: "Historical questions and cultural topics"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "history"
      modelRefs:
        - model: base-model
          lora_name: humanities-expert
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
            mode: "replace"

    - name: health_decision
      description: "Health and medical information queries"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "health"
      modelRefs:
        - model: base-model
          lora_name: science-expert
          use_reasoning: false
      plugins:

        - type: "semantic-cache"
          configuration:
            enabled: true
            similarity_threshold: 0.95
        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
            mode: "replace"

    - name: economics_decision
      description: "Economics and financial topics"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "economics"
      modelRefs:
        - model: base-model
          lora_name: social-expert
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
            mode: "replace"

    - name: math_decision
      description: "Mathematics and quantitative reasoning"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "math"
      modelRefs:
        - model: base-model
          lora_name: math-expert
          use_reasoning: true
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
            mode: "replace"

    - name: physics_decision
      description: "Physics and physical sciences"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "physics"
      modelRefs:
        - model: base-model
          lora_name: science-expert
          use_reasoning: true
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
            mode: "replace"

    - name: computer_science_decision
      description: "Computer science and programming"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "computer science"
      modelRefs:
        - model: base-model
          lora_name: science-expert
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
            mode: "replace"

    - name: philosophy_decision
      description: "Philosophy and ethical questions"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "philosophy"
      modelRefs:
        - model: base-model
          lora_name: humanities-expert
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
            mode: "replace"

    - name: engineering_decision
      description: "Engineering and technical problem-solving"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "engineering"
      modelRefs:
        - model: base-model
          lora_name: science-expert
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
            mode: "replace"

    - name: thinking_decision
      description: "Complex reasoning and multi-step thinking"
      priority: 20
      rules:
        operator: "OR"
        conditions:
          - type: "keyword"
            rule_name: "thinking"
      modelRefs:
        - model: base-model
          lora_name: general-expert
          use_reasoning: true
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
            mode: "replace"

    - name: general_decision
      description: "General knowledge and miscellaneous topics"
      priority: 1
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "other"
      modelRefs:
        - model: base-model
          lora_name: general-expert
          use_reasoning: false
      plugins:

        - type: "semantic-cache"
          configuration:
            enabled: true
            similarity_threshold: 0.75
        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
            mode: "replace"

  # Strategy for selecting between multiple matching decisions
  # Options: "priority" (use decision with highest priority) or "confidence" (use decision with highest confidence)
  strategy: "priority"

  default_model: general-expert

  semantic_cache:
    enabled: true
    backend_type: "memory"  # Options: "memory", "milvus", or "hybrid"
    similarity_threshold: 0.8
    max_entries: 1000  # Only applies to memory backend
    ttl_seconds: 3600
    eviction_policy: "fifo"
    # HNSW index configuration (for memory backend only)
    use_hnsw: true  # Enable HNSW index for faster similarity search
    hnsw_m: 16  # Number of bi-directional links (higher = better recall, more memory)
    hnsw_ef_construction: 200  # Construction parameter (higher = better quality, slower build)

    # Hybrid cache configuration (when backend_type: "hybrid")
    # Combines in-memory HNSW for fast search with Milvus for scalable storage
    # max_memory_entries: 100000 # Max entries in HNSW index (default: 100,000)
    # backend_config_path: "config/milvus.yaml" # Path to Milvus config

    # Embedding model for semantic similarity matching
    # If not specified, automatically uses the model configured in embedding_models section
    # Options: "mmbert" (multilingual, 768-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context)
    # embedding_model: "mmbert"  # Optional: explicitly set if you want to override auto-detection

  tools:
    enabled: true
    top_k: 3
    similarity_threshold: 0.2
    tools_db_path: "config/tools_db.json"
    fallback_to_empty: true

  prompt_guard:
    enabled: true  # Global default - can be overridden per category with jailbreak_enabled
    use_mmbert_32k: true
    model_id: "models/mmbert32k-jailbreak-detector-merged"
    jailbreak_mapping_path: "models/mmbert32k-jailbreak-detector-merged/jailbreak_type_mapping.json"
    threshold: 0.7
    use_cpu: true

  # Classifier configuration
  classifier:
    category_model:
      model_id: "models/mmbert32k-intent-classifier-merged"
      use_mmbert_32k: true
      threshold: 0.5
      use_cpu: true
      category_mapping_path: "models/mmbert32k-intent-classifier-merged/category_mapping.json"
    pii_model:
      model_id: "models/mmbert32k-pii-detector-merged"
      use_mmbert_32k: true
      threshold: 0.9
      use_cpu: true
      pii_mapping_path: "models/mmbert32k-pii-detector-merged/pii_type_mapping.json"

  # Hallucination mitigation configuration
  # Disabled by default - enable in decisions via hallucination plugin
  hallucination_mitigation:
    enabled: false
    # Fact-check classifier: determines if a prompt needs fact verification
    fact_check_model:
      model_id: "models/mmbert32k-factcheck-classifier-merged"
      threshold: 0.6
      use_cpu: true
      use_mmbert_32k: true
    # Hallucination detector: verifies if LLM response is grounded in context
    hallucination_model:
      model_id: "models/mom-halugate-detector"
      threshold: 0.8
      use_cpu: true
      # False positive reduction settings
      min_span_length: 2  # Minimum tokens in a span to report (filters single-token false positives)
      min_span_confidence: 0.6  # Minimum confidence for a span to be reported
      context_window_size: 50  # Characters of context around flagged spans
      enable_nli_filtering: true  # Use NLI to filter false positives
      nli_entailment_threshold: 0.75  # Filter spans with high entailment scores
    # NLI model: provides explanations for hallucinated spans
    nli_model:
      model_id: "models/mom-halugate-explainer"
      threshold: 0.9
      use_cpu: true

  # Feedback detector configuration
  # Classifies user feedback into 4 types: satisfied, need_clarification, wrong_answer, want_different
  feedback_detector:
    enabled: true
    model_id: "models/mmbert32k-feedback-detector-merged"
    threshold: 0.7
    use_cpu: true
    use_mmbert_32k: true

  keyword_rules:
    - category: "thinking"
      operator: "OR"
      keywords: ["urgent", "immediate", "asap", "think", "careful"]
      case_sensitive: false

  # Reasoning family configurations
  reasoning_families:
    deepseek:
      type: "chat_template_kwargs"
      parameter: "thinking"

    qwen3:
      type: "chat_template_kwargs"
      parameter: "enable_thinking"

    gpt-oss:
      type: "reasoning_effort"
      parameter: "reasoning_effort"
    gpt:
      type: "reasoning_effort"
      parameter: "reasoning_effort"

  # Global default reasoning effort level
  default_reasoning_effort: high

  # API Configuration
  api:
    batch_classification:
      max_batch_size: 100
      concurrency_threshold: 5
      max_concurrency: 8
      metrics:
        enabled: true
        detailed_goroutine_tracking: true
        high_resolution_timing: false
        sample_rate: 1.0
        duration_buckets:
          [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
        size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

  # Embedding Models Configuration
  # This is the UNIFIED configuration for all embedding-related features:
  # - Semantic Cache: Automatically uses the configured model
  # - Tool Selection: Uses the configured model for tool matching
  # - Embedding Signal: Uses the model specified in hnsw_config.model_type
  # - Complexity Signal: Uses the model specified in hnsw_config.model_type
  #
  # Available models:
  # - Qwen3-Embedding-0.6B (Pro): Up to 32K context, high quality, 1024-dim
  # - EmbeddingGemma-300M (Flash): Up to 8K context, fast inference, Matryoshka support (768/512/256/128)
  # - mmBERT-Embed-32K-2D-Matryoshka (Ultra): Up to 32K context, 1800+ languages, 2D Matryoshka (layer early exit + dimension reduction)
  embedding_models:
    # qwen3_model_path: "models/mom-embedding-pro"
    # gemma_model_path: "models/mom-embedding-flash"
    mmbert_model_path: "models/mom-embedding-ultra"
    use_cpu: true  # Set to false for GPU acceleration (requires CUDA)
    # HNSW Configuration
    # Improves performance by preloading candidate embeddings at startup
    # and using HNSW index for O(log n) similarity search
    hnsw_config:
      model_type: "mmbert"         # Which model to use: "qwen3" (high quality), "gemma" (fast), or "mmbert" (multilingual)
      preload_embeddings: true    # Precompute candidate embeddings at startup
      target_dimension: 768        # Embedding dimension (1024 for qwen3, 768 for gemma/mmbert)
      # For mmbert only: target_layer (3/6/11/22) for layer early exit
      enable_soft_matching: true
      min_score_threshold: 0.5

  # Observability Configuration
  observability:
    tracing:
      enabled: false  # Enable distributed tracing
      provider: "opentelemetry"  # Provider: opentelemetry, openinference, openllmetry
      exporter:
        type: "otlp"  # Export spans to Jaeger (via OTLP gRPC)
        endpoint: "jaeger:4317"  # Jaeger collector inside compose network
        insecure: true  # Use insecure connection (no TLS)
      sampling:
        type: "always_on"  # Sampling: always_on, always_off, probabilistic
        rate: 1.0  # Sampling rate for probabilistic (0.0-1.0)
      resource:
        service_name: "vllm-semantic-router"
        service_version: "v0.1.0"
        deployment_environment: "development"