# Semantic Router Configuration for Aibrix
config:
  model_config:
    "vllm-llama3-8b-instruct":
      reasoning_family: "qwen3"  # This model uses Qwen-3 reasoning syntax
      # Define available LoRA adapters for this base model
      # These names must match the LoRA modules registered with vLLM at startup
      loras:
        - name: "science-expert"
          description: "Specialized for science domains: biology, chemistry, physics, health, engineering"
        - name: "social-expert"
          description: "Optimized for social sciences: business, economics"
        - name: "math-expert"
          description: "Fine-tuned for mathematics and quantitative reasoning"
        - name: "law-expert"
          description: "Specialized for legal questions and law-related topics"
        - name: "humanities-expert"
          description: "Optimized for humanities: psychology, history, philosophy"
        - name: "general-expert"
          description: "General-purpose adapter for diverse topics"

  # Categories - now only contain metadata for domain classification
  categories:
    - name: business
    - name: law
    - name: psychology
    - name: biology
    - name: chemistry
    - name: history
    - name: other
    - name: health
    - name: economics
    - name: math
    - name: physics
    - name: computer science
    - name: philosophy
    - name: engineering

  # Decisions - define routing logic with rules, model selection, and plugins
  decisions:
    - name: business_decision
      description: "Business and management related queries"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "business"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
            mode: "replace"

    - name: law_decision
      description: "Legal questions and law-related topics"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "law"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
            mode: "replace"

    - name: psychology_decision
      description: "Psychology and mental health topics"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "psychology"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "semantic-cache"
          configuration:
            enabled: true
            similarity_threshold: 0.92
        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
            mode: "replace"

    - name: biology_decision
      description: "Biology and life sciences questions"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "biology"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
            mode: "replace"

    - name: chemistry_decision
      description: "Chemistry and chemical sciences questions"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "chemistry"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: true
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
            mode: "replace"

    - name: history_decision
      description: "Historical questions and cultural topics"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "history"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
            mode: "replace"

    - name: other_decision
      description: "General knowledge and miscellaneous topics"
      priority: 5
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "other"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "semantic-cache"
          configuration:
            enabled: true
            similarity_threshold: 0.75
        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
            mode: "replace"

    - name: health_decision
      description: "Health and medical information queries"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "health"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "semantic-cache"
          configuration:
            enabled: true
            similarity_threshold: 0.95
        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
            mode: "replace"

    - name: economics_decision
      description: "Economics and financial topics"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "economics"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
            mode: "replace"

    - name: math_decision
      description: "Mathematics and quantitative reasoning"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "math"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: true
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
            mode: "replace"

    - name: physics_decision
      description: "Physics and physical sciences"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "physics"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: true
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
            mode: "replace"

    - name: computer_science_decision
      description: "Computer science and programming"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "computer science"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
            mode: "replace"

    - name: philosophy_decision
      description: "Philosophy and ethical questions"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "philosophy"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
            mode: "replace"

    - name: engineering_decision
      description: "Engineering and technical problem-solving"
      priority: 10
      rules:
        operator: "OR"
        conditions:
          - type: "domain"
            name: "engineering"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: false
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
            mode: "replace"

    - name: thinking_decision
      description: "Complex reasoning and multi-step thinking"
      priority: 15
      rules:
        operator: "OR"
        conditions:
          - type: "keyword"
            name: "thinking"
      modelRefs:
        - model: vllm-llama3-8b-instruct
          use_reasoning: true
      plugins:

        - type: "system_prompt"
          configuration:
            enabled: true
            system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
            mode: "replace"

  # Strategy for selecting between multiple matching decisions
  # Options: "priority" (use decision with highest priority) or "confidence" (use decision with highest confidence)
  strategy: "priority"

  default_model: vllm-llama3-8b-instruct

  bert_model:
    model_id: models/mom-embedding-light
    threshold: 0.6
    use_cpu: true

  semantic_cache:
    enabled: true
    backend_type: "memory"  # Options: "memory", "milvus", or "hybrid"
    similarity_threshold: 0.8
    max_entries: 1000  # Only applies to memory backend
    ttl_seconds: 3600
    eviction_policy: "fifo"
    # HNSW index configuration (for memory backend only)
    use_hnsw: true  # Enable HNSW index for faster similarity search
    hnsw_m: 16  # Number of bi-directional links (higher = better recall, more memory)
    hnsw_ef_construction: 200  # Construction parameter (higher = better quality, slower build)

    # Hybrid cache configuration (when backend_type: "hybrid")
    # Combines in-memory HNSW for fast search with Milvus for scalable storage
    # max_memory_entries: 100000 # Max entries in HNSW index (default: 100,000)
    # backend_config_path: "config/milvus.yaml" # Path to Milvus config

    # Embedding model for semantic similarity matching
    # Options: "bert" (fast, 384-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context)
    # Default: "bert" (fastest, lowest memory)
    embedding_model: "bert"

  tools:
    enabled: true
    top_k: 3
    similarity_threshold: 0.2
    tools_db_path: "config/tools_db.json"
    fallback_to_empty: true

  prompt_guard:
    enabled: true  # Global default - can be overridden per category with jailbreak_enabled
    use_modernbert: false
    model_id: "models/mom-jailbreak-classifier"
    threshold: 0.7
    use_cpu: true
    jailbreak_mapping_path: "models/mom-jailbreak-classifier/jailbreak_type_mapping.json"

  # Classifier configuration
  classifier:
    category_model:
      model_id: "models/mom-domain-classifier"
      use_modernbert: true
      threshold: 0.6
      use_cpu: true
      category_mapping_path: "models/mom-domain-classifier/category_mapping.json"
    pii_model:
      # Support both traditional (modernbert) and LoRA-based PII detection
      # When model_type is "auto", the system will auto-detect LoRA configuration
      model_id: "models/mom-pii-classifier"
      use_modernbert: false  # Use LoRA PII model with auto-detection
      threshold: 0.7
      use_cpu: true
      pii_mapping_path: "models/mom-pii-classifier/pii_type_mapping.json"

  keyword_rules:
    - name: "thinking"
      operator: "OR"
      keywords: ["urgent", "immediate", "asap", "think", "careful"]
      case_sensitive: false

  # Reasoning family configurations
  reasoning_families:
    deepseek:
      type: "chat_template_kwargs"
      parameter: "thinking"

    qwen3:
      type: "chat_template_kwargs"
      parameter: "enable_thinking"

    gpt-oss:
      type: "reasoning_effort"
      parameter: "reasoning_effort"
    gpt:
      type: "reasoning_effort"
      parameter: "reasoning_effort"

  # Global default reasoning effort level
  default_reasoning_effort: high

  # API Configuration
  api:
    batch_classification:
      max_batch_size: 100
      concurrency_threshold: 5
      max_concurrency: 8
      metrics:
        enabled: true
        detailed_goroutine_tracking: true
        high_resolution_timing: false
        sample_rate: 1.0
        duration_buckets:
          [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
        size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

  # Embedding Models Configuration
  # These models provide intelligent embedding generation with automatic routing:
  # - Qwen3-Embedding-0.6B: Up to 32K context, high quality,
  # - EmbeddingGemma-300M: Up to 8K context, fast inference, Matryoshka support (768/512/256/128)
  embedding_models:
    qwen3_model_path: "models/mom-embedding-pro"
    gemma_model_path: ""
    use_cpu: true  # Set to false for GPU acceleration (requires CUDA)

  # Observability Configuration
  observability:
    tracing:
      enabled: false  # Enable distributed tracing
      provider: "opentelemetry"  # Provider: opentelemetry, openinference, openllmetry
      exporter:
        type: "otlp"  # Export spans to Jaeger (via OTLP gRPC)
        endpoint: "jaeger:4317"  # Jaeger collector inside compose network
        insecure: true  # Use insecure connection (no TLS)
      sampling:
        type: "always_on"  # Sampling: always_on, always_off, probabilistic
        rate: 1.0  # Sampling rate for probabilistic (0.0-1.0)
      resource:
        service_name: "vllm-semantic-router"
        service_version: "v0.1.0"
        deployment_environment: "development"