--- # SemanticRouter with Redis Cache Backend # This example demonstrates how to configure the semantic router to use Redis for caching # Redis provides high-performance, distributed caching with vector search capabilities apiVersion: vllm.ai/v1alpha1 kind: SemanticRouter metadata: name: semantic-router-redis namespace: default spec: replicas: 1 # Configure vLLM backend endpoints vllmEndpoints: - name: qwen-model model: qwen3-8b reasoningFamily: qwen3 backend: type: service service: name: vllm-qwen port: 8000 # Persistence for model cache persistence: enabled: true size: 10Gi # Basic resource limits resources: requests: cpu: 1000m memory: 2Gi limits: cpu: 2000m memory: 4Gi # Semantic router configuration config: # Embedding models configuration # Using Qwen3 for high-quality embeddings with 32K context support embedding_models: qwen3_model_path: "models/mom-embedding-pro" use_cpu: true # Legacy BERT model configuration (can be removed if using embedding_models) bert_model: model_id: models/mom-embedding-light threshold: "0.6" use_cpu: true # Semantic cache with Redis backend semantic_cache: enabled: true backend_type: redis similarity_threshold: "0.85" ttl_seconds: 3600 # Use Qwen3 embeddings for better semantic understanding embedding_model: qwen3 # Redis configuration # NOTE: Update host to match your Redis deployment namespace redis: connection: host: redis.cache-backends.svc.cluster.local port: 6379 database: 0 # Option 1: Direct password (not recommended for production) # password: "mypassword" # Option 2: Reference to Kubernetes Secret (recommended) password_secret_ref: name: redis-credentials key: password timeout: 30 tls: enabled: false index: name: semantic_cache_idx prefix: "cache:" vector_field: name: embedding dimension: 1024 # For Qwen3 embeddings (384 for BERT, 768 for Gemma) metric_type: COSINE index_type: HNSW params: M: 16 efConstruction: 64 search: topk: 1 development: drop_index_on_startup: false auto_create_index: true verbose_errors: true # Disable features that require additional models for this example # Enable these features in production with proper model configuration prompt_guard: enabled: false tools: enabled: false --- # Secret for Redis password (create separately) # In production, use a secret management system like External Secrets Operator apiVersion: v1 kind: Secret metadata: name: redis-credentials namespace: default type: Opaque stringData: password: "your-redis-password-here"