# =============================================================================
# ContextForge Configuration Example
# =============================================================================

# NOTE ON SOURCES
# Values below match Pydantic Settings defaults unless noted.
# Most env vars below are Pydantic Settings (mcpgateway/config.py).
# Some are runtime/launcher envs (shell scripts, entrypoint) or direct env reads
# in modules (not part of Settings). Those sections are labeled explicitly.

# =============================================================================
# Required: change before use
# =============================================================================

# Admin UI HTTP Basic Auth credentials
# PRODUCTION: Change these values
BASIC_AUTH_USER=admin
BASIC_AUTH_PASSWORD=changeme

# JWT secret used to sign tokens
# PRODUCTION: Use a strong, unique value
JWT_SECRET_KEY=my-test-key

# Passphrase used to encrypt stored auth secrets
# PRODUCTION: Use a strong, unique value
AUTH_ENCRYPTION_SECRET=my-test-salt

# Bootstrap admin credentials (email auth)
# PRODUCTION: Change these values
PLATFORM_ADMIN_EMAIL=admin@example.com
PLATFORM_ADMIN_PASSWORD=changeme
DEFAULT_USER_PASSWORD=changeme

# =============================================================================
# Security Defaults (secure by default)
# =============================================================================

# These settings are enabled by default for security. Only disable for backward
# compatibility with legacy tokens that lack these claims.

# Require JTI (JWT ID) claim in all tokens for revocation support
# Tokens without JTI cannot be revoked before expiration
REQUIRE_JTI=true

# Require expiration (exp) claim in all tokens
# Tokens without expiration never expire and pose a security risk
REQUIRE_TOKEN_EXPIRATION=true

# Disable public user self-registration (admin must create accounts)
PUBLIC_REGISTRATION_ENABLED=false

# Allow entities to have public visibility (set false to block in team scope)
ALLOW_PUBLIC_VISIBILITY=true

# Basic Auth is DISABLED by default for security - use JWT tokens instead
# Only enable for backwards compatibility with legacy clients
# API_ALLOW_BASIC_AUTH=false
# DOCS_ALLOW_BASIC_AUTH=false

# -----------------------------------------------------------------------------
# SSRF Protection (Server-Side Request Forgery)
# -----------------------------------------------------------------------------
# Prevents the gateway from being used to access internal resources or cloud
# metadata services. Enabled by default with safe settings for dev/internal use.

# Master switch for SSRF protection (default: true)
# SSRF_PROTECTION_ENABLED=true

# Allow localhost/loopback addresses (127.0.0.0/8, ::1)
# Default: false (strict)
# SSRF_ALLOW_LOCALHOST=false

# Allow RFC 1918 private network addresses (10.x, 172.16-31.x, 192.168.x)
# Default: false (strict). Use SSRF_ALLOWED_NETWORKS for explicit exceptions.
# SSRF_ALLOW_PRIVATE_NETWORKS=false

# Optional CIDR allowlist when SSRF_ALLOW_PRIVATE_NETWORKS=false
# Only private IPs in these ranges are allowed.
# SSRF_ALLOWED_NETWORKS=["10.20.0.0/16","192.168.50.0/24"]

# Fail closed on DNS resolution errors (default: true)
# URLs that cannot be resolved are rejected
# SSRF_DNS_FAIL_CLOSED=true

# Networks to block (JSON array of CIDR ranges) - ALWAYS blocked regardless of above
# Default blocks cloud metadata endpoints. Add more for stricter security.
# SSRF_BLOCKED_NETWORKS=["169.254.169.254/32","169.254.169.123/32","fd00::1/128","169.254.0.0/16","fe80::/10"]

# Hostnames to block (JSON array) - case-insensitive matching
# SSRF_BLOCKED_HOSTS=["metadata.google.internal","metadata.internal"]

# Example: STRICT mode (external endpoints only, no internal access)
# SSRF_PROTECTION_ENABLED=true
# SSRF_ALLOW_LOCALHOST=false
# SSRF_ALLOW_PRIVATE_NETWORKS=false
# SSRF_ALLOWED_NETWORKS=[]
# SSRF_BLOCKED_NETWORKS=["169.254.169.254/32","169.254.169.123/32","fd00::1/128","169.254.0.0/16","fe80::/10","100.64.0.0/10"]
# The 100.64.0.0/10 range is Carrier-Grade NAT (CGNAT) which some cloud providers use

# =============================================================================
# Project defaults (batteries-included overrides)
# =============================================================================

# These values intentionally differ from config.py defaults to provide a working
# local/dev setup out of the box. Comment out anything you do not want to override.

# Bind to all interfaces for local containers and remote access
HOST=0.0.0.0

# Local origin used for CORS/cookies in development examples
APP_DOMAIN=http://localhost

# Enable Admin UI and Admin API for local development
MCPGATEWAY_UI_ENABLED=true
MCPGATEWAY_ADMIN_API_ENABLED=true

# Local/dev SSRF compatibility: allow local services and fail open on DNS lookups.
# Keep strict values (false/false/true) in production environments.
SSRF_ALLOW_LOCALHOST=true
SSRF_ALLOW_PRIVATE_NETWORKS=true
SSRF_DNS_FAIL_CLOSED=false

# Disable WebSocket relay and reverse-proxy transports by default.
# Enable only when those transports are explicitly required.
MCPGATEWAY_WS_RELAY_ENABLED=false
MCPGATEWAY_REVERSE_PROXY_ENABLED=false

# Relax cookie security for local HTTP development
SECURE_COOKIES=false

# Enable validation middleware and experimental IO validation for visibility
EXPERIMENTAL_VALIDATE_IO=true
VALIDATION_MIDDLEWARE_ENABLED=true

# Permission audit logging (RBAC checks) - disabled by default for performance
PERMISSION_AUDIT_ENABLED=false

# Add SQL injection pattern on top of the default dangerous patterns
DANGEROUS_PATTERNS=["[;&|`$(){}\\[\\]<>]", "\\.\\.[\\\\/]", "[\\x00-\\x1f\\x7f-\\x9f]", "(?i)(drop|delete|insert|update|select)\\s+(table|from|into|where)"]

# Loosen password complexity for local bootstrap (production should re-enable)
PASSWORD_REQUIRE_UPPERCASE=false
PASSWORD_REQUIRE_LOWERCASE=false
PASSWORD_REQUIRE_SPECIAL=false

# Longer UI tool test timeout for slower dev environments
MCPGATEWAY_UI_TOOL_TEST_TIMEOUT=120000

# Slow down health polling and extend config cache for local dev
HEALTH_CHECK_INTERVAL=300
GLOBAL_CONFIG_CACHE_TTL=300

# Log to file by default for local debugging
LOG_FILE=mcpgateway.log
LOG_FOLDER=logs

# Local OTEL collector endpoint
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317

# =============================================================================
# Hot toggles (commented quick switches)
# =============================================================================

# These are frequently changed flags. If a key is already set in the
# Project defaults block above, change it there instead of uncommenting here.

# Feature flags / UX
# MCPGATEWAY_UI_ENABLED=false
# MCPGATEWAY_ADMIN_API_ENABLED=false
# PLUGINS_ENABLED=false
# MCPGATEWAY_CATALOG_ENABLED=true
# LLMCHAT_ENABLED=true
# MCPGATEWAY_STDIO_TRANSPORT_ENABLED=false
# PLUGINS_CAN_OVERRIDE_RBAC=false

# Direct proxy mode
# MCPGATEWAY_DIRECT_PROXY_ENABLED=false
# MCPGATEWAY_DIRECT_PROXY_TIMEOUT=30

# Observability / metrics
# OBSERVABILITY_ENABLED=false
# OTEL_ENABLE_OBSERVABILITY=false
# ENABLE_METRICS=true
# DB_METRICS_RECORDING_ENABLED=true
# METRICS_AGGREGATION_AUTO_START=false

# Logging / audit
# STRUCTURED_LOGGING_DATABASE_ENABLED=false
# AUDIT_TRAIL_ENABLED=false
# PERMISSION_AUDIT_ENABLED=false
# SECURITY_LOGGING_ENABLED=false
# TOKEN_USAGE_LOGGING_ENABLED=true
# TOKEN_LAST_USED_UPDATE_INTERVAL_MINUTES=5

# Security / auth (see also "Security Defaults" section above)
# AUTH_REQUIRED=true
# MCP_CLIENT_AUTH_ENABLED=true
# TRUST_PROXY_AUTH=false
# TRUST_PROXY_AUTH_DANGEROUSLY=false  # DANGER: Only set true behind a strictly trusted auth proxy
# ALLOW_UNAUTHENTICATED_ADMIN=false   # DANGER: Only for local dev — grants admin to unauthenticated requests
# SECURITY_HEADERS_ENABLED=true
# CORS_ALLOW_CREDENTIALS=true
# SECURE_COOKIES=true
# REQUIRE_USER_IN_DB=false

# Performance / reliability
# COMPRESSION_ENABLED=true
# VALIDATION_MIDDLEWARE_ENABLED=false
# CORRELATION_ID_ENABLED=false
# TEMPLATES_AUTO_RELOAD=false
# MCP_SESSION_POOL_ENABLED=false
# ANYIO_CANCEL_DELIVERY_PATCH_ENABLED=false

# =============================================================================
# Performance Tuning (quick reference)
# =============================================================================

# Use this section to tune throughput, latency, and cache behavior.
# If a key is already set in the Project defaults block, change it there instead.
# Detailed explanations for each setting appear later in this file.

# -----------------------------------------------------------------------------
# Cache TTLs (seconds)
# -----------------------------------------------------------------------------

# AUTH / registry / admin caches
# AUTH_CACHE_USER_TTL=60
# AUTH_CACHE_REVOCATION_TTL=30
# AUTH_CACHE_TEAM_TTL=60
# AUTH_CACHE_ROLE_TTL=60
# AUTH_CACHE_TEAMS_TTL=60
# REGISTRY_CACHE_TOOLS_TTL=20
# REGISTRY_CACHE_PROMPTS_TTL=15
# REGISTRY_CACHE_RESOURCES_TTL=15
# REGISTRY_CACHE_AGENTS_TTL=20
# REGISTRY_CACHE_SERVERS_TTL=20
# REGISTRY_CACHE_GATEWAYS_TTL=20
# REGISTRY_CACHE_CATALOG_TTL=300
# ADMIN_STATS_CACHE_SYSTEM_TTL=60
# ADMIN_STATS_CACHE_OBSERVABILITY_TTL=30
# ADMIN_STATS_CACHE_TAGS_TTL=120
# ADMIN_STATS_CACHE_PLUGINS_TTL=120
# ADMIN_STATS_CACHE_PERFORMANCE_TTL=60
# TEAM_MEMBER_COUNT_CACHE_TTL=300
# METRICS_CACHE_TTL_SECONDS=60

# Tool + pagination caches
# TOOL_LOOKUP_CACHE_TTL_SECONDS=60
# TOOL_LOOKUP_CACHE_NEGATIVE_TTL_SECONDS=10
# PAGINATION_COUNT_CACHE_TTL=300

# Session / message / global caches
# SESSION_TTL=3600
# MESSAGE_TTL=600
# GLOBAL_CONFIG_CACHE_TTL=60
# A2A_STATS_CACHE_TTL=30
# MCP_SESSION_POOL_TTL=300.0

# LLM chat caches
# LLMCHAT_SESSION_TTL=300
# LLMCHAT_SESSION_LOCK_TTL=30
# LLMCHAT_CHAT_HISTORY_TTL=3600

# Catalog / DCR / performance caches
# MCPGATEWAY_CATALOG_CACHE_TTL=3600
# DCR_METADATA_CACHE_TTL=3600
# MCPGATEWAY_PERFORMANCE_NET_CONNECTIONS_CACHE_TTL=15

# Static resource caching
# RESOURCE_CACHE_TTL=3600

# Redis leader election
# REDIS_LEADER_TTL=15
# REDIS_LEADER_HEARTBEAT_INTERVAL=5

# -----------------------------------------------------------------------------
# Pooling, concurrency, and limits
# -----------------------------------------------------------------------------

# Database connection pool (SQLAlchemy)
# DB_POOL_CLASS=auto
# DB_POOL_PRE_PING=auto
# DB_POOL_SIZE=200
# DB_MAX_OVERFLOW=10
# DB_POOL_TIMEOUT=30
# DB_POOL_RECYCLE=3600

# Redis connection pool
# REDIS_MAX_CONNECTIONS=50
# REDIS_SOCKET_TIMEOUT=2.0
# REDIS_SOCKET_CONNECT_TIMEOUT=2.0
# REDIS_HEALTH_CHECK_INTERVAL=30

# HTTPX shared client pool
# HTTPX_MAX_CONNECTIONS=200
# HTTPX_MAX_KEEPALIVE_CONNECTIONS=100
# HTTPX_KEEPALIVE_EXPIRY=30.0
# HTTPX_POOL_TIMEOUT=10.0

# Tool and federation limits
# TOOL_TIMEOUT=60
# MAX_TOOL_RETRIES=3
# TOOL_RATE_LIMIT=100
# TOOL_CONCURRENT_LIMIT=10
# FEDERATION_TIMEOUT=120

# Health checks
# HEALTH_CHECK_INTERVAL=60
# HEALTH_CHECK_TIMEOUT=5
# UNHEALTHY_THRESHOLD=3
# GATEWAY_VALIDATION_TIMEOUT=5
# MAX_CONCURRENT_HEALTH_CHECKS=10

# MCP session pool (client sessions)
# MCP_SESSION_POOL_ENABLED=false
# MCP_SESSION_POOL_MAX_PER_KEY=10
# MCP_SESSION_POOL_HEALTH_CHECK_INTERVAL=60.0
# MCP_SESSION_POOL_ACQUIRE_TIMEOUT=30.0
# MCP_SESSION_POOL_CREATE_TIMEOUT=30.0
# MCP_SESSION_POOL_CIRCUIT_BREAKER_THRESHOLD=5
# MCP_SESSION_POOL_CIRCUIT_BREAKER_RESET=60.0
# MCP_SESSION_POOL_IDLE_EVICTION=600.0
# MCP_SESSION_POOL_TRANSPORT_TIMEOUT=30.0
# MCP_SESSION_POOL_EXPLICIT_HEALTH_RPC=false
# MCP_SESSION_POOL_HEALTH_CHECK_METHODS=["ping", "skip"]
# MCP_SESSION_POOL_HEALTH_CHECK_TIMEOUT=5.0

# -----------------------------------------------------------------------------
# Timeouts, polling, and backoff
# -----------------------------------------------------------------------------

# Session registry polling (cache_type=database)
# POLL_INTERVAL=1.0
# MAX_INTERVAL=5.0
# BACKOFF_FACTOR=1.5

# DB startup resilience
# DB_MAX_RETRIES=30
# DB_RETRY_INTERVAL_MS=2000
# DB_MAX_BACKOFF_SECONDS=30

# Redis startup resilience
# REDIS_MAX_RETRIES=30
# REDIS_RETRY_INTERVAL_MS=2000
# REDIS_MAX_BACKOFF_SECONDS=30

# -----------------------------------------------------------------------------
# Retention and cleanup windows
# -----------------------------------------------------------------------------

# METRICS_RETENTION_DAYS=7
# METRICS_CLEANUP_INTERVAL_HOURS=1
# METRICS_ROLLUP_ENABLED=true
# METRICS_ROLLUP_INTERVAL_HOURS=1
# METRICS_ROLLUP_RETENTION_DAYS=365
# METRICS_ROLLUP_LATE_DATA_HOURS=1
# METRICS_DELETE_RAW_AFTER_ROLLUP=true
# METRICS_DELETE_RAW_AFTER_ROLLUP_HOURS=1
# MCPGATEWAY_PERFORMANCE_RETENTION_HOURS=24
# MCPGATEWAY_PERFORMANCE_RETENTION_DAYS=90
# OBSERVABILITY_TRACE_RETENTION_DAYS=7
# LOG_RETENTION_DAYS=30

# HTTPX timeouts
# HTTPX_CONNECT_TIMEOUT=5.0
# HTTPX_READ_TIMEOUT=120.0
# HTTPX_WRITE_TIMEOUT=30.0
# HTTPX_ADMIN_READ_TIMEOUT=30.0

# SSE / cancellation protection
# SSE_SEND_TIMEOUT=30.0
# SSE_RAPID_YIELD_WINDOW_MS=1000
# SSE_RAPID_YIELD_MAX=50
# MCP_SESSION_POOL_CLEANUP_TIMEOUT=5.0
# SSE_TASK_GROUP_CLEANUP_TIMEOUT=5.0
# ANYIO_CANCEL_DELIVERY_PATCH_ENABLED=false
# ANYIO_CANCEL_DELIVERY_MAX_ITERATIONS=100

# -----------------------------------------------------------------------------
# Middleware overhead and compression
# -----------------------------------------------------------------------------

# COMPRESSION_ENABLED=true
# COMPRESSION_GZIP_LEVEL=6
# COMPRESSION_BROTLI_QUALITY=4
# COMPRESSION_ZSTD_LEVEL=3
# COMPRESSION_MINIMUM_SIZE=500
# VALIDATION_MIDDLEWARE_ENABLED=false
# CORRELATION_ID_ENABLED=true
# TEMPLATES_AUTO_RELOAD=false
# STRUCTURED_LOGGING_DATABASE_ENABLED=false
# AUDIT_TRAIL_ENABLED=false
# SECURITY_LOGGING_ENABLED=false

# =============================================================================
# Basic Server Configuration
# =============================================================================

# Application name displayed in UI and logs
# APP_NAME=ContextForge

# Host interface to bind to (127.0.0.1 = localhost only)
# Project defaults block sets HOST=0.0.0.0 for local containers
# HOST=127.0.0.1

# Port number for the HTTP server
# PORT=4444

# Runtime environment - affects CORS, cookies, and security defaults
# Options: development, staging, production
# - development: Relaxed CORS (localhost:3000/8080), debug info, insecure cookies
# - staging: Production-like CORS and cookie defaults, but use staging domains
# - production: Strict CORS (APP_DOMAIN only), secure cookies, no debug info
# ENVIRONMENT=development

# Domain name for CORS origins and cookie settings (use your actual domain in production)
# Project defaults block sets APP_DOMAIN=http://localhost for local dev
# APP_DOMAIN=http://localhost:4444

# FastAPI root_path for reverse proxy deployments (empty = serve from root "/")
# Used when gateway is behind a proxy with path prefix (e.g., "/api/v1")
# See FastAPI docs: https://fastapi.tiangolo.com/advanced/behind-a-proxy/
# APP_ROOT_PATH=

# Client mode for gateway-as-client usage
# Options: true, false (default)
# CLIENT_MODE=false

# Override templates/static directories (absolute paths)
# Leave unset to use package defaults
# TEMPLATES_DIR=/absolute/path/to/templates
# STATIC_DIR=/absolute/path/to/static

# Enable HTTP Basic Auth for OpenAPI docs endpoints (/docs, /redoc)
# Options: true, false (default: false)
# When true: Allows accessing docs with BASIC_AUTH_USER/BASIC_AUTH_PASSWORD
# When false: Only JWT Bearer token authentication is accepted
# DOCS_ALLOW_BASIC_AUTH=false

# Database Configuration
# SQLite (default) - good for development and small deployments
# macOS note: If you see "sqlite3.OperationalError: disk I/O error" on macOS when running
# `make serve`, move the DB to a safe APFS path (avoid iCloud/Dropbox/OneDrive/Google Drive,
# network shares, or external exFAT) and use an absolute path, for example:
# DATABASE_URL=sqlite:////Users/$USER/Library/Application Support/mcpgateway/mcp.db
# DATABASE_URL=sqlite:///./mcp.db

# PostgreSQL - recommended for production deployments
# Uses psycopg3 driver (psycopg[binary])
# IMPORTANT: Use postgresql+psycopg:// (not postgresql://) for psycopg3
# DATABASE_URL=postgresql+psycopg://postgres:mysecretpassword@localhost:5432/mcp

# MariaDB/MySQL - fully supported for production
# For container deployment: mysql+pymysql://mysql:changeme@mariadb:3306/mcp
# For localhost: mysql+pymysql://mysql:changeme@localhost:3306/mcp
# DATABASE_URL=mysql+pymysql://mysql:changeme@localhost:3306/mcp

# Database Connection Pool Configuration
# ============================================================================
# IMPORTANT: Pool size depends on your database connection strategy:
#
# WITH PgBouncer (recommended for PostgreSQL, default in docker-compose):
#   - Use SMALL pool (10-20) since PgBouncer handles connection pooling
#   - docker-compose.yml sets DB_POOL_SIZE=15 by default
#   - Do NOT override here unless you know what you're doing
#   - Formula: (replicas × workers × pool) should be < PgBouncer MAX_CLIENT_CONN
#
# WITHOUT PgBouncer (direct PostgreSQL or SQLite):
#   - Use LARGER pool based on: (replicas × workers × pool) < max_connections
#   - Uncomment and set DB_POOL_SIZE=50-200 depending on workload
#
# Uncomment for SQLite or direct PostgreSQL without PgBouncer
# DB_POOL_SIZE=200
# ============================================================================

# Additional connections beyond pool_size for burst handling (default: 10)
# DB_MAX_OVERFLOW=10
# Seconds to wait for connection before timeout (default: 30)
# DB_POOL_TIMEOUT=30
# Seconds before recreating connection to prevent stale connections (default: 3600)
# DB_POOL_RECYCLE=3600
# Database driver identifier (advanced; used by SQLAlchemy engine selection)
# DB_DRIVER=mariadb+mariadbconnector

# Connection pool class selection
# Options: auto (default), null, queue
# DB_POOL_CLASS=auto

# Connection pool pre-ping behavior
# Options: auto (default), true, false
# DB_POOL_PRE_PING=auto
# Database Startup Resilience (exponential backoff with jitter)
# Retry progression: 2s → 4s → 8s → 16s → 30s (capped), ±25% jitter
# 30 retries ≈ 5 minutes total wait before worker gives up
# DB_MAX_RETRIES=30
# Base retry interval in milliseconds (doubles each attempt)
# DB_RETRY_INTERVAL_MS=2000
# Maximum backoff cap in seconds (jitter ±25% applied after cap)
# DB_MAX_BACKOFF_SECONDS=30
# psycopg3: Number of query executions before auto-preparing server-side (default: 5)
# Set to 0 to disable, 1 to prepare immediately. Higher values reduce memory usage.
# DB_PREPARE_THRESHOLD=5

# SQLite Configuration
# SQLite busy timeout (milliseconds) - maximum time SQLite will block while waiting
# to acquire a database lock before returning SQLITE_BUSY. Limits lock-wait latency
# and prevents prolonged thread blocking under write contention (default: 5000ms)
# DB_SQLITE_BUSY_TIMEOUT=5000

# Database Performance Optimization
# Use database-native percentile functions for observability performance metrics
# When true: PostgreSQL uses native percentile_cont (5-10x faster for large datasets)
# When false: Falls back to Python-based percentile calculations (works with all databases)
# Recommended: true for PostgreSQL production deployments, auto-detected for SQLite
# USE_POSTGRESDB_PERCENTILES=true

# The number of rows fetched from the database at a time when streaming results,
# to limit memory usage and avoid loading all rows into RAM at once.
# YIELD_BATCH_SIZE=1000

# Cache Backend Configuration
# Options: database (default), memory (in-process), redis (distributed)
# - database: Uses SQLite/PostgreSQL for persistence (good for single-node)
# - memory: Fast in-process caching (lost on restart, not shared between workers)
# - redis: Distributed caching for multi-node deployments
# REQUIRED for multi-worker session affinity (MCPGATEWAY_SESSION_AFFINITY_ENABLED=true)
# CACHE_TYPE=database

# Session Registry Database Polling (Adaptive Backoff)
# When CACHE_TYPE=database, sessions use polling to check for messages.
# Adaptive backoff reduces database load by ~90% during idle periods while
# maintaining responsiveness when messages arrive.
#
# How it works:
# - Starts polling at POLL_INTERVAL (1.0s default)
# - When no messages found, interval increases by BACKOFF_FACTOR (1.5x)
# - Backs off until reaching MAX_INTERVAL (5.0s cap)
# - Immediately resets to POLL_INTERVAL when a message arrives
#
# Example progression: 1.0s → 1.5s → 2.25s → 3.375s → 5.0s (capped)
# =============================================================================
# Tuning guide:
# - Lower POLL_INTERVAL (0.1-0.5s) for real-time applications needing <1s latency
# - Higher MAX_INTERVAL (10-30s) for batch workloads to minimize DB queries
# - Higher BACKOFF_FACTOR (2.0) for faster backoff, lower (1.2) for gradual
# POLL_INTERVAL=1.0
# MAX_INTERVAL=5.0
# BACKOFF_FACTOR=1.5

# Redis connection URL (only used when CACHE_TYPE=redis)
# Format: redis://[username:password@]host:port/database
# Example: redis://localhost:6379/0 (local), redis://redis:6379/0 (container)
# REDIS_URL=redis://localhost:6379/0

# Cache key prefix for Redis (used to namespace keys in shared Redis instances)
# Default: "mcpgw:"
# CACHE_PREFIX=mcpgw:

# Session time-to-live in seconds (how long sessions remain valid)
# Default: 3600 (1 hour)
# SESSION_TTL=3600

# Message time-to-live in seconds (how long messages are retained)
# Default: 600 (10 minutes)
# MESSAGE_TTL=600

# Redis Startup Resilience (exponential backoff with jitter)
# Same behavior as DB retries: 2s → 4s → 8s → 16s → 30s (capped), ±25% jitter
# 30 retries ≈ 5 minutes total wait before worker gives up
# REDIS_MAX_RETRIES=30
# Base retry interval in milliseconds (doubles each attempt)
# REDIS_RETRY_INTERVAL_MS=2000
# Maximum backoff cap in seconds (jitter ±25% applied after cap)
# REDIS_MAX_BACKOFF_SECONDS=30

# =============================================================================
# Redis Connection Pool - Performance Tuned
# =============================================================================

# Connection pool size per worker process
# Formula: (concurrent_requests / workers) * 1.5
# Default 50 handles ~500 concurrent requests with 10 workers
# REDIS_MAX_CONNECTIONS=50

# Socket read/write timeout (seconds)
# Keep low for fast failure detection; Redis ops typically <100ms
# REDIS_SOCKET_TIMEOUT=2.0

# Connection establishment timeout (seconds)
# Keep low to avoid blocking event loop on network issues
# REDIS_SOCKET_CONNECT_TIMEOUT=2.0

# Retry commands that timeout (recommended: true)
# REDIS_RETRY_ON_TIMEOUT=true

# Connection health check interval (seconds, 0=disabled)
# Prevents stale connections in pool
# REDIS_HEALTH_CHECK_INTERVAL=30

# Return strings instead of bytes (recommended: true)
# REDIS_DECODE_RESPONSES=true

# =============================================================================
# Redis Parser Configuration (Performance - ADR-026)
# =============================================================================

# Redis protocol parser selection
# Options:
#   - auto (default): Use hiredis C parser if available, fallback to pure-Python
#   - hiredis: Require hiredis C parser (fails if not installed)
#   - python: Force pure-Python parser (useful for debugging)
#
# Performance benchmarks (hiredis vs pure-Python):
#   - Simple SET/GET: ~1.1x faster
#   - LRANGE (10 items): ~2.7x faster
#   - LRANGE (100 items): ~10x faster
#   - LRANGE (999 items): ~83x faster
#
# Recommendation: Leave as "auto" - hiredis is installed by default with redis[hiredis]
# REDIS_PARSER=auto

# =============================================================================
# Redis Leader Election - Multi-Node Deployments
# =============================================================================

# Leader TTL in seconds (time before failover if leader dies)
# Lower = faster failover, but more sensitive to network blips
# Recommended: 15s for production, 5s for development
# REDIS_LEADER_TTL=15

# Leader heartbeat interval (seconds)
# Must be < leader_ttl/2 to prevent false failovers
# Rule: heartbeat_interval <= leader_ttl / 3
# REDIS_LEADER_HEARTBEAT_INTERVAL=5

# Leader key name in Redis
# REDIS_LEADER_KEY=gateway_service_leader

# =============================================================================
# Protocol Settings
# =============================================================================

# MCP protocol version supported by this gateway
# PROTOCOL_VERSION=2025-06-18

# =============================================================================
# Authentication
# =============================================================================

# Admin UI HTTP Basic Auth credentials
# Used for: Admin UI login, /docs endpoint (if DOCS_ALLOW_BASIC_AUTH=true)
# PRODUCTION: Change these to strong, unique values!
# BASIC_AUTH_USER=admin
# BASIC_AUTH_PASSWORD=changeme

# Global authentication requirement
# Options: true (default), false
# When true: All endpoints require authentication (Basic or JWT)
# When false: Endpoints are publicly accessible (NOT RECOMMENDED)
# AUTH_REQUIRED=true

# MCP endpoint authentication requirement
# Options: true, false
# Default when unset: follows AUTH_REQUIRED
# - AUTH_REQUIRED=true  -> MCP auth required
# - AUTH_REQUIRED=false -> public-only /mcp access allowed
# Set to false explicitly to allow unauthenticated public-only MCP access.
# MCP_REQUIRE_AUTH=true

# JWT Algorithm Selection
# Supported algorithms:
#   HMAC (Symmetric): HS256, HS384, HS512 - Simple deployments, shared secret
#   RSA (Asymmetric): RS256, RS384, RS512 - Enterprise, distributed systems
#   ECDSA (Asymmetric): ES256, ES384, ES512 - High performance, modern crypto
# JWT_ALGORITHM=HS256

# === HMAC (Symmetric) Configuration - Default for Development ===
# Secret used to sign JWTs (required for HMAC algorithms: HS256, HS384, HS512)
# PRODUCTION: Use a strong, random secret (minimum 32 characters)
# Generate with: openssl rand -base64 32
# JWT_SECRET_KEY=my-test-key

# === RSA/ECDSA (Asymmetric) Configuration - Recommended for Production ===
# Public and private key paths (required for asymmetric algorithms: RS*, ES*)
# Generate RSA keys with: make certs-jwt
#   (creates certs/jwt/private.pem and certs/jwt/public.pem with proper permissions)
# Generate ECDSA keys with: make certs-jwt-ecdsa
#   (creates certs/jwt/ec_private.pem and certs/jwt/ec_public.pem with proper permissions)
# Generate both SSL and JWT keys: make certs-all
#JWT_PUBLIC_KEY_PATH=certs/jwt/public.pem
#JWT_PRIVATE_KEY_PATH=certs/jwt/private.pem

# JWT Claims Configuration
# PRODUCTION: Set these to your service-specific values
# JWT_AUDIENCE=mcpgateway-api
# JWT_ISSUER=mcpgateway

# JWT Validation Options
# Set to false for Dynamic Client Registration (DCR) scenarios where audience varies
# JWT_AUDIENCE_VERIFICATION=true
# Set to false for custom auth flows where issuer varies or is not present
# JWT_ISSUER_VERIFICATION=true

# Expiry time for generated JWT tokens (in minutes; e.g. 7 days)
# TOKEN_EXPIRY=10080

# SECURITY: Require expiration claim in all tokens (default: true)
# Set to false only for backward compatibility with legacy tokens
# REQUIRE_TOKEN_EXPIRATION=true

# SECURITY: Require JTI (JWT ID) claim for token revocation support (default: true)
# Set to false only for backward compatibility with legacy tokens
# REQUIRE_JTI=true

# Require all authenticated users to exist in the database
# When true, disables the platform admin bootstrap mechanism
# WARNING: Enabling this on a fresh deployment will lock you out!
# REQUIRE_USER_IN_DB=false

# Embed environment claim in gateway-issued JWTs
# EMBED_ENVIRONMENT_IN_TOKENS=false

# Reject tokens with mismatched environment claim (tokens without env are allowed)
# VALIDATE_TOKEN_ENVIRONMENT=false

# =============================================================================
# Security Validation & Sanitization
# =============================================================================

# Enable experimental input validation and output sanitization
# This implements gateway-level security controls to protect against:
# - Path traversal attacks (../../../etc/passwd)
# - Command injection (file.jpg; rm -rf /)
# - SQL injection ('; DROP TABLE users; --)
# - XSS attacks (<script>alert(1)</script>)
# - Control character injection (\x1b[31m)
#
# Roll-out phases:
# Phase 0: EXPERIMENTAL_VALIDATE_IO=false (disabled, default)
# Phase 1: EXPERIMENTAL_VALIDATE_IO=true, VALIDATION_STRICT=false (log-only)
# Phase 2: EXPERIMENTAL_VALIDATE_IO=true, VALIDATION_STRICT=true (enforce in staging)
# Phase 3: Production deployment with all features enabled
# Project defaults block enables EXPERIMENTAL_VALIDATE_IO for local dev
# EXPERIMENTAL_VALIDATE_IO=false

# Enable validation middleware for all requests
# When enabled, validates all incoming request parameters and paths
# Options: true, false (default)
# Project defaults block enables VALIDATION_MIDDLEWARE_ENABLED for local dev
# VALIDATION_MIDDLEWARE_ENABLED=false

# Strict validation mode
# Options:
# - true: Reject requests with validation failures (422 status)
# - false: Log warnings but allow requests (log-only mode)
# Recommended: false for dev/staging, true for production
# VALIDATION_STRICT=true

# Strict JSON Schema validation for tools and prompts
# Options:
# - true: Reject invalid JSON schemas during registration (strict spec compliance)
# - false: Log warnings only (backward compatibility for legacy tools)
# JSON_SCHEMA_VALIDATION_STRICT=true

# Sanitize output to remove control characters
# Removes ANSI escape sequences and C0/C1 control characters from responses
# Preserves newlines (\n) and tabs (\t)
# Options: true (default), false
# SANITIZE_OUTPUT=true

# Allowed root paths for resource access
# Restricts file system access to specific directories
# Format: JSON array or comma-separated list
# Examples:
# - JSON: ["/srv/data", "/var/app/uploads"]
# - CSV: /srv/data,/var/app/uploads
# - Empty: [] (no restrictions, not recommended)
# PRODUCTION: Always configure this to limit resource access
# ALLOWED_ROOTS=[]

# Maximum allowed path depth
# Prevents deeply nested path attacks
# Default: 10 levels
# MAX_PATH_DEPTH=10

# Maximum parameter length (characters)
# Prevents buffer overflow and DoS attacks
# Default: 10000 characters
# MAX_PARAM_LENGTH=10000

# Regex patterns for dangerous input (JSON array)
# Used to detect and block malicious input patterns
# Default patterns:
# 1. Shell metacharacters: [;&|`$(){}\[\]<>]
# 2. Path traversal: \.\.[/\\]
# 3. Control characters: [\x00-\x1f\x7f-\x9f]
# Format: JSON array of regex patterns
# Project defaults block adds an SQL injection pattern on top of defaults
# DANGEROUS_PATTERNS=["[;&|`$(){}\\[\\]<>]", "\\.\\.[\\\\/]", "[\\x00-\\x1f\\x7f-\\x9f]"]

# =============================================================================
# Email-Based Authentication
# =============================================================================

# Enable email-based authentication system
# EMAIL_AUTH_ENABLED=true

# Public registration control
# When false (default), only admins can create user accounts via /admin/users
# When true, anyone can self-register via /auth/email/register
# SECURITY: Keep this false in production unless you explicitly need public sign-up
# PUBLIC_REGISTRATION_ENABLED=false

# Admin protection mode
# When true (default), no admin can be demoted, deactivated, or locked out via API/UI
# When false, only the last remaining active admin is protected
# PROTECT_ALL_ADMINS=true

# Platform admin user (bootstrap from environment)
# PRODUCTION: Change these to your actual admin credentials!
# PLATFORM_ADMIN_EMAIL=admin@example.com
# PLATFORM_ADMIN_PASSWORD=changeme
# PLATFORM_ADMIN_FULL_NAME=Platform Administrator

# Default password for newly created users (bootstrap only)
# DEFAULT_USER_PASSWORD=changeme

# Argon2id Password Hashing Configuration
# Time cost (iterations) - higher = more secure but slower
# ARGON2ID_TIME_COST=3
# Memory cost (KB) - higher = more secure but uses more RAM
# ARGON2ID_MEMORY_COST=65536
# Parallelism (threads) - typically 1 for web apps
# ARGON2ID_PARALLELISM=1

# Password Policy Configuration
# PASSWORD_MIN_LENGTH=8
# Project defaults block relaxes these for local bootstrap
# PASSWORD_REQUIRE_UPPERCASE=true
# PASSWORD_REQUIRE_LOWERCASE=true
# PASSWORD_REQUIRE_NUMBERS=false
# PASSWORD_REQUIRE_SPECIAL=true

# Password Change Enforcement
# Master switch for all password change enforcement checks
# PASSWORD_CHANGE_ENFORCEMENT_ENABLED=true
# Force admin to change password after bootstrap
# ADMIN_REQUIRE_PASSWORD_CHANGE_ON_BOOTSTRAP=true
# Detect default password during login and mark user for change
# DETECT_DEFAULT_PASSWORD_ON_LOGIN=true
# Require password change when using default password
# REQUIRE_PASSWORD_CHANGE_FOR_DEFAULT_PASSWORD=true
# Enable password complexity validation for new/changed passwords
# PASSWORD_POLICY_ENABLED=true
# Prevent reusing the current password when changing
# PASSWORD_PREVENT_REUSE=true
# Password maximum age in days before expiry forces a change
# PASSWORD_MAX_AGE_DAYS=90

# Account Security Configuration
# Maximum failed login attempts before account lockout
# MAX_FAILED_LOGIN_ATTEMPTS=10
# Account lockout duration in minutes
# ACCOUNT_LOCKOUT_DURATION_MINUTES=1
# Send lockout notification emails when an account is locked
# ACCOUNT_LOCKOUT_NOTIFICATION_ENABLED=true
# Minimum response time for failed login attempts (milliseconds)
# Helps reduce timing-based account enumeration
# FAILED_LOGIN_MIN_RESPONSE_MS=250

# Self-Service Password Reset
# Enable forgot-password and reset-password workflows
# Set to false to disable public self-service reset UI/API endpoints.
# PASSWORD_RESET_ENABLED=true
# Password reset token validity (minutes)
# PASSWORD_RESET_TOKEN_EXPIRY_MINUTES=60
# Max password reset requests allowed per email in each window
# PASSWORD_RESET_RATE_LIMIT=5
# Rate limit window length (minutes)
# PASSWORD_RESET_RATE_WINDOW_MINUTES=15
# Invalidate active sessions after successful password reset
# PASSWORD_RESET_INVALIDATE_SESSIONS=true
# Minimum response time for forgot-password requests (milliseconds)
# Helps reduce timing-based account enumeration
# PASSWORD_RESET_MIN_RESPONSE_MS=250

# SMTP Email Delivery (for password reset + lockout notifications)
# Enable SMTP delivery
# SMTP_ENABLED=false
# SMTP_HOST=smtp.example.com
# SMTP_PORT=587
# SMTP_USER=noreply@example.com
# SMTP_PASSWORD=changeme
# SMTP_FROM_EMAIL=noreply@example.com
# SMTP_FROM_NAME=ContextForge
# Use STARTTLS
# SMTP_USE_TLS=true
# Use implicit SSL/TLS (set true for port 465)
# SMTP_USE_SSL=false
# SMTP_TIMEOUT_SECONDS=15

# MCP Client Authentication
# Controls JWT authentication for /mcp endpoints
# MCP_CLIENT_AUTH_ENABLED=true
# TRUST_PROXY_AUTH=false
# PROXY_USER_HEADER=X-Authenticated-User

# SECURITY NOTE: MCP Access Control Dependencies
# Full MCP access control (visibility + team scoping + membership validation) requires:
#   1. MCP_CLIENT_AUTH_ENABLED=true (JWT auth extracts user identity and teams)
#   2. Valid Bearer token with teams claim for team-scoped access
# When MCP_CLIENT_AUTH_ENABLED=false:
#   - Access control relies on MCP_REQUIRE_AUTH + tool/resource visibility only
#   - Team membership validation is skipped (no JWT to extract teams from)
#   - Use TRUST_PROXY_AUTH=true with a reverse proxy for user identification

# Used to derive an AES encryption key for secure auth storage
# Must be a non-empty string (e.g. passphrase or random secret)
# AUTH_ENCRYPTION_SECRET=my-test-salt

# OAuth Configuration
# OAUTH_REQUEST_TIMEOUT=30
# OAUTH_MAX_RETRIES=3
# OAUTH_DEFAULT_TIMEOUT=3600

# OAuth Security Settings
# When MCP servers require OAuth authorization code flow,
# tokens are stored per-user to prevent cross-user token access.
# Users must individually authorize each OAuth-protected gateway.

# =============================================================================
# OAuth Dynamic Client Registration (DCR) and PKCE
# =============================================================================

# Enable Dynamic Client Registration (RFC 7591)
# When enabled, ContextForge can automatically register as an OAuth client with Authorization Servers
# that support DCR, eliminating the need for manual client credential configuration.
# DCR_ENABLED=true

# Auto-register when gateway has issuer but no client_id
# When true, gateway automatically registers with the Authorization Server when configured
# with an issuer URL but no client credentials.
# DCR_AUTO_REGISTER_ON_MISSING_CREDENTIALS=true

# Default scopes to request during DCR
# JSON array of OAuth scopes to request when auto-registering
# DCR_DEFAULT_SCOPES=["mcp:read"]

# Optional allowlist of issuer URLs for DCR (empty = allow any)
# JSON array of trusted Authorization Server issuer URLs
# Example: ["https://auth.example.com", "https://auth2.example.com"]
# Empty array [] allows DCR with any issuer (not recommended for production)
# DCR_ALLOWED_ISSUERS=[]

# Token endpoint authentication method for DCR
# Options: client_secret_basic (default), client_secret_post, none
# - client_secret_basic: Send credentials via HTTP Basic Auth header
# - client_secret_post: Send credentials in POST body
# - none: Public client (no client secret, PKCE-only)
# DCR_TOKEN_ENDPOINT_AUTH_METHOD=client_secret_basic

# AS metadata cache TTL in seconds (RFC 8414 discovery)
# How long to cache Authorization Server metadata after discovery
# DCR_METADATA_CACHE_TTL=3600

# Template for client_name in DCR requests
# {gateway_name} will be replaced with the actual gateway name
# DCR_CLIENT_NAME_TEMPLATE=ContextForge ({gateway_name})

# Request refresh_token even when AS metadata omits grant_types_supported
# Default: false (strict mode - only request refresh_token if AS explicitly advertises support)
# Set to true for AS servers that support refresh tokens but don't advertise it in metadata
# DCR_REQUEST_REFRESH_TOKEN_WHEN_UNSUPPORTED=false

# Enable OAuth AS metadata discovery (RFC 8414)
# When enabled, gateway automatically discovers Authorization Server endpoints
# from the issuer URL using well-known metadata endpoints
# OAUTH_DISCOVERY_ENABLED=true

# Preferred PKCE code challenge method
# Options: S256 (SHA-256, recommended), plain (not recommended)
# PKCE (Proof Key for Code Exchange) is always enabled for Authorization Code flows
# OAUTH_PREFERRED_CODE_CHALLENGE_METHOD=S256

# ==============================================================================
# SSO (Single Sign-On) Configuration
# ==============================================================================

# Master SSO switch - enable Single Sign-On authentication
# Options: true, false (default)
# When true: Enables SSO login options alongside local auth
# SSO_ENABLED=false

# GitHub OAuth Configuration
# Options: true, false (default)
# Requires: GitHub OAuth App (Settings > Developer settings > OAuth Apps)
# SSO_GITHUB_ENABLED=false
# SSO_GITHUB_CLIENT_ID=your-github-client-id
# SSO_GITHUB_CLIENT_SECRET=your-github-client-secret

# Google OAuth Configuration
# SSO_GOOGLE_ENABLED=false
# SSO_GOOGLE_CLIENT_ID=your-google-client-id.googleusercontent.com
# SSO_GOOGLE_CLIENT_SECRET=your-google-client-secret

# IBM Security Verify OIDC Configuration
# SSO_IBM_VERIFY_ENABLED=false
# SSO_IBM_VERIFY_CLIENT_ID=your-ibm-verify-client-id
# SSO_IBM_VERIFY_CLIENT_SECRET=your-ibm-verify-client-secret
# SSO_IBM_VERIFY_ISSUER=https://your-tenant.verify.ibm.com/oidc/endpoint/default

# Okta OIDC Configuration
# SSO_OKTA_ENABLED=false
# SSO_OKTA_CLIENT_ID=your-okta-client-id
# SSO_OKTA_CLIENT_SECRET=your-okta-client-secret
# SSO_OKTA_ISSUER=https://your-okta-domain.okta.com

# Keycloak OIDC Configuration (with auto-discovery)
# SSO_KEYCLOAK_ENABLED=false
# SSO_KEYCLOAK_BASE_URL=https://keycloak.example.com
# Optional: browser-facing Keycloak URL when gateway uses an internal base URL (e.g., Docker DNS)
# SSO_KEYCLOAK_PUBLIC_BASE_URL=https://login.example.com
# SSO_KEYCLOAK_REALM=master
# SSO_KEYCLOAK_CLIENT_ID=mcp-gateway
# SSO_KEYCLOAK_CLIENT_SECRET=your-keycloak-client-secret
# SSO_KEYCLOAK_MAP_REALM_ROLES=true
# SSO_KEYCLOAK_MAP_CLIENT_ROLES=false
# SSO_KEYCLOAK_USERNAME_CLAIM=preferred_username
# SSO_KEYCLOAK_EMAIL_CLAIM=email
# SSO_KEYCLOAK_GROUPS_CLAIM=groups
# Optional: map Keycloak realm roles/groups to Gateway RBAC roles
# Example: {"gateway-admin":"platform_admin","gateway-developer":"developer","gateway-viewer":"viewer"}
# SSO_KEYCLOAK_ROLE_MAPPINGS={}
# Optional: fallback role when no mapping matches
# SSO_KEYCLOAK_DEFAULT_ROLE=
# If true, map team-scoped roles to the user's personal team automatically
# SSO_KEYCLOAK_RESOLVE_TEAM_SCOPE_TO_PERSONAL_TEAM=false

# Microsoft Entra ID (Azure AD) OIDC Configuration
# See docs/docs/manage/sso-microsoft-entra-id-tutorial.md for detailed setup instructions
# SSO_ENTRA_ENABLED=false
# SSO_ENTRA_CLIENT_ID=your-entra-application-client-id
# SSO_ENTRA_CLIENT_SECRET=your-entra-client-secret-value
# SSO_ENTRA_TENANT_ID=your-entra-tenant-id

# ─────────────────────────────────────────────────────────────────────────────
# EntraID Role Mapping Configuration
# ─────────────────────────────────────────────────────────────────────────────
# IMPORTANT: Configure group claims in Azure Portal > App Registration > Token Configuration
# Add "groups" claim to ID tokens for Security Groups, or use App Roles for semantic names.
#
# JWT claim containing groups (default: "groups" for Security Groups, use "roles" for App Roles)
# SSO_ENTRA_GROUPS_CLAIM=groups
#
# Admin Groups - members get platform_admin role and is_admin=true (full platform access)
# Accepts Object IDs (GUIDs) or App Role names. Case-insensitive matching.
# Example with Object IDs: ["a1b2c3d4-1234-5678-90ab-cdef12345678"]
# Example with App Roles:  ["Admin", "PlatformAdmin"]
# SSO_ENTRA_ADMIN_GROUPS=[]
#
# Role Mappings - map EntraID groups/roles to ContextForge RBAC roles
# Available roles: platform_admin (global), team_admin, developer, viewer (team scope)
# Format: JSON object {"group-id-or-name": "role-name"}
# Example with Object IDs:
#   SSO_ENTRA_ROLE_MAPPINGS={"e5f6g7h8-1234-5678-90ab-cdef12345678":"developer","i9j0k1l2-1234-5678-90ab-cdef12345678":"team_admin"}
# Example with App Roles (recommended - more readable):
#   SSO_ENTRA_ROLE_MAPPINGS={"Developer":"developer","TeamAdmin":"team_admin","Viewer":"viewer"}
# SSO_ENTRA_ROLE_MAPPINGS={}
#
# Default role for users without any group mappings (default: None = no automatic role)
# Set to "viewer" to give all EntraID users read-only access, or leave empty for explicit mapping only
# SSO_ENTRA_DEFAULT_ROLE=
#
# Synchronize role assignments on each login (default: true)
# When true: roles are updated based on current group membership (recommended for security)
# When false: roles are only assigned on first login (user creation)
# SSO_ENTRA_SYNC_ROLES_ON_LOGIN=true
#
# Group overage fallback (users with >200 Entra groups)
# When enabled, ContextForge calls Microsoft Graph /v1.0/me/getMemberObjects
# to resolve full group membership during SSO login.
# SSO_ENTRA_GRAPH_API_ENABLED=true
# SSO_ENTRA_GRAPH_API_TIMEOUT=10
# Maximum number of groups retained from Graph response (0 = no cap)
# SSO_ENTRA_GRAPH_API_MAX_GROUPS=0

# Generic OIDC Provider Configuration (Keycloak, Auth0, Authentik, etc.)
# SSO_GENERIC_ENABLED=false
# SSO_GENERIC_PROVIDER_ID=keycloak
# SSO_GENERIC_DISPLAY_NAME=Keycloak
# SSO_GENERIC_CLIENT_ID=your-oidc-client-id
# SSO_GENERIC_CLIENT_SECRET=your-oidc-client-secret
# SSO_GENERIC_AUTHORIZATION_URL=https://keycloak.company.com/auth/realms/master/protocol/openid-connect/auth
# SSO_GENERIC_TOKEN_URL=https://keycloak.company.com/auth/realms/master/protocol/openid-connect/token
# SSO_GENERIC_USERINFO_URL=https://keycloak.company.com/auth/realms/master/protocol/openid-connect/userinfo
# SSO_GENERIC_ISSUER=https://keycloak.company.com/auth/realms/master
# SSO_GENERIC_JWKS_URI=https://keycloak.company.com/auth/realms/master/protocol/openid-connect/certs
# SSO_GENERIC_SCOPE=openid profile email

# SSO General Settings
# SSO_AUTO_CREATE_USERS=true
# JSON array of trusted email domains, e.g., ["example.com", "company.org"]
# SSO_TRUSTED_DOMAINS=[]
# Keep local admin authentication when SSO is enabled
# SSO_PRESERVE_ADMIN_AUTH=true

# SSO Issuers Configuration
# Optional JSON array of issuer URLs for SSO providers
# Example: ["https://idp1.example.com", "https://idp2.example.com"]
# Default: null (not set)
# SSO_ISSUERS=["https://idp.example.com"]

# SSO Admin Assignment Settings
# Email domains that automatically get admin privileges, e.g., ["yourcompany.com"]
# SSO_AUTO_ADMIN_DOMAINS=[]
# GitHub organizations whose members get admin privileges, e.g., ["your-org", "partner-org"]
# SSO_GITHUB_ADMIN_ORGS=[]
# Google Workspace domains that get admin privileges, e.g., ["company.com"]
# SSO_GOOGLE_ADMIN_DOMAINS=[]
# Require admin approval for new SSO registrations
# SSO_REQUIRE_ADMIN_APPROVAL=false

# =============================================================================
# Personal Teams Configuration
# =============================================================================

# Enable automatic personal team creation for new users
# AUTO_CREATE_PERSONAL_TEAMS=true

# Personal team naming prefix (optional; empty default derives slug from display name)
# PERSONAL_TEAM_PREFIX=personal

# Allow users to create organizational teams (admins can always create teams)
# ALLOW_TEAM_CREATION=true

# Allow users to request to join public teams
# ALLOW_TEAM_JOIN_REQUESTS=true

# Allow team owners to send invitations
# ALLOW_TEAM_INVITATIONS=true

# Default global role assigned to admin users
# DEFAULT_ADMIN_ROLE=platform_admin

# Default global role assigned to non-admin users
# DEFAULT_USER_ROLE=platform_viewer

# Default team role assigned to team owners (e.g. personal team creator)
# DEFAULT_TEAM_OWNER_ROLE=team_admin

# Default team role assigned to team members
# DEFAULT_TEAM_MEMBER_ROLE=viewer

# Team Limits
# MAX_TEAMS_PER_USER=50
# MAX_MEMBERS_PER_TEAM=100

# Team Invitation Settings
# INVITATION_EXPIRY_DAYS=7
# REQUIRE_EMAIL_VERIFICATION_FOR_INVITES=true

# =============================================================================
# Admin UI and API Toggles
# =============================================================================

# Enable the web-based Admin UI at /admin
# Options: true, false (default)
# PRODUCTION: Set to false for security unless needed
# Project defaults block enables this for local dev
# MCPGATEWAY_UI_ENABLED=false

# Enable Admin REST API endpoints (/tools, /servers, /resources, etc.)
# Options: true, false (default)
# Required for: Admin UI functionality, programmatic management
# Project defaults block enables this for local dev
# MCPGATEWAY_ADMIN_API_ENABLED=false

# Use local CDN assets for airgapped deployments
# Options: true, false (default)
# When enabled, UI loads CSS/JS from local files instead of external CDNs
# Requires container build with downloaded assets (automatic in Containerfile.lite)
# MCPGATEWAY_UI_AIRGAPPED=false

# Embedded UI mode (hides logout + team selector by default)
# Options: true, false (default)
# MCPGATEWAY_UI_EMBEDDED=false

# Comma-separated list of UI sections to hide
# Valid values: overview, servers, gateways, tools, prompts, resources, roots, mcp-registry, metrics, plugins, export-import, logs, version-info, maintenance, teams, users, agents, tokens, settings
# Example: MCPGATEWAY_UI_HIDE_SECTIONS=prompts,resources,teams
# MCPGATEWAY_UI_HIDE_SECTIONS=

# Comma-separated list of header items to hide
# Valid values: logout, team_selector, user_identity, theme_toggle
# Example: MCPGATEWAY_UI_HIDE_HEADER_ITEMS=logout,team_selector
# MCPGATEWAY_UI_HIDE_HEADER_ITEMS=

# Enable bulk import feature for mass tool/resource registration
# Options: true (default), false
# Allows importing multiple tools/resources in a single API call
# MCPGATEWAY_BULK_IMPORT_ENABLED=true

# Maximum number of tools allowed per bulk import request
# MCPGATEWAY_BULK_IMPORT_MAX_TOOLS=200

# Rate limiting for bulk import endpoint (requests per minute)
# MCPGATEWAY_BULK_IMPORT_RATE_LIMIT=10

# =============================================================================
# Tool Execution Cancellation
# =============================================================================

# Enable gateway-authoritative tool execution cancellation
# Options: true (default), false
# When enabled: Provides REST API endpoints for cancelling long-running tool executions
#   - POST /cancellation/cancel - Cancel a running tool execution
#   - GET /cancellation/status/{id} - Query tool execution status
# When disabled: Cancellation endpoints return 404, tool executions not tracked
# Features: Real asyncio task interruption, multi-worker coordination via Redis
# MCPGATEWAY_TOOL_CANCELLATION_ENABLED=true

# =============================================================================
# A2A (Agent-to-Agent) Configuration
# =============================================================================

# Enable A2A agent features (true/false)
# Allows registration and management of external AI agents
# MCPGATEWAY_A2A_ENABLED=true

# Maximum number of A2A agents allowed
# MCPGATEWAY_A2A_MAX_AGENTS=100

# Default timeout for A2A agent HTTP requests (seconds)
# MCPGATEWAY_A2A_DEFAULT_TIMEOUT=30

# Maximum retry attempts for failed A2A agent calls
# MCPGATEWAY_A2A_MAX_RETRIES=3

# Enable A2A agent metrics collection (true/false)
# MCPGATEWAY_A2A_METRICS_ENABLED=true

# =============================================================================
# MCP Server Catalog Configuration
# =============================================================================

# Enable MCP server catalog feature
# Allows defining a catalog of pre-configured MCP servers in a YAML file
# for easy discovery and management via the Admin UI
# Options: true (default), false
# MCPGATEWAY_CATALOG_ENABLED=true

# Path to the catalog configuration file
# YAML file containing MCP server definitions
# Default: mcp-catalog.yml
# MCPGATEWAY_CATALOG_FILE=mcp-catalog.yml

# Automatically health check catalog servers on startup and periodically
# Options: true (default), false
# MCPGATEWAY_CATALOG_AUTO_HEALTH_CHECK=true

# Catalog cache TTL in seconds
# How long to cache catalog data before refreshing
# Default: 3600 (1 hour)
# MCPGATEWAY_CATALOG_CACHE_TTL=3600

# Number of catalog servers to display per page
# Default: 100
# MCPGATEWAY_CATALOG_PAGE_SIZE=100

# =============================================================================
# Elicitation Support (MCP 2025-06-18)
# =============================================================================

# Enable elicitation passthrough - allows upstream MCP servers to request
# structured user input through connected clients (e.g., Claude Desktop)
# Per MCP spec 2025-06-18, elicitation enables interactive workflows where
# servers can dynamically gather information from users during operations
# MCPGATEWAY_ELICITATION_ENABLED=true

# Default timeout for user responses (seconds)
# How long to wait for users to respond to elicitation requests
# MCPGATEWAY_ELICITATION_TIMEOUT=60

# Maximum concurrent elicitation requests
# Prevents resource exhaustion from too many pending user input requests
# MCPGATEWAY_ELICITATION_MAX_CONCURRENT=100

# =============================================================================
# Header Passthrough Configuration
# =============================================================================

# SECURITY WARNING: Header passthrough is disabled by default for security.
# Only enable if you understand the security implications and have reviewed
# which headers should be passed through to backing MCP servers.
# ENABLE_HEADER_PASSTHROUGH=false

# Enable overwriting of base headers (advanced usage only)
# When disabled, passthrough headers cannot override gateway headers like Content-Type, Authorization
# ENABLE_OVERWRITE_BASE_HEADERS=false

# Default headers to pass through (when feature is enabled)
# JSON array format recommended: ["X-Tenant-Id", "X-Trace-Id"]
# Comma-separated also supported: X-Tenant-Id,X-Trace-Id
# NOTE: Authorization header removed from defaults for security
# DEFAULT_PASSTHROUGH_HEADERS=["X-Tenant-Id", "X-Trace-Id"]

# Passthrough headers source priority
# Controls where header configuration is read from:
# - "db": Database wins if configured, env as fallback (default, backward compatible)
# - "env": Environment variable always wins (ideal for Kubernetes/containerized deployments)
# - "merge": Union of both sources - env provides base, DB can add more headers
# PASSTHROUGH_HEADERS_SOURCE=db

# =============================================================================
# Security and CORS
# =============================================================================

# Skip SSL/TLS certificate verification for upstream requests
# Options: true, false (default)
# WARNING: Only use in development or with self-signed certificates!
# PRODUCTION: Must be false for security
# SKIP_SSL_VERIFY=false

# CORS allowed origins (JSON array of URLs)
# Controls which domains can make cross-origin requests to the gateway
# Format: JSON array starting with [ and ending with ]
# Example: ["http://localhost:3000", "https://app.example.com"]
# Use ["*"] to allow all origins (NOT RECOMMENDED)
# ALLOWED_ORIGINS='["http://localhost", "http://localhost:4444"]'

# Enable CORS (Cross-Origin Resource Sharing) handling
# Options: true (default), false
# Required for: Web browser clients, cross-domain API access
# CORS_ENABLED=true

# CORS allow credentials (true/false)
# CORS_ALLOW_CREDENTIALS=true

# Environment setting (development/production) - affects security defaults
# development: Auto-configures CORS for localhost:3000, localhost:8080, etc.
# production: Uses APP_DOMAIN for HTTPS origins, enforces secure cookies
# ENVIRONMENT is already defined in Basic Server Configuration section

# Domain configuration for production CORS origins
# In production, automatically creates origins: https://APP_DOMAIN, https://app.APP_DOMAIN, https://admin.APP_DOMAIN
# For production: set to your actual domain (e.g., mycompany.com)
# APP_DOMAIN is already defined in Basic Server Configuration section

# Security settings for cookies
# production: Automatically enables secure cookies regardless of this setting
# development: Set to false for HTTP development, true for HTTPS
# Project defaults block sets SECURE_COOKIES=false for local dev
# SECURE_COOKIES=true

# Cookie SameSite attribute for CSRF protection
# strict: Maximum security, may break some OAuth flows
# lax: Good balance of security and compatibility (recommended)
# none: Requires Secure=true, allows cross-site usage
# COOKIE_SAMESITE=lax

# =============================================================================
# Query Parameter Authentication (INSECURE)
# =============================================================================
# WARNING: Query parameter authentication exposes API keys in URLs.
# API keys may appear in proxy logs, browser history, and server access logs.
# See CWE-598: Use of GET Request Method With Sensitive Query Strings.
# Only use when the upstream MCP server (e.g., Tavily) requires this method.

# Enable query parameter authentication for gateway peers
# Options: true, false (default)
# SECURITY: Disabled by default. Only enable with explicit allowlist.
# INSECURE_ALLOW_QUERYPARAM_AUTH=false

# Allowlist of hosts permitted to use query parameter authentication
# Format: JSON array of hostnames, e.g., ["mcp.tavily.com", "api.example.com"]
# Empty list [] allows any host when feature is enabled (NOT RECOMMENDED)
# PRODUCTION: Always configure an explicit allowlist
# INSECURE_QUERYPARAM_AUTH_ALLOWED_HOSTS=[]

# =============================================================================
# Security Headers Configuration
# =============================================================================

# Enable security headers middleware (true/false)
# SECURITY_HEADERS_ENABLED=true

# X-Frame-Options setting - Controls iframe embedding (also sets CSP frame-ancestors)
# DENY: Prevents all iframe embedding (recommended for security) → frame-ancestors 'none'
# SAMEORIGIN: Allows embedding from same domain only → frame-ancestors 'self'
# "" (empty string) / null / none: Removes iframe restrictions (no headers sent, allows embedding)
# ALLOW-FROM uri: Allows specific domain (deprecated, use CSP instead)
# ALLOW-ALL: Allows all embedding → frame-ancestors * file: http: https:
#
# Both X-Frame-Options header and CSP frame-ancestors directive are automatically synced.
# Modern browsers prioritize CSP frame-ancestors over X-Frame-Options.
# X_FRAME_OPTIONS=DENY

# Other security headers (true/false)
# X_CONTENT_TYPE_OPTIONS_ENABLED=true
# X_XSS_PROTECTION_ENABLED=true
# X_DOWNLOAD_OPTIONS_ENABLED=true

# HSTS (HTTP Strict Transport Security) settings
# HSTS_ENABLED=true
# HSTS max age in seconds (31536000 = 1 year)
# HSTS_MAX_AGE=31536000
# HSTS_INCLUDE_SUBDOMAINS=true

# Remove server identification headers (true/false)
# REMOVE_SERVER_HEADERS=true

# Enable HTTP Basic Auth for docs endpoints (in addition to Bearer token auth)
# Uses the same credentials as BASIC_AUTH_USER and BASIC_AUTH_PASSWORD
# DOCS_ALLOW_BASIC_AUTH is already defined in Basic Server Configuration section

# =============================================================================
# Response Compression Configuration
# =============================================================================

# Enable response compression (Brotli, Zstd, GZip)
# Options: true (default), false
# Reduces bandwidth by 30-70% for text-based responses (JSON, HTML, CSS, JS)
# Automatically negotiates compression algorithm based on client Accept-Encoding header
# Priority: Brotli (best compression) > Zstd (fast) > GZip (universal fallback)
# COMPRESSION_ENABLED=true

# Minimum response size in bytes to compress
# Responses smaller than this won't be compressed (compression overhead not worth it)
# Default: 500 bytes
# Set to 0 to compress all responses
# COMPRESSION_MINIMUM_SIZE=500

# GZip compression level (1-9)
# 1 = fastest compression, larger files
# 6 = balanced (recommended default)
# 9 = best compression, slower
# Default: 6
# COMPRESSION_GZIP_LEVEL=6

# Brotli compression quality (0-11)
# 0-3 = fast compression (lower quality)
# 4-9 = balanced compression (recommended)
# 10-11 = maximum compression (slower)
# Default: 4 (balanced)
# Note: Brotli offers 15-20% better compression than GZip at similar speeds
# COMPRESSION_BROTLI_QUALITY=4

# Zstd compression level (1-22)
# 1-3 = fast compression
# 4-9 = balanced compression
# 10+ = slower, maximum compression
# Default: 3 (fast)
# Note: Zstd is the fastest algorithm with good compression ratio
# COMPRESSION_ZSTD_LEVEL=3

# =============================================================================
# HTTPX Client Connection Pool Configuration
# =============================================================================
# Controls HTTP client settings for outbound requests (federation, health checks,
# A2A, SSO, MCP server connections, etc.). Most requests use a shared singleton
# client for ~20x better performance. SSE/streaming MCP connections use factory
# clients with the same settings for proper connection lifecycle management.

# Maximum total connections in the pool (default: 200, range: 10-1000)
# Formula: concurrent_outbound_requests × 1.5
# HTTPX_MAX_CONNECTIONS=200

# Maximum keepalive connections (default: 100, range: 1-500)
# Connections held open for reuse; typically 50% of max_connections
# HTTPX_MAX_KEEPALIVE_CONNECTIONS=100

# Keepalive connection expiry in seconds (default: 30.0, range: 5.0-300.0)
# How long idle connections stay in the pool before being closed
# HTTPX_KEEPALIVE_EXPIRY=30.0

# Connection timeout in seconds (default: 5.0, range: 1.0-60.0)
# Time to establish a new TCP connection (5s for LAN, increase for WAN)
# HTTPX_CONNECT_TIMEOUT=5.0

# Read timeout in seconds (default: 120.0, range: 1.0-600.0)
# Time to wait for response data after connection established
# Set high to accommodate slow MCP tool calls (60-90s+)
# HTTPX_READ_TIMEOUT=120.0

# Write timeout in seconds (default: 30.0, range: 1.0-600.0)
# Time to wait when sending request data
# HTTPX_WRITE_TIMEOUT=30.0

# Pool timeout in seconds (default: 10.0, range: 1.0-120.0)
# Time to wait for a connection from the pool (fail fast on exhaustion)
# HTTPX_POOL_TIMEOUT=10.0

# Enable HTTP/2 support (default: false)
# HTTP/2 provides multiplexing but may not be supported by all upstream servers
# HTTPX_HTTP2_ENABLED=false

# Admin operations read timeout in seconds (default: 30.0, range: 1.0-120.0)
# Shorter timeout for admin UI operations (model fetching, health checks)
# Use this to fail fast on admin pages instead of waiting for httpx_read_timeout
# HTTPX_ADMIN_READ_TIMEOUT=30.0

# =============================================================================
# Retry Config for HTTP Requests
# =============================================================================

# RETRY_MAX_ATTEMPTS=3
# seconds
# RETRY_BASE_DELAY=1.0
# seconds
# RETRY_MAX_DELAY=60.0
# fraction of delay
# RETRY_JITTER_MAX=0.5

# =============================================================================
# Logging
# =============================================================================

# Logging verbosity level
# Options: DEBUG, INFO, WARNING, ERROR (default), CRITICAL
# DEBUG: Detailed diagnostic info (verbose)
# INFO: General operational messages
# WARNING: Warning messages for potential issues
# ERROR: Error messages for failures (recommended for production)
# CRITICAL: Only critical failures
# PRODUCTION: Use ERROR to minimize I/O overhead and improve performance
# LOG_LEVEL=ERROR

# Log output format
# Options: json (default), text
# json: Structured JSON logs (good for log aggregation)
# text: Human-readable plain text
# LOG_FORMAT=json

# Enable file logging (in addition to console output)
# Options: true, false (default)
# LOG_TO_FILE=false

# Enable request payload logging for debugging
# Options: true, false (default)
# When enabled, logs HTTP request method, headers, query params, and body
# Sensitive data (passwords, tokens, etc.) is automatically masked
# LOG_REQUESTS=false

# File write mode when LOG_TO_FILE=true
# Options: a+ (append, default), w (overwrite on startup)
# LOG_FILEMODE=a+
# Project defaults block sets LOG_FILE and LOG_FOLDER for local debug logs
# LOG_FILE=mcpgateway.log
# LOG_FOLDER=logs
# LOG_ROTATION_ENABLED=false
# LOG_MAX_SIZE_MB=1
# LOG_BACKUP_COUNT=5
# LOG_BUFFER_SIZE_MB=1.0

# Masking value used for sensitive data in logs
# MASKED_AUTH_VALUE=*****

# Maximum request body size to log in detailed mode (bytes)
# Controls how much of the request body is parsed and logged when LOG_REQUESTS=true
# Separate from LOG_MAX_SIZE_MB which is for log file rotation
# Default: 16384 (16KB), Range: 1024-1048576 (1KB-1MB)
# LOG_DETAILED_MAX_BODY_SIZE=16384

# Path prefixes to skip from detailed request logging (JSON array or comma-separated)
# Use to exclude high-volume or low-value endpoints from logging overhead
# Examples: '[]' (empty), '["/metrics","/health"]', or "/metrics,/health"
# Default: [] (no additional endpoints skipped beyond built-in health checks)
# LOG_DETAILED_SKIP_ENDPOINTS=[]

# Sampling rate for detailed request logging (0.0-1.0)
# When LOG_REQUESTS=true, only log a fraction of requests to reduce overhead
# 1.0 = log all requests, 0.5 = log 50%, 0.1 = log 10%
# Default: 1.0 (log all requests when detailed logging is enabled)
# LOG_DETAILED_SAMPLE_RATE=1.0

# Enable user identity resolution via database lookup during request logging
# When false (default), only uses cached user identity from request context
# When true, falls back to DB lookup if no cached identity (adds overhead)
# Default: false (avoid implicit DB queries for better performance)
# LOG_RESOLVE_USER_IDENTITY=false

# ═══════════════════════════════════════════════════════════════════════════════
# Structured Log Database Persistence
# ═══════════════════════════════════════════════════════════════════════════════
# Persist structured logs to the database for search, tracing, and metrics.
# Options: true, false (default)
#
# When ENABLED, you get:
#   - Log Search API (/api/logs/search) - search logs by level, component, user, time
#   - Request Tracing (/api/logs/trace/{id}) - trace all logs for a correlation ID
#   - Performance Metrics - aggregated p50/p95/p99 latencies, error rates
#   - Admin UI log viewer with filtering and search
#
# When DISABLED:
#   - Logs only go to console/file (no database writes)
#   - Better performance (no DB I/O per log entry)
#   - Log search/trace/metrics APIs return empty results
#   - Use this if you have an external log aggregator (ELK, Datadog, etc.)
#
# PERFORMANCE NOTE: Each log entry triggers a synchronous database write.
# Disable this in high-throughput production environments or use external logging.
# STRUCTURED_LOGGING_ENABLED=true
# STRUCTURED_LOGGING_DATABASE_ENABLED=false
# STRUCTURED_LOGGING_EXTERNAL_ENABLED=false

# Log Search Configuration
# Maximum results per log search query
# LOG_SEARCH_MAX_RESULTS=1000

# Number of days to retain logs in the database
# LOG_RETENTION_DAYS=30

# External Log Integration Configuration
# Send logs to Elasticsearch
# ELASTICSEARCH_ENABLED=false
# ELASTICSEARCH_URL=
# ELASTICSEARCH_INDEX_PREFIX=mcpgateway-logs

# Send logs to syslog
# SYSLOG_ENABLED=false
# SYSLOG_HOST=
# SYSLOG_PORT=514

# Send logs to webhook endpoints
# WEBHOOK_LOGGING_ENABLED=false
# WEBHOOK_LOGGING_URLS=[]

# Correlation ID / Request Tracking
# Enable automatic correlation ID tracking for unified request tracing
# Options: true (default), false
# CORRELATION_ID_ENABLED=true
# HTTP header name for correlation ID (default: X-Correlation-ID)
# CORRELATION_ID_HEADER=X-Correlation-ID
# Preserve incoming correlation IDs from clients (default: true)
# CORRELATION_ID_PRESERVE=true
# Include correlation ID in HTTP response headers (default: true)
# CORRELATION_ID_RESPONSE_HEADER=true

# ═══════════════════════════════════════════════════════════════════════════════
# Database Query Logging (N+1 Detection)
# ═══════════════════════════════════════════════════════════════════════════════
# Enable database query logging to file for N+1 detection and performance analysis
# Use: make dev-query-log (starts server with logging enabled)
# Use: make query-log-analyze (analyze logs for N+1 patterns)
# DB_QUERY_LOG_ENABLED=false
# DB_QUERY_LOG_FILE=logs/db-queries.log
# DB_QUERY_LOG_JSON_FILE=logs/db-queries.jsonl
# DB_QUERY_LOG_FORMAT=both
# DB_QUERY_LOG_MIN_QUERIES=1
# DB_QUERY_LOG_INCLUDE_PARAMS=false
# DB_QUERY_LOG_DETECT_N1=true
# DB_QUERY_LOG_N1_THRESHOLD=3

# =============================================================================
# Metrics Aggregation Configuration
# =============================================================================
# Aggregates structured logs into performance metrics on a schedule.
# Requires STRUCTURED_LOGGING_DATABASE_ENABLED=true.
# METRICS_AGGREGATION_ENABLED=true

# Hours of structured logs to backfill into metrics on startup
# METRICS_AGGREGATION_BACKFILL_HOURS=6

# Time window for metrics aggregation (minutes)
# METRICS_AGGREGATION_WINDOW_MINUTES=5

# Automatically start the aggregation loop on application startup
# METRICS_AGGREGATION_AUTO_START=false

# =============================================================================
# Execution Metrics Recording
# =============================================================================
# Controls whether tool/resource/prompt/server/A2A execution metrics are written
# to the database. Each MCP operation (tool call, resource read, etc.) creates
# one database row with: entity_id, timestamp, response_time (seconds), is_success, error_message.
#
# Disable if you rely entirely on external observability (ELK, Datadog, Splunk)
# to reduce database I/O overhead.
#
# Note: This does NOT affect:
# - Log aggregation (METRICS_AGGREGATION_ENABLED) - aggregates StructuredLogEntry into PerformanceMetric
# - Prometheus metrics (ENABLE_METRICS) - /metrics endpoint for Prometheus scraping
# - Observability metrics (OBSERVABILITY_METRICS_ENABLED) - internal observability system
#
# To disable log aggregation as well, also set METRICS_AGGREGATION_ENABLED=false
# DB_METRICS_RECORDING_ENABLED=true

# =============================================================================
# Metrics Buffer Configuration
# =============================================================================
# Batches tool/resource/prompt/server metric writes to reduce DB pressure under load

# Enable buffered metrics writes (default: true)
# When enabled, metrics are accumulated in memory and flushed periodically
# METRICS_BUFFER_ENABLED=true

# Seconds between automatic metrics buffer flushes (default: 60, range: 5-300)
# Lower values = more frequent writes, higher values = better batching
# METRICS_BUFFER_FLUSH_INTERVAL=60

# Maximum buffered metrics before forced flush (default: 1000, range: 100-10000)
# Prevents unbounded memory growth under very high load
# METRICS_BUFFER_MAX_SIZE=1000

# Metrics Cache Configuration
# =============================================================================
# Caches aggregate metrics queries to reduce database load under high traffic
# See GitHub Issue #1734 for performance optimization details

# Enable in-memory caching for aggregate metrics queries (default: true)
# When enabled, aggregate_metrics() results are cached to reduce database load
# METRICS_CACHE_ENABLED=true

# TTL for cached aggregate metrics in seconds (default: 60, range: 1-300)
# Lower values = fresher data, higher values = better performance
# Recommended: 60-300 seconds for high-traffic deployments (see Issue #1906)
# METRICS_CACHE_TTL_SECONDS=60

# Metrics Cleanup Configuration
# =============================================================================
# Automatically deletes old metrics data to prevent unbounded table growth

# Enable automatic cleanup of old metrics data (default: true)
# METRICS_CLEANUP_ENABLED=true

# Days to retain raw metrics when rollup is disabled (default: 7, range: 1-365)
# This is a fallback - when METRICS_DELETE_RAW_AFTER_ROLLUP=true, raw metrics
# are deleted based on METRICS_DELETE_RAW_AFTER_ROLLUP_HOURS instead.
# METRICS_RETENTION_DAYS=7

# Hours between automatic cleanup runs (default: 1, range: 1-168)
# METRICS_CLEANUP_INTERVAL_HOURS=1

# Batch size for metrics deletion (default: 10000, range: 100-100000)
# Larger batches are faster but may cause longer table locks
# METRICS_CLEANUP_BATCH_SIZE=10000

# Metrics Rollup Configuration
# =============================================================================
# Aggregates raw metrics into hourly summaries for efficient historical queries
# Rollups preserve counts, averages, and percentiles (p50, p95, p99)

# Enable hourly metrics rollup for efficient historical queries (default: true)
# METRICS_ROLLUP_ENABLED=true

# Hours between rollup runs (default: 1, range: 1-24)
# METRICS_ROLLUP_INTERVAL_HOURS=1

# Days to retain hourly rollup data (default: 365, range: 30-3650)
# METRICS_ROLLUP_RETENTION_DAYS=365

# Hours to re-process on each rollup run to catch late-arriving data (default: 1, range: 1-48)
# Smaller = less CPU/IO overhead, larger = more tolerance for delayed metrics
# METRICS_ROLLUP_LATE_DATA_HOURS=1

# Delete raw metrics after hourly rollup exists (default: true)
# When true, raw metrics older than METRICS_DELETE_RAW_AFTER_ROLLUP_HOURS are
# deleted once hourly rollups exist. Rollups preserve all analytics.
#
# Set to false only if you need raw metrics indefinitely (e.g., exact error
# messages, individual request debugging without external observability).
#
# If using ELK, Datadog, Splunk, or similar platforms for debugging, keep this
# true - your external platform handles detailed logs and traces.
# METRICS_DELETE_RAW_AFTER_ROLLUP=true

# Hours to retain raw metrics when hourly rollup exists (default: 1, range: 1-8760)
# After this period, raw metrics are deleted but hourly rollups remain.
# Increase to 168 if you need raw data for debugging without external observability.
# METRICS_DELETE_RAW_AFTER_ROLLUP_HOURS=1

# Authentication Cache Configuration
# =============================================================================
# Caches authentication data (user, team, revocation) to reduce database queries
# Uses Redis when available, falls back to in-memory cache

# Enable Redis/in-memory caching for authentication data (default: true)
# Significantly reduces database queries during authentication
# AUTH_CACHE_ENABLED=true

# TTL in seconds for cached user data (default: 60, range: 10-300)
# AUTH_CACHE_USER_TTL=60

# TTL in seconds for token revocation cache (default: 30, range: 5-120)
# Security-critical: keep short to limit exposure window for revoked tokens
# AUTH_CACHE_REVOCATION_TTL=30

# TTL in seconds for team membership cache (default: 60, range: 10-300)
# AUTH_CACHE_TEAM_TTL=60

# TTL in seconds for user role in team cache (default: 60, range: 10-300)
# Caches get_user_role_in_team() which is called 11+ times per team operation
# AUTH_CACHE_ROLE_TTL=60

# Enable caching for get_user_teams() (default: true)
# Set to false to disable teams list caching (useful for debugging)
# AUTH_CACHE_TEAMS_ENABLED=true

# TTL in seconds for user teams list cache (default: 60, range: 10-300)
# Caches get_user_teams() which is called 20+ times per request for auth checks
# AUTH_CACHE_TEAMS_TTL=60

# Batch auth DB queries into single call (default: true)
# Reduces 3 separate queries to 1, improving performance under load
# AUTH_CACHE_BATCH_QUERIES=true

# Registry Cache Configuration
# =============================================================================
# Caches registry list endpoints (tools, prompts, resources, agents, servers, gateways)
# Uses Redis when available, falls back to in-memory cache
# Reduces DB queries for frequently accessed list endpoints

# Enable registry caching (default: true)
# REGISTRY_CACHE_ENABLED=true

# TTL in seconds for tools list cache (default: 20, range: 5-300)
# REGISTRY_CACHE_TOOLS_TTL=20

# TTL in seconds for prompts list cache (default: 15, range: 5-300)
# REGISTRY_CACHE_PROMPTS_TTL=15

# TTL in seconds for resources list cache (default: 15, range: 5-300)
# REGISTRY_CACHE_RESOURCES_TTL=15

# TTL in seconds for A2A agents list cache (default: 20, range: 5-300)
# REGISTRY_CACHE_AGENTS_TTL=20

# TTL in seconds for servers list cache (default: 20, range: 5-300)
# REGISTRY_CACHE_SERVERS_TTL=20

# TTL in seconds for gateways list cache (default: 20, range: 5-300)
# REGISTRY_CACHE_GATEWAYS_TTL=20

# TTL in seconds for catalog servers list cache (default: 300, range: 60-600)
# Longer TTL since external catalog changes infrequently
# REGISTRY_CACHE_CATALOG_TTL=300

# Tool Lookup Cache Configuration
# =============================================================================
# Caches tool lookup by name in the invoke_tool hot path
# Uses in-memory L1 cache and optional Redis L2 cache when CACHE_TYPE=redis

# Enable tool lookup caching (default: true)
# TOOL_LOOKUP_CACHE_ENABLED=true

# TTL in seconds for tool lookup cache entries (default: 60, range: 5-600)
# TOOL_LOOKUP_CACHE_TTL_SECONDS=60

# TTL in seconds for negative cache entries (default: 10, range: 1-60)
# Used for missing/inactive/offline tool lookups
# TOOL_LOOKUP_CACHE_NEGATIVE_TTL_SECONDS=10

# Max entries for in-memory L1 tool cache (default: 10000, range: 100-1000000)
# TOOL_LOOKUP_CACHE_L1_MAXSIZE=10000

# Enable Redis L2 cache when CACHE_TYPE=redis (default: true)
# TOOL_LOOKUP_CACHE_L2_ENABLED=true

# Admin Stats Cache Configuration
# =============================================================================
# Caches admin dashboard statistics (entity counts, observability metrics)
# Reduces expensive aggregate queries under dashboard load

# Enable admin stats caching (default: true)
# ADMIN_STATS_CACHE_ENABLED=true

# TTL in seconds for system stats cache (default: 60, range: 10-300)
# ADMIN_STATS_CACHE_SYSTEM_TTL=60

# TTL in seconds for observability stats cache (default: 30, range: 10-120)
# ADMIN_STATS_CACHE_OBSERVABILITY_TTL=30

# TTL in seconds for tags listing cache (default: 120, range: 30-600)
# ADMIN_STATS_CACHE_TAGS_TTL=120

# TTL in seconds for plugin stats cache (default: 120, range: 30-600)
# ADMIN_STATS_CACHE_PLUGINS_TTL=120

# TTL in seconds for performance aggregates cache (default: 60, range: 15-300)
# ADMIN_STATS_CACHE_PERFORMANCE_TTL=60

# Team Member Count Cache
# Reduces N+1 queries in admin UI team listings

# Enable team member count caching (default: true)
# TEAM_MEMBER_COUNT_CACHE_ENABLED=true

# TTL in seconds for team member count cache (default: 300, range: 30-3600)
# TEAM_MEMBER_COUNT_CACHE_TTL=300

# Transport Protocol Configuration
# Options: all (default), sse, streamablehttp, http
# - all: Enable all transport protocols
# - sse: Server-Sent Events only
# - streamablehttp: Streaming HTTP only
# - http: Standard HTTP JSON-RPC only
# TRANSPORT_TYPE=all

# WebSocket keepalive ping interval in seconds
# Prevents connection timeout for idle WebSocket connections
# WEBSOCKET_PING_INTERVAL=30

# Enable legacy WebSocket JSON-RPC relay endpoint (/ws)
# SECURITY: Disabled by default. Enable only for clients that require /ws.
# MCPGATEWAY_WS_RELAY_ENABLED=false

# Enable reverse-proxy transport endpoints (/reverse-proxy/*)
# SECURITY: Disabled by default. Enable only when using mcpgateway.reverse_proxy.
# MCPGATEWAY_REVERSE_PROXY_ENABLED=false

# SSE client retry timeout in milliseconds
# Time client waits before reconnecting after SSE connection loss
# SSE_RETRY_TIMEOUT=5000

# Enable SSE keepalive events to prevent proxy/firewall timeouts
# Options: true (default), false
# SSE_KEEPALIVE_ENABLED=true

# SSE keepalive event interval in seconds
# How often to send keepalive events when SSE_KEEPALIVE_ENABLED=true
# SSE_KEEPALIVE_INTERVAL=30

# ─────────────────────────────────────────────────────────────────────────────
# SSE Connection Protection (CPU Spin Loop Mitigation - Layer 1)
# ─────────────────────────────────────────────────────────────────────────────
# These settings detect and close dead SSE connections before they trigger
# CPU spin loops in anyio's _deliver_cancellation method.
#
# Part of Issue #2360 mitigation. See: docs/docs/operations/cpu-spin-loop-mitigation.md
# Upstream issue: https://github.com/agronholm/anyio/issues/695

# SSE send timeout in seconds
# Timeout for ASGI send() calls - protects against sends that hang indefinitely
# when client connection is in a bad state. Does NOT affect MCP server response times.
# Set to 0 to disable. Default matches keepalive interval.
# SSE_SEND_TIMEOUT=30.0

# SSE rapid yield detection
# If more than SSE_RAPID_YIELD_MAX yields occur within SSE_RAPID_YIELD_WINDOW_MS,
# the connection is assumed dead and closed. Set SSE_RAPID_YIELD_MAX=0 to disable.
# SSE_RAPID_YIELD_WINDOW_MS=1000
# SSE_RAPID_YIELD_MAX=50

# Streaming HTTP Configuration
# Enable stateful sessions (stores session state server-side)
# Options: true, false (default)
# false: Stateless mode (better for scaling)
# true: Stateful mode (requires CACHE_TYPE=redis for multi-worker deployments)
# USE_STATEFUL_SESSIONS=false

# Multi-Worker Session Affinity (ADR-038)
# Routes client requests to the same worker for session continuity in multi-worker deployments
# Requires: CACHE_TYPE=redis, USE_STATEFUL_SESSIONS=true, Redis accessible at REDIS_URL
# IMPORTANT: Redis must be enabled (CACHE_TYPE=redis) for session affinity to work
# MCPGATEWAY_SESSION_AFFINITY_ENABLED=false

# Session ownership TTL in seconds (default: 300 = 5 minutes)
# How long a worker owns a session before it expires
# MCPGATEWAY_SESSION_AFFINITY_TTL=300

# Maximum sessions per worker when affinity is enabled (default: 1)
# Limits concurrent sessions per worker to prevent resource exhaustion
# MCPGATEWAY_SESSION_AFFINITY_MAX_SESSIONS=1

# Forwarded request timeout in seconds (default: 30)
# Timeout when forwarding requests between workers via Redis Pub/Sub
# MCPGATEWAY_POOL_RPC_FORWARD_TIMEOUT=30

# Enable JSON response format for streaming HTTP
# Options: true (default), false
# true: Return JSON responses, false: Return SSE stream
# JSON_RESPONSE_ENABLED=true

# Event store configuration for stateful sessions
# Ring buffer size per stream (default: 100)
# Controls how many events are kept in memory before oldest are evicted
# STREAMABLE_HTTP_MAX_EVENTS_PER_STREAM=100

# Stream TTL in seconds (default: 3600 = 1 hour)
# How long event streams are kept in Redis before automatic cleanup
# STREAMABLE_HTTP_EVENT_TTL=3600

# Federation Configuration

# Timeout for federation requests in seconds
# Default: 120 seconds (matches config.py)
# FEDERATION_TIMEOUT=120

# Resource Configuration
# RESOURCE_CACHE_SIZE=1000
# RESOURCE_CACHE_TTL=3600
# MAX_RESOURCE_SIZE=10485760

# Allowed MIME types for resources (JSON array)
# Controls which content types are allowed for resource handling
# Default includes common text, image, and data formats
# Example: ["text/plain", "text/markdown", "application/json", "image/png"]
# To add custom types: ["text/plain", "application/pdf", "video/mp4"]
# ALLOWED_MIME_TYPES=["text/plain", "text/markdown", "text/html", "application/json", "application/xml", "image/png", "image/jpeg", "image/gif"]

# Tool Configuration
# TOOL_TIMEOUT=60
# MAX_TOOL_RETRIES=3
# TOOL_RATE_LIMIT=100
# TOOL_CONCURRENT_LIMIT=10
# GATEWAY_TOOL_NAME_SEPARATOR=-

# Prompt Configuration
# PROMPT_CACHE_SIZE=100
# MAX_PROMPT_SIZE=102400
# PROMPT_RENDER_TIMEOUT=10

# =============================================================================
# MCP Server Health Check Configuration
# =============================================================================

# Interval between health checks in seconds (default: 60)
# Project defaults block sets HEALTH_CHECK_INTERVAL=300 for local dev
# HEALTH_CHECK_INTERVAL=60

# Health check timeout in seconds (default: 5)
# HEALTH_CHECK_TIMEOUT=5

# Per-check timeout (seconds) to bound total time of one gateway health check (default: 5.0)
# GATEWAY_HEALTH_CHECK_TIMEOUT=5.0

# Consecutive failures before marking gateway offline (default: 3)
# UNHEALTHY_THRESHOLD=3

# Gateway URL validation timeout in seconds (default: 5)
# GATEWAY_VALIDATION_TIMEOUT=5

# Maximum redirects allowed during gateway validation (default: 5)
# GATEWAY_MAX_REDIRECTS=5

# Maximum concurrent health checks per worker (default: 10)
# MAX_CONCURRENT_HEALTH_CHECKS=10

# Enable automatic tools/prompts/resources refresh from the mcp servers during health checks (default: false)
# If the tools/prompts/resources in the mcp servers are not updated frequently, it is recommended to keep this disabled to reduce load on the servers
# AUTO_REFRESH_SERVERS=false

# Default refresh interval in seconds for gateway tools/resources/prompts sync
# Minimum: 60 seconds
# GATEWAY_AUTO_REFRESH_INTERVAL=300

# File lock name for gateway service leader election
# Used to coordinate multiple gateway instances when running in cluster mode
# Default: "gateway_service_leader.lock"
# FILELOCK_NAME=gateway_service_leader.lock


# =============================================================================
# MCP Session Pool Configuration
# =============================================================================

# Enable MCP session pooling for reduced latency (10-20x improvement)
# Sessions are isolated per user/tenant via identity hashing
# Default: false (enable explicitly after testing)
# MCP_SESSION_POOL_ENABLED=false

# Max sessions per (URL, identity, transport) tuple
# Default: 10
# MCP_SESSION_POOL_MAX_PER_KEY=10

# Session TTL before forced close (seconds)
# Default: 300
# MCP_SESSION_POOL_TTL=300.0

# Idle time before session health check (seconds)
# Auto-aligned with min(HEALTH_CHECK_INTERVAL, this value)
# Default: 60
# MCP_SESSION_POOL_HEALTH_CHECK_INTERVAL=60.0

# Timeout waiting for available session slot (seconds)
# Default: 30
# MCP_SESSION_POOL_ACQUIRE_TIMEOUT=30.0

# Timeout creating new session (seconds)
# Default: 30
# MCP_SESSION_POOL_CREATE_TIMEOUT=30.0

# Circuit breaker: failures before opening circuit
# Default: 5
# MCP_SESSION_POOL_CIRCUIT_BREAKER_THRESHOLD=5

# Circuit breaker: seconds before reset
# Default: 60
# MCP_SESSION_POOL_CIRCUIT_BREAKER_RESET=60.0

# Evict idle pool keys after this time (seconds)
# Prevents unbounded growth with rotating tokens
# Default: 600
# MCP_SESSION_POOL_IDLE_EVICTION=600.0

# Transport timeout for pooled sessions (seconds)
# Applies to all HTTP operations (connect, read, write) on pooled sessions.
# Use a higher value for deployments with long-running tool calls.
# Default: 30 (matches MCP SDK default)
# MCP_SESSION_POOL_TRANSPORT_TIMEOUT=30.0

# Force explicit RPC (list_tools) on gateway health checks
# Off by default: pool's internal staleness check is sufficient
# Enable for stricter verification at ~5ms latency cost per check
# Default: false
# MCP_SESSION_POOL_EXPLICIT_HEALTH_RPC=false

# Configurable health check chain - ordered list of methods to try (JSON array)
# Options: ping, list_tools, list_prompts, list_resources, skip
# Default: ["ping", "skip"] (try lightweight ping, skip if unsupported)
# Examples:
#   ["ping", "skip"] - Modern servers (recommended, fastest)
#   ["ping", "list_tools", "skip"] - Legacy server support
#   ["skip"] - No health check (maximum performance, use with caution)
#   ["ping"] - Strict (fail if ping unsupported)
# MCP_SESSION_POOL_HEALTH_CHECK_METHODS=["ping", "skip"]

# Timeout in seconds for each health check attempt
# Default: 5.0
# MCP_SESSION_POOL_HEALTH_CHECK_TIMEOUT=5.0

# Headers used to derive identity hash for pooled sessions (JSON array)
# MCP_SESSION_POOL_IDENTITY_HEADERS=["authorization", "x-tenant-id", "x-user-id", "x-api-key", "cookie"]

# ─────────────────────────────────────────────────────────────────────────────
# Cleanup Timeouts (CPU Spin Loop Mitigation - Layer 2)
# ─────────────────────────────────────────────────────────────────────────────
# Limit how long cleanup waits for stuck tasks. Shorter timeouts = faster
# recovery from spin loops but may interrupt legitimate cleanup.
#
# Part of Issue #2360 mitigation. See: docs/docs/operations/cpu-spin-loop-mitigation.md
# Upstream issue: https://github.com/agronholm/anyio/issues/695

# Timeout for session/transport cleanup operations (seconds).
# Controls how long to wait for session.__aexit__() and transport.__aexit__()
# when closing sessions.
#
# IMPORTANT: Does NOT affect tool execution time - only cleanup of
# idle/released sessions. Tool execution uses TOOL_TIMEOUT instead.
#
# Tuning:
#   - Increase (10s) if you see frequent "cleanup timed out" warnings
#   - Decrease (0.5-2s) for faster recovery from CPU spin loops
# Default: 5.0
# MCP_SESSION_POOL_CLEANUP_TIMEOUT=5.0

# Timeout for SSE task group cleanup (seconds).
# Controls how long to wait for internal tasks to respond before forcing cleanup.
# Only affects cancelled connections, not normal SSE operation.
# Default: 5.0
# SSE_TASK_GROUP_CLEANUP_TIMEOUT=5.0


# =============================================================================
# EXPERIMENTAL: anyio Monkey-Patch (CPU Spin Loop Mitigation - Layer 3)
# =============================================================================
# Last resort workaround that patches anyio to limit _deliver_cancellation iterations.
# Use only if Layers 1-2 don't fully resolve the issue.
#
# Part of Issue #2360 mitigation. See: docs/docs/operations/cpu-spin-loop-mitigation.md
# Upstream issue: https://github.com/agronholm/anyio/issues/695
#
# WARNING: This is EXPERIMENTAL and may be removed when upstream fixes the issue.
# =============================================================================
# Trade-offs when enabled:
#   - Prevents indefinite CPU spin (good)
#   - May leave some tasks uncancelled (usually harmless)
#   - Worker recycling (GUNICORN_MAX_REQUESTS) cleans up orphaned tasks
#
# Default: false
# ANYIO_CANCEL_DELIVERY_PATCH_ENABLED=false

# Maximum iterations for _deliver_cancellation before forcing termination.
# Only used when ANYIO_CANCEL_DELIVERY_PATCH_ENABLED=true.
#   - Higher (100+) = more attempts to cancel, longer potential spin
#   - Lower (50) = faster recovery, more orphaned tasks
# Default: 100
# ANYIO_CANCEL_DELIVERY_MAX_ITERATIONS=100


# =============================================================================
# Default Root Paths
# =============================================================================

# Default root paths (JSON array)
# List of default root paths for resource resolution
# Example: ["/api/v1", "/mcp"]
# Default: []
# DEFAULT_ROOTS=[]


# =============================================================================
# OpenTelemetry Observability Configuration
# =============================================================================

# Enable distributed tracing and metrics collection
# Options: true (default), false
# OTEL_ENABLE_OBSERVABILITY=false

# Traces exporter backend
# Options: otlp (default), jaeger, zipkin, console, none
# - otlp: OpenTelemetry Protocol (works with many backends)
# - jaeger: Direct Jaeger integration
# - zipkin: Direct Zipkin integration
# - console: Print to stdout (debugging)
# - none: Disable tracing
# OTEL_TRACES_EXPORTER=otlp

# OTLP endpoint for traces and metrics
# Examples:
# - Phoenix: http://localhost:4317
# - Jaeger: http://localhost:4317
# - Tempo: http://localhost:4317
# Project defaults block sets OTEL_EXPORTER_OTLP_ENDPOINT for local tracing
# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317

# OTLP protocol
# Options: grpc (default), http
# OTEL_EXPORTER_OTLP_PROTOCOL=grpc

# Use insecure connection (no TLS) for OTLP
# Options: true (default for localhost), false (use TLS)
# OTEL_EXPORTER_OTLP_INSECURE=true
# OTEL_EXPORTER_OTLP_HEADERS=key1=value1,key2=value2
# OTEL_EXPORTER_JAEGER_ENDPOINT=http://localhost:14268/api/traces
# OTEL_EXPORTER_ZIPKIN_ENDPOINT=http://localhost:9411/api/v2/spans
# OTEL_SERVICE_NAME=mcp-gateway
# OTEL_RESOURCE_ATTRIBUTES=service.version=1.0.0,environment=production
# OTEL_BSP_MAX_QUEUE_SIZE=2048
# OTEL_BSP_MAX_EXPORT_BATCH_SIZE=512
# OTEL_BSP_SCHEDULE_DELAY=5000

# Prometheus Metrics Configuration
# Enable Prometheus-compatible metrics endpoint at /metrics/prometheus
# Options: true, false (default)
# When true: Exposes metrics at /metrics/prometheus (requires JWT authentication)
# When false: Returns HTTP 503 on metrics endpoint
# Enable this only when a Prometheus stack is configured to scrape.
# Prometheus scrape config needs: authorization: { type: Bearer, credentials: <JWT> }
# Generate a scrape token: python -m mcpgateway.utils.create_jwt_token --username prometheus@monitoring --exp 0 --secret $JWT_SECRET_KEY
# ENABLE_METRICS=false

# Comma-separated regex patterns for endpoints to exclude from metrics collection
# Use this to avoid high-cardinality issues with dynamic paths or reduce overhead
# Examples:
# - Exclude SSE endpoints: /servers/.*/sse
# - Exclude static files: /static/.*
# - Exclude health checks: .*health.*
# - Multiple patterns: /servers/.*/sse,/static/.*,.*health.*
# Default: "" (no exclusions)
# METRICS_EXCLUDED_HANDLERS=

# Prometheus metrics namespace (prefix for all metric names)
# Used to group metrics by application or organization
# Example: mycompany_gateway_http_requests_total
# Default: "default"
# METRICS_NAMESPACE=default

# Prometheus metrics subsystem (secondary prefix for metric names)
# Used for further categorization within namespace
# Example: mycompany_api_http_requests_total (if subsystem=api)
# Default: "" (no subsystem)
# METRICS_SUBSYSTEM=

# Custom static labels for app_info gauge metric
# Format: comma-separated "key=value" pairs (low-cardinality values only)
# WARNING: Never use high-cardinality values (user IDs, request IDs, timestamps)
# Examples:
# - Single label: environment=production
# - Multiple labels: environment=production,region=us-east-1,team=platform
# - K8s example: cluster=prod-us-east,namespace=mcp-gateway
# Default: "" (no custom labels)
# METRICS_CUSTOM_LABELS=

# -----------------------------------------------------------------------------
# Plugin Framework Settings
# -----------------------------------------------------------------------------
# The plugin framework has its own configuration via pydantic-settings with the
# PLUGINS_ env var prefix. These settings allow the plugin framework to operate
# independently of the gateway configuration (mcpgateway.config).
#
# When the plugin framework is used standalone (e.g., via the mcpplugins CLI or
# as a library), only these PLUGINS_-prefixed variables are needed. When running
# inside the gateway, both the gateway settings (above) AND these framework
# settings are in effect.
#
# The plugin framework settings share some env var names with the gateway
# (e.g. PLUGINS_ENABLED, PLUGINS_CLI_MARKUP_MODE). Other settings mirror
# gateway-level HTTPX_*/SKIP_SSL_VERIFY but are scoped to plugin requests:
#   HTTPX_CONNECT_TIMEOUT → PLUGINS_HTTPX_CONNECT_TIMEOUT
#   HTTPX_READ_TIMEOUT    → PLUGINS_HTTPX_READ_TIMEOUT
#   SKIP_SSL_VERIFY       → PLUGINS_SKIP_SSL_VERIFY

# Plugin Framework Configuration
# Enable the plugin system for extending gateway functionality
# Options: true, false (default)
# When true: Loads and executes plugins from PLUGINS_CONFIG_FILE
# PLUGINS_ENABLED=false

# Allow plugin HTTP_AUTH_CHECK_PERMISSION grants to override built-in RBAC decisions.
# Disabled by default: plugin grants are audit-only unless this is explicitly enabled.
# PLUGINS_CAN_OVERRIDE_RBAC=false

# Path to the plugin configuration file
# Contains plugin definitions, hooks, and settings
# Default: plugins/config.yaml
# PLUGINS_CONFIG_FILE=plugins/config.yaml

# Plugin execution timeout in seconds
# PLUGINS_PLUGIN_TIMEOUT=30

# Plugin framework log level
# PLUGINS_LOG_LEVEL=INFO

# Skip SSL/TLS certificate verification for plugin HTTP requests
# WARNING: Only use in development or with self-signed certificates
# PLUGINS_SKIP_SSL_VERIFY=false

# HTTP client pool settings for plugin framework
# These mirror the gateway HTTPX_* settings but are scoped to plugin requests
# PLUGINS_HTTPX_MAX_CONNECTIONS=200
# PLUGINS_HTTPX_MAX_KEEPALIVE_CONNECTIONS=100
# PLUGINS_HTTPX_KEEPALIVE_EXPIRY=30.0
# PLUGINS_HTTPX_CONNECT_TIMEOUT=5.0
# PLUGINS_HTTPX_READ_TIMEOUT=120.0
# PLUGINS_HTTPX_WRITE_TIMEOUT=30.0
# PLUGINS_HTTPX_POOL_TIMEOUT=10.0

# Optional defaults for mTLS when connecting to external MCP plugins (STREAMABLEHTTP transport)
# Provide file paths inside the container. Plugin-specific TLS blocks override these defaults.
# PLUGINS_CLIENT_MTLS_CA_BUNDLE=/app/certs/plugins/ca.crt
# PLUGINS_CLIENT_MTLS_CERTFILE=/app/certs/plugins/gateway-client.pem
# PLUGINS_CLIENT_MTLS_KEYFILE=/app/certs/plugins/gateway-client.key
# PLUGINS_CLIENT_MTLS_KEYFILE_PASSWORD=
# PLUGINS_CLIENT_MTLS_VERIFY=true
# PLUGINS_CLIENT_MTLS_CHECK_HOSTNAME=true

# Optional defaults for plugin server TLS when exposing plugins over HTTP
# PLUGINS_SERVER_SSL_ENABLED=false
# PLUGINS_SERVER_SSL_KEYFILE=/app/certs/plugins/server.key
# PLUGINS_SERVER_SSL_CERTFILE=/app/certs/plugins/server.pem
# PLUGINS_SERVER_SSL_CA_CERTS=/app/certs/plugins/ca.crt
# PLUGINS_SERVER_SSL_CERT_REQS=2
# PLUGINS_SERVER_SSL_KEYFILE_PASSWORD=

# Plugin MCP server bind settings
# PLUGINS_SERVER_HOST=0.0.0.0
# PLUGINS_SERVER_PORT=9000
# PLUGINS_SERVER_UDS=/tmp/mcpgateway-plugins.sock

# Plugin server runtime (external MCP server)
# PLUGINS_TRANSPORT=stdio
# PLUGINS_CONFIG_PATH=./resources/plugins/config.yaml

# Optional defaults for mTLS when connecting to external plugins over gRPC
# PLUGINS_GRPC_CLIENT_MTLS_CA_BUNDLE=/app/certs/plugins/grpc-ca.crt
# PLUGINS_GRPC_CLIENT_MTLS_CERTFILE=/app/certs/plugins/grpc-client.pem
# PLUGINS_GRPC_CLIENT_MTLS_KEYFILE=/app/certs/plugins/grpc-client.key
# PLUGINS_GRPC_CLIENT_MTLS_KEYFILE_PASSWORD=
# PLUGINS_GRPC_CLIENT_MTLS_VERIFY=true

# Optional defaults for plugin gRPC server TLS
# PLUGINS_GRPC_SERVER_SSL_ENABLED=false
# PLUGINS_GRPC_SERVER_SSL_KEYFILE=/app/certs/plugins/grpc-server.key
# PLUGINS_GRPC_SERVER_SSL_CERTFILE=/app/certs/plugins/grpc-server.pem
# PLUGINS_GRPC_SERVER_SSL_CA_CERTS=/app/certs/plugins/grpc-ca.crt
# PLUGINS_GRPC_SERVER_SSL_CLIENT_AUTH=none
# PLUGINS_GRPC_SERVER_SSL_KEYFILE_PASSWORD=

# Plugin gRPC server bind settings
# PLUGINS_GRPC_SERVER_HOST=0.0.0.0
# PLUGINS_GRPC_SERVER_PORT=50051
# PLUGINS_GRPC_SERVER_UDS=/tmp/mcpgateway-plugins-grpc.sock

# Unix domain socket transport for plugin communication
# PLUGINS_UNIX_SOCKET_PATH=/tmp/mcpgateway-plugins-unix.sock

# Enable auto-completion for plugins CLI
# PLUGINS_CLI_COMPLETION=false

# Set markup mode for plugins CLI
# Valid options:
#  rich: use rich markup
#  markdown: allow markdown in help strings
#  disabled: disable markup
# If unset (commented out), uses "rich" if rich is detected, otherwise disables it.
# Project defaults block sets PLUGINS_CLI_MARKUP_MODE=rich
PLUGINS_CLI_MARKUP_MODE=rich

# =============================================================================
# Well-Known URI Configuration
# =============================================================================

# Enable well-known URI endpoints (/.well-known/*)
# WELL_KNOWN_ENABLED=true

# robots.txt content - Default blocks all crawlers (private API)
# Use multiline with proper escaping or keep on one line
# WELL_KNOWN_ROBOTS_TXT="User-agent: *\nDisallow: /\n\n# ContextForge is a private API gateway\n# Public crawling is disabled by default"

# security.txt content - Define your security contact information
# Format: RFC 9116 (https://www.rfc-editor.org/rfc/rfc9116.html)
# Leave empty to disable security.txt
# Example:
# WELL_KNOWN_SECURITY_TXT="Contact: mailto:security@example.com\nExpires: 2025-12-31T23:59:59Z\nPreferred-Languages: en\nCanonical: https://example.com/.well-known/security.txt"
# WELL_KNOWN_SECURITY_TXT=""

# Enable security.txt endpoint (auto-enabled when content is provided)
# WELL_KNOWN_SECURITY_TXT_ENABLED=false

# Additional custom well-known files (JSON format)
# Example: {"ai.txt": "AI Usage: This service uses AI for tool orchestration...", "dnt-policy.txt": "We respect DNT headers..."}
# WELL_KNOWN_CUSTOM_FILES="{}"

# Cache control for well-known files (seconds) - 3600 = 1 hour
# WELL_KNOWN_CACHE_MAX_AGE=3600

# =============================================================================
# Well-Known URI Examples
# =============================================================================

# Example 1: Basic security.txt
# WELL_KNOWN_SECURITY_TXT="Contact: mailto:security@mycompany.com\nContact: https://mycompany.com/security\nEncryption: https://mycompany.com/pgp-key.txt\nPreferred-Languages: en, es\nCanonical: https://api.mycompany.com/.well-known/security.txt"

# Example 2: Custom AI policy
# WELL_KNOWN_CUSTOM_FILES={"ai.txt": "# AI Usage Policy\n\nThis ContextForge uses AI for:\n- Tool orchestration\n- Response generation\n- Error handling\n\nWe do not use AI for:\n- User data analysis\n- Behavioral tracking\n- Decision making without human oversight"}

# Example 3: Allow specific crawlers
# WELL_KNOWN_ROBOTS_TXT="User-agent: internal-monitor\nAllow: /health\nAllow: /metrics\n\nUser-agent: *\nDisallow: /"

# Example 4: Multiple custom files
# WELL_KNOWN_CUSTOM_FILES={"ai.txt": "# AI Usage Policy\n\nThis ContextForge uses AI for:\n- Tool orchestration\n- Response generation\n- Error handling\n\nWe do not use AI for:\n- User data analysis\n- Behavioral tracking\n- Decision making without human oversight", "dnt-policy.txt": "# Do Not Track Policy\n\nWe respect the DNT header.\nNo tracking cookies are used.\nOnly essential session data is stored.", "change-password": "https://mycompany.com/account/password"}

# =============================================================================
# Startup Tuning
# =============================================================================

# Batch size for gateway/tool slug refresh at startup
# SLUG_REFRESH_BATCH_SIZE=1000

# =============================================================================
# Validation Settings
# =============================================================================

# These settings control input validation and security patterns
# Most users won't need to change these defaults

# HTML/JavaScript injection patterns (regex)
# Used to detect potentially dangerous HTML/JS content
# VALIDATION_DANGEROUS_HTML_PATTERN - Pattern to detect dangerous HTML tags
# VALIDATION_DANGEROUS_JS_PATTERN - Pattern to detect JavaScript injection attempts
#
# Default dangerous HTML pattern (regex)
# VALIDATION_DANGEROUS_HTML_PATTERN="<(script|iframe|object|embed|link|meta|base|form|img|svg|video|audio|source|track|area|map|canvas|applet|frame|frameset|html|head|body|style)\\b|</*(script|iframe|object|embed|link|meta|base|form|img|svg|video|audio|source|track|area|map|canvas|applet|frame|frameset|html|head|body|style)>"

# Default dangerous JS pattern (regex)
# VALIDATION_DANGEROUS_JS_PATTERN="(?i)(?:^|\\s|[\\\"'`<>=])(javascript:|vbscript:|data:\\s*[^,]*[;\\s]*(javascript|vbscript)|\\bon[a-z]+\\s*=|<\\s*script\\b)"

# Allowed URL schemes for external requests
# Controls which URL schemes are permitted for gateway operations
# Default: ["http://", "https://", "ws://", "wss://"]
# VALIDATION_ALLOWED_URL_SCHEMES=["http://", "https://", "ws://", "wss://"]

# Character validation patterns (regex)
# Used to validate various input fields
# VALIDATION_NAME_PATTERN - Pattern for validating names (allows spaces)
# VALIDATION_IDENTIFIER_PATTERN - Pattern for validating IDs (no spaces)
# VALIDATION_SAFE_URI_PATTERN - Pattern for safe URI characters
# VALIDATION_UNSAFE_URI_PATTERN - Pattern to detect unsafe URI characters
# VALIDATION_TOOL_NAME_PATTERN - MCP tool naming pattern
# VALIDATION_TOOL_METHOD_PATTERN - MCP tool method naming pattern
#
# Name pattern (allows spaces)
# VALIDATION_NAME_PATTERN="^[a-zA-Z0-9_.\\-\\s]+$"

# Identifier pattern (no spaces)
# VALIDATION_IDENTIFIER_PATTERN="^[a-zA-Z0-9_\\-\\.]+$"

# Safe URI pattern
# VALIDATION_SAFE_URI_PATTERN="^[a-zA-Z0-9_\\-.:/?=&%{}]+$"

# Unsafe URI pattern
# VALIDATION_UNSAFE_URI_PATTERN="[<>\"'\\]"

# MCP tool naming pattern per SEP-986
# VALIDATION_TOOL_NAME_PATTERN="^[a-zA-Z0-9_][a-zA-Z0-9._/-]*$"

# MCP tool method naming pattern
# VALIDATION_TOOL_METHOD_PATTERN="^[a-zA-Z][a-zA-Z0-9_\\./-]*$"

# Size limits for various inputs (in characters or bytes)
# VALIDATION_MAX_NAME_LENGTH=255
# VALIDATION_MAX_DESCRIPTION_LENGTH=8192
# VALIDATION_MAX_TEMPLATE_LENGTH=65536
# VALIDATION_MAX_CONTENT_LENGTH=1048576
# VALIDATION_MAX_JSON_DEPTH=10
# VALIDATION_MAX_URL_LENGTH=2048
# VALIDATION_MAX_RPC_PARAM_SIZE=262144
# VALIDATION_MAX_METHOD_LENGTH=128

# Rate limiting for validation operations
# Maximum requests per minute for validation endpoints
# VALIDATION_MAX_REQUESTS_PER_MINUTE=60

# Allowed MIME types for validation (JSON array)
# Controls which content types pass validation checks
# VALIDATION_ALLOWED_MIME_TYPES=["text/plain", "text/html", "text/css", "text/markdown", "text/javascript", "application/json", "application/xml", "application/pdf", "image/png", "image/jpeg", "image/gif", "image/svg+xml", "application/octet-stream"]

# =============================================================================
# Non-Settings Environment Variables
# =============================================================================

# --- Runtime / launcher envs --------------------------------------------------

# Runtime/launcher env (docker-entrypoint.sh)
# HTTP server to use (for containers)
# Options: gunicorn (default, stable), granian (alternative, Rust-based)
# HTTP_SERVER=gunicorn

# Disable access logging for performance
# Options: true, false (default)
# When true: Disables both gunicorn and uvicorn access logs
# Access logs create massive I/O overhead under high concurrency
# Default: true (disabled for performance)
# Set to false to enable access logging for debugging
# DISABLE_ACCESS_LOG=true

# Force start even if another instance is running
# Options: true, false (default)
# Bypasses the lock file check at /tmp/mcpgateway-gunicorn.lock or /tmp/mcpgateway-granian.lock
# FORCE_START=false

# --- Gunicorn Production Server Configuration --------------------------------

# These settings are used by run-gunicorn.sh for production deployments.
# They control the Gunicorn WSGI server behavior.

# Number of worker processes
# Options: "auto" (default, 2*CPU+1 capped at 16), or any positive integer
# Recommendation: 2-4 x $(NUM_CORES) for CPU-bound, more for I/O-bound workloads
# GUNICORN_WORKERS=auto

# Worker timeout in seconds
# Workers that don't respond within this time are killed and restarted
# Increase for long-running requests (e.g., LLM streaming, large file uploads)
# Default: 600 (10 minutes)
# GUNICORN_TIMEOUT=600

# Maximum requests per worker before automatic restart
# Helps prevent memory leaks by periodically recycling workers
# Default: 100000
# GUNICORN_MAX_REQUESTS=100000

# Random jitter added to max requests (prevents thundering herd on restart)
# Default: 100
# GUNICORN_MAX_REQUESTS_JITTER=100

# Preload application before forking workers
# Options: true (default), false
# true: Saves memory (shared code), runs migrations once before forking
# false: Each worker loads app independently (more memory, better isolation)
# GUNICORN_PRELOAD_APP=true

# Developer mode with hot reload
# Options: true, false (default)
# Enables --reload flag and reduces workers for easier debugging
# WARNING: Disables preload_app. Not for production!
# GUNICORN_DEV_MODE=false

# --- Granian Runtime Configuration -------------------------------------------

# These settings are used by run-granian.sh for production deployments.
# They control the Granian server behavior.
# GRANIAN_WORKERS=auto
# GRANIAN_RUNTIME_MODE=auto
# GRANIAN_RUNTIME_THREADS=1
# GRANIAN_BLOCKING_THREADS=1
# GRANIAN_HTTP=auto
# GRANIAN_LOOP=uvloop
# GRANIAN_TASK_IMPL=auto
# GRANIAN_HTTP1_PIPELINE_FLUSH=true
# GRANIAN_HTTP1_BUFFER_SIZE=524288
# GRANIAN_BACKLOG=2048
# GRANIAN_BACKPRESSURE=512
# GRANIAN_RESPAWN_FAILED=true
# GRANIAN_WORKERS_LIFETIME=
# GRANIAN_WORKERS_MAX_RSS=
# GRANIAN_DEV_MODE=false
# GRANIAN_LOG_LEVEL=info

# --- SSL/TLS Configuration (launcher) ----------------------------------------

# Enable HTTPS for production deployments (run-gunicorn.sh / run-granian.sh)

# Enable TLS/SSL
# Options: true, false (default)
# SSL=false

# Path to SSL certificate file (PEM format)
# CERT_FILE=certs/cert.pem

# Path to SSL private key file (PEM format)
# KEY_FILE=certs/key.pem

# Passphrase for encrypted private key (optional)
# If your key is passphrase-protected, set this value
# The key will be decrypted by the SSL key manager before Gunicorn starts
# KEY_FILE_PASSWORD=
# CERT_PASSPHRASE=

# --- Direct env reads (application code) -------------------------------------

# Content type for outgoing HTTP requests to upstream services
# Options: application/json (default), application/x-www-form-urlencoded, multipart/form-data
# Direct env read (mcpgateway/config.py)
# FORGE_CONTENT_TYPE=application/json

# SQLAlchemy echo commands - debug only, used to identify N+1 issues, etc.
# Direct env read (mcpgateway/db.py)
# SQLALCHEMY_ECHO=0

# Copy resource attributes to span attributes (for Arize compatibility)
# Some observability backends like Arize require certain attributes as span attributes
# rather than resource attributes. Enable this to copy arize.project.name and model_id.
# Direct env read (mcpgateway/observability.py)
# OTEL_COPY_RESOURCE_ATTRS_TO_SPANS=false

# Deployment environment label for observability resource attributes
# Direct env read (mcpgateway/observability.py)
# DEPLOYMENT_ENV=development

# Jaeger exporter auth (only used when OTEL_TRACES_EXPORTER=jaeger)
# Direct env read (mcpgateway/observability.py)
# OTEL_EXPORTER_JAEGER_USER=
# OTEL_EXPORTER_JAEGER_PASSWORD=

# Test mode for observability (disables tracing when set to 1)
# Direct env read (mcpgateway/observability.py)
# MCP_TESTING=0

# --- Auxiliary tools and CLIs (non-gateway runtime) --------------------------

# These are used by helper tools, CLIs, and SDK wrappers (not the main gateway server).

# Gateway CLI defaults (mcpgateway/cli.py)
# MCG_HOST=127.0.0.1
# MCG_PORT=4444

# Export/import CLI auth (mcpgateway/cli_export_import.py)
# MCPGATEWAY_BEARER_TOKEN=

# MCP wrapper for tool calls (mcpgateway/wrapper.py)
# MCP_SERVER_URL=
# MCP_AUTH=
# MCP_TOOL_CALL_TIMEOUT=60
# MCP_WRAPPER_LOG_LEVEL=INFO
# CONCURRENCY=10

# Reverse proxy helper (mcpgateway/reverse_proxy.py)
# REVERSE_PROXY_GATEWAY=
# REVERSE_PROXY_TOKEN=
# REVERSE_PROXY_RECONNECT_DELAY=1
# REVERSE_PROXY_MAX_RETRIES=0
# REVERSE_PROXY_LOG_LEVEL=INFO

# DB readiness helper (mcpgateway/utils/db_isready.py)
# DB_WAIT_MAX_TRIES=30
# DB_WAIT_INTERVAL=2
# DB_CONNECT_TIMEOUT=2
# DB_MAX_BACKOFF_SECONDS=30

# Builder / deploy tooling (mcpgateway/tools/builder/*)
# MCP_DEPLOY_DIR=./deploy
# MCP_DEBUG=
# CONTAINER=false

# ToolOps LLM provider envs (mcpgateway/toolops/utils/llm_util.py)
# LLM_PROVIDER=openai
# NOTE: MAX_TOEKNS is misspelled in code; use the exact env name shown below.
# NOTE: *TEMPERATURE envs are currently not read (commented in code) but listed for completeness.
#
# OpenAI
# OPENAI_API_KEY=
# OPENAI_BASE_URL=https://api.openai.com
# OPENAI_MODEL=
# OPENAI_TEMPERATURE=0.7
# OPENAI_MAX_RETRIES=2
# OPENAI_MAX_TOEKNS=600
#
# Azure OpenAI
# AZURE_OPENAI_API_KEY=
# AZURE_OPENAI_ENDPOINT=
# AZURE_OPENAI_API_VERSION=
# AZURE_OPENAI_DEPLOYMENT=
# AZURE_OPENAI_MODEL=
# AZURE_OPENAI_TEMPERATURE=0.7
# AZURE_OPENAI_MAX_RETRIES=2
# AZURE_OPENAI_MAX_TOEKNS=600
#
# Anthropic
# ANTHROPIC_API_KEY=
# ANTHROPIC_MODEL=
# ANTHROPIC_TEMPERATURE=0.7
# ANTHROPIC_MAX_RETRIES=2
# ANTHROPIC_MAX_TOKENS=4096
#
# AWS Bedrock
# AWS_BEDROCK_MODEL_ID=
# AWS_BEDROCK_REGION=
# AWS_BEDROCK_TEMPERATURE=0.7
# AWS_BEDROCK_MAX_TOKENS=4096
# AWS_ACCESS_KEY_ID=
# AWS_SECRET_ACCESS_KEY=
# AWS_SESSION_TOKEN=
#
# Ollama
# OLLAMA_BASE_URL=http://localhost:11434
# OLLAMA_MODEL=
# OLLAMA_TEMPERATURE=0.7
#
# Watsonx
# WATSONX_APIKEY=
# WATSONX_URL=
# WATSONX_PROJECT_ID=
# WATSONX_MODEL_ID=
# WATSONX_MAX_NEW_TOKENS=1000
# WATSONX_DECODING_METHOD=greedy

# =============================================================================
# Development Configuration
# =============================================================================

# Enable development mode (relaxed security, verbose logging)
# Options: true, false (default)
# WARNING: Never use in production!
# DEV_MODE=false

# Enable auto-reload on code changes (for development)
# Options: true, false (default)
# Requires: Running with uvicorn directly (not gunicorn)
# RELOAD=false

# Enable Jinja2 template auto-reload (for development)
# Options: true, false (default)
# Set to true for development to auto-detect template changes
# Performance impact: Disabling reduces CPU usage for admin UI under load
# Note: `make dev` automatically sets this to true
# TEMPLATES_AUTO_RELOAD=false

# Enable debug mode (verbose error messages, stack traces)
# Options: true, false (default)
# WARNING: May expose sensitive information!
# DEBUG=false

# Header Passthrough (WARNING: Security implications)
# ENABLE_HEADER_PASSTHROUGH=false
# ENABLE_OVERWRITE_BASE_HEADERS=false
# DEFAULT_PASSTHROUGH_HEADERS=["X-Tenant-Id", "X-Trace-Id"]

# Authorization Header Conflict Resolution:
# When gateway uses auth, use X-Upstream-Authorization header to pass
# authorization to upstream servers (automatically renamed to Authorization)

# GlobalConfig In-Memory Cache TTL (Issue #1715)
# Caches GlobalConfig (passthrough headers settings) in memory to reduce DB queries.
# Under load (1000+ concurrent users), this eliminates 42,000+ redundant DB queries.
# Trade-off: Config changes take up to TTL seconds to propagate (use admin API to force refresh).
# Default: 60 seconds. Range: 5-3600 seconds.
# Project defaults block sets GLOBAL_CONFIG_CACHE_TTL=300 for local dev
# GLOBAL_CONFIG_CACHE_TTL=60

# A2A Stats In-Memory Cache TTL
# Caches A2A agent counts (total, active) to avoid redundant COUNT queries on /metrics calls.
# Trade-off: Agent count changes take up to TTL seconds to propagate (use admin API to force refresh).
# Default: 30 seconds. Range: 5-3600 seconds.
# A2A_STATS_CACHE_TTL=30

# Project defaults block sets MCPGATEWAY_UI_TOOL_TEST_TIMEOUT=120000 for local dev
# MCPGATEWAY_UI_TOOL_TEST_TIMEOUT=60000

# =============================================================================
# Security Validation Settings
# =============================================================================

# Minimum length for secret keys (JWT, encryption)
# MIN_SECRET_LENGTH=32

# Minimum length for passwords
# MIN_PASSWORD_LENGTH=12

# Enforce strong secrets (set to true to fail startup on critical issues)
# Default is false to maintain backward compatibility
# REQUIRE_STRONG_SECRETS=false

# Security validation thresholds
# Set to false to allow startup with security warnings
# NOT RECOMMENDED for production!
# REQUIRE_STRONG_SECRETS=false

# =============================================================================
# ToolOps Configuration
# =============================================================================

# Enable the ToolOps functionality (true/false)
# When disabled, ToolOps features will be completely hidden from UI and APIs
# Default: false (must be explicitly enabled)
# TOOLOPS_ENABLED=false

# =============================================================================
# LLM Chat MCP Client Configuration
# =============================================================================

# Enable the LLM Chat functionality (true/false)
# When disabled, LLM chat features will be completely hidden from UI and APIs
# Default: true
# LLMCHAT_ENABLED=true

# Enable stdio transport for LLM Chat MCP server config.
# Disabled by default; enable only when trusted stdio subprocess execution is required.
# MCPGATEWAY_STDIO_TRANSPORT_ENABLED=false

# LLM Provider Configuration
# All LLM providers are now configured via Admin UI -> Settings -> LLM Settings.
# Add providers (OpenAI, Azure OpenAI, Anthropic, AWS Bedrock, Ollama, watsonx)
# and their models through the Admin UI. API keys and credentials are securely
# stored in the database.

# Redis Configuration for chat session storage and maintaining history
# CACHE_TYPE should be set to "redis" and REDIS_URL configured appropriately as mentioned in the caching section.

# Seconds for active_session key TTL
# LLMCHAT_SESSION_TTL=300
# Seconds for lock expiry
# LLMCHAT_SESSION_LOCK_TTL=30
# How many times to poll while waiting
# LLMCHAT_SESSION_LOCK_RETRIES=10
# Seconds between polls
# LLMCHAT_SESSION_LOCK_WAIT=0.2

# Seconds for chat history expiry
# LLMCHAT_CHAT_HISTORY_TTL=3600
# Maximum message history to store per user
# LLMCHAT_CHAT_HISTORY_MAX_MESSAGES=50

# =============================================================================
# LLM Settings (Internal API)
# =============================================================================
# These settings control the internal LLM API that allows the gateway to
# act as a unified LLM provider. Configure external providers in the Admin UI.

# API prefix for internal LLM endpoints (OpenAI-compatible)
# Default: /v1
# LLM_API_PREFIX=/v1

# Request timeout in seconds for LLM API calls
# Default: 120
# LLM_REQUEST_TIMEOUT=120

# Enable streaming responses for LLM Chat
# Default: true
# LLM_STREAMING_ENABLED=true

# Provider health check interval in seconds
# Default: 300 (5 minutes)
# LLM_HEALTH_CHECK_INTERVAL=300

# =============================================================================
# Pagination Configuration
# =============================================================================

# Default number of items per page for paginated endpoints
# Applies to: tools, resources, prompts, servers, gateways, users, teams, tokens, etc.
# Default: 50, Min: 1, Max: 1000
# PAGINATION_DEFAULT_PAGE_SIZE=50

# Maximum allowed items per page (prevents abuse)
# Default: 500, Min: 1, Max: 10000
# PAGINATION_MAX_PAGE_SIZE=500

# Minimum items per page
# Default: 1
# PAGINATION_MIN_PAGE_SIZE=1

# Threshold for switching from offset to cursor-based pagination
# When result set exceeds this count, use cursor-based pagination for performance
# Default: 10000
# PAGINATION_CURSOR_THRESHOLD=10000

# Enable cursor-based pagination globally
# Options: true (default), false
# When false, only offset-based pagination is used
# PAGINATION_CURSOR_ENABLED=true

# Default sort field for paginated queries
# Default: created_at
# PAGINATION_DEFAULT_SORT_FIELD=created_at

# Default sort order for paginated queries
# Options: asc, desc (default)
# PAGINATION_DEFAULT_SORT_ORDER=desc

# Maximum offset allowed for offset-based pagination (prevents abuse)
# Default: 100000 (100K records)
# PAGINATION_MAX_OFFSET=100000

# Cache pagination counts for performance (seconds)
# Set to 0 to disable caching
# Default: 300 (5 minutes)
# PAGINATION_COUNT_CACHE_TTL=300

# Enable pagination links in API responses
# Options: true (default), false
# PAGINATION_INCLUDE_LINKS=true

# Base URL for pagination links (defaults to request URL)
# PAGINATION_BASE_URL=https://api.example.com

# =============================================================================
# gRPC Support Settings (EXPERIMENTAL)
# =============================================================================

# Enable gRPC to MCP translation support (disabled by default)
# Requires: pip install mcp-contextforge-gateway[grpc]
# MCPGATEWAY_GRPC_ENABLED=false

# Enable gRPC server reflection by default for service discovery
# MCPGATEWAY_GRPC_REFLECTION_ENABLED=true

# Maximum gRPC message size in bytes (4MB default)
# MCPGATEWAY_GRPC_MAX_MESSAGE_SIZE=4194304

# Default gRPC call timeout in seconds
# MCPGATEWAY_GRPC_TIMEOUT=30

# Enable TLS for gRPC connections by default
# MCPGATEWAY_GRPC_TLS_ENABLED=false

# =============================================================================
# Audit Trail Logging
# =============================================================================

# Enable audit trail logging for compliance (CRUD operations on all resources)
# Default: false (disabled for performance - causes a DB write on EVERY API request)
# When enabled, logs all create, read, update, delete operations to the audit_trails table
# WARNING: This can generate millions of rows during load testing!
# Only enable for production compliance requirements (SOC2, HIPAA, etc.)
# AUDIT_TRAIL_ENABLED=false

# =============================================================================
# Permission Audit Logging
# =============================================================================

# Enable permission audit logging for RBAC checks (one DB row per permission check)
# Default: false (disabled for performance under load)
# PERMISSION_AUDIT_ENABLED=false

# =============================================================================
# Security Event Logging
# =============================================================================

# Enable security event logging (authentication attempts, authorization failures, etc.)
# Default: false (disabled for performance - can cause high DB write load)
# When enabled, the AuthContextMiddleware will log authentication events to the database
# This is INDEPENDENT of observability settings
# SECURITY_LOGGING_ENABLED=false

# Security logging level - controls what events are logged to the database
# Options:
#   - "all"            : Log ALL events including successful auth (WARNING: high DB load!)
#   - "failures_only"  : Log only authentication/authorization failures (recommended)
#   - "high_severity"  : Log only high/critical severity events
# Default: failures_only
# SECURITY_LOGGING_LEVEL=failures_only

# Failed auth attempts before high severity alert
# SECURITY_FAILED_AUTH_THRESHOLD=5

# Threat score threshold for alerts (0.0-1.0)
# SECURITY_THREAT_SCORE_ALERT=0.7

# Time window for rate limit checks (minutes)
# SECURITY_RATE_LIMIT_WINDOW_MINUTES=5

# =============================================================================
# Observability Settings
# =============================================================================

# Enable observability tracing and metrics collection
# When enabled, all HTTP requests will be traced with detailed timing, status codes, and context
# OBSERVABILITY_ENABLED=false

# Automatically trace HTTP requests
# OBSERVABILITY_TRACE_HTTP_REQUESTS=true

# Number of days to retain trace data
# OBSERVABILITY_TRACE_RETENTION_DAYS=7

# Maximum number of traces to retain (prevents unbounded growth)
# OBSERVABILITY_MAX_TRACES=100000

# Trace sampling rate (0.0-1.0) - 1.0 means trace everything, 0.1 means trace 10%
# OBSERVABILITY_SAMPLE_RATE=1.0

# Paths to include for tracing (JSON array of regex patterns)
# OBSERVABILITY_INCLUDE_PATHS=["^/rpc/?$","^/sse$","^/message$","^/mcp(?:/|$)","^/servers/[^/]+/mcp/?$","^/servers/[^/]+/sse$","^/servers/[^/]+/message$","^/a2a(?:/|$)"]

# Paths to exclude from tracing (JSON array of regex patterns, applied after include patterns)
# OBSERVABILITY_EXCLUDE_PATHS=["/health", "/healthz", "/ready", "/metrics", "/static/.*"]

# Enable metrics collection
# OBSERVABILITY_METRICS_ENABLED=true

# Enable event logging within spans
# OBSERVABILITY_EVENTS_ENABLED=true

# =============================================================================
# Performance Tracking Thresholds
# =============================================================================

# Enable performance tracking and metrics (internal)
# PERFORMANCE_TRACKING_ENABLED=true

# Alert thresholds (milliseconds)
# PERFORMANCE_THRESHOLD_DATABASE_QUERY_MS=100.0
# PERFORMANCE_THRESHOLD_TOOL_INVOCATION_MS=2000.0
# PERFORMANCE_THRESHOLD_RESOURCE_READ_MS=1000.0
# PERFORMANCE_THRESHOLD_HTTP_REQUEST_MS=500.0

# Alert if performance degrades by this multiplier vs baseline
# PERFORMANCE_DEGRADATION_MULTIPLIER=1.5

# =============================================================================
# Performance Monitoring Settings
# =============================================================================

# Enable performance tracking tab in admin UI (default: false)
# Shows real-time CPU, memory, disk, network, worker, and request metrics
# MCPGATEWAY_PERFORMANCE_TRACKING=false

# Metric collection interval in seconds (default: 10)
# How often to sample system metrics for historical data
# MCPGATEWAY_PERFORMANCE_COLLECTION_INTERVAL=10

# Snapshot retention period in hours (default: 24)
# How long to keep individual metric snapshots
# MCPGATEWAY_PERFORMANCE_RETENTION_HOURS=24

# Aggregate retention period in days (default: 90)
# How long to keep hourly/daily aggregated metrics
# MCPGATEWAY_PERFORMANCE_RETENTION_DAYS=90

# Maximum performance snapshots to retain (default: 10000)
# Prevents unbounded database growth
# MCPGATEWAY_PERFORMANCE_MAX_SNAPSHOTS=10000

# Enable distributed mode for multi-container deployments (default: false)
# Uses Redis to aggregate metrics from multiple workers/containers
# MCPGATEWAY_PERFORMANCE_DISTRIBUTED=false

# Enable network connections counting (default: true)
# psutil.net_connections() can be CPU intensive under heavy load
# Disable to skip network connection counting entirely
# MCPGATEWAY_PERFORMANCE_NET_CONNECTIONS_ENABLED=true

# Cache TTL for net_connections in seconds (default: 15)
# Throttles expensive psutil.net_connections() calls
# Higher values reduce CPU usage but report stale connection counts
# MCPGATEWAY_PERFORMANCE_NET_CONNECTIONS_CACHE_TTL=15

# =============================================================================
# Ed25519 Key Support
# =============================================================================
# Enable Ed25519 signing for certificates
# ENABLE_ED25519_SIGNING=false

# Previous Ed25519 private key for signing rotation
# PREV_ED25519_PRIVATE_KEY=""

# Previous Ed25519 public key (derived automatically if private key is set)
# PREV_ED25519_PUBLIC_KEY=

# Current Ed25519 private key for signing
# ED25519_PRIVATE_KEY=

# Current Ed25519 public key (derived automatically if private key is set)
# ED25519_PUBLIC_KEY=


# =============================================================================
# Bootstrap additional system roles
# =============================================================================

# Enable Bootstrap additional system roles feature
# Allows defining a set of roles to be added while bootstraping db
# Options: false (default), true
# MCPGATEWAY_BOOTSTRAP_ROLES_IN_DB_ENABLED=false

# Path to the additional roles configuration file
# JSON file contains an array of JSON objects as shown below
# Example:
# [{
#     "name": "example_role_1",
#     "description": "Read-only access to resources",
#     "scope": "team",
#     "permissions": ["teams.join", "tools.read", "resources.read"],
#     "is_system_role": true
# },
# {
#     "name": "example_role_2",
#     "description": "Read-only access to prompts",
#     "scope": "team",
#     "permissions": ["teams.join", "tools.read", "resources.read", "prompts.read"],
#     "is_system_role": true
# }]
# MCPGATEWAY_BOOTSTRAP_ROLES_IN_DB_FILE=additional_roles_in_db.json