# KalamDB Server Configuration
# This is an example configuration file with all available settings.
# Copy this file to config.toml and adjust values for your environment.
#
# NOTE: Runtime configuration only!
# - Namespace and storage location configuration is stored in system tables (via kalamdb-sql)
# - This file contains only server runtime settings (ports, paths, limits, etc.)

[server]
# Server bind address (default: 127.0.0.1)
host = "127.0.0.1"

# Server port (default: 8080)
port = 8080

# Number of worker threads (0 = number of CPU cores)
workers = 0

# Enable HTTP/2 protocol support (default: true)
# When true, server uses automatic HTTP/1.1 and HTTP/2 cleartext (h2c) negotiation
# When false, server only supports HTTP/1.1
# HTTP/2 offers:
# - Multiplexed requests (multiple requests on single connection)
# - Header compression (HPACK)
# - Binary protocol (more efficient parsing)
# - Server push support (for future features)
enable_http2 = true

# API version (default: "v1")
# Controls the versioned endpoint prefix (e.g., /v1/api/sql)
api_version = "v1"

[storage]
# Base data directory for all KalamDB storage
# Subdirectories are automatically created:
#   ./data/rocksdb  - RocksDB hot storage (write buffer)
#   ./data/storage  - Parquet cold storage (flushed segments)
#   ./data/snapshots - Raft snapshots (consensus state)
data_path = "./data"

# Templates for table storage paths (used by 'local' storage)
# Available placeholders: {namespace}, {tableName}, {userId}
# Final paths: {default_storage_path}/{template}
# Examples:
#   Shared table: ./data/storage/myapp/products
#   User table: ./data/storage/myapp/preferences/user123
shared_tables_template = "{namespace}/{tableName}"
user_tables_template = "{namespace}/{tableName}/{userId}"

# Remote storage timeout settings (S3, GCS, Azure)
# These timeouts apply programmatically to all remote storage operations
[storage.remote_timeouts]
# Request timeout in seconds for all remote storage operations (default: 60s)
request_timeout_secs = 60
# Connect timeout in seconds for establishing connections (default: 10s)
connect_timeout_secs = 10


[storage.rocksdb]
# Write buffer size per column family in bytes (default: 2MB)
# MEMORY OPTIMIZATION: Reduced from 64MB for lower idle memory footprint
# Formula: memory ≈ write_buffer_size × max_write_buffers × num_column_families
# Lower values reduce memory with many column families
write_buffer_size = 2097152

# Maximum number of write buffers (default: 2)
# Allows writes to continue while buffers are being flushed
# MEMORY OPTIMIZATION: Reduced from 3 to 2
max_write_buffers = 2

# Block cache size for reads in bytes (default: 4MB)
# MEMORY OPTIMIZATION: Reduced from 256MB to 4MB
# IMPORTANT: This cache is SHARED across ALL column families
# Adding more column families does NOT increase memory proportionally
# Example: 100 CFs × 4MB cache = still only 4MB total (not 400MB)
block_cache_size = 4194304

# Maximum number of background compaction/flush jobs (default: 4)
max_background_jobs = 4

[datafusion]
# Memory limit for query execution in bytes (default: 256MB)
# Queries exceeding this limit will be terminated
memory_limit = 268435456

# Number of parallel threads for query execution (default: number of CPU cores)
# Set to 0 to auto-detect CPU count
query_parallelism = 0

# Maximum number of partitions per query (default: 8)
# Higher values increase parallelism but use more resources
max_partitions = 8

# Batch size for record processing (default: 2048 rows)
# MEMORY OPTIMIZATION: Reduced from 8192 for lower memory usage
# Lower batch size reduces memory pressure during query execution
batch_size = 2048

[flush]
# Default row limit for flush policies (default: 10000 rows)
# Tables without explicit flush policy will use this value
default_row_limit = 10000

# Default time interval for flush in seconds (default: 300s = 5 minutes)
# Tables will flush to Parquet after this duration
default_time_interval = 300

[retention]
# Default retention hours for soft-deleted rows (default: 168 hours = 7 days)
# Rows with _deleted=true will be kept in Parquet files for this duration
default_deleted_retention_hours = 168

[stream]
# Default TTL for stream table rows in seconds (default: 10 seconds)
# Stream tables are ephemeral - rows expire after this duration
default_ttl_seconds = 10

# Default maximum buffer size for stream tables (default: 10000 rows)
# Oldest rows are evicted when buffer exceeds this limit
default_max_buffer = 10000

# Stream eviction interval in seconds (default: 60 seconds = 1 minute)
# How often the background task checks and evicts expired events
eviction_interval_seconds = 60

[manifest_cache]
# Eviction job interval in seconds (default: 300s = 5 minutes)
# How often the eviction job runs to clean up stale manifest entries
eviction_interval_seconds = 300

# Maximum number of manifest entries in hot cache (default: 500)
# Older entries are evicted when this limit is reached (LRU policy)
max_entries = 500

# TTL in days for manifest eviction (default: 7 days)
# Manifests not accessed for this many days will be removed from both
# hot cache (RAM) and RocksDB persistent cache
# Set to 0 to disable manifest eviction (not recommended for long-running servers)
eviction_ttl_days = 7

# Weight factor for user table manifests (default: 10)
# User tables are evicted N times faster than shared tables.
user_table_weight_factor = 10

[limits]
# Maximum message size for REST API requests in bytes (default: 1MB)
max_message_size = 1048576

# Maximum rows that can be returned in a single query (default: 1000)
max_query_limit = 1000

# Default LIMIT for queries without explicit LIMIT clause (default: 50)
default_query_limit = 50

[logging]
# Log level: error, warn, info, debug, trace (default: info)
level = "info"

# Directory for all log files (default: "./logs")
# Server will create server.log (or server.jsonl for JSON format), slow.log, etc.
logs_path = "./logs"

# Also log to console/stdout (default: true)
log_to_console = true

# Log format: compact, json (default: compact)
# - compact: Human-readable text format -> server.log
#   Format: [timestamp] [LEVEL] [thread - target:line] - message
# - json: JSON Lines format -> server.jsonl (queryable via system.server_logs)
#   Each line is a valid JSON object with timestamp, level, thread, target, line, message
format = "compact"

# Slow query logging threshold in milliseconds (default: 1000ms = 1 second)
# Queries taking longer than this threshold will be logged to slow.log
# AND displayed as WARN in the console
# Set to a high value (e.g., 999999) to disable slow query logging
slow_query_threshold_ms = 1000

[logging.otlp]
# Export tracing spans to an OTLP collector (Jaeger all-in-one supports this)
enabled = false
# gRPC endpoint for Jaeger OTLP receiver (port 4317)
endpoint = "http://127.0.0.1:4317"
# Protocol: "grpc" or "http"
protocol = "grpc"
# Service name shown in Jaeger UI
service_name = "kalamdb-server"
# Export timeout in milliseconds
timeout_ms = 3000

[performance]
# Request timeout in seconds (default: 30s)
# Requests exceeding this duration will be terminated
request_timeout = 30

# Keep-alive timeout in seconds (default: 75s)
# HTTP keep-alive allows connection reuse, reducing TCP handshake overhead
keepalive_timeout = 75

# Maximum concurrent connections per worker (default: 25000)
# Includes both REST API and WebSocket connections
# For testing environments with high concurrency, consider 50000
max_connections = 25000

# TCP listen backlog - pending connections queue size (default: 4096)
# Controls how many connections can wait in the kernel queue before being accepted
# Increase for burst traffic or high-concurrency scenarios
# Recommended values:
#   - Development/Testing: 4096-8192 (handles burst test loads)
#   - Production: 4096-8192 (handles traffic spikes)
#   - High traffic: 8192+ (enterprise scale)
# Industry standards: Nginx (511), Apache (511), Caddy (1024), Actix (2048)
backlog = 4096

# Max blocking threads per worker for CPU-intensive operations (default: 512)
# Used for RocksDB I/O and synchronous operations
# Increase for high-concurrency workloads or test environments
worker_max_blocking_threads = 512

# Client request timeout in seconds (default: 5)
# Time allowed for client to send complete request headers
client_request_timeout = 5

# Client disconnect timeout in seconds (default: 2)
# Time allowed for graceful connection shutdown
client_disconnect_timeout = 2

# Maximum HTTP header size in bytes (default: 16384 = 16KB)
# Increase if you have large JWT tokens or custom headers  
max_header_size = 16384

[rate_limit]
# Maximum SQL queries per second per user (default: 100)
# Prevents query flooding from a single user
# For testing/development environments with high load, increase to 10000-100000
max_queries_per_sec = 100

# Maximum WebSocket messages per second per connection (default: 50)
# Prevents message flooding on WebSocket connections
# For testing/development environments with high load, increase to 500-1000
max_messages_per_sec = 50

# Maximum concurrent live query subscriptions per user (default: 10)
# Limits total active subscriptions to prevent resource exhaustion
# For testing/development environments, increase to 100-1000
max_subscriptions_per_user = 10

# Maximum authentication requests per IP per second (default: 20)
# Prevents brute force attacks and login flooding
# Applies to /auth/login, /auth/refresh, /setup endpoints
max_auth_requests_per_ip_per_sec = 20

# Maximum concurrent connections per IP address (default: 100)
# Prevents a single IP from exhausting all server connections
max_connections_per_ip = 100

# Maximum requests per second per IP BEFORE authentication (default: 200)
# ⚠️ CRITICAL: This is the main rate limit that triggers IP BANS
# Applied before auth to protect against unauthenticated floods
# If exceeded repeatedly, IP will be banned for ban_duration_seconds
# For testing/development with high request rates, set to 100000+
max_requests_per_ip_per_sec = 200

# Maximum request body size in bytes (default: 10MB)
# Prevents memory exhaustion from huge request payloads
request_body_limit_bytes = 10485760

# Duration in seconds to ban abusive IPs (default: 300 = 5 minutes)
# IPs that violate max_requests_per_ip_per_sec 10+ times are banned
ban_duration_seconds = 300

# Enable connection protection middleware (default: true)
# Set to false to completely disable rate limiting (NOT recommended for production)
enable_connection_protection = true

# Maximum cached entries for rate limiting state (default: 100,000)
cache_max_entries = 100000

# Time-to-idle for cached entries in seconds (default: 600 = 10 minutes)
cache_ttl_seconds = 600

# ============================================================================
# Security Settings
# ============================================================================
# CORS, WebSocket, and request limit configuration

[security]
# Maximum request body size in bytes (default: 10MB)
# Prevents memory exhaustion from large payloads
max_request_body_size = 10485760

# Maximum WebSocket message size in bytes (default: 1MB)
# Prevents memory exhaustion from large WebSocket messages
max_ws_message_size = 1048576

# Allowed WebSocket origins (if different from CORS origins)
# Leave empty to use CORS allowed_origins for WebSocket validation
allowed_ws_origins = []

# Strict WebSocket origin checking (default: false)
# If true, rejects WebSocket connections without Origin header
strict_ws_origin_check = false

# CORS Configuration (uses actix-cors)
# See: https://docs.rs/actix-cors
[security.cors]
# Allowed origins for CORS requests
# Use ["*"] or empty [] for any origin (development mode)
# For production, specify exact origins: ["https://app.example.com", "https://admin.example.com"]
allowed_origins = []

# Allowed HTTP methods (default: common REST methods)
allowed_methods = ["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"]

# Allowed HTTP headers
# Use ["*"] to allow any header
allowed_headers = ["Authorization", "Content-Type", "Accept", "Origin", "X-Requested-With"]

# Headers to expose to the browser (default: none)
# Example: ["X-Custom-Header", "X-Request-Id"]
expose_headers = []

# Allow credentials (cookies, authorization headers) (default: true)
# Note: If true, allowed_origins cannot be ["*"] in browsers
allow_credentials = true

# Preflight request cache max age in seconds (default: 3600 = 1 hour)
max_age = 3600

# Allow private network requests (default: false)
# Enables Access-Control-Request-Private-Network header support
allow_private_network = false

[authentication]
# Bcrypt cost factor for password hashing (default: 12, range: 10-14)
# Higher values = more secure but slower
# Changing this only affects NEW passwords
bcrypt_cost = 12

# Minimum password length (default: 8)
min_password_length = 8

# Maximum password length (default: 72, bcrypt limit)
# Note: Passwords longer than 72 bytes are truncated by bcrypt
max_password_length = 72

# Disable common password checking (default: false)
# If true, allows passwords like "password", "123456", etc.
# WARNING: Only disable for testing/development environments!
disable_common_password_check = false

# JWT configuration (for JWT Bearer token authentication)
# Secret key for JWT signature validation (minimum 32 characters recommended)
# IMPORTANT: Change this in production! Use a strong, random secret.
jwt_secret = "your-secret-key-at-least-32-chars-change-me-in-production"

# Allow initial server setup from non-localhost clients (default: false)
# Useful for Docker or remote hosts in trusted networks.
# WARNING: Only enable in trusted environments.
allow_remote_setup = false

# Comma-separated list of trusted JWT issuers (leave empty to accept any issuer)
# Add your OAuth provider domains here
# Example for Google OAuth: "https://accounts.google.com"
# Example for GitHub OAuth: "https://github.com"
# Multiple issuers (comma-separated): "https://accounts.google.com,https://github.com,https://kalamdb.io"
jwt_trusted_issuers = ""

[shutdown]
# Timeout settings for graceful shutdown

[shutdown.flush]
# Timeout in seconds to wait for flush jobs to complete during graceful shutdown (default: 300)
timeout = 300

# Maximum number of concurrent jobs (default: 10)
# Controls how many jobs can execute simultaneously
max_concurrent = 10

# Maximum number of retry attempts per job (default: 3)
# Jobs will be retried this many times before being marked as permanently failed
max_retries = 3

# Initial retry backoff delay in milliseconds (default: 100ms)
# Delay increases exponentially with each retry (100ms, 200ms, 400ms, etc.)
retry_backoff_ms = 100

# Phase 11, T026: SQL Handler Execution Configuration
[execution]
# Handler execution timeout in seconds (default: 30)
# Maximum time allowed for a single SQL statement to execute
# Prevents hung requests from blocking resources
handler_timeout_seconds = 30

# Maximum number of parameters per statement (default: 50)
# Prevents memory exhaustion from excessive parameter arrays
max_parameters = 50

# Maximum size per parameter in bytes (default: 524288 = 512KB)
# Prevents memory exhaustion from individual large parameters
max_parameter_size_bytes = 524288

# Maximum number of cached SQL logical plans (default: 1000)
# Bound memory used by SQL plan cache
sql_plan_cache_max_entries = 1000

# Time-to-idle TTL for SQL cached plans in seconds (default: 900 = 15 minutes)
# Unused plans are evicted automatically after this idle period
sql_plan_cache_ttl_seconds = 900

# ============================================================================
# Cluster Configuration (Multi-Node Raft Replication)
# ============================================================================
# Enable distributed clustering with Raft consensus for strong consistency.
# When enabled, the server runs as part of a multi-node cluster.
# When disabled (default), the server runs in standalone mode with no overhead.

# IMPORTANT: All nodes in a cluster MUST have:
# - Matching cluster_id values
# - The same members list
# - Matching sharding configuration
# - Unique node_id values

# ============================================================================
# Cluster Configuration - Multi-Node Raft Replication (v0.2.0)
# ============================================================================
# Enable distributed clustering with Raft consensus for strong consistency.
# When [cluster] section is present, the server runs as part of a multi-node cluster.
# When absent (default), the server runs in standalone mode with no overhead.
#
# IMPORTANT: All nodes in a cluster MUST have:
# - Matching cluster_id values
# - The same peers list (with this node's entry removed)
# - Matching sharding configuration (user_shards, shared_shards)
# - Unique node_id values
#
# Node with node_id=1 automatically becomes the bootstrap node.
# No explicit bootstrap flag needed - cluster auto-initializes.

# [cluster]
# # Unique cluster identifier - all nodes must share this
# cluster_id = "prod-cluster"
# 
# # This node's unique ID within the cluster (must be >= 1)
# # Node with node_id=1 is the designated bootstrap node
# node_id = 1
# 
# # RPC address for Raft inter-node communication
# rpc_addr = "0.0.0.0:9090"
# 
# # API address for client HTTP requests (should match server.host:server.port)
# api_addr = "http://192.168.1.10:8080"
# #
# # Optional mTLS for inter-node gRPC (Raft + cluster RPC)
# # When enabled, all three paths are required.
# # [cluster.rpc_tls]
# # enabled = true
# # ca_cert_path = "/etc/kalamdb/certs/cluster-ca.pem"
# # node_cert_path = "/etc/kalamdb/certs/node1.pem"
# # node_key_path = "/etc/kalamdb/certs/node1.key"
# 
# # Number of user data shards (default: 8)
# # MEMORY OPTIMIZATION: Reduced from 32 (saves ~5-8 MB)
# # Each shard is a separate Raft group for user table data
# # Trade-off: Lower write parallelism (acceptable for dev/testing)
# user_shards = 8
# 
# # Number of shared data shards (default: 1)
# # Each shard is a separate Raft group for shared table data
# shared_shards = 1
# 
# # Raft heartbeat interval in milliseconds (default: 50)
# heartbeat_interval_ms = 50
# 
# # Raft election timeout range [min, max] in milliseconds (default: [150, 300])
# election_timeout_ms = [150, 300]
# 
# # Maximum entries per Raft snapshot (default: 10000)
# snapshot_threshold = 10000
# 
# # Minimum number of nodes that must acknowledge writes (default: 1)
# # Set to 2 or 3 for strong consistency in a 3-node cluster
# # This ensures data is replicated to multiple nodes before acknowledging success
# min_replication_nodes = 3
# 
# # Peer nodes (list all OTHER nodes in the cluster)
# [[cluster.peers]]
# node_id = 2
# rpc_addr = "192.168.1.11:9090"
# api_addr = "http://192.168.1.11:8080"
# # Optional TLS server-name override for this peer (SNI/hostname verification)
# # rpc_server_name = "node2.cluster.local"
# 
# [[cluster.peers]]
# node_id = 3
# rpc_addr = "192.168.1.12:9090"
# api_addr = "http://192.168.1.12:8080"
# # rpc_server_name = "node3.cluster.local"

# ============================================================================
# Example: 3-Node Production Cluster Configuration
# ============================================================================
# For production, use an odd number of nodes (3 or 5) for optimal fault tolerance:
# - 3 nodes: tolerates 1 node failure
# - 5 nodes: tolerates 2 node failures
#
# Node 1 configuration (server1.toml):
# [cluster]
# cluster_id = "prod"
# node_id = 1
# rpc_addr = "node1.example.com:9090"
# api_addr = "http://node1.example.com:8080"
# user_shards = 32
# shared_shards = 1
# min_replication_nodes = 3
# 
# [[cluster.peers]]
# node_id = 2
# rpc_addr = "node2.example.com:9090"
# api_addr = "http://node2.example.com:8080"
# rpc_server_name = "node2.example.com"
# 
# [[cluster.peers]]
# node_id = 3
# rpc_addr = "node3.example.com:9090"
# api_addr = "http://node3.example.com:8080"
# rpc_server_name = "node3.example.com"