# KalamDB Server Configuration # This is an example configuration file with all available settings. # Copy this file to config.toml and adjust values for your environment. # # NOTE: Runtime configuration only! # - Namespace and storage location configuration is stored in system tables (via kalamdb-system) # - This file contains only server runtime settings (ports, paths, limits, etc.) # Maximum lifetime for an open transaction before the server aborts it. transaction_timeout_secs = 300 # Maximum in-memory transaction overlay size before new writes are rejected. max_transaction_buffer_bytes = 104857600 [server] # Server bind address (default: 127.0.0.1) host = "0.0.0.0" # Server port (default: 2900) port = 2900 # Public origin used by the Admin UI for both /v1/api and /v1/ws traffic. # Set this when browsers reach KalamDB through a public hostname or reverse proxy. # Example: "https://db.example.com" # Empty = use "http://localhost:{port}" public_origin = "" # Number of worker threads (0 = number of CPU cores) workers = 0 # Enable HTTP/2 protocol support (default: true) # When true, server uses automatic HTTP/1.1 and HTTP/2 cleartext (h2c) negotiation # When false, server only supports HTTP/1.1 # HTTP/2 offers: # - Multiplexed requests (multiple requests on single connection) # - Header compression (HPACK) # - Binary protocol (more efficient parsing) # - Server push support (for future features) enable_http2 = true # API version (default: "v1") # Controls the versioned endpoint prefix (e.g., /v1/api/sql) api_version = "v1" # Optional path to the Admin UI static build output. # Uncomment to serve the UI bundle directly from the server. # ui_path = "./ui/dist" [storage] # Base data directory for all KalamDB storage # Subdirectories are automatically created: # ./data/rocksdb - RocksDB hot storage (write buffer) # ./data/storage - Parquet cold storage (flushed segments) # ./data/snapshots - Raft snapshots (consensus state) data_path = "./data" # Templates for table storage paths (used by 'local' storage) # Available placeholders: {namespace}, {tableName}, {userId} # Final paths: {default_storage_path}/{template} # Examples: # Shared table: ./data/storage/myapp/products # User table: ./data/storage/myapp/preferences/user123 shared_tables_template = "{namespace}/{tableName}" user_tables_template = "{namespace}/{tableName}/{userId}" # Remote storage timeout settings (S3, GCS, Azure) # These timeouts apply programmatically to all remote storage operations [storage.remote_timeouts] # Request timeout in seconds for all remote storage operations (default: 60s) request_timeout_secs = 60 # Connect timeout in seconds for establishing connections (default: 10s) connect_timeout_secs = 10 [storage.rocksdb] # Block cache size for reads in bytes (default: 2MB) # IMPORTANT: This cache is SHARED across ALL column families # Adding more column families does NOT increase memory proportionally block_cache_size = 2097152 # Maximum number of background compaction/flush jobs (default: 4) max_background_jobs = 4 # Maximum number of open files RocksDB may keep open (default: 512) max_open_files = 512 # Sync each write to the WAL before returning (default: false) sync_writes = false # Disable WAL for highest throughput at the cost of crash durability (default: false) disable_wal = false # Compact column families during startup (default: false) compact_on_startup = false [storage.rocksdb.cf_profiles.system_meta] # Low-write system metadata tables and compatibility partitions. write_buffer_size = 32768 max_write_buffers = 2 [storage.rocksdb.cf_profiles.system_index] # Secondary indexes for system tables. write_buffer_size = 32768 max_write_buffers = 2 [storage.rocksdb.cf_profiles.hot_data] # User/shared/stream data CFs and topic message payloads. # These stay warmer because they sit on the main read/write path. write_buffer_size = 131072 max_write_buffers = 2 [storage.rocksdb.cf_profiles.hot_index] # PK indexes and vector PK indexes. These are latency-sensitive, but smaller than data CFs. write_buffer_size = 65536 max_write_buffers = 2 [storage.rocksdb.cf_profiles.raft] # The single raft_data CF is append-heavy and benefits from a larger memtable. write_buffer_size = 262144 max_write_buffers = 2 [datafusion] # Memory limit for query execution in bytes (default: 64MB) # KalamDB is optimised for mobile/OLTP workloads, not heavy analytics. # 64 MB is sufficient for key-based lookups and small aggregates. # Queries that exceed this limit are terminated immediately. memory_limit = 67108864 # Number of parallel threads for query execution (default: 2) # Low fixed value: mobile app queries are short-lived key lookups; # high parallelism just increases context-switch overhead. # Set to 0 to auto-detect CPU count (not recommended for shared servers). query_parallelism = 2 # Maximum number of partitions per query (default: 4) # Caps concurrent partition scans; reduces memory and CPU burst per query. max_partitions = 4 # Batch size for record processing (default: 1024 rows) # Smaller batches limit peak arrow allocation per operator. batch_size = 1024 [flush] # Default row limit for flush policies (default: 10000 rows) # Tables without explicit flush policy will use this value default_row_limit = 10000 # Default time interval for flush in seconds (default: 300s = 5 minutes) # Tables will flush to Parquet after this duration default_time_interval = 300 # How often the background scheduler checks for tables with pending writes # and creates flush jobs (default: 60 seconds). Set to 0 to disable. check_interval_seconds = 60 [flush.compaction] # Optional post-flush Parquet compaction for trailing small segments. # Disabled by default so flush latency and cold-storage writes stay unchanged. enabled = false # Compact only when the newest trailing run has at least this many small segments. min_eligible_segments = 5 # Maximum number of newest trailing segments rewritten by one compaction job. max_segments_per_run = 8 # User-table segments below this row count are considered small. user_max_segment_rows = 10000 # Shared tables generally collect more rows per scope, so the default limit is higher. shared_max_segment_rows = 25000 [stream] # Default TTL for stream table rows in seconds (default: 10 seconds) # Stream tables are ephemeral - rows expire after this duration default_ttl_seconds = 10 # Default maximum buffer size for stream tables (default: 10000 rows) # Oldest rows are evicted when buffer exceeds this limit default_max_buffer = 10000 # Stream eviction interval in seconds (default: 60 seconds = 1 minute) # How often the background task checks and evicts expired events eviction_interval_seconds = 60 [manifest_cache] # Eviction job interval in seconds (default: 300s = 5 minutes) # How often the eviction job runs to clean up stale manifest entries eviction_interval_seconds = 300 # Maximum number of manifest entries in hot cache (default: 500) # Older entries are evicted when this limit is reached (LRU policy) max_entries = 500 # TTL in days for manifest eviction (default: 7 days) # Manifests not accessed for this many days will be removed from both # hot cache (RAM) and RocksDB persistent cache # Set to 0 to disable manifest eviction (not recommended for long-running servers) eviction_ttl_days = 7 [limits] # Maximum message size for REST API requests in bytes (default: 1MB) max_message_size = 1048576 # Maximum rows that can be returned in a single query (default: 1000) max_query_limit = 1000 # Default LIMIT for queries without explicit LIMIT clause (default: 50) default_query_limit = 50 [logging] # Log level: error, warn, info, debug, trace (default: info) level = "info" # Directory for all log files (default: "./logs") # Server will create server.log (or server.jsonl for JSON format), slow.jsonl, etc. logs_path = "./logs" # Also log to console/stdout (default: true) log_to_console = true # Log format: compact, json (default: compact) # - compact: Human-readable text format -> server.log # Format: [timestamp] [LEVEL] [thread - target:line] - message # - json: JSON Lines format -> server.jsonl (queryable via system.server_logs) # Each line is a valid JSON object with timestamp, level, thread, target, line, message format = "compact" # Slow query logging threshold in milliseconds (default: 1000ms = 1 second) # Queries taking longer than this threshold will be logged to slow.jsonl # AND displayed as WARN in the console # Set to a high value (e.g., 999999) to disable slow query logging slow_query_threshold_ms = 1000 [logging.otlp] # Export tracing spans to an OTLP collector (Jaeger all-in-one supports this) enabled = false # gRPC endpoint for Jaeger OTLP receiver (port 4317) endpoint = "http://127.0.0.1:4317" # Protocol: "grpc" or "http" protocol = "grpc" # Service name shown in Jaeger UI service_name = "kalamdb-server" # Export timeout in milliseconds timeout_ms = 3000 # Optional per-target log level overrides. # Example: # [logging.targets] # datafusion = "info" # arrow = "warn" # parquet = "warn" [performance] # Request timeout in seconds (default: 30s) # Requests exceeding this duration will be terminated request_timeout = 30 # Keep-alive timeout in seconds (default: 75s) # HTTP keep-alive allows connection reuse, reducing TCP handshake overhead keepalive_timeout = 75 # Maximum concurrent connections per worker (default: 25000) # Includes both REST API and WebSocket connections # For testing environments with high concurrency, consider 50000 max_connections = 25000 # TCP listen backlog - pending connections queue size (default: 4096) # Controls how many connections can wait in the kernel queue before being accepted # Increase for burst traffic or high-concurrency scenarios # Recommended values: # - Development/Testing: 4096-8192 (handles burst test loads) # - Production: 4096-8192 (handles traffic spikes) # - High traffic: 8192+ (enterprise scale) # Industry standards: Nginx (511), Apache (511), Caddy (1024), Actix (2048) backlog = 4096 # Max blocking threads for synchronous operations (default: 32) # Applied to the outer Tokio runtime blocking pool and Actix worker blocking pools. # Used for RocksDB, Parquet, and other synchronous operations. # Increase for high-concurrency workloads or test environments. worker_max_blocking_threads = 32 # Number of tokio runtime worker threads (default: 0 = auto, num_cpus capped at 4) # Lower values reduce idle RSS from thread stacks (~2MB per thread). # Set to 0 for auto-detection, or an explicit count for Docker/constrained environments. # Can also be overridden via KALAMDB_TOKIO_WORKER_THREADS env var. tokio_worker_threads = 0 # Client request timeout in seconds (default: 5) # Time allowed for client to send complete request headers client_request_timeout = 5 # Client disconnect timeout in seconds (default: 2) # Time allowed for graceful connection shutdown client_disconnect_timeout = 2 # Maximum HTTP header size in bytes (default: 16384 = 16KB) # Increase if you have large JWT tokens or custom headers max_header_size = 16384 [rate_limit] # Maximum SQL queries per second per user (default: 100) # Prevents query flooding from a single user # The isolated SDK/full-suite server uses this example template, so keep these # high enough to avoid test self-throttling on slower CI and Linux builders. max_queries_per_sec = 100000 # Maximum WebSocket messages per second per connection (default: 50) # Prevents message flooding on WebSocket connections max_messages_per_sec = 10000 # Maximum concurrent live query subscriptions per user (default: 10) # Limits total active subscriptions to prevent resource exhaustion max_subscriptions_per_user = 200000 # Maximum authentication requests per IP per second (default: 20) # Prevents brute force attacks and login flooding # Applies to /auth/login, /auth/refresh, /setup endpoints max_auth_requests_per_ip_per_sec = 200000 # Maximum concurrent connections per IP address (default: 100) # Prevents a single IP from exhausting all server connections max_connections_per_ip = 200000 # Maximum requests per second per IP BEFORE authentication (default: 200) # ⚠️ CRITICAL: This is the main rate limit that triggers IP BANS # Applied before auth to protect against unauthenticated floods # If exceeded repeatedly, IP will be banned for ban_duration_seconds max_requests_per_ip_per_sec = 200000 # Maximum request body size in bytes (default: 10MB) # Prevents memory exhaustion from huge request payloads request_body_limit_bytes = 104857600 # Duration in seconds to ban abusive IPs (default: 300 = 5 minutes) # IPs that violate max_requests_per_ip_per_sec 10+ times are banned ban_duration_seconds = 300 # Enable connection protection middleware (default: true) # Set to false to completely disable rate limiting (NOT recommended for production) enable_connection_protection = true # Maximum cached entries for rate limiting state (default: 1,000) # MEMORY OPTIMIZATION: Reduced from 100k. Moka internal bookkeeping scales # with max_capacity. 1k handles typical mobile-app deployments. cache_max_entries = 1000 # Time-to-idle for cached entries in seconds (default: 600 = 10 minutes) cache_ttl_seconds = 600 # ============================================================================ # Security Settings # ============================================================================ # CORS, WebSocket, and request limit configuration [security] # Maximum request body size in bytes (default: 10MB) # Prevents memory exhaustion from large payloads max_request_body_size = 10485760 # Maximum WebSocket message size in bytes (default: 1MB) # Prevents memory exhaustion from large WebSocket messages max_ws_message_size = 1048576 # WebSocket origins use security.cors.allowed_origins. # Strict WebSocket origin checking (default: false) # If true, rejects WebSocket connections without Origin header strict_ws_origin_check = false # Trusted reverse proxy source IPs or CIDR ranges for forwarded client IP headers. # Only peers in this list may supply X-Forwarded-For / X-Real-IP. # Examples: ["10.0.1.9", "10.0.0.0/8", "192.168.0.0/24"] trusted_proxy_ranges = [] # CORS Configuration (uses actix-cors) # See: https://docs.rs/actix-cors [security.cors] # Allowed origins for CORS requests # Bind-to-all-interfaces still needs an explicit browser allowlist. # Add your public hostname(s) here for reverse-proxy or production deployments. allowed_origins = [ "http://localhost:4173", "http://127.0.0.1:4173", "http://localhost:5173", "http://127.0.0.1:5173", "http://localhost:5174", "http://127.0.0.1:5174", "http://localhost:2900", "http://127.0.0.1:2900", ] # Allowed HTTP methods (default: common REST methods) allowed_methods = ["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"] # Allowed HTTP headers # Use ["*"] to allow any header allowed_headers = ["Authorization", "Content-Type", "Accept", "Origin", "X-Requested-With"] # Headers to expose to the browser (default: none) # Example: ["X-Custom-Header", "X-Request-Id"] expose_headers = [] # Allow credentials (cookies, authorization headers) (default: true) # Note: If true, allowed_origins cannot be ["*"] in browsers allow_credentials = true # Preflight request cache max age in seconds (default: 3600 = 1 hour) max_age = 3600 # Allow private network requests (default: false) # Enables Access-Control-Request-Private-Network header support allow_private_network = false [websocket] # Client heartbeat timeout in seconds (default: 10) # How long to wait for client pong/activity before disconnecting. # Increase for high connection counts (>10K) to avoid false timeouts # caused by scheduling contention and TCP buffer pressure. client_timeout_secs = 10 # Authentication timeout in seconds (default: 3) # How long to wait for auth message after WebSocket connect auth_timeout_secs = 3 # Heartbeat check interval in seconds (default: 5) # How often the background heartbeat task iterates all connections. # Pings are staggered across 4 groups, so each connection is pinged # once every heartbeat_interval × 4 seconds. heartbeat_interval_secs = 5 [auth] # Optional root password for remote root access. # Prefer KALAMDB_ROOT_PASSWORD in production. # root_password = "change-me" # Bcrypt cost factor for password hashing (default: 12, range: 10-14) # Higher values = more secure but slower # Changing this only affects NEW passwords bcrypt_cost = 12 # Minimum password length (default: 8) min_password_length = 8 # Maximum accepted password length for local validation. # Bcrypt still only uses the first 72 bytes of the password material. max_password_length = 72 # JWT expiry in hours (default: 24) jwt_expiry_hours = 24 # Whether auth cookies require HTTPS (default: true) cookie_secure = true # Enforce uppercase/lowercase/digit/special password complexity (default: false) enforce_password_complexity = false # JWT configuration (for JWT Bearer token authentication) # Secret key for JWT signature validation (minimum 32 characters recommended) # IMPORTANT: Change this in production! Use a strong, random secret. jwt_secret = "your-secret-key-at-least-32-chars-change-me-in-production" # Allow initial server setup from non-localhost clients (default: false) # Useful for Docker or remote hosts in trusted networks. # WARNING: Only enable in trusted environments. allow_remote_setup = false # Comma-separated list of trusted JWT issuers for external bearer tokens. # The configured auth.oidc issuer is registered for audience validation at startup. jwt_trusted_issuers = "" # Optional pre-shared token required by pg_kalam gRPC connections. # pg_auth_token = "change-me" # Local username/password login (default: true). [auth.local] enabled = true # Single external OpenID Connect provider. # Use this for Dex, Keycloak, Okta, Auth0, Entra ID, Google, Firebase, or any # provider with standards-compliant OIDC discovery. # # Configure browser redirect URIs at the provider, for example: # http://YOUR_ADMIN_UI_ORIGIN/ui/oauth/callback # # [auth.oidc] # enabled = true # display_name = "Dex" # issuer = "http://127.0.0.1:5556" # client_id = "client" # Local Dex development normally uses a public PKCE client, so leave client_secret unset. # client_secret = "optional-confidential-client-secret" # scopes = ["openid", "email", "profile"] # auto_provision = true # default_role = "user" # broker_device_flow_enabled = false # Enable only when your provider advertises or supports device auth. # device_authorization_endpoint = "http://127.0.0.1:5556/device/code" [user_management] # Days to keep a soft-deleted user before permanent purge (default: 30) deletion_grace_period_days = 30 # Cron schedule for the background cleanup job (default: daily at 02:00) cleanup_job_schedule = "0 2 * * *" [files] # Maximum size for a single uploaded FILE value (default: 25MB) max_size_bytes = 26214400 # Maximum number of uploaded files accepted in one request (default: 20) max_files_per_request = 20 # Maximum number of files stored in one subdirectory before rotation (default: 5000) max_files_per_folder = 5000 # Temporary staging directory for uploads before durable placement staging_path = "./data/tmp" # Allowed MIME types for FILE uploads. Empty list allows any MIME type. allowed_mime_types = [] [shutdown] # Timeout settings for graceful shutdown [shutdown.flush] # Timeout in seconds to wait for flush jobs to complete during graceful shutdown (default: 300) timeout = 300 [jobs] # Maximum number of concurrent jobs (default: 10) # Controls how many jobs can execute simultaneously max_concurrent = 10 # Maximum number of retry attempts per job (default: 3) # Jobs will be retried this many times before being marked as permanently failed max_retries = 3 # Initial retry backoff delay in milliseconds (default: 100ms) # Delay increases exponentially with each retry (100ms, 200ms, 400ms, etc.) retry_backoff_ms = 100 # How often the jobs runner flushes all RocksDB memtables so stale WAL files # can be reclaimed (default: 300 seconds). Set to 0 to disable. wal_cleanup_interval_seconds = 300 # Phase 11, T026: SQL Handler Execution Configuration [execution] # Handler execution timeout in seconds (default: 30) # Maximum time allowed for a single SQL statement to execute # Prevents hung requests from blocking resources handler_timeout_seconds = 30 # Maximum number of parameters per statement (default: 50) # Prevents memory exhaustion from excessive parameter arrays max_parameters = 50 # Maximum size per parameter in bytes (default: 524288 = 512KB) # Prevents memory exhaustion from individual large parameters max_parameter_size_bytes = 524288 # Maximum number of cached SQL logical plans (default: 200) # Bound memory used by SQL plan cache sql_plan_cache_max_entries = 200 # Time-to-idle TTL for SQL cached plans in seconds (default: 900 = 15 minutes) # Unused plans are evicted automatically after this idle period sql_plan_cache_ttl_seconds = 900 [topics] # Visibility timeout for topic consumer claims (seconds). # Keep this lower in the example template so local and CI topic-recovery tests # exercise redelivery promptly without inflating test timeouts. visibility_timeout_secs = 10 # Default topic time retention in seconds (default: 604800 = 7 days). # Individual topics can override this with CREATE/ALTER TOPIC retention options. default_retention_seconds = 604800 # Default topic byte retention per partition (default: 1073741824 = 1 GiB). # Individual topics can set this to NULL to disable byte retention. default_retention_max_bytes = 1073741824 # Topic retention scheduler interval in seconds (default: 3600 = 1 hour). # Set to 0 to disable automatic topic retention scheduling. retention_check_interval_seconds = 3600 # Maximum messages a topic retention job deletes per partition in one run. retention_batch_size = 10000 # ============================================================================ # RPC TLS / mTLS Configuration # ============================================================================ # Secures the shared gRPC listener used by Raft replication, cluster RPCs, # and the PostgreSQL extension. # # Both cluster nodes and PG extension clients present a certificate signed by # the same CA. The server identifies the caller from the certificate CN: # kalamdb-node-{node_id} → cluster node # kalamdb-pg-{name} → PG extension client # # All cert values accept EITHER a file path OR an inline PEM string. # Inline detection: if the value starts with "-----BEGIN", it is used directly. # Otherwise the value is treated as a file path and read from disk. # [rpc_tls] # enabled = true # # CA cert — validates ALL incoming client certs (cluster nodes + PG extension) # ca_cert = "/etc/kalamdb/certs/ca.pem" # # This server's identity cert and key # server_cert = "/etc/kalamdb/certs/node1.pem" # server_key = "/etc/kalamdb/certs/node1.key" # # Require clients to present a cert (full mTLS). Set false for server-only TLS. # require_client_cert = true # ============================================================================ # Cluster Configuration — Multi-Node Raft Replication # ============================================================================ # When [cluster] is present the server joins a distributed cluster. # When absent (default) the server runs standalone with no clustering overhead. # # All nodes in a cluster MUST have: # - Matching cluster_id values # - The same peers list (this node omitted) # - Matching sharding configuration (user_shards, shared_shards) # - Unique node_id values # # Node with node_id=1 is the bootstrap node (no explicit flag needed). # [cluster] # # Unique cluster identifier - all nodes must share this # cluster_id = "prod-cluster" # # # This node's unique ID within the cluster (must be >= 1) # # Node with node_id=1 is the designated bootstrap node # node_id = 1 # # # RPC address for Raft inter-node communication # rpc_addr = "0.0.0.0:2910" # # # API address for client HTTP requests (should match server.host:server.port) # api_addr = "http://192.168.1.10:2900" # # # # Optional mTLS for inter-node gRPC (Raft + cluster RPC) # # When enabled, all three paths are required. # # Note: TLS for this node's identity lives in the top-level [rpc_tls] section, not here. # # # Number of user data shards (default: 8) # # MEMORY OPTIMIZATION: Reduced from 32 (saves ~5-8 MB) # # Each shard is a separate Raft group for user table data # # Trade-off: Lower write parallelism (acceptable for dev/testing) # user_shards = 8 # # # Number of shared data shards (default: 1) # # Each shard is a separate Raft group for shared table data # shared_shards = 1 # # # Raft heartbeat interval in milliseconds (default: 50) # heartbeat_interval_ms = 50 # # # Raft election timeout range [min, max] in milliseconds (default: [150, 300]) # election_timeout_ms = [150, 300] # # # Snapshot policy (default: "LogsSinceLast(1000)") # # Supported values: # # - "LogsSinceLast(1000)" # # - "Never" # snapshot_policy = "LogsSinceLast(1000)" # # # Number of snapshots to retain on disk (default: 3) # max_snapshots_to_keep = 3 # # # Timeout waiting for learner catchup or replication progress (default: 5000ms) # replication_timeout_ms = 5000 # # # Minimum interval between reconnect attempts to an unreachable peer (default: 3000ms) # reconnect_interval_ms = 3000 # # # Optional peer startup wait tuning. # # peer_wait_max_retries = 60 # # peer_wait_initial_delay_ms = 500 # # peer_wait_max_delay_ms = 2000 # # # Peer nodes (list all OTHER nodes in the cluster) # [[cluster.peers]] # node_id = 2 # rpc_addr = "192.168.1.11:2910" # api_addr = "http://192.168.1.11:2900" # # Optional TLS server-name override for this peer (SNI/hostname verification) # # rpc_server_name = "node2.cluster.local" # # [[cluster.peers]] # node_id = 3 # rpc_addr = "192.168.1.12:2910" # api_addr = "http://192.168.1.12:2900" # # rpc_server_name = "node3.cluster.local" # ============================================================================ # Example: 3-Node Production Cluster Configuration # ============================================================================ # For production, use an odd number of nodes (3 or 5) for optimal fault tolerance: # - 3 nodes: tolerates 1 node failure # - 5 nodes: tolerates 2 node failures # # Node 1 configuration (server1.toml): # [cluster] # cluster_id = "prod" # node_id = 1 # rpc_addr = "node1.example.com:2910" # api_addr = "http://node1.example.com:2900" # user_shards = 32 # shared_shards = 1 # snapshot_policy = "LogsSinceLast(1000)" # max_snapshots_to_keep = 3 # replication_timeout_ms = 5000 # reconnect_interval_ms = 3000 # # [[cluster.peers]] # node_id = 2 # rpc_addr = "node2.example.com:2910" # api_addr = "http://node2.example.com:2900" # rpc_server_name = "node2.example.com" # # [[cluster.peers]] # node_id = 3 # rpc_addr = "node3.example.com:2910" # api_addr = "http://node3.example.com:2900" # rpc_server_name = "node3.example.com"