# Dream Server Configuration # Copy this file to .env and edit values before starting: # cp .env.example .env # # The installer (install-core.sh) generates .env automatically with # secure random secrets. This file documents all available variables. # Dream Server version (auto-set by installer, used by dream-cli for compat checks) # DREAM_VERSION=2.4.0 # LiteLLM proxy API key for internal service auth # TARGET_API_KEY=not-needed # ═══════════════════════════════════════════════════════════════════ # REQUIRED — these must be set or docker compose will refuse to start # ═══════════════════════════════════════════════════════════════════ # Session signing for Open WebUI (generate: openssl rand -hex 32) WEBUI_SECRET=CHANGEME # n8n workflow automation credentials N8N_USER=admin@dreamserver.local N8N_PASS=CHANGEME # LiteLLM API gateway key (generate: echo "sk-dream-$(openssl rand -hex 16)") LITELLM_KEY=CHANGEME # LiveKit real-time communication credentials LIVEKIT_API_KEY=CHANGEME LIVEKIT_API_SECRET=CHANGEME # Dify agent platform secret key DIFY_SECRET_KEY=CHANGEME # SearXNG session secret (generate: openssl rand -hex 32) SEARXNG_SECRET=CHANGEME # OpenCode web UI password (generate: openssl rand -base64 16) OPENCODE_SERVER_PASSWORD=CHANGEME # OpenClaw gateway token (generate: openssl rand -hex 24) OPENCLAW_TOKEN=CHANGEME # ═══════════════════════════════════════════════════════════════════ # OpenClaw Security # ═══════════════════════════════════════════════════════════════════ # # OpenClaw device authentication (device pairing) defaults to ON. Token auth # and device auth are separate gates: a valid OPENCLAW_TOKEN still auto- # connects the local Control UI with device auth enabled, so you normally do # NOT need to change this. # # Setting this to "true" DISABLES device pairing entirely. Anyone who can # reach the gateway then gets an UNAUTHENTICATED agent with exec / read / # write tools — especially dangerous with a widened BIND_ADDRESS # (LAN / Tailscale / 0.0.0.0). Only enable as a deliberate, understood # escape hatch; a loud warning banner is logged at startup when set. # Leave unset (the secure default) unless you fully accept the risk. # OPENCLAW_DANGEROUSLY_DISABLE_DEVICE_AUTH= # ═══════════════════════════════════════════════════════════════════ # Network Binding # ═══════════════════════════════════════════════════════════════════ # Bind address for all Docker port bindings. # 127.0.0.1 = localhost only (secure default) # 0.0.0.0 = accessible from LAN (headless servers, see SECURITY.md) # BIND_ADDRESS=127.0.0.1 # ═══════════════════════════════════════════════════════════════════ # LLM Backend Mode # ═══════════════════════════════════════════════════════════════════ # local = llama-server (default, requires GPU or CPU inference) # cloud = LiteLLM -> cloud APIs (no local GPU needed) # hybrid = local primary, cloud fallback DREAM_MODE=local # Inference backend engine: llama-server (NVIDIA/CPU), lemonade (AMD), litellm (cloud) # AMD hardware auto-selects lemonade for NPU/Vulkan/ROCm acceleration LLM_BACKEND=llama-server # API base path: /v1 for llama-server, /api/v1 for Lemonade (set by installer) LLM_API_BASE_PATH=/v1 LLM_API_URL=http://llama-server:8080 # AMD local inference runtime selected by the installer. # Linux AMD: lemonade / rocm / container # Windows AMD: lemonade or llama-server / vulkan / host AMD_INFERENCE_RUNTIME= AMD_INFERENCE_BACKEND= AMD_INFERENCE_LOCATION= AMD_INFERENCE_PORT=8080 AMD_INFERENCE_SUPPORTED_BACKENDS= AMD_INFERENCE_RUNTIME_MODE= AMD_INFERENCE_MANAGED= # ═══════════════════════════════════════════════════════════════════ # Cloud API Keys (only needed for cloud/hybrid modes) # ═══════════════════════════════════════════════════════════════════ ANTHROPIC_API_KEY= OPENAI_API_KEY= TOGETHER_API_KEY= MINIMAX_API_KEY= # ═══════════════════════════════════════════════════════════════════ # LLM Settings (llama-server) # ═══════════════════════════════════════════════════════════════════ # Optional model family profile for installer-driven tier selection. # qwen = keep the current stable DreamServer defaults # gemma4 = always use Gemma 4 tier mappings when possible # auto = prefer Gemma 4 on capable hardware, keep Qwen fallback for minimum/cloud paths MODEL_PROFILE=qwen # Model GGUF filename override (installer normally rewrites this from tier + MODEL_PROFILE) GGUF_FILE=Qwen3.5-9B-Q4_K_M.gguf # Context window size (tokens) CTX_SIZE=16384 # GPU backend: nvidia or amd GPU_BACKEND=nvidia # Unified system RAM in GB (macOS Apple Silicon only — written by env-generator.sh). # Passed to llama.cpp for the Metal backend to size the unified-memory pool. # HOST_RAM_GB=24 # Model name override (installer normally rewrites this from tier + MODEL_PROFILE) LLM_MODEL=qwen3.5-9b # Installer recommendation metadata. During fast-start bootstrap, LLM_MODEL/GGUF_FILE # may temporarily point at the small bootstrap model while these fields keep the # full hardware-selected target visible in the dashboard. MODEL_RECOMMENDED_MODEL=qwen3.5-9b MODEL_RECOMMENDED_GGUF=Qwen3.5-9B-Q4_K_M.gguf MODEL_RECOMMENDED_CONTEXT=16384 MODEL_RECOMMENDATION_SOURCE=catalog_fit_pre_download MODEL_RECOMMENDATION_POLICY=context-aware-largest-capable-general-v1 MODEL_RECOMMENDATION_CONFIDENCE=high MODEL_RECOMMENDATION_REASON=Catalog fit (context-aware-largest-capable-general-v1): Qwen 3.5 9B needs about 8GB including context/KV, fits 8.0GB GPU VRAM on nvidia, and gives 32K context. Throughput requires a local benchmark after first launch. MODEL_RECOMMENDED_ALTERNATIVES=qwen3.5-9b-q4:32768:8;phi4-mini-q4:128000:4.38;deepseek-r1-7b-q4:32768:7 MODEL_PERFORMANCE_SOURCE=benchmark_required MODEL_PERFORMANCE_LABEL=Benchmark after first launch MODEL_RUNTIME_PROFILE= MODEL_RUNTIME_PROFILE_LABEL= MODEL_RUNTIME_PROFILE_SOURCE= SYSTEM_RAM_GB=32 # Optional llama.cpp image override (installer sets this automatically for runtime profiles) # LLAMA_SERVER_IMAGE=ghcr.io/ggml-org/llama.cpp:server-cuda-b9014 # Optional explicit fallback if a custom llama.cpp image tag disappears # LLAMA_SERVER_IMAGE_FALLBACK=ghcr.io/ggml-org/llama.cpp:server-cuda-b9014 # Optional AMD Lemonade image override. Default is pinned in config/backends/amd.json. # LEMONADE_SERVER_IMAGE=ghcr.io/lemonade-sdk/lemonade-server:v10.2.0 # ─── Architecture-specific overrides ──────────────────────────────────────────── # WHISPER_IMAGE is a forward-compat escape hatch — the default 0.9.0-rc.3-cuda # tag is multi-arch (amd64 + arm64) and works on both. Only override if you # need to pin a specific speaches commit. # WHISPER_IMAGE=ghcr.io/speaches-ai/speaches:0.9.0-rc.3-cuda # llama-server inference tuning (advanced) # LLAMA_BATCH_SIZE=2048 # Batch size for prompt processing (higher = faster prefill) # LLAMA_THREADS=4 # CPU threads for non-GPU work # LLAMA_PARALLEL=1 # Concurrent request slots (increase for multi-user) # LLAMA_ARG_FLASH_ATTN=auto # auto | on | off. Use on for long-context CUDA/Metal/SYCL tuning. # LLAMA_ARG_CACHE_TYPE_K=f16 # KV cache key type: f16 (default) or q8_0 for lower VRAM/RAM. # LLAMA_ARG_CACHE_TYPE_V=f16 # KV cache value type: f16 (default) or q8_0 for lower VRAM/RAM. # LLAMA_ARG_N_CPU_MOE=25 # Optional MoE only: keep first N MoE expert layers on CPU/RAM. # LLAMA_ARG_NO_CACHE_PROMPT=1 # Optional profile flag for long-context MoE runs. # LLAMA_ARG_CHECKPOINT_EVERY_N_TOKENS=-1 # Optional profile flag to disable context checkpoints. # LLAMA_ARG_SPEC_TYPE=draft-mtp # Optional: only for MTP-capable GGUFs and llama.cpp builds. # LLAMA_ARG_SPEC_DRAFT_N_MAX=3 # Optional: speculative draft cap; benchmark per model/runtime. # LLAMA_CPU_LIMIT=12.0 # Auto-generated: capped to CPUs actually exposed by Docker # LLAMA_CPU_RESERVATION=2.0 # Auto-generated: reservation capped to the same ceiling # LLAMA_START_PERIOD=240s # Healthcheck grace window for cold model load (bump for 70B-class on slow NVMe) # ═══════════════════════════════════════════════════════════════════ # Ports — all overridable, defaults shown # ═══════════════════════════════════════════════════════════════════ OLLAMA_PORT=11434 # llama-server API (external → internal 8080) WEBUI_PORT=3000 # Open WebUI (external → internal 8080) # SEARXNG_PORT=8888 # SearXNG metasearch (external → internal 8080) # PERPLEXICA_PORT=3004 # Perplexica deep research (external → internal 3000) # WHISPER_PORT=9000 # Whisper STT (external → internal 8000) # TTS_PORT=8880 # Kokoro TTS (external → internal 8880) # N8N_PORT=5678 # n8n workflows (external → internal 5678) # QDRANT_PORT=6333 # Qdrant vector DB (external → internal 6333) # QDRANT_GRPC_PORT=6334 # Qdrant gRPC (external → internal 6334) # EMBEDDINGS_PORT=8090 # Text embeddings (external → internal 80) # LITELLM_PORT=4000 # LiteLLM gateway (external → internal 4000) # OPENCLAW_PORT=7860 # OpenClaw agent (external → internal 18789) # SHIELD_PORT=8085 # Privacy Shield (external → internal 8085) # DASHBOARD_API_PORT=3002 # Dashboard API (external → internal 3002) # DASHBOARD_PORT=3001 # Dashboard UI (external → internal 3001) # COMFYUI_PORT=8188 # ComfyUI image gen (external → internal 8188) # TOKEN_SPY_PORT=3005 # Token Spy usage monitor (external → internal 8080) # OPENCODE_PORT=3003 # OpenCode IDE web UI (host service) # ═══════════════════════════════════════════════════════════════════ # Optional Security # ═══════════════════════════════════════════════════════════════════ # Host Agent bind address (leave empty for platform-aware default). # macOS/Windows: defaults to 127.0.0.1 (Docker Desktop routes via loopback). # Linux: auto-detects Docker bridge gateway IP (e.g. 172.17.0.1) so containers # can reach the agent while LAN devices cannot. Falls back to 127.0.0.1 if # detection fails. Set explicitly to override. # DREAM_AGENT_BIND= # Hostname/IP dashboard-api containers use to reach dream-host-agent. # Docker Desktop installs set this to host.docker.internal because the # host-agent stays loopback-only on macOS/Windows. # DREAM_AGENT_HOST=host.docker.internal # Dashboard API key (generate: openssl rand -hex 32) # DASHBOARD_API_KEY= # Dream host agent key (generate: openssl rand -hex 32) — auto-generated by installer; rotating breaks host-agent authentication # DREAM_AGENT_KEY= # Session-cookie signing secret. Used by dashboard-api to mint # HMAC-signed `dream-session` cookies at magic-link redemption, and # by the Hermes auth-proxy to verify them via /api/auth/verify-session. # Generate with: openssl rand -hex 32. Rotating invalidates all # currently-issued session cookies (forces re-redemption). # DREAM_SESSION_SECRET= # Privacy Shield API key (generate: openssl rand -hex 32) - auto-generated by installer; shared between dashboard-api and privacy-shield containers # SHIELD_API_KEY= # Qdrant API key (generate: openssl rand -hex 32) # QDRANT_API_KEY= # ═══════════════════════════════════════════════════════════════════ # Langfuse (LLM Observability) — optional, disabled by default # ═══════════════════════════════════════════════════════════════════ LANGFUSE_PORT=3006 LANGFUSE_ENABLED=false LANGFUSE_NEXTAUTH_SECRET= # auto-generated during install LANGFUSE_SALT= # auto-generated during install LANGFUSE_ENCRYPTION_KEY= # auto-generated during install LANGFUSE_DB_PASSWORD= # auto-generated during install LANGFUSE_CLICKHOUSE_PASSWORD= # auto-generated during install LANGFUSE_REDIS_PASSWORD= # auto-generated during install LANGFUSE_MINIO_ACCESS_KEY= # auto-generated during install LANGFUSE_MINIO_SECRET_KEY= # auto-generated during install LANGFUSE_PROJECT_PUBLIC_KEY= # auto-generated during install LANGFUSE_PROJECT_SECRET_KEY= # auto-generated during install LANGFUSE_INIT_PROJECT_ID= # auto-generated during install LANGFUSE_INIT_USER_EMAIL=admin@dreamserver.local LANGFUSE_INIT_USER_PASSWORD= # auto-generated during install # ═══════════════════════════════════════════════════════════════════ # Multi-GPU Settings (auto-populated by installer for multi-GPU) # ═══════════════════════════════════════════════════════════════════ # GPU_ASSIGNMENT_JSON_B64= # Base64-encoded GPU assignment JSON # LLAMA_SERVER_GPU_UUIDS= # GPU UUIDs for llama-server (NVIDIA, comma-separated) # LLAMA_SERVER_GPU_INDICES= # GPU indices for llama-server (AMD, comma-separated) # LLAMA_ARG_SPLIT_MODE=none # none | layer (pipeline) | row (tensor/hybrid) # LLAMA_ARG_TENSOR_SPLIT= # Proportional VRAM weights (e.g. 3,1) # COMFYUI_GPU_UUID= # GPU UUID for ComfyUI (NVIDIA) # WHISPER_GPU_UUID= # GPU UUID for Whisper (NVIDIA) # EMBEDDINGS_GPU_UUID= # GPU UUID for embeddings (NVIDIA) # ROCR_VISIBLE_DEVICES= # AMD GPU indices visible to ROCm (comma-separated) # VIDEO_GID=44 # Host 'video' group GID (AMD) # RENDER_GID=992 # Host 'render' group GID (AMD) # HSA_OVERRIDE_GFX_VERSION= # AMD gfx version override for ROCm compatibility # LLM_MODEL_SIZE_MB= # Approximate model size in MB # ═══════════════════════════════════════════════════════════════════ # Optional — Voice, Web UI, n8n # ═══════════════════════════════════════════════════════════════════ # Whisper model (tiny, base, small, medium, large-v3-turbo) # WHISPER_MODEL=base # Whisper STT model (HuggingFace repo ID). Installer auto-selects by GPU: # NVIDIA → deepdml/faster-whisper-large-v3-turbo-ct2 (faster, ~1.5GB) # AMD/CPU → Systran/faster-whisper-base (~130MB) # Override to use a different model — must then run `dream restart` and # manually download the new model via: # curl -X POST --max-time 3600 http://localhost:9000/v1/models/ # AUDIO_STT_MODEL=Systran/faster-whisper-base # Kokoro TTS voice name # TTS_VOICE=en_US-lessac-medium # Open WebUI authentication (true/false) # WEBUI_AUTH=true # Open WebUI PWA / branding name. Shown as the app title, page title, and PWA # install label (the text next to the icon on a phone's home screen). # WEBUI_NAME=Dream # Optional Open WebUI public URL — used for share links, OAuth callbacks, and # PWA install metadata. Leave empty for traditional localhost usage. After # enabling dream-proxy + mDNS, set this to the reachable chat subdomain: # http://chat..local. Override with a Tailscale Funnel / # Cloudflare Tunnel / custom domain URL when fronted by one. # WEBUI_URL=http://chat.dream.local # Device name used by mDNS announcements and as the hostname segment for # proxy/headless URLs. Letters / digits / hyphens only — short and memorable # wins (e.g. "dream", "kitchen", "lab"). # DREAM_DEVICE_NAME=dream # Web search settings # ENABLE_WEB_SEARCH=true # WEB_SEARCH_ENGINE=searxng # System timezone (used by Open WebUI and n8n) # TIMEZONE=UTC # n8n settings # N8N_HOST=localhost # n8n hostname # N8N_WEBHOOK_URL=http://localhost:5678 # n8n webhook URL (for external access) # Embedding model for RAG # EMBEDDING_MODEL=BAAI/bge-base-en-v1.5 # Image generation (ComfyUI + SDXL Lightning) # ENABLE_IMAGE_GENERATION=true # ═══════════════════════════════════════════════════════════════════ # AMD-specific (only needed with GPU_BACKEND=amd) # ═══════════════════════════════════════════════════════════════════ # VIDEO_GID=44 # `getent group video | cut -d: -f3` # RENDER_GID=992 # `getent group render | cut -d: -f3` # HSA_OVERRIDE_GFX_VERSION=11.5.1 # Required for Strix Halo gfx1151 # HSA_XNACK=1 # Extended memory access for large KV caches # ROCBLAS_USE_HIPBLASLT=1 # 2-3x faster prompt processing via hipBLASLt # AMDGPU_TARGET=gfx1151 # GPU arch for llama.cpp build (gfx1151, gfx1100, etc.) # LLAMA_CPP_REF=b8763 # llama.cpp release tag to build (pin to avoid breakage) # ═══════════════════════════════════════════════════════════════════ # Advanced # ═══════════════════════════════════════════════════════════════════ # Container user/group IDs # UID=1000 # GID=1000 # Privacy Shield settings # PII_CACHE_ENABLED=true # PII_CACHE_SIZE=1000 # PII_CACHE_TTL=300 # LOG_LEVEL=info # OpenClaw bootstrap model (small model for instant startup) # BOOTSTRAP_MODEL=qwen3:8b-q4_K_M # Dashboard API internal URLs (usually Docker-internal, not user-facing) # KOKORO_URL=http://tts:8880 # N8N_URL=http://n8n:5678 # llama-server memory limit (Docker) # LLAMA_SERVER_MEMORY_LIMIT=64G # llama-server CPU core limit (macOS/CPU-only mode — static default 8.0) # Tune this to control how many CPU cores llama-server may use. # LLAMA_CPU_LIMIT=8.0 # ═══════════════════════════════════════════════════════════════════ # Dream Proxy (LAN reverse proxy on port 80) # ═══════════════════════════════════════════════════════════════════ # Opt-in Caddy reverse proxy that fronts dashboard / chat / API on # port 80, so `http://.local` (no port) works from any LAN # device. Without this, services bind to 127.0.0.1 and `dream.local` # resolves to a port that's not open. # Enable: `dream enable dream-proxy`. See docs/DREAM-PROXY.md. # # DREAM_PROXY_PORT=80 # Host port (standard HTTP) # DREAM_PROXY_TLS_PORT=443 # HTTPS port (deferred; HTTP only in v1) # DREAM_PROXY_BIND=0.0.0.0 # LAN-exposed by default — that's the point # Hermes Agent (Remote-capable Chat + Generalist Agent) # ═══════════════════════════════════════════════════════════════════ # Hermes Agent (Nous Research, MIT) — chat-first generalist agent with # persistent memory, autonomous skill creation, and 70+ tools. Dream # Server runs the upstream image pinned by SHA and points it at the # local LLM. Browser UI ships in the upstream image (port 9119). # Enable: `dream enable hermes`. With hermes-proxy enabled (the default # pairing), end users reach Hermes at http://hermes..local # (port 80, auth-gated by the proxy). Hermes itself is internal-only — # there's no HERMES_PORT knob in this stack. # # HERMES_LLM_BASE_URL=http://llama-server:8080/v1 # OpenAI-compat endpoint # HERMES_LLM_API_KEY=sk-dream-hermes-local # Dummy (OpenAI SDK requires non-empty) # HERMES_LANGUAGE=en # UI language # (No HERMES_MODEL_NAME — the model lives in /opt/data/config.yaml, # not in env. Edit the file after first start to switch models.) # # Hermes is reached on the LAN via dream-hermes-proxy (Caddy sidecar) which # verifies the dream-session cookie against dashboard-api's verify endpoint # (forward_auth → HMAC signature check via DREAM_SESSION_SECRET). # Direct Hermes port is NOT bound to the host — only the proxy is LAN-facing. # HERMES_PROXY_PORT=9120 # LAN entry — what users actually browse to # HERMES_PROXY_UPSTREAM=dream-hermes:9119 # Where the proxy forwards (internal DNS) # DREAM_AUTH_UPSTREAM=dream-dashboard-api:3002 # Where forward_auth verifies sessions # Tailscale (Remote Access) # ═══════════════════════════════════════════════════════════════════ # Optional remote-access via Tailscale's mesh VPN. Generate an auth # key at https://login.tailscale.com/admin/settings/keys and paste # below. Once joined the device is reachable as ..ts.net # from any other tailnet member. # # IMPORTANT: joining the tailnet alone isn't enough. For the device to # actually answer HTTP at .tail-xxxxx.ts.net, the host has to # be listening for non-loopback traffic. That means: # 1. dream-proxy is enabled (Caddy fielding port 80 + routing /chat, # /api/*, /auth/* to backends) # 2. BIND_ADDRESS=0.0.0.0 in this .env (loopback-only services don't # accept tailnet packets even with network_mode: host) # See docs/TAILSCALE.md for the full prerequisites and architecture. # # TS_AUTHKEY is consumed exactly once on first join. After that the # tailscale daemon caches the node key in data/tailscale/, so you can # rotate or delete the auth key in the Tailscale admin. # # TS_AUTHKEY=tskey-auth-xxxxxxxxxxxxxxxxxxxxxx # TS_HOSTNAME= # Defaults to DREAM_DEVICE_NAME # TS_EXTRA_ARGS= # Optional, e.g. --advertise-tags=tag:dream