# Ingestion Configuration # Set to true to disable Langflow ingestion and use traditional OpenRAG processor # If unset or false, Langflow pipeline will be used (default: upload -> ingest -> delete) DISABLE_INGEST_WITH_LANGFLOW=false # Set to false to skip ingesting OpenRAG sample docs during onboarding # Default: true INGEST_SAMPLE_DATA=true # Default OpenRAG docs source for sample ingestion during onboarding. # - url: crawl DEFAULT_DOCS_URL using URL ingestion flow # - files: ingest files from OPENRAG_DOCUMENTS_PATH DEFAULT_DOCS_INGEST_SOURCE=url # URL used when DEFAULT_DOCS_INGEST_SOURCE=url DEFAULT_DOCS_URL=https://www.openr.ag/ # How many levels deep to crawl when DEFAULT_DOCS_INGEST_SOURCE=url (default: 2) # DEFAULT_DOCS_CRAWL_DEPTH=2 # Check for newer OpenRAG docs and update knowledge on every startup. # Default: false (manual fetch remains available in UI) FETCH_OPENRAG_DOCS_AT_STARTUP=false # Langflow HTTP timeout configuration (in seconds) # For large documents (300+ pages), ingestion can take 30+ minutes # Increase these values if you experience timeouts with very large PDFs # Default: 2400 seconds (40 minutes) total timeout, 30 seconds connection timeout # LANGFLOW_TIMEOUT=2400 # LANGFLOW_CONNECT_TIMEOUT=30 # OPTIONAL: Per-file processing timeout for document ingestion tasks (in seconds) # Should be >= LANGFLOW_TIMEOUT to allow long-running ingestion to complete # Default: 3600 seconds (60 minutes) total timeout # INGESTION_TIMEOUT=3600 # OPTIONAL: OpenSearch JWT TTL for per-user document security tokens (in seconds) # Default: INGESTION_TIMEOUT + 300 seconds # OPENRAG_OPENSEARCH_JWT_TTL=3900 # OPTIONAL: Maximum number of files to upload / ingest (in batch) per task when adding knowledge via folder # Default: 25 # UPLOAD_BATCH_SIZE=25 # make one like so https://docs.langflow.org/api-keys-and-authentication#langflow-secret-key LANGFLOW_SECRET_KEY= # flow ids for chat and ingestion flows LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0 LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813 LANGFLOW_URL_INGEST_FLOW_ID=72c3d17c-2dac-4a73-b48a-6518473d7830 # Ingest flow using docling # LANGFLOW_INGEST_FLOW_ID=1402618b-e6d1-4ff2-9a11-d6ce71186915 NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c # Base64 AES-256-GCM Master Encryption Key # If not provided, OpenRAG will automatically generate a secure 32-byte key when you run the TUI or CLI wizard. # If completely unset during normal execution, the master encryption system will remain inactive. # This key is required to decrypt API credentials stored in local databases and provider indices. OPENRAG_ENCRYPTION_KEY= # Identifier for AAD tenant binding. # Used to verify that encrypted files belong to the expected tenant. # Default: openrag OPENRAG_TENANT_ID=openrag # If set to true, the application will fail to start if any critical # prerequisites are not met (e.g. master encryption key missing). # Default: false OPENRAG_ENFORCE_PREREQUISITES=false # Set a strong admin password for OpenSearch; a bcrypt hash is generated at # container startup from this value. Do not commit real secrets. # must match the hashed password in secureconfig, must change for secure deployment!!! # NOTE: if you set this by hand, it must be a complex password: # The password must contain at least 8 characters, and must contain at least one uppercase letter, one lowercase letter, one digit, and one special character. OPENSEARCH_PASSWORD= # Path to persist Langflow database and state (flows, credentials, settings) # Without this volume, flow edits will be lost on container restart # Default: ./langflow-data LANGFLOW_DATA_PATH=./langflow-data # Langflow database URL. Defaults to SQLite stored in LANGFLOW_DATA_PATH. # Override with a PostgreSQL URL for production deployments, e.g.: # LANGFLOW_DATABASE_URL=postgresql://user:pass@host:5432/langflow LANGFLOW_DATABASE_URL=sqlite:////app/langflow-data/langflow.db # OpenSearch Connection OPENSEARCH_HOST=opensearch OPENSEARCH_PORT=9200 # Optional: Use a different OpenSearch endpoint for Langflow # LANGFLOW_OPENSEARCH_HOST=opensearch # LANGFLOW_OPENSEARCH_PORT=9200 OPENSEARCH_USERNAME=admin # OpenSearch index name for storing documents # Default: documents # Change this if you want to use a different index name or avoid conflicts OPENSEARCH_INDEX_NAME=documents # IBM AMS Authentication (IBM Watsonx Data embedded mode) # Set IBM_AUTH_ENABLED=true to authenticate via the ibm-openrag-session cookie # instead of Google OAuth. The raw IBM JWT is also passed directly to OpenSearch. # When enabled, GOOGLE_OAUTH_CLIENT_ID/SECRET are not required for login. IBM_AUTH_ENABLED=false # URL to fetch IBM's JWT public key (used to validate ibm-openrag-session tokens). IBM_JWT_PUBLIC_KEY_URL= # Cookie name set by Traefik after successful IBM AMS authentication (default: ibm-openrag-session). IBM_SESSION_COOKIE_NAME=ibm-openrag-session # Header injected by Traefik on every forwarded request containing the OpenSearch Basic credentials # (base64-encoded username:password). Override if your proxy uses a different header name. IBM_CREDENTIALS_HEADER=X-IBM-LH-Credentials # Optional: Use these to override authentication for local development/calls when IBM_AUTH_ENABLED=true IBM_USERNAME= IBM_PASSWORD= # Set to true to disable OpenSearch security roles/mapping setup during onboarding/startup IBM_AUTH_DEV_MODE=false # make here https://console.cloud.google.com/apis/credentials GOOGLE_OAUTH_CLIENT_ID= GOOGLE_OAUTH_CLIENT_SECRET= # Azure app registration credentials for SharePoint/OneDrive MICROSOFT_GRAPH_OAUTH_CLIENT_ID= MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET= # AWS Access Key ID and Secret Access Key with access to your S3 instance AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= # OPTIONAL: dns routable from google (etc.) to handle continous ingest (something like ngrok works). This enables continous ingestion WEBHOOK_BASE_URL= # Model Provider API Keys OPENAI_API_KEY= ANTHROPIC_API_KEY= OLLAMA_ENDPOINT= WATSONX_API_KEY= WATSONX_ENDPOINT= WATSONX_PROJECT_ID= # Tune the ibm-watsonx-ai SDK's native 429 retry used during ingestion embedding. # OPTIONAL: Max retries = retry attempts (default 10). # WATSONX_MAX_RETRIES=10 # OPTIONAL: Delay time = base backoff seconds for the jittered exponential backoff (default 0.5). # WATSONX_DELAY_TIME=0.5 # LLM Provider configuration. Providers can be "anthropic", "watsonx", "ibm" or "ollama". LLM_PROVIDER= LLM_MODEL= # OPTIONAL: Custom system prompt for the chat agent. Overrides the default prompt baked into the Langflow flow. # SYSTEM_PROMPT= # Embedding provider configuration. Providers can be "watsonx", "ibm" or "ollama". EMBEDDING_PROVIDER= EMBEDDING_MODEL= # Knowledge ingestion tuning # OPTIONAL: Document chunk size in tokens (default configured in Langflow flow) # CHUNK_SIZE= # OPTIONAL: Overlap between chunks in tokens (default configured in Langflow flow) # CHUNK_OVERLAP= # OPTIONAL: Enable OCR for scanned PDFs and images (default: false) # OCR_ENABLED=false # OPTIONAL: Generate text descriptions for images found in documents (default: false) # PICTURE_DESCRIPTIONS_ENABLED=false # Port Configuration # Change these values if you have port conflicts with other services on your system # Default ports: Frontend=3000, Langflow=7860 FRONTEND_PORT=3000 LANGFLOW_PORT=7860 # Internal URL the OpenRAG backend uses to communicate with Langflow. # When running via docker-compose this is set automatically to http://langflow:7860. # Override only when running the backend outside Docker or pointing at a remote Langflow instance. # LANGFLOW_URL=http://localhost:7860 # OPTIONAL url for openrag link to langflow in the UI # If you change LANGFLOW_PORT, also update LANGFLOW_PUBLIC_URL to match the new port (e.g., http://localhost:7861) LANGFLOW_PUBLIC_URL= # OPTIONAL: Override the full docling-serve URL (e.g., for remote instances) # If not set, auto-detects host and uses port 5001 # DOCLING_SERVE_URL=http://my-docling-server:5001 # DOCLING_SERVE_VERIFY_SSL=true # OPTIONAL: OCR engine used by docling for scanned documents. Options: "easyocr" (default), "tesseract", "tesseract_cli" # DOCLING_OCR_ENGINE=easyocr # OPTIONAL: Override host for docling service (for special networking setups) # HOST_DOCKER_INTERNAL=host.containers.internal # Langflow auth LANGFLOW_AUTO_LOGIN=False LANGFLOW_SUPERUSER= LANGFLOW_SUPERUSER_PASSWORD= LANGFLOW_NEW_USER_IS_ACTIVE=False LANGFLOW_ENABLE_SUPERUSER_CLI=False # ───────────────────────────────────────────────────────────────────────────── # Langflow Container Variables # These variables are passed directly to the Langflow container by docker-compose. # They are NOT read by the OpenRAG Python backend. # # Note: OPENSEARCH_URL is automatically constructed by docker-compose from # LANGFLOW_OPENSEARCH_HOST and LANGFLOW_OPENSEARCH_PORT — do not set it manually. # ───────────────────────────────────────────────────────────────────────────── # Langfuse tracing (optional) # Get keys from https://cloud.langfuse.com or your self-hosted instance LANGFUSE_SECRET_KEY= LANGFUSE_PUBLIC_KEY= # Leave empty for Langfuse Cloud, or set for self-hosted (e.g., http://localhost:3002) # Example of self hosting outside of the docker network: LANGFUSE_HOST=http://host.docker.internal:3000 LANGFUSE_HOST= # OPTIONAL: A development-mode only configuration value that allows additional origins (domains/hosts) to make requests to the Next.js dev server (next dev). # - A list of comma-separate hostnames may be specified # - e.g. 9.46.110.49,10.21.103.227,openrag.example.com NEXT_ALLOWED_DEV_ORIGINS= # OPTIONAL: Number of worker processes for concurrent request handling # - Increase for higher throughput; but, factor in CPU / memory constraints # MAX_WORKERS=4 # Backend ingestion workers (default: min(4, CPU_COUNT // 2)) # LANGFLOW_WORKERS=2 # Langflow workers (default: 1) # DOCLING_WORKERS=2 # Docling workers (default: 1) # OPTIONAL: Set the logging level for the Backend service # - Supported values: DEBUG, INFO, WARNING, ERROR, CRITICAL # - Default: INFO # LOG_LEVEL=DEBUG # OPTIONAL: Disable colored log output (https://no-color.org) # - When set (to any value), ANSI color codes are omitted from log output # NO_COLOR=1 # OPTIONAL: Enable or disable HTTP access logging events # - e.g. INFO: 127.0.0.1:45132 - "GET /tasks HTTP/1.1" 200 OK # - Default: true # ACCESS_LOG=false # OpenRAG Configuration # The version tag for OpenRAG Docker images (e.g., 'latest', 'v1.0.0'). # This is automatically managed by the TUI during updates to match the Python package version, # but can be pinned here to lock the deployment to a specific version when running OpenRAG with Make commands. OPENRAG_VERSION=latest # Path configuration (override defaults for custom directory layouts) # OPENRAG_DATA_PATH=./data # OPENRAG_KEYS_PATH=./keys # OPENRAG_CONFIG_PATH=./config # OPENRAG_FLOWS_PATH=./flows # OPENRAG_FLOWS_BACKUP_PATH= # defaults to /backup # Comma-separated list of host directories the TUI watches for documents to ingest. # The TUI derives OPENRAG_DOCUMENTS_PATH (below) from the first entry for Docker's volume mount. # OPENRAG_DOCUMENTS_PATHS=$HOME/.openrag/documents # # Single directory path mounted into the OpenRAG container by docker-compose. # Set automatically by the TUI from OPENRAG_DOCUMENTS_PATHS; override only if running without the TUI. # OPENRAG_DOCUMENTS_PATH=./openrag-documents # Langflow Configuration # API Key for authenticating with Langflow. # If not provided, OpenRAG will automatically generate one using the superuser credentials # (LANGFLOW_SUPERUSER and LANGFLOW_SUPERUSER_PASSWORD). LANGFLOW_KEY= # Number of retry attempts when generating the Langflow API key (default: 15) LANGFLOW_KEY_RETRIES= # Delay in seconds between API key generation retry attempts (default: 2.0) LANGFLOW_KEY_RETRY_DELAY= # Version of Langflow (informational/future use) # LANGFLOW_VERSION= # Logging and Service # Format for log messages (default: json) LOG_FORMAT= # Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) LOG_LEVEL= # Service name used in logs (default: openrag) SERVICE_NAME=openrag # Secret key for session management (auto-generated if not provided) SESSION_SECRET= # OPTIONAL: JWT signing key for token issuance and verification. # If not set, OpenRAG generates an RSA key pair (private_key.pem / public_key.pem) # under OPENRAG_KEYS_PATH at startup and uses RS256 signing. # Accepted values: # - RSA private key in PEM format (PKCS#8 or PKCS#1) → RS256 # - Plain string → HS256 (symmetric; JWKS endpoint returns an empty key set) # When set, the on-disk key files are never read or created. # # PEM keys must be set as a double-quoted single-line value with literal \n # separators. python-dotenv expands \n in double-quoted strings to real newlines, # which is required by the PEM parser. Multiline values break Make's .env parsing, # and single-quoted or unquoted values do not expand \n. # # Generate and format a test key with: # openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:2048 2>/dev/null \ # | awk '{printf "%s\\n", $0}' | sed 's/^/"/' | sed 's/$/"/' # # JWT_SIGNING_KEY= # OPTIONAL: The OpenRAG application execution mode or deployment target. # - Used to configure the operation of the application based on it's execution context. # Values: # - "oss": Open Source Software (default) # - "saas": Public Cloud # - "on_prem": Private Cloud OPENRAG_RUN_MODE=oss # Permission/identity cache backend. Only "memory" is currently wired. # When CACHE_BACKEND=memory the RBAC permission cache and OAuth-subject # cache are per-process, so OpenRAG must run with UVICORN_WORKERS=1 (and # replicaCount=1 in helm). Anything >1 hard-fails at startup until cache # is shared across processes (planned: Redis). CACHE_BACKEND=memory # UVICORN_WORKERS=1 # Storage mode. Default `db` — workspace config + chat history live in # the SQL DB only; no `config.yaml`, `conversations.json`, or # `session_ownership.json` are ever written. Pre-existing JSON files # are imported once on first boot, then ignored. # - db (default) DB-only, no JSON written # - hybrid DB + JSON dual-write (Phase B fallback) # - files legacy JSON-only path; DB untouched OPENRAG_STORAGE_MODE=db # RBAC kill switch. Default OFF — the system behaves like the pre-RBAC # release: any authenticated user has full access; API-key role # overrides are also bypassed. Set to `true` to turn the permissions # system on for multi-user deployments. Available in all run modes. # OPENRAG_RBAC_ENFORCE=true