# Ingestion Configuration # Set to true to disable Langflow ingestion and use traditional OpenRAG processor # If unset or false, Langflow pipeline will be used (default: upload -> ingest -> delete) DISABLE_INGEST_WITH_LANGFLOW=false # Set to false to skip ingesting OpenRAG sample docs during onboarding # Default: true INGEST_SAMPLE_DATA=true # Default OpenRAG docs source for sample ingestion during onboarding. # - url: crawl DEFAULT_DOCS_URL using URL ingestion flow # - files: ingest files from OPENRAG_DOCUMENTS_PATH DEFAULT_DOCS_INGEST_SOURCE=url # URL used when DEFAULT_DOCS_INGEST_SOURCE=url DEFAULT_DOCS_URL=https://www.openr.ag/ # Check for newer OpenRAG docs and update knowledge on every startup. # Default: false (manual fetch remains available in UI) FETCH_OPENRAG_DOCS_AT_STARTUP=false # Langflow HTTP timeout configuration (in seconds) # For large documents (300+ pages), ingestion can take 30+ minutes # Increase these values if you experience timeouts with very large PDFs # Default: 2400 seconds (40 minutes) total timeout, 30 seconds connection timeout # LANGFLOW_TIMEOUT=2400 # LANGFLOW_CONNECT_TIMEOUT=30 # OPTIONAL: Per-file processing timeout for document ingestion tasks (in seconds) # Should be >= LANGFLOW_TIMEOUT to allow long-running ingestion to complete # Default: 3600 seconds (60 minutes) total timeout # INGESTION_TIMEOUT=3600 # OPTIONAL: Maximum number of files to upload / ingest (in batch) per task when adding knowledge via folder # Default: 25 # UPLOAD_BATCH_SIZE=25 # make one like so https://docs.langflow.org/api-keys-and-authentication#langflow-secret-key LANGFLOW_SECRET_KEY= # flow ids for chat and ingestion flows LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0 LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813 LANGFLOW_URL_INGEST_FLOW_ID=72c3d17c-2dac-4a73-b48a-6518473d7830 # Ingest flow using docling # LANGFLOW_INGEST_FLOW_ID=1402618b-e6d1-4ff2-9a11-d6ce71186915 NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c # Base64 AES-256-GCM Master Encryption Key # If not provided, OpenRAG will automatically generate a secure 32-byte key when you run the TUI or CLI wizard. # If completely unset during normal execution, the master encryption system will remain inactive. # This key is required to decrypt API credentials stored in local databases and provider indices. OPENRAG_ENCRYPTION_KEY= # Identifier for AAD tenant binding. # Used to verify that encrypted files belong to the expected tenant. # Default: openrag OPENRAG_TENANT_ID=openrag # If set to true, the application will fail to start if any critical # prerequisites are not met (e.g. master encryption key missing). # Default: false OPENRAG_ENFORCE_PREREQUISITES=false # Set a strong admin password for OpenSearch; a bcrypt hash is generated at # container startup from this value. Do not commit real secrets. # must match the hashed password in secureconfig, must change for secure deployment!!! # NOTE: if you set this by hand, it must be a complex password: # The password must contain at least 8 characters, and must contain at least one uppercase letter, one lowercase letter, one digit, and one special character. OPENSEARCH_PASSWORD= # Path to persist OpenSearch data (indices, documents, cluster state) # Default: ./opensearch-data OPENSEARCH_DATA_PATH=./opensearch-data # Path to persist Langflow database and state (flows, credentials, settings) # Without this volume, flow edits will be lost on container restart # Default: ./langflow-data LANGFLOW_DATA_PATH=./langflow-data # Langflow database URL. Defaults to SQLite stored in LANGFLOW_DATA_PATH. # Override with a PostgreSQL URL for production deployments, e.g.: # LANGFLOW_DATABASE_URL=postgresql://user:pass@host:5432/langflow LANGFLOW_DATABASE_URL=sqlite:////app/langflow-data/langflow.db # OpenSearch Connection OPENSEARCH_HOST=opensearch OPENSEARCH_PORT=9200 # Optional: Use a different OpenSearch endpoint for Langflow # LANGFLOW_OPENSEARCH_HOST=opensearch # LANGFLOW_OPENSEARCH_PORT=9200 OPENSEARCH_USERNAME=admin # OpenSearch index name for storing documents # Default: documents # Change this if you want to use a different index name or avoid conflicts OPENSEARCH_INDEX_NAME=documents # IBM AMS Authentication (IBM Watsonx Data embedded mode) # Set IBM_AUTH_ENABLED=true to authenticate via the ibm-openrag-session cookie # instead of Google OAuth. The raw IBM JWT is also passed directly to OpenSearch. # When enabled, GOOGLE_OAUTH_CLIENT_ID/SECRET are not required for login. IBM_AUTH_ENABLED=false # URL to fetch IBM's JWT public key (used to validate ibm-openrag-session tokens). IBM_JWT_PUBLIC_KEY_URL= # Cookie name set by Traefik after successful IBM AMS authentication (default: ibm-openrag-session). IBM_SESSION_COOKIE_NAME=ibm-openrag-session # Header injected by Traefik on every forwarded request containing the OpenSearch Basic credentials # (base64-encoded username:password). Override if your proxy uses a different header name. IBM_CREDENTIALS_HEADER=X-IBM-LH-Credentials # make here https://console.cloud.google.com/apis/credentials GOOGLE_OAUTH_CLIENT_ID= GOOGLE_OAUTH_CLIENT_SECRET= # Azure app registration credentials for SharePoint/OneDrive MICROSOFT_GRAPH_OAUTH_CLIENT_ID= MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET= # AWS Access Key ID and Secret Access Key with access to your S3 instance AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= # OPTIONAL: dns routable from google (etc.) to handle continous ingest (something like ngrok works). This enables continous ingestion WEBHOOK_BASE_URL= # Model Provider API Keys OPENAI_API_KEY= ANTHROPIC_API_KEY= OLLAMA_ENDPOINT= WATSONX_API_KEY= WATSONX_ENDPOINT= WATSONX_PROJECT_ID= # LLM Provider configuration. Providers can be "anthropic", "watsonx", "ibm" or "ollama". LLM_PROVIDER= LLM_MODEL= # Embedding provider configuration. Providers can be "watsonx", "ibm" or "ollama". EMBEDDING_PROVIDER= EMBEDDING_MODEL= # Port Configuration # Change these values if you have port conflicts with other services on your system # Default ports: Frontend=3000, Langflow=7860 FRONTEND_PORT=3000 LANGFLOW_PORT=7860 # OPTIONAL url for openrag link to langflow in the UI # If you change LANGFLOW_PORT, also update LANGFLOW_PUBLIC_URL to match the new port (e.g., http://localhost:7861) LANGFLOW_PUBLIC_URL= # OPTIONAL: Override the full docling-serve URL (e.g., for remote instances) # If not set, auto-detects host and uses port 5001 # DOCLING_SERVE_URL=http://my-docling-server:5001 # OPTIONAL: Override host for docling service (for special networking setups) # HOST_DOCKER_INTERNAL=host.containers.internal # Langflow auth LANGFLOW_AUTO_LOGIN=False LANGFLOW_SUPERUSER= LANGFLOW_SUPERUSER_PASSWORD= LANGFLOW_NEW_USER_IS_ACTIVE=False LANGFLOW_ENABLE_SUPERUSER_CLI=False # Langfuse tracing (optional) # Get keys from https://cloud.langfuse.com or your self-hosted instance LANGFUSE_SECRET_KEY= LANGFUSE_PUBLIC_KEY= # Leave empty for Langfuse Cloud, or set for self-hosted (e.g., http://localhost:3002) # Example of self hosting outside of the docker network: LANGFUSE_HOST=http://host.docker.internal:3000 LANGFUSE_HOST= # OPTIONAL: A development-mode only configuration value that allows additional origins (domains/hosts) to make requests to the Next.js dev server (next dev). # - A list of comma-separate hostnames may be specified # - e.g. 9.46.110.49,10.21.103.227,openrag.example.com NEXT_ALLOWED_DEV_ORIGINS= # OPTIONAL: Number of worker processes for concurrent request handling # - Increase for higher throughput; but, factor in CPU / memory constraints # MAX_WORKERS=4 # Backend ingestion workers (default: min(4, CPU_COUNT // 2)) # LANGFLOW_WORKERS=2 # Langflow workers (default: 1) # DOCLING_WORKERS=2 # Docling workers (default: 1) # OPTIONAL: Set the logging level for the Backend service # - Supported values: DEBUG, INFO, WARNING, ERROR, CRITICAL # - Default: INFO # LOG_LEVEL=DEBUG # OPTIONAL: Disable colored log output (https://no-color.org) # - When set (to any value), ANSI color codes are omitted from log output # NO_COLOR=1 # OPTIONAL: Enable or disable HTTP access logging events # - e.g. INFO: 127.0.0.1:45132 - "GET /tasks HTTP/1.1" 200 OK # - Default: true # ACCESS_LOG=false # OpenRAG Configuration # The version tag for OpenRAG Docker images (e.g., 'latest', 'v1.0.0'). # This is automatically managed by the TUI during updates to match the Python package version, # but can be pinned here to lock the deployment to a specific version when running OpenRAG with Make commands. OPENRAG_VERSION=latest # The local documents path for ingestion. OPENRAG_DOCUMENTS_PATH=./openrag-documents # Langflow Configuration # API Key for authenticating with Langflow. # If not provided, OpenRAG will automatically generate one using the superuser credentials # (LANGFLOW_SUPERUSER and LANGFLOW_SUPERUSER_PASSWORD). LANGFLOW_KEY= # Number of retry attempts when generating the Langflow API key (default: 15) LANGFLOW_KEY_RETRIES= # Delay in seconds between API key generation retry attempts (default: 2.0) LANGFLOW_KEY_RETRY_DELAY= # Version of Langflow (informational/future use) # LANGFLOW_VERSION= # Logging and Service # Format for log messages (default: json) LOG_FORMAT= # Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) LOG_LEVEL= # Service name used in logs (default: openrag) SERVICE_NAME=openrag # Secret key for session management (auto-generated if not provided) SESSION_SECRET=