# Ingestion Configuration
# Set to true to disable Langflow ingestion and use traditional OpenRAG processor
# If unset or false, Langflow pipeline will be used (default: upload -> ingest -> delete)
DISABLE_INGEST_WITH_LANGFLOW=false

# Set to false to skip ingesting OpenRAG sample docs during onboarding
# Default: true
INGEST_SAMPLE_DATA=true

# Default OpenRAG docs source for sample ingestion during onboarding.
# - url: crawl DEFAULT_DOCS_URL using URL ingestion flow
# - files: ingest files from OPENRAG_DOCUMENTS_PATH
DEFAULT_DOCS_INGEST_SOURCE=url

# URL used when DEFAULT_DOCS_INGEST_SOURCE=url
DEFAULT_DOCS_URL=https://www.openr.ag/
# How many levels deep to crawl when DEFAULT_DOCS_INGEST_SOURCE=url (default: 2)
# DEFAULT_DOCS_CRAWL_DEPTH=2


# Check for newer OpenRAG docs and update knowledge on every startup.
# Default: false (manual fetch remains available in UI)
FETCH_OPENRAG_DOCS_AT_STARTUP=false

# Langflow HTTP timeout configuration (in seconds)
# For large documents (300+ pages), ingestion can take 30+ minutes
# Increase these values if you experience timeouts with very large PDFs
# Default: 2400 seconds (40 minutes) total timeout, 30 seconds connection timeout
# LANGFLOW_TIMEOUT=2400
# LANGFLOW_CONNECT_TIMEOUT=30

# OPTIONAL: Per-file processing timeout for document ingestion tasks (in seconds)
# Should be >= LANGFLOW_TIMEOUT to allow long-running ingestion to complete
# Default: 3600 seconds (60 minutes) total timeout
# INGESTION_TIMEOUT=3600

# OPTIONAL: OpenSearch JWT TTL for per-user document security tokens (in seconds)
# Default: INGESTION_TIMEOUT + 300 seconds
# OPENRAG_OPENSEARCH_JWT_TTL=3900

# OPTIONAL: Maximum number of files to upload / ingest (in batch) per task when adding knowledge via folder
# Default: 25
# UPLOAD_BATCH_SIZE=25

# make one like so https://docs.langflow.org/api-keys-and-authentication#langflow-secret-key
LANGFLOW_SECRET_KEY=

# flow ids for chat and ingestion flows
LANGFLOW_CHAT_FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0
LANGFLOW_INGEST_FLOW_ID=5488df7c-b93f-4f87-a446-b67028bc0813
LANGFLOW_URL_INGEST_FLOW_ID=72c3d17c-2dac-4a73-b48a-6518473d7830
# Ingest flow using docling
# LANGFLOW_INGEST_FLOW_ID=1402618b-e6d1-4ff2-9a11-d6ce71186915
NUDGES_FLOW_ID=ebc01d31-1976-46ce-a385-b0240327226c

# Backend bind port for local `make backend`.
OPENRAG_BACKEND_PORT=8000

# URL Docker-hosted services use to call the backend in host-backend mode.
# If unset, auto-derived from OPENRAG_BACKEND_PORT. Only set this explicitly
# if you need to override the auto-derived value (e.g., custom hostname).
# OPENRAG_BACKEND_INTERNAL_URL=http://openrag-backend:8000

# --- Backend ingestion-callback proxy router --------------------------------
# When enabled, a tiny standalone uvicorn app is started in the same process as
# the backend (its own port) that proxies ONLY the Langflow ingest callback
# (POST /internal/ingest/chunks) to the real backend. Langflow is then pointed
# at this router instead of the backend internal URL, narrowing the surface
# Langflow can reach. Disabled by default.
# OPENRAG_BACKEND_ROUTER_ENABLE=false
# Port the router listens on (inside the backend container/pod).
# OPENRAG_BACKEND_ROUTER_PORT=8100
# OPENRAG_BACKEND_ROUTER_HOST=0.0.0.0
# Base URL Langflow calls back to. Defaults to the backend host on the router
# port (derived from OPENRAG_BACKEND_INTERNAL_URL); set only if the router is
# fronted by a separate Service/ingress.
# OPENRAG_BACKEND_ROUTER_URL=
# Upstream the router forwards callbacks to. The router is co-located with the
# backend (same process), so it defaults to loopback on the backend's port
# (port derived from OPENRAG_BACKEND_INTERNAL_URL, host forced to 127.0.0.1).
# Set only to override the upstream entirely.
# OPENRAG_BACKEND_ROUTER_UPSTREAM_URL=http://127.0.0.1:8000

# Base64 AES-256-GCM Master Encryption Key
# If not provided, OpenRAG will automatically generate a secure 32-byte key when you run the TUI or CLI wizard.
# If completely unset during normal execution, the master encryption system will remain inactive.
# This key is required to decrypt API credentials stored in local databases and provider indices.
OPENRAG_ENCRYPTION_KEY=

# Identifier for AAD tenant binding.
# Used to verify that encrypted files belong to the expected tenant.
# Default: openrag
OPENRAG_TENANT_ID=openrag

# If set to true, the application will fail to start if any critical
# prerequisites are not met (e.g. master encryption key missing).
# Default: false
OPENRAG_ENFORCE_PREREQUISITES=false

# Set a strong admin password for OpenSearch; a bcrypt hash is generated at
# container startup from this value. Do not commit real secrets.
# must match the hashed password in secureconfig, must change for secure deployment!!!
# NOTE: if you set this by hand, it must be a complex password:
# The password must contain at least 8 characters, and must contain at least one uppercase letter, one lowercase letter, one digit, and one special character.
OPENSEARCH_PASSWORD=

# Path to persist Langflow database and state (flows, credentials, settings)
# Without this volume, flow edits will be lost on container restart
# Default: ./langflow-data
LANGFLOW_DATA_PATH=./langflow-data

# Langflow database URL. Defaults to SQLite stored in LANGFLOW_DATA_PATH.
# Override with a PostgreSQL URL for production deployments, e.g.:
#   LANGFLOW_DATABASE_URL=postgresql://user:pass@host:5432/langflow
LANGFLOW_DATABASE_URL=sqlite:////app/langflow-data/langflow.db

# OpenSearch Connection
# OPENSEARCH_HOST=opensearch
# OPENSEARCH_PORT=9200
# OPENSEARCH_DASHBOARDS_PORT=5601
# OPENSEARCH_PERF_PORT=9600
OPENSEARCH_USERNAME=admin
# Disable the OpenSearch node-count readiness gate for single-node local setups.
OPENSEARCH_NODE_COUNT_CHECK_ENABLED=false

# OpenSearch index name for storing documents
# Default: documents
# Change this if you want to use a different index name or avoid conflicts.
# Must start with "documents" or "knowledge_filters" — the OpenSearch security
# role (securityconfig/roles.yml) only grants search access to indices
# matching those patterns; any other name causes a 403 AuthorizationException.
OPENSEARCH_INDEX_NAME=documents

# OpenSearch index layout for newly-created OpenRAG indices
# Shard count cannot be changed on an existing index without reindexing.
# Local/dev OpenSearch is single-node, so default to 1 shard / 0 replicas here
# (production deployments default to 2/2 in code; override as needed).
OPENRAG_OPENSEARCH_NUMBER_OF_SHARDS=1
OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS=0

# Reconcile existing OpenRAG indices' replica counts to
# OPENRAG_OPENSEARCH_NUMBER_OF_REPLICAS on startup. Defaults to true in code
# (for multi-node production); disabled here since local/dev is single-node.
OPENRAG_ENSURE_INDEX_REPLICAS_ON_STARTUP=false

# IBM AMS Authentication (IBM Watsonx Data embedded mode)
# Set IBM_AUTH_ENABLED=true to authenticate via the ibm-openrag-session cookie
# instead of Google OAuth. The raw IBM JWT is also passed directly to OpenSearch.
# When enabled, GOOGLE_OAUTH_CLIENT_ID/SECRET are not required for login.
IBM_AUTH_ENABLED=false
# URL to fetch IBM's JWT public key (used to validate ibm-openrag-session tokens).
IBM_JWT_PUBLIC_KEY_URL=
# Cookie name set by Traefik after successful IBM AMS authentication (default: ibm-openrag-session).
IBM_SESSION_COOKIE_NAME=ibm-openrag-session
# Header injected by Traefik on every forwarded request containing the OpenSearch Basic credentials
# (base64-encoded username:password). Override if your proxy uses a different header name.
IBM_CREDENTIALS_HEADER=X-IBM-LH-Credentials
# Optional: Use these to override authentication for local development/calls when IBM_AUTH_ENABLED=true
IBM_USERNAME=
IBM_PASSWORD=
# Set to true to disable OpenSearch security roles/mapping setup during onboarding/startup
IBM_AUTH_DEV_MODE=false

# make here https://console.cloud.google.com/apis/credentials
GOOGLE_OAUTH_CLIENT_ID=
GOOGLE_OAUTH_CLIENT_SECRET=

# Azure app registration credentials for SharePoint/OneDrive
MICROSOFT_GRAPH_OAUTH_CLIENT_ID=
MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET=

# AWS Access Key ID and Secret Access Key with access to your S3 instance
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=

# --- Azure Blob Storage connector (Enterprise/SaaS) ------------------------
# Enterprise/SaaS-only bucket connector, gated by IBM_AUTH_ENABLED (like the
# AWS S3 / IBM COS connectors). Credentials are normally entered per-connection
# in the UI; these env vars are OPTIONAL fallbacks / defaults for the config
# dialog. Two auth modes are supported: a connection string, OR account name +
# key (+ optional custom blob endpoint).
#
# Feature kill switch for the Azure Blob connector (default: true = enabled).
# Independent of IBM_AUTH_ENABLED: set to false to force-hide the connector in
# the UI even when IBM auth is on. When true, the connector still requires the
# Enterprise/SaaS gate (IBM_AUTH_ENABLED) or OPENRAG_DEV_AZURE_BLOB below --
# this flag is AND-ed with that gate, not an override.
# OPENRAG_AZURE_BLOB_ENABLED=true
#
# Local dev / Azurite testing: set OPENRAG_DEV_AZURE_BLOB=true to enable the
# connector without IBM_AUTH_ENABLED. NEVER use in production.
# OPENRAG_DEV_AZURE_BLOB=false
#
# Connection-string mode (covers real Azure accounts and the Azurite emulator):
#   - Real account: copy from Azure Portal -> Storage account -> Access keys
#   - Azurite (local dev): AZURE_STORAGE_CONNECTION_STRING="UseDevelopmentStorage=true"
# AZURE_STORAGE_CONNECTION_STRING=
#
# Account-key mode (set account name + key; endpoint optional):
#   - Azurite (host-side backend):
#     - AZURE_STORAGE_ACCOUNT_NAME=devstoreaccount1
#     - AZURE_STORAGE_ACCOUNT_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==
#     - AZURE_STORAGE_ENDPOINT=http://127.0.0.1:10000/devstoreaccount1
#   - Azurite (in-compose backend):
#     - AZURE_STORAGE_ACCOUNT_NAME=devstoreaccount1
#     - AZURE_STORAGE_ACCOUNT_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==
#     - AZURE_STORAGE_ENDPOINT=http://azurite:10000/devstoreaccount1
# AZURE_STORAGE_ACCOUNT_NAME=
# AZURE_STORAGE_ACCOUNT_KEY=
# AZURE_STORAGE_ENDPOINT=

# OPTIONAL: dns routable from google (etc.) to handle continous ingest (something like ngrok works). This enables continous ingestion
WEBHOOK_BASE_URL=

# Model Provider API Keys
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
OLLAMA_ENDPOINT=
WATSONX_API_KEY=
WATSONX_ENDPOINT=
WATSONX_PROJECT_ID=
# Tune the ibm-watsonx-ai SDK's native 429 retry used during ingestion embedding.
# OPTIONAL: Max retries = retry attempts (default 10).
# WATSONX_MAX_RETRIES=10
# OPTIONAL: Delay time = base backoff seconds for the jittered exponential backoff (default 0.5).
# WATSONX_DELAY_TIME=0.5

# LLM Provider configuration. Providers can be "anthropic", "watsonx", "ibm" or "ollama".
LLM_PROVIDER=
LLM_MODEL=

# OPTIONAL: Custom system prompt for the chat agent. Overrides the default prompt baked into the Langflow flow.
# SYSTEM_PROMPT=

# Embedding provider configuration. Providers can be "watsonx", "ibm" or "ollama".
EMBEDDING_PROVIDER=
EMBEDDING_MODEL=

# Knowledge ingestion tuning
# OPTIONAL: Document chunk size in tokens (default configured in Langflow flow)
# CHUNK_SIZE=
# OPTIONAL: Overlap between chunks in tokens (default configured in Langflow flow)
# CHUNK_OVERLAP=
# OPTIONAL: Enable OCR for scanned PDFs and images (default: false)
# OCR_ENABLED=false
# OPTIONAL: Generate text descriptions for images found in documents (default: false)
# PICTURE_DESCRIPTIONS_ENABLED=false

# Port Configuration
# Change these values if you have port conflicts with other services on your system
# Default ports: Frontend=3000, Langflow=7860
FRONTEND_PORT=3000
LANGFLOW_PORT=7860
# Internal URL the OpenRAG backend uses to communicate with Langflow.
# When running via docker-compose this is set automatically to http://langflow:7860.
# Override only when running the backend outside Docker or pointing at a remote Langflow instance.
# LANGFLOW_URL=http://localhost:7860
# OPTIONAL url for openrag link to langflow in the UI
# If you change LANGFLOW_PORT, also update LANGFLOW_PUBLIC_URL to match the new port (e.g., http://localhost:7861)
LANGFLOW_PUBLIC_URL=

# OPTIONAL: Override the full docling-serve URL (e.g., for remote instances)
# If not set, auto-detects host and uses port 5001
# DOCLING_SERVE_URL=http://my-docling-server:5001
# DOCLING_SERVE_VERIFY_SSL=true
# OPTIONAL: OCR engine used by docling for scanned documents. Options: "easyocr" (default), "tesseract", "tesseract_cli"
# DOCLING_OCR_ENGINE=easyocr

# OPTIONAL: Override host for docling service (for special networking setups)
# HOST_DOCKER_INTERNAL=host.containers.internal

# Langflow auth
LANGFLOW_AUTO_LOGIN=False
LANGFLOW_SUPERUSER=
LANGFLOW_SUPERUSER_PASSWORD=
LANGFLOW_NEW_USER_IS_ACTIVE=False
LANGFLOW_ENABLE_SUPERUSER_CLI=False

# ─────────────────────────────────────────────────────────────────────────────
# Langflow Container Variables
# These variables are passed directly to the Langflow container by docker-compose.
# They are NOT read by the OpenRAG Python backend.
# ─────────────────────────────────────────────────────────────────────────────

# Langfuse tracing (optional)
# Get keys from https://cloud.langfuse.com or your self-hosted instance
LANGFUSE_SECRET_KEY=
LANGFUSE_PUBLIC_KEY=
# Leave empty for Langfuse Cloud, or set for self-hosted (e.g., http://localhost:3002)
# Example of self hosting outside of the docker network: LANGFUSE_HOST=http://host.docker.internal:3000
LANGFUSE_HOST=

# OPTIONAL: A development-mode only configuration value that allows additional origins (domains/hosts) to make requests to the Next.js dev server (next dev).
# - A list of comma-separate hostnames may be specified
# - e.g. 9.46.110.49,10.21.103.227,openrag.example.com
NEXT_ALLOWED_DEV_ORIGINS=


# OPTIONAL: Number of worker processes for concurrent request handling
# - Increase for higher throughput; but, factor in CPU / memory constraints
# MAX_WORKERS=4         # Backend ingestion workers (default: min(4, CPU_COUNT // 2))
# LANGFLOW_WORKERS=2    # Langflow workers (default: 1)
# DOCLING_WORKERS=2     # Docling workers (default: 1)

# OPTIONAL: Set the logging level for the Backend service
# - Supported values: DEBUG, INFO, WARNING, ERROR, CRITICAL
# - Default: INFO
# LOG_LEVEL=DEBUG

# OPTIONAL: Disable colored log output (https://no-color.org)
# - When set (to any value), ANSI color codes are omitted from log output
# NO_COLOR=1

# OPTIONAL: Enable or disable HTTP access logging events
# - e.g. INFO:     127.0.0.1:45132 - "GET /tasks HTTP/1.1" 200 OK
# - Default: true
# ACCESS_LOG=false

# OpenRAG Configuration
# The version tag for OpenRAG Docker images (e.g., 'latest', 'v1.0.0').
# This is automatically managed by the TUI during updates to match the Python package version,
# but can be pinned here to lock the deployment to a specific version when running OpenRAG with Make commands.
OPENRAG_VERSION=latest

# Docker Compose project name. Change this to run multiple instances of OpenRAG alongside each other.
COMPOSE_PROJECT_NAME=openrag

# Path configuration (override defaults for custom directory layouts)
# OPENRAG_DATA_PATH=./data
# OPENRAG_KEYS_PATH=./keys
# OPENRAG_CONFIG_PATH=./config
# OPENRAG_FLOWS_PATH=./flows
# OPENRAG_FLOWS_BACKUP_PATH=  # defaults to <OPENRAG_FLOWS_PATH>/backup
# Comma-separated list of host directories the TUI watches for documents to ingest.
# The TUI derives OPENRAG_DOCUMENTS_PATH (below) from the first entry for Docker's volume mount.
# OPENRAG_DOCUMENTS_PATHS=$HOME/.openrag/documents
#
# Single directory path mounted into the OpenRAG container by docker-compose.
# Set automatically by the TUI from OPENRAG_DOCUMENTS_PATHS; override only if running without the TUI.
# OPENRAG_DOCUMENTS_PATH=./openrag-documents

# Langflow Configuration
# API Key for authenticating with Langflow.
# If not provided, OpenRAG will automatically generate one using the superuser credentials
# (LANGFLOW_SUPERUSER and LANGFLOW_SUPERUSER_PASSWORD).
LANGFLOW_KEY=

# Number of retry attempts when generating the Langflow API key (default: 15)
LANGFLOW_KEY_RETRIES=

# Delay in seconds between API key generation retry attempts (default: 2.0)
LANGFLOW_KEY_RETRY_DELAY=

# Version of Langflow (informational/future use)
# LANGFLOW_VERSION=

# Logging and Service
# Format for log messages (default: json)
LOG_FORMAT=
# Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
LOG_LEVEL=
# Service name used in logs (default: openrag)
SERVICE_NAME=openrag
# Secret key for session management (auto-generated if not provided)
SESSION_SECRET=

# OPTIONAL: JWT signing key for token issuance and verification.
# If not set, OpenRAG generates an RSA key pair (private_key.pem / public_key.pem)
# under OPENRAG_KEYS_PATH at startup and uses RS256 signing.
# Accepted values:
#   - RSA private key in PEM format (PKCS#8 or PKCS#1) → RS256
#   - Plain string → HS256 (symmetric; JWKS endpoint returns an empty key set)
# When set, the on-disk key files are never read or created.
#
# PEM keys must be set as a double-quoted single-line value with literal \n
# separators. python-dotenv expands \n in double-quoted strings to real newlines,
# which is required by the PEM parser. Multiline values break Make's .env parsing,
# and single-quoted or unquoted values do not expand \n.
#
# Generate and format a test key with:
#   openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:2048 2>/dev/null \
#     | awk '{printf "%s\\n", $0}' | sed 's/^/"/' | sed 's/$/"/'
#
# JWT_SIGNING_KEY=

# OPTIONAL: The OpenRAG application execution mode or deployment target.
# - Used to configure the operation of the application based on it's execution context.
# Values:
# - "oss": Open Source Software (default)
# - "saas": Public Cloud
# - "on_prem": Private Cloud
OPENRAG_RUN_MODE=oss

# Permission/identity cache backend. Only "memory" is currently wired.
# When CACHE_BACKEND=memory the RBAC permission cache and OAuth-subject
# cache are per-process, so OpenRAG must run with UVICORN_WORKERS=1 (and
# replicaCount=1 in helm). Anything >1 hard-fails at startup until cache
# is shared across processes (planned: Redis).
CACHE_BACKEND=memory
# UVICORN_WORKERS=1

# Storage mode. Default `db` — workspace config + chat history live in
# the SQL DB only; no `config.yaml`, `conversations.json`, or
# `session_ownership.json` are ever written. Pre-existing JSON files
# are imported once on first boot, then ignored.
#   - db     (default) DB-only, no JSON written
#   - hybrid          DB + JSON dual-write (Phase B fallback)
#   - files           legacy JSON-only path; DB untouched
OPENRAG_STORAGE_MODE=db

# TLS verification when OpenRAG fetches JWT signing keys from the token iss URL
# (gateway-forwarded JWT / RBAC role sync). Default false (internal issuers).
# Set true when the issuer uses a cert trusted by the pod CA bundle.
# OPENRAG_JWT_ISSUER_VERIFY_TLS=true

# Verify forwarded JWT signatures via iss JWKS (default false).
# Set true when OpenRAG must verify signatures (no trusted upstream auth).
# OPENRAG_JWT_VERIFY_SIGNATURE=true

# RBAC kill switch. Default OFF — the system behaves like the pre-RBAC
# release: any authenticated user has full access; API-key role
# overrides are also bypassed. Set to `true` to turn the permissions
# system on for multi-user deployments. Available in all run modes.
# OPENRAG_RBAC_ENFORCE=true


# Default role for new users (JWT role sync off). Does not change existing
# users unless OPENRAG_SYNC_DEFAULT_ROLE is also enabled.
# OPENRAG_DEFAULT_ROLE=user
# OPENRAG_NOAUTH_ROLE=admin

# Dev-only (OSS run mode): run scripts/sync_default_user_roles.py manually
# to migrate users when OPENRAG_DEFAULT_ROLE changes. Ignored in saas/on_prem.
# OPENRAG_SYNC_DEFAULT_ROLE=false