# arXiv Paper Curator - Environment Configuration (EXAMPLE) # Copy this file to .env and adjust values as needed # Application Settings DEBUG=true ENVIRONMENT=development # PostgreSQL Database POSTGRES_DATABASE_URL=postgresql+psycopg2://rag_user:rag_password@postgres:5432/rag_db # External Services OPENSEARCH_HOST=http://opensearch:9200 OPENSEARCH__HOST=http://opensearch:9200 OLLAMA_HOST=http://ollama:11434 # arXiv API Configuration ARXIV__MAX_RESULTS=15 ARXIV__BASE_URL=https://export.arxiv.org/api/query ARXIV__PDF_CACHE_DIR=./data/arxiv_pdfs ARXIV__RATE_LIMIT_DELAY=3.0 ARXIV__TIMEOUT_SECONDS=30 ARXIV__SEARCH_CATEGORY=cs.AI ARXIV__DOWNLOAD_MAX_RETRIES=3 ARXIV__DOWNLOAD_RETRY_DELAY_BASE=5.0 ARXIV__MAX_CONCURRENT_DOWNLOADS=5 ARXIV__MAX_CONCURRENT_PARSING=1 # PDF Parser Configuration PDF_PARSER__MAX_PAGES=30 PDF_PARSER__MAX_FILE_SIZE_MB=20 PDF_PARSER__DO_OCR=false PDF_PARSER__DO_TABLE_STRUCTURE=true # OpenSearch Configuration (Single hybrid index for all search types) OPENSEARCH__INDEX_NAME=arxiv-papers OPENSEARCH__CHUNK_INDEX_SUFFIX=chunks OPENSEARCH__MAX_TEXT_SIZE=1000000 # Vector Search Settings OPENSEARCH__VECTOR_DIMENSION=1024 OPENSEARCH__VECTOR_SPACE_TYPE=cosinesimil # Hybrid Search Settings OPENSEARCH__RRF_PIPELINE_NAME=hybrid-rrf-pipeline OPENSEARCH__HYBRID_SEARCH_SIZE_MULTIPLIER=2 # Text Chunking Configuration CHUNKING__CHUNK_SIZE=600 CHUNKING__OVERLAP_SIZE=100 CHUNKING__MIN_CHUNK_SIZE=100 CHUNKING__SECTION_BASED=true # Jina AI Embeddings (Required for hybrid search) JINA_API_KEY=your_jina_api_key_here # Ollama Configuration OLLAMA_MODEL=llama3.2:1b OLLAMA_TIMEOUT=300 # Langfuse v3 Tracing Configuration - Official SDK Standard (single underscore) LANGFUSE_ENABLED=true LANGFUSE_HOST=http://localhost:3001 LANGFUSE_PUBLIC_KEY=pk-lf-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx LANGFUSE_SECRET_KEY=sk-lf-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx LANGFUSE_FLUSH_AT=15 LANGFUSE_FLUSH_INTERVAL=1.0 LANGFUSE_DEBUG=true # Langfuse Server Configuration (for docker-compose) # WARNING: Change these values in production! These are development defaults. LANGFUSE_NEXTAUTH_SECRET=changeme-v3-nextauth-secret-min-32-chars-recommended LANGFUSE_SALT=changeme-v3-salt-min-32-chars-recommended-for-security # REQUIRED: Generate a real key before starting: openssl rand -hex 32 LANGFUSE_ENCRYPTION_KEY=0000000000000000000000000000000000000000000000000000000000000000 LANGFUSE_REDIS_PASSWORD=langfuse_redis_password LANGFUSE_MINIO_ACCESS_KEY=langfuse_minio LANGFUSE_MINIO_SECRET_KEY=langfuse_minio_secret # Redis Cache Configuration REDIS__HOST=redis REDIS__PORT=6379 # Leave empty since compose.yml Redis has no --requirepass configured REDIS__PASSWORD= REDIS__DB=0 REDIS__TTL_HOURS=6 # Telegram Bot Configuration (Week 7) # Get your bot token from @BotFather on Telegram TELEGRAM__ENABLED=true TELEGRAM__BOT_TOKEN=your_telegram_bot_token_here # Airflow Settings AIRFLOW__CORE__EXECUTOR=LocalExecutor AIRFLOW__CORE__LOAD_EXAMPLES=false AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true AIRFLOW__HOME=/opt/airflow AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://rag_user:rag_password@postgres:5432/rag_db