[build-system]
requires = ["maturin>=1.5,<2.0"]
build-backend = "maturin"

[project]
name = "headroom-ai"
version = "0.26.0"
description = "The Context Optimization Layer for LLM Applications - Cut costs by 50-90%"
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.10"
authors = [
    { name = "Headroom Contributors" }
]
maintainers = [
    { name = "Headroom Contributors" }
]
keywords = [
    "llm",
    "openai",
    "anthropic",
    "claude",
    "gpt",
    "context",
    "token",
    "optimization",
    "compression",
    "caching",
    "proxy",
    "ai",
    "machine-learning",
]
classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Programming Language :: Python :: 3.14",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Typing :: Typed",
]
dependencies = [
    # Core: lightweight compression (SmartCrusher, ContentRouter, CCR, TOIN)
    "tiktoken>=0.5.0",            # Tokenizer for all compressors
    "pydantic>=2.0.0",            # Config and data models
    # litellm's own metadata pins requires-python <3.14, and headroom only uses it for
    # model registry / pricing / non-core providers — all lazily imported and
    # ImportError-guarded. Marking it 3.14-optional lets headroom install on Python 3.14
    # (core compression + the Anthropic proxy path never import litellm). See GH #956.
    "litellm>=1.86.2,<2.0; python_version < '3.14'",  # model registry, pricing, providers (lazy)
    "click>=8.1.0",               # CLI framework
    "rich>=13.0.0",               # Rich terminal output
    "opentelemetry-api>=1.24.0",  # Safe no-op OTEL API for instrumentation
    "ast-grep-cli>=0.30.0",       # AST-aware code slicing (CodeCompressor); binary wheel
    "tomli>=2.0.0; python_version < '3.11'",  # tomllib backport for helper scripts
]

[project.optional-dependencies]
# Proxy server (most common install: pip install headroom-ai[proxy])
proxy = [
    "fastapi>=0.100.0",
    "uvicorn>=0.23.0,<1.0",
    "httpx[http2]>=0.24.0",
    "openai>=2.14.0",             # OpenAI API format support
    "mcp>=1.0.0",                 # MCP server (headroom_compress, retrieve, stats)
    "magika>=0.6.0",              # ML content detection for ContentRouter
    "zstandard>=0.20.0",          # Decompress zstd request bodies (Codex, etc.)
    "websockets>=13.0",           # WebSocket proxy for /v1/responses (Codex gpt-5.4+)
    "onnxruntime>=1.16.0",        # Kompress ONNX INT8 text compression (no torch needed)
    "transformers>=4.30.0,<6.0",  # Tokenizer only (for Kompress)
    "watchdog>=4.0.0",            # File watcher for live code graph reindexing (--code-graph)
    "sqlite-vec>=0.1.6",          # Vector index for memory (--memory). Lightweight, no torch.
]
# Production ASGI/WSGI server — Unix-only (gunicorn does not support Windows).
# Kept separate from [proxy] so that dev, CI, and Windows users are not forced
# to install a non-functional package.  Production deployments should use:
#   pip install headroom-ai[proxy,proxy-prod]
proxy-prod = [
    "headroom-ai[proxy]",
    "gunicorn>=21.0.0; sys_platform != 'win32'",
]
# AST-based code compression (tree-sitter)
code = [
    "tree-sitter-language-pack>=0.10.0",
]
# ML-based compression with Kompress (ModernBERT).
# (The legacy [llmlingua] extra was removed in 0.9.x — no live code path used it.
# Use [ml] for the supported ML compression dependencies.)
ml = [
    "torch>=2.0.0",
    "transformers>=4.30.0,<6.0",
    # transformers >= 5.x requires huggingface-hub >= 1.5.0,<2.0; pinning
    # the floor here prevents Kompress from silently falling back to
    # "unavailable" when a sibling install (e.g. `pip install
    # strands-agents`) drags huggingface-hub backwards.
    "huggingface-hub>=1.5.0,<2.0",
]
# Memory system (hierarchical memory with vector search)
memory = [
    "hnswlib>=0.8.0",
    "sqlite-vec>=0.1.6",
    "sentence-transformers>=2.2.0,<6.0",
]
# Qdrant + Neo4j memory backend helpers
memory-stack = [
    "mem0ai>=1.0.0,<2.0",
    "qdrant-client>=1.9.0,<2.0",
    "neo4j>=5.20.0,<7.0",
]
# Apple-Silicon GPU (MPS) offload for the memory embedder. Opt in at runtime with
# HEADROOM_EMBEDDER_RUNTIME=pytorch_mps. macOS-only; intentionally excluded from [all].
pytorch-mps = [
    "torch>=2.0.0; sys_platform == 'darwin'",
    "sentence-transformers>=2.2.0; sys_platform == 'darwin'",
]
# Semantic relevance scoring with embeddings.
# Uses `fastembed` (BAAI/bge-small-en-v1.5 by default — 33M params,
# 384 dims, ~30 MB int8-quantized ONNX). Same library + model used by
# the Rust SmartCrusher (`fastembed` crate), giving byte-equal embeddings
# across the language boundary. Replaced sentence-transformers in
# Stage 3c.1 — fastembed is faster (~2-3x), smaller (no torch
# dependency), and outranks all-MiniLM-L6-v2 on MTEB by ~6 points.
relevance = [
    "fastembed>=0.4.0",
    "numpy>=1.24.0",
]
# Image compression (ML-based routing + OCR)
#
# OCR backend uses ONNX Runtime regardless of Python version. The
# rapidocr ecosystem split into two flavors after 1.4.x:
#   * rapidocr-onnxruntime 1.4.x — bundled-ORT package, capped at
#     Python <3.13 by its requires-python metadata. Drop-in for our
#     existing v1 tuple-shaped API call.
#   * rapidocr 3.x — engine-agnostic core, supports Python 3.13+.
#     Returns a RapidOCROutput dataclass (txts, scores, boxes, ...).
#     Needs `onnxruntime` installed separately to use the ORT backend.
#
# `headroom/image/compressor.py` adapts both API shapes at runtime via
# a try/except cascade. See issue #372 for context.
image = [
    "pillow>=10.0.0",
    "sentencepiece>=0.1.99",  # Required by SigLIP tokenizer (SiglipTokenizer)
    # Python 3.6–3.12: keep the proven ORT-bundled package directly.
    # ~15 MB ONNX models auto-downloaded on first use.
    "rapidocr-onnxruntime>=1.4.0,<2; python_version<'3.13'",
    # Python 3.13+: rapidocr-onnxruntime is unavailable (its wheels
    # declare requires-python<3.13). Use the successor `rapidocr` 3.x
    # core + `onnxruntime` engine; same ORT backend, just split into
    # two packages. Total install size and inference speed unchanged.
    "rapidocr>=3.0,<4; python_version>='3.13'",
    "onnxruntime>=1.7,<2; python_version>='3.13'",
]
# Report generation
reports = [
    "jinja2>=3.0.0",
]
# Binary spreadsheet ingestion (.xlsx / .xls -> tabular text)
spreadsheet = [
    "openpyxl>=3.1.0",      # .xlsx
    "xlrd>=2.0.1",          # legacy .xls
]
# OpenTelemetry metrics export
otel = [
    "opentelemetry-sdk>=1.24.0",
    "opentelemetry-exporter-otlp-proto-http>=1.24.0",
]
# any-llm multi-provider backend (requires Python 3.11+)
anyllm = [
    "any-llm-sdk>=1.0.0; python_version >= '3.11'",
]
# LangChain integration
langchain = [
    "langchain-core>=1.3.3,<4.0",
    "langchain-openai>=1.1.14,<2.0",
]
# Agno agent framework integration
agno = [
    "agno>=1.0.0",
]
# AWS Strands Agents SDK integration
strands = [
    "strands-agents>=0.1.0",
]
# MCP server for Claude Code integration
mcp = [
    "mcp>=1.0.0",
    "httpx>=0.24.0",
]
# Voice filler detection
voice = [
    "onnxruntime>=1.16.0",
    "transformers>=4.30.0,<6.0",
    "torch>=2.0.0",
]
# Voice training (includes voice deps + training extras)
voice-train = [
    "headroom-ai[voice]",
    "datasets>=2.14.0",
    "accelerate>=0.20.0",
]
# Evaluation framework
evals = [
    "datasets>=2.14.0",
    "sentence-transformers>=2.2.0,<6.0",
    "numpy>=1.24.0",
    "scikit-learn>=1.3.0",
    "anthropic>=0.18.0",
    "openai>=1.0.0",
]
# AWS Bedrock backend
bedrock = [
    "boto3>=1.28.0",
]
# HTML content extraction
html = [
    "trafilatura>=1.6.0",
]
# Comprehensive LLM benchmarks
benchmark = [
    "lm-eval[api]>=0.4.0",
    "openai>=1.0.0",
    "anthropic>=0.18.0",
]
# Development dependencies
dev = [
    "pytest>=7.0.0",
    "pytest-cov>=4.0.0",
    "pytest-asyncio>=0.21.0",
    "ruff>=0.1.0",
    "mypy>=1.0.0",
    "pre-commit>=3.0.0",
    "openai>=1.0.0",
    "anthropic>=0.18.0",
    "litellm>=1.86.2,<2.0; python_version < '3.14'",  # see core deps note (GH #956)
    "fastapi>=0.100.0",
    "uvicorn>=0.23.0,<1.0",
    "httpx[http2]>=0.24.0",
    "websockets>=13.0",
    "opentelemetry-sdk>=1.24.0",
    "opentelemetry-exporter-otlp-proto-http>=1.24.0",
    "ollama>=0.4.0",
    "langchain-ollama>=0.2.0",
    "hnswlib>=0.8.0",
    "sqlite-vec>=0.1.6",
    "sentence-transformers>=2.2.0,<6.0",
    "numpy>=1.24.0",
    "openpyxl>=3.1.0",  # exercises spreadsheet_ingest (.xlsx) in the test suite
]
# All optional dependencies (everything you need)
all = [
    "headroom-ai[proxy,code,ml,memory,relevance,image,reports,otel,evals,voice,html,benchmark,mcp,spreadsheet]",
]

[project.scripts]
headroom = "headroom.cli:main"

[project.urls]
Homepage = "https://headroom-docs.vercel.app"
Documentation = "https://headroom-docs.vercel.app/docs"
Repository = "https://github.com/chopratejas/headroom"
Issues = "https://github.com/chopratejas/headroom/issues"
Changelog = "https://github.com/chopratejas/headroom/blob/main/CHANGELOG.md"
# llms.txt convention (llmstxt.org) — point AI agents / LLM crawlers
# at the auto-generated docs index so they can resolve install paths
# and entry points without a follow-up fetch.
"AI / LLM Index" = "https://headroom-docs.vercel.app/llms.txt"

# Maturin builds a single wheel containing both the Python source under
# `headroom/` AND the compiled Rust extension `headroom/_core.so` (cdylib
# from `crates/headroom-py`). One `pip install headroom-ai` ships everything
# atomically — no separate `headroom-core-py` package, no chicken-and-egg,
# no PIP_FIND_LINKS plumbing. Phase A0's runtime fail-loud check still
# exists but only fires if someone forces an sdist install on a platform
# without a wheel and the rust toolchain isn't available to compile it.
# Pin the project's package index to public PyPI. Without this, `uv lock`
# inherits the developer's user-level `~/.config/uv/uv.toml` index
# setting — including private/internal mirrors like
# `pypi.netflix.net/simple` — and bakes those URLs into uv.lock, which
# then breaks CI on every public runner that can't reach the mirror.
# Declaring the index in pyproject.toml makes the project authoritative
# regardless of who runs `uv lock`.
[[tool.uv.index]]
name = "pypi"
url = "https://pypi.org/simple/"
default = true

[tool.maturin]
# Where the Python package lives. With `python-source = "."` and the
# package directory `headroom/` at repo root, maturin includes every file
# under `headroom/` in the wheel — that picks up the dashboard HTML
# templates and bundled YAML configs. `LICENSE` and `NOTICE` are listed
# explicitly because maturin sdists do not get the package-directory
# treatment wheels do, and PEP 639 auto-discovery emits both files into
# `License-File:` metadata — PyPI rejects sdists whose declared license
# files are missing from the tarball with `400 License-File X does not
# exist in distribution file`.
include = [
    { path = "LICENSE", format = "sdist" },
    { path = "NOTICE", format = "sdist" },
]
python-source = "."
module-name = "headroom._core"
# The cdylib source lives under `crates/headroom-py`. Maturin invokes
# `cargo build` with this manifest to produce `_core.cdylib`, then injects
# the resulting `.so` into the wheel at `headroom/_core.so`.
manifest-path = "crates/headroom-py/Cargo.toml"
features = ["extension-module"]
# Forbid building without the cdylib feature — bare `cargo build` won't
# produce a usable Python extension. Maturin's default `bindings` is "pyo3"
# which is correct here (see `crates/headroom-py/src/`).
bindings = "pyo3"

[tool.ruff]
target-version = "py310"
line-length = 100

[tool.ruff.lint]
select = [
    "E",   # pycodestyle errors
    "W",   # pycodestyle warnings
    "F",   # pyflakes
    "I",   # isort
    "B",   # flake8-bugbear
    "C4",  # flake8-comprehensions
    "UP",  # pyupgrade
]
ignore = [
    "E501",  # line too long (handled by formatter)
    "B008",  # do not perform function calls in argument defaults
    "B905",  # zip without strict parameter
]

[tool.ruff.lint.isort]
known-first-party = ["headroom"]

[tool.ruff.format]
quote-style = "double"
indent-style = "space"

[tool.mypy]
python_version = "3.10"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
ignore_missing_imports = true

# Per-module overrides for modules with dynamic typing patterns
[[tool.mypy.overrides]]
module = [
    "headroom.proxy.server",
    "headroom.proxy.cost",
    "headroom.proxy.prometheus_metrics",
    "headroom.proxy.semantic_cache",
    "headroom.proxy.rate_limiter",
    "headroom.proxy.request_logger",
    "headroom.proxy.helpers",
    "headroom.integrations.langchain",
    "headroom.integrations.mcp",
    "headroom.ccr.mcp_server",
    "headroom.relevance.embedding",
    "headroom.reporting.generator",
]
disallow_untyped_defs = false

[[tool.mypy.overrides]]
module = [
    "headroom.tokenizers.*",
    "headroom.providers.litellm",
    "headroom.providers.google",
]
disallow_untyped_defs = false
warn_return_any = false

# Handler mixins use self.* from HeadroomProxy via duck typing — mypy can't resolve these
[[tool.mypy.overrides]]
module = ["headroom.proxy.handlers.*"]
disallow_untyped_defs = false
ignore_errors = true

# Ignore third-party stubs with syntax errors
[[tool.mypy.overrides]]
module = ["mlx.*"]
ignore_errors = true

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_functions = ["test_*"]
addopts = "-v --tb=short"
asyncio_mode = "auto"
filterwarnings = [
    # pyo3 Unsendable parsers emit an unraisable warning when GC drops them on a
    # test-teardown thread; this is a test-harness artifact, not a production issue
    # (production threads are long-lived and drop their parsers on themselves).
    "ignore::pytest.PytestUnraisableExceptionWarning",
]
markers = [
    "slow: slow tests (model loads, large fixtures)",
    "real_llm: tests that hit real LLM APIs; skipped unless explicitly enabled",
    "live: opt-in multi-turn tests that hit real upstream APIs; require provider keys",
]

[tool.coverage.run]
source = ["headroom"]
branch = true
omit = [
    "headroom/cli.py",
    "*/tests/*",
]

[tool.coverage.report]
exclude_lines = [
    "pragma: no cover",
    "def __repr__",
    "raise NotImplementedError",
    "if TYPE_CHECKING:",
    "if __name__ == .__main__.:",
]