[build-system] requires = [ "packaging", "setuptools >= 49.4.0", "wheel", ] build-backend = "setuptools.build_meta" [project] name = "orbit" version = "0.2.1" description = "Orbit RL infrastructure for large-scale post-training" readme = "README.md" requires-python = ">=3.12,<3.13" license = { file = "LICENSE" } authors = [ { name = "Orbit Team" }, ] classifiers = [ "Programming Language :: Python :: 3.12", "Environment :: GPU :: NVIDIA CUDA", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: System :: Distributed Computing", ] dependencies = [ "accelerate==1.13.0", "blobfile==3.0.0", "cachetools==7.1.1", "datasets==4.8.5", "deep-ep", "fsspec==2026.2.0", "func_timeout==4.3.5", "httpx[http2]==0.28.1", "mcp[cli]==1.27.1", "megatron-bridge", "megatron-core", "memray", "nvidia-resiliency-ext", "omegaconf==2.3.0", "pillow==12.2.0", "pybase64==1.4.3", "pylatexenc==2.10", "pytest-asyncio", "pyyaml==6.0.3", "qwen_vl_utils==0.0.14", "ray[default]==2.55.1", "ring_flash_attn", "sglang", "sglang-router==0.3.2", "starlette==0.52.1", "tensorboard==2.20.0", "timm==1.0.17", "transformer-engine", "transformers==4.57.1", "wandb==0.27.0", ] [project.optional-dependencies] fsdp = [ "torch==2.11.0", ] # Opt-in one-shot install: `uv sync --extra allinone` (with `source env.sh`) builds # the full CUDA-13 kernel + ML layer that CUDA-13-install.md otherwise installs by # hand, so no manual layer is needed. Default `uv sync` is unchanged by this extra. # Build sources/flags for these live in [tool.uv.sources], no-build-isolation-package, # and [tool.uv.extra-build-dependencies] below. (numpy<2 is pinned globally in # [tool.uv] override-dependencies because orbit/Megatron require numpy 1.x regardless.) allinone = [ # CUDA kernels — source-built against the pinned torch 2.11 / CUDA 13 layer. "flash-attn==2.8.3", "mamba-ssm==2.3.1", "causal-conv1d==1.6.2.post1", "flash-linear-attention==0.5.0", "fast-hadamard-transform", "deep-gemm-official", "sgl-kernel==0.3.21", "torch-memory-saver==0.0.9", "apex", "tilelang==0.1.9", "tile-kernels==1.0.0", # ML libs (CUDA-13-install.md steps 6/11) "open-clip-torch==3.3.0", "trl", "math-verify==0.9.0", "latex2sympy2-extended==1.11.0", "liger-kernel==0.8.0", "ftfy", "nvtx", # megatron.bridge imports modelopt at module load (the manual path gets it via the # NVIDIA-NeMo Megatron-Bridge priming step). "nvidia-modelopt==0.44.0", # build-time tools for the no-build-isolation source builds "ninja", "pybind11", "cmake>=3.31", "scikit-build-core", ] [tool.uv] # Keep transitive packages from silently selecting an untested CUDA/Torch build. override-dependencies = [ # Megatron requires numpy 1.x; pin hard so no transitive dep pulls 2.x back. "numpy==1.26.4", "cuda-python==13.2.0", "flashinfer-python==0.6.3", "megatron-core", "nvidia-cublas==13.4.1.1", "nvidia-cuda-cupti==13.2.75", "nvidia-cuda-nvrtc==13.2.78", "nvidia-cuda-runtime==13.2.75", "nvidia-cudnn-cu13==9.22.0.52", "nvidia-cufft==12.2.0.46", "nvidia-curand==10.4.2.55", "nvidia-cusolver==12.2.0.1", "nvidia-cusparse==12.7.10.1", "nvidia-nccl-cu13==2.30.4", "nvidia-nvjitlink==13.2.78", "nvidia-nvtx==13.2.75", "timm==1.0.17", "transformers==4.57.1", "torch==2.11.0", "torchvision==0.26.0", "torchaudio==2.11.0", "triton==3.6.0", ] no-build-isolation-package = [ "causal-conv1d", "deep-ep", "deep-gemm-official", "fast-hadamard-transform", "flash-attn", "flash-linear-attention", "mamba-ssm", "sgl-kernel", "torch-memory-saver", "transformer-engine", "transformer-engine-torch", ] [tool.uv.extra-build-dependencies] transformer-engine = ["pybind11", "setuptools", "torch", "wheel"] fast-hadamard-transform = ["setuptools", "torch", "wheel", "packaging"] flash-attn = ["setuptools", "torch", "wheel", "packaging", "psutil", "ninja"] mamba-ssm = ["setuptools", "torch", "wheel", "packaging", "ninja"] causal-conv1d = ["setuptools", "torch", "wheel", "packaging", "ninja"] deep-gemm-official = ["setuptools", "torch", "wheel", "packaging"] deep-ep = ["setuptools", "torch", "wheel", "packaging"] sgl-kernel = ["setuptools", "torch", "wheel", "scikit-build-core", "cmake", "ninja"] torch-memory-saver = ["setuptools", "torch", "wheel", "packaging"] flash-linear-attention = ["setuptools", "torch", "wheel"] [tool.uv.sources] deep-ep = { git = "https://github.com/deepseek-ai/DeepEP.git", rev = "d4f41e4e93602a15e95f55f6ee8df8f1aaa0e4bb" } megatron-bridge = { git = "https://github.com/Sphere-AI-Lab/Megatron-Bridge.git", rev = "85c84cbc26d4c983a3d6e46c804f02e2a99af5a2" } megatron-core = { git = "https://github.com/Sphere-AI-Lab/Megatron-LM.git", rev = "00eb75b0c803b0fc8e5413d736529d9d3b82b6bd" } nvidia-resiliency-ext = { git = "https://github.com/NVIDIA/nvidia-resiliency-ext.git", rev = "63154570cea17f8805a7fd15cc3b8cc2919ba575" } sglang = { git = "https://github.com/Sphere-AI-Lab/sglang.git", rev = "9c83ae8be07cbb1eb6898ce608ae244e3be375b4", subdirectory = "python" } # TransformerEngine built from source (verified end-to-end). The orbit-build-wheels # prebuilt is cu13/torch-2.11-correct too, but swapping it in perturbed libcudart load # order and exposed a flaky tilelang<->flashinfer find_loaded_library("libcudart") # race (tilelang's libcudart_stub.so shadows the real lib). Keep source build until # that orbit/tilelang issue is fixed. # (Prebuilt alt: { url = "https://github.com/liulixinkerry/orbit-build-wheels/releases/download/cu132-torch211-ubuntu2204/transformer_engine-2.14.0-cp312-cp312-linux_x86_64.whl" }) transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "71bbefbf153418f943640df0f7373625dc93fa46" } fast-hadamard-transform = { git = "https://github.com/Dao-AILab/fast-hadamard-transform.git", rev = "e7706faf8d1c3b9f241e36860640ad1dac644ede" } deep-gemm-official = { git = "https://github.com/liulixinkerry/DeepGEMM.git", rev = "18db30e9db9703b906e9ab3803fbbc1a64be1520" } sgl-kernel = { git = "https://github.com/Sphere-AI-Lab/sglang.git", rev = "9c83ae8be07cbb1eb6898ce608ae244e3be375b4", subdirectory = "sgl-kernel" } torch-memory-saver = { git = "https://github.com/fzyzcjy/torch_memory_saver.git", rev = "dc6876905830430b5054325fa4211ff302169c6b" } sglang-router = { url = "https://github.com/zhuzilin/sgl-router/releases/download/v0.3.2-5f8d397/sglang_router-0.3.2-cp38-abi3-manylinux_2_28_x86_64.whl" } # apex orbit-build-wheels prebuilt is cu13-clean; source build needs fragile --config-settings, so use the public wheel. apex = { url = "https://github.com/liulixinkerry/orbit-build-wheels/releases/download/cu132-torch211-ubuntu2204/apex-0.1-cp312-cp312-linux_x86_64.whl" } [tool.setuptools] include-package-data = true [tool.setuptools.packages.find] include = [ "orbit*", "orbit_plugins*", "tools*", ] [tool.orbit.release.backend-pins.megatron-core] source = "https://github.com/Sphere-AI-Lab/Megatron-LM.git" tested-ref = "00eb75b0c803b0fc8e5413d736529d9d3b82b6bd" [tool.orbit.release.backend-pins.megatron-bridge] source = "https://github.com/Sphere-AI-Lab/Megatron-Bridge.git" tested-ref = "85c84cbc26d4c983a3d6e46c804f02e2a99af5a2" [tool.orbit.release.backend-pins.sglang] source = "https://github.com/Sphere-AI-Lab/sglang.git" subdirectory = "python" tested-ref = "9c83ae8be07cbb1eb6898ce608ae244e3be375b4" [tool.isort] profile = "black" # black-compatible line_length = 119 # should match black parameters ignore_whitespace = true # ignore whitespace for compatibility with the initial style py_version = 310 # python 3.10 as a target version sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"] default_section = "THIRDPARTY" extend_skip = ["setup.py"] known_first_party = ["orbit", "orbit_plugins"] known_third_party = ["megatron", "wandb", "ray", "transformers"] src_paths = ["orbit", "orbit_plugins"] [tool.black] line_length = 119 [tool.ruff] line-length = 320 # Follow-up select = [ "E", # Pycodestyle Errors (Structural/Fundamental Errors like bad indentation) "F", # Pyflakes (Core Errors: Unused imports, undefined names) "B", # Flake8-Bugbear (Logic Bugs: Variable shadowing, dangerous default arguments) "UP", # pyupgrade (Modernization and compatibility issues) ] ignore = [ "E402", # module-import-not-at-top-of-file "E501", # Line too long # Follow-up handle it later ] [tool.pytest.ini_options] # durations=0 will display all tests execution time, sorted in ascending order starting from from the slowest one. # -vv will also display tests with duration = 0.00s addopts = "--verbose --pyargs --durations=0 --strict-markers" # always add these arguments to pytest testpaths = ["./tests"] # must be an explicit path to avoid importing another "tests" module # directories to ignore when discovering tests norecursedirs = [ "external", "examples", "docs", "scripts", "tools", "tutorials", "*.egg", ".*", "_darcs", "build", "CVS", "dist", "venv", "{arch}", ] asyncio_mode = "auto" # NOTE: we do not use pytest markers currently. tests are mainly separated by folders, i.e. tests/fast vs tests/e2e