FROM nvidia/cuda:12.1.0-devel-ubuntu20.04
ARG PYTHON_VERSION=3.11
ARG WITH_TORCHVISION=0
ENV DEBIAN_FRONTEND=noninteractive
# Some dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential cmake git curl ca-certificates \
         software-properties-common bzip2 libffi-dev libgomp1 libssl-dev

# For MKL, add mkl mkl-include
RUN curl -L -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-py311_23.5.2-0-Linux-x86_64.sh && \
     chmod +x ~/miniconda.sh 
RUN  ~/miniconda.sh -b -p /opt/conda && rm ~/miniconda.sh
RUN  /opt/conda/bin/conda install -y python=$PYTHON_VERSION nomkl numpy pyyaml scipy ipython ninja cython typing && \
     /opt/conda/bin/conda install -y -c pytorch magma-cuda110 && \
     /opt/conda/bin/conda clean -ya
ENV PATH /opt/conda/bin:$PATH

# Remove nccl, etc. need cufft, cublas, curand, cusolver
RUN apt-get remove -y libnccl-dev libnccl2 libnpp-11-0 --allow-change-held-packages


RUN git clone -b 'v2.1.0' --single-branch --depth 1 https://github.com/pytorch/pytorch.git
WORKDIR /pytorch
# # 2023-08-16 nightly release
# RUN git init \ 
#  && git remote add origin https://github.com/pytorch/pytorch.git \
#  && git fetch --depth 1 origin 3af011b858f5e5c40fd8e9d41fa7f31a928b3b47 \
#  && git checkout 3af011b858f5e5c40fd8e9d41fa7f31a928b3b47
RUN git submodule update --init --recursive
RUN pip install -r requirements.txt
RUN pip install pybind11

# Build Pytorch for latest arch only (makes a smaller image):
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6"
# just A100, A30 only
# L40: 8.9, H100: 9.0
#ENV TORCH_CUDA_ARCH_LIST="8.0"
ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
#ENV BUILD_SHARED_LIBS=OFF
ENV DEBUG=0
ENV USE_CUDNN=0
ENV USE_OPENCV=0
ENV USE_NCCL=1
ENV BUILD_TEST=0
ENV USE_FFMPEG=0
ENV USE_MKLDNN=0
ENV USE_DISTRIBUTED=1
ENV BUILD_CAFFE2=0
ENV BUILD_CAFFE2_OPS=0
ENV CAFFE2_USE_CUDA=0
ENV MAX_JOBS=64
ENV BLAS=Eigen 
ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"

# new
ENV USE_CUPTI_SO=0
ENV USE_RCCL=0
# unclear if IBVERBS is usedconsumed
ENV USE_IBVERBS=0 
ENV USE_KINETO=0
ENV BUILD_LIBTORCH_CPU_WITH_DEBUG=0
ENV USE_NVRTC=0
# more new 
ENV USE_ITT=0
# nnpack is for cpu nn, or mobile
ENV USE_NNPACK=0 
ENV USE_QNNPACK=0
ENV USE_PYTORCH_QNNPACK=0 
ENV USE_XNNPACK=0 
# "also sets up special cufft linkage"
# essentially bundles cufft and cusparse, worse overall 
# ENV ATEN_STATIC_CUDA=1
# ENV USE_CUDA_STATIC_LINK=1

# INTERN doesn't seem to propagate?
ENV INTERN_DISABLE_ONNX=1
ENV ONNX_ML=0

#ENV BUILD_PYTHON=OFF
ENV BUILD_NVFUSER=0
# this makes sure the git hash isn't part of the version
ENV PYTORCH_BUILD_VERSION="2.1.0"
ENV PYTORCH_BUILD_NUMBER=0
# flags that no longer exist 
# ENV USE_MKL=0
# ENV BUILD_CAFFE2_MOBILE=0
# ENV USE_NVTX=0
# RUN pip install magic-wormhole && apt-get install -yy zip patchelf
# COPY ./build.sh /build.sh
# ENTRYPOINT ["/bin/bash", "/build.sh"]
RUN pip wheel -vv --debug . 2>&1 | tee compile-log
RUN apt-get install -yy zip patchelf && patchelf --remove-needed libcurand.so.10 torch/lib/libtorch_cuda.so && patchelf --remove-needed libcurand.so.10 torch/lib/libtorch_global_deps.so
#RUN zip -o ./torch-*.whl torch/lib/libtorch_{cuda,global_deps}.so

# nvrtc is unfortunately required by mlc/tvm even though there shouldn't be any runtime compilation irl
RUN cp /usr/local/cuda/lib64/libcublas.so.12 \
 /usr/local/cuda/lib64/libcublasLt.so.12 \
 /usr/local/cuda/lib64/libcudart.so.12 \
 /usr/local/cuda/lib64/libnvToolsExt.so.1 \
 /usr/local/cuda/lib64/libnvrtc.so.12 \
 /usr/local/cuda/lib64/libcufft.so.11 \
 /usr/local/cuda/lib64/libcusparse.so.12 \
 /usr/local/cuda/lib64/libnvJitLink.so.12 \
 /usr/lib/x86_64-linux-gnu/libgomp.so.1 \
 torch/lib
RUN bash -c 'zip -8 -o ./torch-*.whl torch/lib/lib{cu{dart,blas{,Lt},fft,sparse},nv{rtc,ToolsExt,JitLink},gomp,torch_{cuda,global_deps}}.so*'

# once image is built, just copy the wheel over
CMD cp torch-*.whl /output/