name: Build and test on: push: branches: - master pull_request: branches: - master jobs: build-and-test-cpu: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.os }} steps: - name: Checkout code uses: actions/checkout@v4 - name: Install OpenMP if: matrix.os != 'windows-latest' run: | if [ "${{ runner.os }}" == "Linux" ]; then sudo apt-get update && sudo apt-get install -y libomp-dev elif [ "${{ runner.os }}" == "macOS" ]; then brew install libomp fi - name: Install dependencies run: pip install -r requirements.txt - name: Run preprocessing run: python prepro_tinyshakespeare.py - name: Train model run: python train_gpt2.py --device=cpu - name: Download Win32 Make.exe if: matrix.os == 'windows-latest' run: | $wc = New-Object System.Net.WebClient $url = 'https://github.com/maweil/MakeForWindows/releases/download/v4.4.1/make-bin-win64.zip' $output = './make-bin-win64.zip' $wc.DownloadFile($url, $output) - name: Unzip Win32 Makefile if: matrix.os == 'windows-latest' run: | unzip make-bin-win64.zip - name: Compile training and testing program if: matrix.os != 'windows-latest' run: make test_gpt2 train_gpt2 - name: Compile training and testing program for Windows if: matrix.os == 'windows-latest' shell: cmd run: | call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise\\VC\\Auxiliary\\Build\\vcvars64.bat" make-4.4.1\dist\make WIN_CI_BUILD=1 test_gpt2 train_gpt2 - name: Execute testing program (With OpenMP) if: matrix.os != 'windows-latest' run: OMP_NUM_THREADS=8 ./test_gpt2 - name: Execute Windows testing program (With OpenMP) if: matrix.os == 'windows-latest' shell: cmd run: | copy test_gpt2 test_gpt2.exe test_gpt2.exe - name: Compile training and testing program without OpenMP if: matrix.os != 'windows-latest' run: NO_OMP=1 make test_gpt2 train_gpt2 - name: Execute testing program (No OpenMP) if: matrix.os != 'windows-latest' run: ./test_gpt2 build-cuda-windows: runs-on: windows-latest steps: - name: Checkout code uses: actions/checkout@v4 - name: Download Win32 Make.exe run: | $wc = New-Object System.Net.WebClient $url = 'https://github.com/maweil/MakeForWindows/releases/download/v4.4.1/make-bin-win64.zip' $output = './make-bin-win64.zip' $wc.DownloadFile($url, $output) - name: Unzip Win32 Makefile run: | unzip make-bin-win64.zip - name: Install Cuda Toolkit 12.4 on Windows run: | mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" choco install unzip -y curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip" curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip" curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip" curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip" curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip" curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip" curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip" curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip" curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip" unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y # Default installation path for CUDA Toolkit is C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4 - name: Add Path run: | echo "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.4\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - name: Build Cuda targets shell: cmd working-directory: ${{ github.workspace }} run: | call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise\\VC\\Auxiliary\\Build\\vcvars64.bat" make-4.4.1\dist\make -j WIN_CI_BUILD=1 train_gpt2fp32cu test_gpt2fp32cu test_gpt2cu train_gpt2cu profile_gpt2cu build-cuda-fp32: runs-on: ubuntu-latest container: image: nvidia/cuda:12.4.1-devel-ubuntu22.04 steps: - name: Checkout code uses: actions/checkout@v4 - name: Build FP32 checkpoint run: make train_gpt2fp32cu test_gpt2fp32cu - name: Build FP32 precision run: PRECISION=FP32 make train_gpt2cu test_gpt2cu profile_gpt2cu build-cuda-bf16: runs-on: ubuntu-latest container: image: nvidia/cuda:12.4.1-devel-ubuntu22.04 steps: - name: Checkout code uses: actions/checkout@v4 - name: Build project run: PRECISION=BF16 make test_gpt2cu train_gpt2cu profile_gpt2cu build-cuda-fp16: runs-on: ubuntu-latest container: image: nvidia/cuda:12.4.1-devel-ubuntu22.04 steps: - name: Checkout code uses: actions/checkout@v4 - name: Build project run: PRECISION=FP16 make test_gpt2cu train_gpt2cu profile_gpt2cu build-cuda-kernels: runs-on: ubuntu-latest container: image: nvidia/cuda:12.4.1-devel-ubuntu22.04 steps: - name: Checkout code uses: actions/checkout@v4 - name: Install OpenMP and OpenMPI run: apt-get update && apt-get install -y libomp-dev libopenmpi-dev - name: Build project run: make -j4 -C dev/cuda