cmake_minimum_required(VERSION 3.25.2) set(MATX_VERSION 0.9.4) # Used for config file generation if(NOT DEFINED PROJECT_NAME) set(NOT_SUBPROJECT ON) else() set(NOT_SUBPROJECT OFF) endif() # CMake 3.24 can auto-detect GPUs, but it's not standard on any distrobution. For now, rapids-cmake has a utility # function to do it, so we grab that as a dependency. The user can optionally override GPU_ARCH to specify # their own. We check if rapids-cmake exists for projects that already include it so we don't have conflicting # directories if(NOT DEFINED rapids-cmake-dir) include(FetchContent) # Tell FetchContent to just use the local copy of rapids-cmake: FetchContent_Declare(rapids-cmake SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/cmake/rapids-cmake") # Tell FetchContent to download remote copy of rapids-cmake: #FetchContent_Declare(rapids-cmake URL https://github.com/rapidsai/rapids-cmake/archive/refs/heads/branch-24.12.zip) FetchContent_MakeAvailable(rapids-cmake) else() # The include() commands below search the module path for the corresponding .cmake files list(APPEND CMAKE_MODULE_PATH "${rapids-cmake-dir}") endif() include(${rapids-cmake-dir}/cpm/cccl.cmake) include(${rapids-cmake-dir}/cpm/nvbench.cmake) include(rapids-cmake) include(rapids-cpm) include(rapids-cuda) include(rapids-export) include(rapids-find) rapids_cpm_init(OVERRIDE "${CMAKE_CURRENT_SOURCE_DIR}/cmake/versions.json") if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES "native") message(STATUS "Using native GPU architecture since CMAKE_CUDA_ARCHITECTURES not defined") endif() # This needs to go after rapids initialization otherwise we get a rapids_export_parse_version error project(MATX LANGUAGES CUDA CXX DESCRIPTION "A modern and efficient header-only C++ library for numerical computing on GPU" VERSION ${MATX_VERSION} HOMEPAGE_URL "https://github.com/NVIDIA/MatX") if (NOT CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES "70;80") endif() message(STATUS "Using GPU architectures ${CMAKE_CUDA_ARCHITECTURES}") rapids_cmake_write_version_file(include/matx/version_config.h) # Command line options option(MATX_BUILD_EXAMPLES "Build examples" OFF) option(MATX_BUILD_TESTS "Build unit tests" OFF) option(MATX_BUILD_BENCHMARKS "Build benchmarks" OFF) option(MATX_NVTX_FLAGS "Enable NVTX Macros" OFF) option(MATX_BUILD_DOCS "Build documentation" OFF) option(MATX_BUILD_32_BIT "Build with 32-bit indexing support" OFF) option(MATX_MULTI_GPU "Multi-GPU support" OFF) option(MATX_EN_VISUALIZATION "Enable visualization support" OFF) #option(MATX_EN_CUTLASS OFF) option(MATX_EN_CUTENSOR OFF) option(MATX_EN_CUDSS OFF) option(MATX_EN_FILEIO OFF) option(MATX_EN_NVTIFF OFF "Enable nvTiff support") option(MATX_EN_X86_FFTW OFF "Enable x86 FFTW support") option(MATX_EN_NVPL OFF, "Enable NVIDIA Performance Libraries for optimized ARM CPU support") option(MATX_EN_BLIS OFF "Enable BLIS support") option(MATX_EN_OPENBLAS OFF "Enable OpenBLAS (BLAS + LAPACK) support") option(MATX_DISABLE_CUB_CACHE "Disable caching for CUB allocations" ON) option(MATX_EN_COVERAGE OFF "Enable code coverage reporting") option(MATX_EN_COMPLEX_OP_NAN_CHECKS "Enable full NaN/Inf handling for complex multiplication and division" OFF) option(MATX_EN_CUDA_LINEINFO "Enable line information for CUDA kernels via -lineinfo nvcc flag" OFF) option(MATX_EN_EXTENDED_LAMBDA "Enable extended lambda support for device/host lambdas" ON) option(MATX_EN_JIT "Enable CUDA JIT compilation support via NVRTC" OFF) option(MATX_EN_MATHDX "Enable MathDx support for kernel fusion" OFF) option(MATX_EN_UNSAFE_ALIAS_DETECTION "Enable aliased memory detection" OFF) option(MATX_DISABLE_EXCEPTIONS "Disable C++ exceptions and log errors instead" OFF) set(MATX_EN_PYBIND11 OFF CACHE BOOL "Enable pybind11 support") set(cudss_DIR "" CACHE PATH "Directory where cuDSS is installed.") set(cutensor_DIR "" CACHE PATH "Directory where cuTENSOR is installed.") set(cutensornet_DIR "" CACHE PATH "Directory where cuTensorNet is installed.") set(eigen_DIR "" CACHE PATH "Directory where Eigen is installed") set(blas_DIR "" CACHE PATH "Directory where a BLAS library (NVPL/OpenBLAS/BLIS) is installed (install prefix)") # Enable compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if (MATX_BUILD_DOCS) project(MATX_DOCS VERSION ${MATX_VERSION}) add_subdirectory(docs_input) endif() # Check for header availability (required for logging support) include(CheckIncludeFileCXX) set(CMAKE_REQUIRED_FLAGS "-std=c++20") check_include_file_cxx("format" MATX_HAS_STD_FORMAT) if (NOT MATX_HAS_STD_FORMAT) message(WARNING "C++20 header not found. MatX logging will be disabled. " "Consider using a newer compiler (GCC 13+ or Clang 15+) for full logging support.") else() message(STATUS "C++20 header found. MatX logging will be enabled.") endif() if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) if (NOT GCC_VERSION VERSION_GREATER 8) message(FATAL_ERROR "${PROJECT_NAME} requires g++ 9 or higher if using g++ host compiler") endif() endif() # CPM is required for all package management include(public/cpm-cmake/cmake/CPM.cmake) # Helper for selecting build type include(cmake/BuildType.cmake) rapids_find_package( CUDAToolkit REQUIRED BUILD_EXPORT_SET matx-exports INSTALL_EXPORT_SET matx-exports) # Create our transitive target to pass build properties to external users and our own build environment add_library(matx INTERFACE) add_library(matx::matx ALIAS matx) target_include_directories(matx INTERFACE "$" "$") target_include_directories(matx INTERFACE "$" "$") target_compile_features(matx INTERFACE cxx_std_20 $) # 11.2 and above required for async allocation if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5) message(FATAL_ERROR "MatX requires CUDA 11.5 or higher. Please update before using.") endif() message(STATUS "Finding CCCL...") rapids_cpm_cccl( BUILD_EXPORT_SET matx-exports INSTALL_EXPORT_SET matx-exports ) target_link_libraries(matx INTERFACE CCCL::CCCL) # Set flags for compiling tests faster (only for nvcc) if (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang") set(MATX_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} --threads 0 -ftemplate-backtrace-limit=0 --extended-lambda) endif() # Hack because CMake doesn't have short circult evaluation if (NOT CMAKE_BUILD_TYPE OR "${CMAKE_BUILD_TYPE}" STREQUAL "Debug" OR MATX_EN_CUDA_LINEINFO) # Propagate -lineinfo to all MatX consumers target_compile_options(matx INTERFACE $<$:-lineinfo>) endif() # Enable extended lambda support for device/host lambdas (required for apply operator) if (MATX_EN_EXTENDED_LAMBDA) target_compile_options(matx INTERFACE $<$:--extended-lambda>) endif() # Set preferred compiler warning flags. nvc++ doesn't support most warnings string(FIND "${CMAKE_CUDA_HOST_COMPILER}" "nvc++" IS_NVCPP) if (NOT ${IS_NVCPP} GREATER -1) set(WARN_FLAGS -Wall -Wextra -Wcast-align -Wunused -Wshadow -Wno-unknown-pragmas -Wnon-virtual-dtor) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(WARN_FLAGS ${WARN_FLAGS} -Wconversion -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches -Wlogical-op -Wnull-dereference) endif() endif() if (CMAKE_CUDA_COMPILER_ID STREQUAL "Clang") message((STATUS "Using Clang compiler")) # Workaround for clang bug: https://github.com/llvm/llvm-project/issues/58491 set(WARN_FLAGS ${WARN_FLAGS} $<$:-Wno-unused-command-line-argument>) else() set(WARN_FLAGS ${WARN_FLAGS} $<$:-Werror all-warnings>) endif() set(WARN_FLAGS ${WARN_FLAGS} $<$:-Werror>) # CUTLASS slows down compile times when used, so leave it as optional for now # if (MATX_EN_CUTLASS) # include(cmake/GetCUTLASS.cmake) # set (CUTLASS_INC ${cutlass_SOURCE_DIR}/include/ ${cutlass_SOURCE_DIR}/tools/util/include/) # target_compile_definitions(matx INTERFACE MATX_ENABLE_CUTLASS=1) # else() # set (CUTLASS_INC "") # target_compile_definitions(matx INTERFACE MATX_ENABLE_CUTLASS=0) # endif() # CUTLASS support is not maintained. Remove the option to avoid confusion if (MATX_NVTX_FLAGS) add_definitions(-DMATX_NVTX_FLAGS) target_compile_definitions(matx INTERFACE MATX_NVTX_FLAGS) endif() if (MATX_DISABLE_EXCEPTIONS) target_compile_definitions(matx INTERFACE MATX_DISABLE_EXCEPTIONS) endif() if (MATX_BUILD_32_BIT) set(MATX_NVPL_INT_TYPE "lp64") target_compile_definitions(matx INTERFACE MATX_INDEX_32_BIT) else() set(MATX_NVPL_INT_TYPE "ilp64") endif() if (MATX_EN_UNSAFE_ALIAS_DETECTION) target_compile_definitions(matx INTERFACE MATX_EN_UNSAFE_ALIAS_DETECTION) endif() # Host support if (MATX_EN_NVPL OR MATX_EN_X86_FFTW OR MATX_EN_BLIS OR MATX_EN_OPENBLAS) message(STATUS "Enabling OpenMP support") find_package(OpenMP REQUIRED) target_link_libraries(matx INTERFACE OpenMP::OpenMP_CXX) target_compile_options(matx INTERFACE ${OpenMP_CXX_FLAGS}) target_compile_definitions(matx INTERFACE MATX_EN_OMP) set(BLAS_FLAGS MATX_EN_NVPL MATX_EN_BLIS MATX_EN_OPENBLAS) set(ENABLED_BLAS_COUNT 0) foreach(BLAS_FLAG IN LISTS BLAS_FLAGS) if(${BLAS_FLAG}) math(EXPR ENABLED_BLAS_COUNT "${ENABLED_BLAS_COUNT} + 1") endif() endforeach() if(ENABLED_BLAS_COUNT GREATER 1) message(AUTHOR_WARNING "Multiple Host BLAS libraries (${ENABLED_BLAS_COUNT}) are enabled. Only 1 will be used.") endif() if (MATX_EN_NVPL) message(STATUS "Enabling NVPL library support for ARM CPUs with ${MATX_NVPL_INT_TYPE} interface") find_package(nvpl REQUIRED COMPONENTS fft blas lapack HINTS ${blas_DIR}) if (NOT MATX_BUILD_32_BIT) target_compile_definitions(matx INTERFACE NVPL_ILP64) endif() target_compile_definitions(matx INTERFACE NVPL_LAPACK_COMPLEX_CUSTOM) target_link_libraries(matx INTERFACE nvpl::fftw nvpl::blas_${MATX_NVPL_INT_TYPE}_omp nvpl::lapack_${MATX_NVPL_INT_TYPE}_omp) target_compile_definitions(matx INTERFACE MATX_EN_NVPL) else() # FFTW if (MATX_EN_X86_FFTW) message(STATUS "Enabling x86 FFTW") find_library(FFTW_LIB fftw3 REQUIRED) find_library(FFTWF_LIB fftw3f REQUIRED) find_library(FFTW_OMP_LIB fftw3_omp REQUIRED) find_library(FFTWF_OMP_LIB fftw3f_omp REQUIRED) target_link_libraries(matx INTERFACE ${FFTW_LIB} ${FFTWF_LIB} ${FFTW_OMP_LIB} ${FFTWF_OMP_LIB}) target_compile_definitions(matx INTERFACE MATX_EN_X86_FFTW) endif() # BLAS if (MATX_EN_BLIS) message(STATUS "Enabling BLIS") include(cmake/FindBLIS.cmake) target_link_libraries(matx INTERFACE BLIS::BLIS) target_compile_definitions(matx INTERFACE MATX_EN_BLIS) elseif(MATX_EN_OPENBLAS) message(STATUS "Enabling OpenBLAS") include(cmake/FindOpenBLAS.cmake) target_link_libraries(matx INTERFACE OpenBLAS::OpenBLAS) target_compile_definitions(matx INTERFACE MATX_EN_OPENBLAS) endif() endif() endif() if (MATX_DISABLE_CUB_CACHE) target_compile_definitions(matx INTERFACE MATX_DISABLE_CUB_CACHE) endif() if (MATX_EN_COMPLEX_OP_NAN_CHECKS) target_compile_definitions(matx INTERFACE MATX_EN_COMPLEX_OP_NAN_CHECKS) endif() if (MATX_EN_COVERAGE) target_compile_options(matx INTERFACE -fprofile-arcs -ftest-coverage) target_link_options(matx INTERFACE -lgcov --coverage) endif() if (MATX_EN_NVTIFF) # Add nvtiff support if installed find_library(NVTIFF_LIBRARY nvtiff) if (NOT NVTIFF_LIBRARY) message(STATUS "Cannot find nvtiff library. Disabling MatX nvtiff features.") else() message(STATUS "Found nvtiff library at ${NVTIFF_LIBRARY}. Enabling MatX nvtiff features.") target_compile_definitions(matx INTERFACE MATX_ENABLE_NVTIFF) endif() endif() # Get the tensor libraries if we need them if (MATX_EN_CUTENSOR) set(CUTENSORNET_VERSION 25.09.1.12) set(CUTENSOR_VERSION 2.3.1.0) include(cmake/FindcuTENSOR.cmake) include(cmake/FindcuTensorNet.cmake) target_compile_definitions(matx INTERFACE MATX_EN_CUTENSOR) target_link_libraries(matx INTERFACE cuTENSOR::cuTENSOR) target_link_libraries(matx INTERFACE cuTensorNet::cuTensorNet) # CUDA toolkit and most accompanying libraries like cuTENSOR use the old rpath instead of RUNPATH. # We switch to that format here for compatibility target_link_libraries(matx INTERFACE "-Wl,--disable-new-dtags") endif() # Enable JIT compilation support if (MATX_EN_JIT OR MATX_EN_MATHDX) message(STATUS "Enabling JIT compilation support via NVRTC") target_compile_definitions(matx INTERFACE MATX_EN_JIT) # Add NVRTC configuration as compiler definitions list(GET CMAKE_CUDA_ARCHITECTURES 0 NVRTC_CUDA_ARCH) # Strip -real or -virt postfix if present string(REGEX REPLACE "-real$" "" NVRTC_CUDA_ARCH "${NVRTC_CUDA_ARCH}") string(REGEX REPLACE "-virtual$" "" NVRTC_CUDA_ARCH "${NVRTC_CUDA_ARCH}") target_compile_definitions(matx INTERFACE NVRTC_CUDA_ARCH="${NVRTC_CUDA_ARCH}") target_compile_definitions(matx INTERFACE NVRTC_CXX_STANDARD="${CMAKE_CXX_STANDARD}") # Link NVRTC library target_link_libraries(matx INTERFACE CUDA::nvrtc) endif() if (MATX_EN_MATHDX) set(MathDx_VERSION 25.06) set(MathDx_NANO 0) include(cmake/FindMathDx.cmake) target_compile_definitions(matx INTERFACE MATX_EN_MATHDX) # Link libmathdx if available if(TARGET libmathdx::libmathdx) target_link_libraries(matx INTERFACE libmathdx::libmathdx) message(STATUS "Linked libmathdx to matx target") endif() # Link mathdx components target_link_libraries(matx INTERFACE mathdx::cufftdx) endif() if (MATX_EN_CUDSS) set(cuDSS_VERSION 0.7.0.20) include(cmake/FindcuDSS.cmake) target_compile_definitions(matx INTERFACE MATX_EN_CUDSS) target_link_libraries(matx INTERFACE cuDSS::cuDSS) endif() if (MATX_MULTI_GPU) include(cmake/FindNvshmem.cmake) find_package(Nvshmem REQUIRED) endif() # Find python3 and pybind11 for generating unit tests and benchmarks if (MATX_EN_FILEIO OR MATX_EN_VISUALIZATION OR MATX_EN_PYBIND11 OR MATX_BUILD_EXAMPLES OR MATX_BUILD_TESTS OR MATX_BUILD_BENCHMARKS) message(STATUS "Enabling pybind11 support") set(MATX_EN_PYBIND11 ON) target_compile_definitions(matx INTERFACE MATX_ENABLE_PYBIND11) target_compile_definitions(matx INTERFACE MATX_ENABLE_FILEIO) include(cmake/GetPyBind11.cmake) find_package(Python3 REQUIRED COMPONENTS Interpreter Development) find_package(pybind11 REQUIRED) # Export pybind11 dependency for downstream users after make install rapids_export_package(INSTALL pybind11 matx-exports GLOBAL_TARGETS pybind11::embed pybind11::pybind11) rapids_export_package(BUILD pybind11 matx-exports GLOBAL_TARGETS pybind11::embed pybind11::pybind11) # Check for python libs include(cmake/CheckPythonLibs.cmake) check_python_libs("numpy") check_optional_python_libs("cupy") target_link_libraries(matx INTERFACE pybind11::embed) # Visualization requires Python libraries if (MATX_EN_VISUALIZATION) target_compile_definitions(matx INTERFACE MATX_ENABLE_VIZ) check_python_libs("plotly.express") endif() endif() # Add in all CUDA linker dependencies target_link_libraries(matx INTERFACE CUDA::cublas CUDA::cublasLt CUDA::cuda_driver CUDA::cudart CUDA::cufft CUDA::curand CUDA::cusolver CUDA::cusparse) if (CMAKE_VERSION VERSION_LESS 3.25.0) target_link_libraries(matx INTERFACE CUDA::nvToolsExt) else() target_link_libraries(matx INTERFACE CUDA::nvtx3) endif() # Build config files if the user isn't adding this as a subdirectory. At this point our transitive target # should have all build properties needed based on the options passed in if (NOT_SUBPROJECT) include(GNUInstallDirs) include(CMakePackageConfigHelpers) install(TARGETS matx EXPORT matx-exports) install(DIRECTORY include/ DESTINATION include) install(FILES ${CMAKE_BINARY_DIR}/include/matx/version_config.h DESTINATION include) set(doc_string [=[ Provide targets for MatX. [MatX](https://github.com/NVIDIA/MatX) provides a Python-like syntax for near-native speed numerical computing on NVIDIA GPUs. ]=]) rapids_export( INSTALL matx EXPORT_SET matx-exports GLOBAL_TARGETS matx NAMESPACE matx:: DOCUMENTATION doc_string) # build export targets rapids_export( BUILD matx EXPORT_SET matx-exports GLOBAL_TARGETS matx NAMESPACE matx:: DOCUMENTATION doc_string) endif() if (MATX_BUILD_EXAMPLES) add_subdirectory(examples) endif() if (MATX_BUILD_BENCHMARKS) rapids_cpm_nvbench() add_subdirectory(bench) endif() if (MATX_BUILD_TESTS) include(cmake/GetGTest.cmake) add_subdirectory(test) endif()