From ae924d1754075f5a1be81437b3db75094b031d78 Mon Sep 17 00:00:00 2001 From: Josh Romero Date: Mon, 15 Sep 2025 14:42:04 -0700 Subject: [PATCH 1/3] Making required changes for CUDA 13 support. Signed-off-by: Josh Romero --- CMakeLists.txt | 5 ++++- include/internal/cuda_wrap.h | 30 +++++++++++++++------------ src/cuda_wrap.cc | 40 +++++++++++++++++++++--------------- 3 files changed, 45 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 68b8b74..9aee092 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,9 @@ project(cudecomp LANGUAGES ${LANGS}) # Set up CUDA compute capabilities by CUDA version. Users can override defaults with CUDECOMP_CUDA_CC_LIST if (NOT CUDECOMP_CUDA_CC_LIST) - if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8) + if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0) + set(CUDECOMP_CUDA_CC_LIST_DEFAULTS "80;90;100") + elseif (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8) set(CUDECOMP_CUDA_CC_LIST_DEFAULTS "70;80;90;100") elseif (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 11.8) set(CUDECOMP_CUDA_CC_LIST_DEFAULTS "70;80;90") @@ -174,6 +176,7 @@ target_include_directories(cudecomp ${CMAKE_CURRENT_SOURCE_DIR}/include ${MPI_CXX_INCLUDE_DIRS} ${NVHPC_CUDA_INCLUDE_DIR} + ${NVHPC_CUDA_INCLUDE_DIR}/cccl ${NVHPC_CUTENSOR_INCLUDE_DIR} ${NCCL_INCLUDE_DIR} ) diff --git a/include/internal/cuda_wrap.h b/include/internal/cuda_wrap.h index 8a693b7..d99ac75 100644 --- a/include/internal/cuda_wrap.h +++ b/include/internal/cuda_wrap.h @@ -22,23 +22,25 @@ #include #endif +#define DECLARE_CUDA_PFN(symbol, version) PFN_##symbol##_v##version pfn_##symbol = nullptr + namespace cudecomp { struct cuFunctionTable { #if CUDART_VERSION >= 11030 - PFN_cuDeviceGet pfn_cuDeviceGet = nullptr; - PFN_cuDeviceGetAttribute pfn_cuDeviceGetAttribute = nullptr; - PFN_cuGetErrorString pfn_cuGetErrorString = nullptr; - PFN_cuMemAddressFree pfn_cuMemAddressFree = nullptr; - PFN_cuMemAddressReserve pfn_cuMemAddressReserve = nullptr; - PFN_cuMemCreate pfn_cuMemCreate = nullptr; - PFN_cuMemGetAddressRange pfn_cuMemGetAddressRange = nullptr; - PFN_cuMemGetAllocationGranularity pfn_cuMemGetAllocationGranularity = nullptr; - PFN_cuMemMap pfn_cuMemMap = nullptr; - PFN_cuMemRetainAllocationHandle pfn_cuMemRetainAllocationHandle = nullptr; - PFN_cuMemRelease pfn_cuMemRelease = nullptr; - PFN_cuMemSetAccess pfn_cuMemSetAccess = nullptr; - PFN_cuMemUnmap pfn_cuMemUnmap = nullptr; + DECLARE_CUDA_PFN(cuDeviceGet, 2000); + DECLARE_CUDA_PFN(cuDeviceGetAttribute, 2000); + DECLARE_CUDA_PFN(cuGetErrorString, 6000); + DECLARE_CUDA_PFN(cuMemAddressFree, 10020); + DECLARE_CUDA_PFN(cuMemAddressReserve, 10020); + DECLARE_CUDA_PFN(cuMemCreate, 10020); + DECLARE_CUDA_PFN(cuMemGetAddressRange, 3020); + DECLARE_CUDA_PFN(cuMemGetAllocationGranularity, 10020); + DECLARE_CUDA_PFN(cuMemMap, 10020); + DECLARE_CUDA_PFN(cuMemRetainAllocationHandle, 11000); + DECLARE_CUDA_PFN(cuMemRelease, 10020); + DECLARE_CUDA_PFN(cuMemSetAccess, 10020); + DECLARE_CUDA_PFN(cuMemUnmap, 10020); #endif }; @@ -48,4 +50,6 @@ void initCuFunctionTable(); } // namespace cudecomp +#undef DECLARE_CUDA_PFN + #endif // CUDECOMP_CUDA_WRAP_H diff --git a/src/cuda_wrap.cc b/src/cuda_wrap.cc index dcacfe1..dc82da8 100644 --- a/src/cuda_wrap.cc +++ b/src/cuda_wrap.cc @@ -21,15 +21,23 @@ #include "internal/cuda_wrap.h" #include "internal/exceptions.h" -#if CUDART_VERSION >= 12000 -#define LOAD_SYM(symbol) \ +#if CUDART_VERSION >= 13000 +#define LOAD_SYM(symbol, version) \ + do { \ + cudaDriverEntryPointQueryResult driverStatus = cudaDriverEntryPointSymbolNotFound; \ + CHECK_CUDA(cudaGetDriverEntryPointByVersion(#symbol, (void**)(&cuFnTable.pfn_##symbol), version, \ + cudaEnableDefault,&driverStatus)); \ + if (driverStatus != cudaDriverEntryPointSuccess) { THROW_CUDA_ERROR("cudaGetDriverEntryPointByVersion failed."); } \ + } while (false) +#elif CUDART_VERSION >= 12000 +#define LOAD_SYM(symbol, version) \ do { \ cudaDriverEntryPointQueryResult driverStatus = cudaDriverEntryPointSymbolNotFound; \ CHECK_CUDA(cudaGetDriverEntryPoint(#symbol, (void**)(&cuFnTable.pfn_##symbol), cudaEnableDefault, &driverStatus)); \ if (driverStatus != cudaDriverEntryPointSuccess) { THROW_CUDA_ERROR("cudaGetDriverEntryPoint failed."); } \ } while (false) #else -#define LOAD_SYM(symbol) \ +#define LOAD_SYM(symbol, version) \ do { \ CHECK_CUDA(cudaGetDriverEntryPoint(#symbol, (void**)(&cuFnTable.pfn_##symbol), cudaEnableDefault)); \ } while (false) @@ -41,19 +49,19 @@ cuFunctionTable cuFnTable; // global table of required CUDA driver functions void initCuFunctionTable() { #if CUDART_VERSION >= 11030 - LOAD_SYM(cuDeviceGet); - LOAD_SYM(cuDeviceGetAttribute); - LOAD_SYM(cuGetErrorString); - LOAD_SYM(cuMemAddressFree); - LOAD_SYM(cuMemAddressReserve); - LOAD_SYM(cuMemCreate); - LOAD_SYM(cuMemGetAddressRange); - LOAD_SYM(cuMemGetAllocationGranularity); - LOAD_SYM(cuMemMap); - LOAD_SYM(cuMemRetainAllocationHandle); - LOAD_SYM(cuMemRelease); - LOAD_SYM(cuMemSetAccess); - LOAD_SYM(cuMemUnmap); + LOAD_SYM(cuDeviceGet, 2000); + LOAD_SYM(cuDeviceGetAttribute, 2000); + LOAD_SYM(cuGetErrorString, 6000); + LOAD_SYM(cuMemAddressFree, 10020); + LOAD_SYM(cuMemAddressReserve, 10020); + LOAD_SYM(cuMemCreate, 10020); + LOAD_SYM(cuMemGetAddressRange, 3020); + LOAD_SYM(cuMemGetAllocationGranularity, 10020); + LOAD_SYM(cuMemMap, 10020); + LOAD_SYM(cuMemRetainAllocationHandle, 11000); + LOAD_SYM(cuMemRelease, 10020); + LOAD_SYM(cuMemSetAccess, 10020); + LOAD_SYM(cuMemUnmap, 10020); #endif } From d22d6dc660618ee9bf4fa55c0300063455a2dcf0 Mon Sep 17 00:00:00 2001 From: Josh Romero Date: Mon, 15 Sep 2025 15:07:22 -0700 Subject: [PATCH 2/3] Formatting fixes. Signed-off-by: Josh Romero --- src/cuda_wrap.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cuda_wrap.cc b/src/cuda_wrap.cc index dc82da8..a24dca5 100644 --- a/src/cuda_wrap.cc +++ b/src/cuda_wrap.cc @@ -26,7 +26,7 @@ do { \ cudaDriverEntryPointQueryResult driverStatus = cudaDriverEntryPointSymbolNotFound; \ CHECK_CUDA(cudaGetDriverEntryPointByVersion(#symbol, (void**)(&cuFnTable.pfn_##symbol), version, \ - cudaEnableDefault,&driverStatus)); \ + cudaEnableDefault, &driverStatus)); \ if (driverStatus != cudaDriverEntryPointSuccess) { THROW_CUDA_ERROR("cudaGetDriverEntryPointByVersion failed."); } \ } while (false) #elif CUDART_VERSION >= 12000 From 8b65110eed434e3c6856ca9670d7750c78457e63 Mon Sep 17 00:00:00 2001 From: Josh Romero Date: Mon, 6 Oct 2025 15:36:14 -0700 Subject: [PATCH 3/3] Remove CCCL directory modification from CMake files. Signed-off-by: Josh Romero --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9aee092..b06e54c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -176,7 +176,6 @@ target_include_directories(cudecomp ${CMAKE_CURRENT_SOURCE_DIR}/include ${MPI_CXX_INCLUDE_DIRS} ${NVHPC_CUDA_INCLUDE_DIR} - ${NVHPC_CUDA_INCLUDE_DIR}/cccl ${NVHPC_CUTENSOR_INCLUDE_DIR} ${NCCL_INCLUDE_DIR} )