Skip to content

Commit 9c24af4

Browse files
authored
Making required changes for CUDA 13 support. (#86)
* Making required changes for CUDA 13 support. Signed-off-by: Josh Romero <joshr@nvidia.com> * Formatting fixes. Signed-off-by: Josh Romero <joshr@nvidia.com> * Remove CCCL directory modification from CMake files. Signed-off-by: Josh Romero <joshr@nvidia.com> --------- Signed-off-by: Josh Romero <joshr@nvidia.com>
1 parent 001ef17 commit 9c24af4

3 files changed

Lines changed: 44 additions & 30 deletions

File tree

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ project(cudecomp LANGUAGES ${LANGS})
4444

4545
# Set up CUDA compute capabilities by CUDA version. Users can override defaults with CUDECOMP_CUDA_CC_LIST
4646
if (NOT CUDECOMP_CUDA_CC_LIST)
47-
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8)
47+
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
48+
set(CUDECOMP_CUDA_CC_LIST_DEFAULTS "80;90;100")
49+
elseif (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8)
4850
set(CUDECOMP_CUDA_CC_LIST_DEFAULTS "70;80;90;100")
4951
elseif (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 11.8)
5052
set(CUDECOMP_CUDA_CC_LIST_DEFAULTS "70;80;90")

include/internal/cuda_wrap.h

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,25 @@
2222
#include <cudaTypedefs.h>
2323
#endif
2424

25+
#define DECLARE_CUDA_PFN(symbol, version) PFN_##symbol##_v##version pfn_##symbol = nullptr
26+
2527
namespace cudecomp {
2628

2729
struct cuFunctionTable {
2830
#if CUDART_VERSION >= 11030
29-
PFN_cuDeviceGet pfn_cuDeviceGet = nullptr;
30-
PFN_cuDeviceGetAttribute pfn_cuDeviceGetAttribute = nullptr;
31-
PFN_cuGetErrorString pfn_cuGetErrorString = nullptr;
32-
PFN_cuMemAddressFree pfn_cuMemAddressFree = nullptr;
33-
PFN_cuMemAddressReserve pfn_cuMemAddressReserve = nullptr;
34-
PFN_cuMemCreate pfn_cuMemCreate = nullptr;
35-
PFN_cuMemGetAddressRange pfn_cuMemGetAddressRange = nullptr;
36-
PFN_cuMemGetAllocationGranularity pfn_cuMemGetAllocationGranularity = nullptr;
37-
PFN_cuMemMap pfn_cuMemMap = nullptr;
38-
PFN_cuMemRetainAllocationHandle pfn_cuMemRetainAllocationHandle = nullptr;
39-
PFN_cuMemRelease pfn_cuMemRelease = nullptr;
40-
PFN_cuMemSetAccess pfn_cuMemSetAccess = nullptr;
41-
PFN_cuMemUnmap pfn_cuMemUnmap = nullptr;
31+
DECLARE_CUDA_PFN(cuDeviceGet, 2000);
32+
DECLARE_CUDA_PFN(cuDeviceGetAttribute, 2000);
33+
DECLARE_CUDA_PFN(cuGetErrorString, 6000);
34+
DECLARE_CUDA_PFN(cuMemAddressFree, 10020);
35+
DECLARE_CUDA_PFN(cuMemAddressReserve, 10020);
36+
DECLARE_CUDA_PFN(cuMemCreate, 10020);
37+
DECLARE_CUDA_PFN(cuMemGetAddressRange, 3020);
38+
DECLARE_CUDA_PFN(cuMemGetAllocationGranularity, 10020);
39+
DECLARE_CUDA_PFN(cuMemMap, 10020);
40+
DECLARE_CUDA_PFN(cuMemRetainAllocationHandle, 11000);
41+
DECLARE_CUDA_PFN(cuMemRelease, 10020);
42+
DECLARE_CUDA_PFN(cuMemSetAccess, 10020);
43+
DECLARE_CUDA_PFN(cuMemUnmap, 10020);
4244
#endif
4345
};
4446

@@ -48,4 +50,6 @@ void initCuFunctionTable();
4850

4951
} // namespace cudecomp
5052

53+
#undef DECLARE_CUDA_PFN
54+
5155
#endif // CUDECOMP_CUDA_WRAP_H

src/cuda_wrap.cc

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,23 @@
2121
#include "internal/cuda_wrap.h"
2222
#include "internal/exceptions.h"
2323

24-
#if CUDART_VERSION >= 12000
25-
#define LOAD_SYM(symbol) \
24+
#if CUDART_VERSION >= 13000
25+
#define LOAD_SYM(symbol, version) \
26+
do { \
27+
cudaDriverEntryPointQueryResult driverStatus = cudaDriverEntryPointSymbolNotFound; \
28+
CHECK_CUDA(cudaGetDriverEntryPointByVersion(#symbol, (void**)(&cuFnTable.pfn_##symbol), version, \
29+
cudaEnableDefault, &driverStatus)); \
30+
if (driverStatus != cudaDriverEntryPointSuccess) { THROW_CUDA_ERROR("cudaGetDriverEntryPointByVersion failed."); } \
31+
} while (false)
32+
#elif CUDART_VERSION >= 12000
33+
#define LOAD_SYM(symbol, version) \
2634
do { \
2735
cudaDriverEntryPointQueryResult driverStatus = cudaDriverEntryPointSymbolNotFound; \
2836
CHECK_CUDA(cudaGetDriverEntryPoint(#symbol, (void**)(&cuFnTable.pfn_##symbol), cudaEnableDefault, &driverStatus)); \
2937
if (driverStatus != cudaDriverEntryPointSuccess) { THROW_CUDA_ERROR("cudaGetDriverEntryPoint failed."); } \
3038
} while (false)
3139
#else
32-
#define LOAD_SYM(symbol) \
40+
#define LOAD_SYM(symbol, version) \
3341
do { \
3442
CHECK_CUDA(cudaGetDriverEntryPoint(#symbol, (void**)(&cuFnTable.pfn_##symbol), cudaEnableDefault)); \
3543
} while (false)
@@ -41,19 +49,19 @@ cuFunctionTable cuFnTable; // global table of required CUDA driver functions
4149

4250
void initCuFunctionTable() {
4351
#if CUDART_VERSION >= 11030
44-
LOAD_SYM(cuDeviceGet);
45-
LOAD_SYM(cuDeviceGetAttribute);
46-
LOAD_SYM(cuGetErrorString);
47-
LOAD_SYM(cuMemAddressFree);
48-
LOAD_SYM(cuMemAddressReserve);
49-
LOAD_SYM(cuMemCreate);
50-
LOAD_SYM(cuMemGetAddressRange);
51-
LOAD_SYM(cuMemGetAllocationGranularity);
52-
LOAD_SYM(cuMemMap);
53-
LOAD_SYM(cuMemRetainAllocationHandle);
54-
LOAD_SYM(cuMemRelease);
55-
LOAD_SYM(cuMemSetAccess);
56-
LOAD_SYM(cuMemUnmap);
52+
LOAD_SYM(cuDeviceGet, 2000);
53+
LOAD_SYM(cuDeviceGetAttribute, 2000);
54+
LOAD_SYM(cuGetErrorString, 6000);
55+
LOAD_SYM(cuMemAddressFree, 10020);
56+
LOAD_SYM(cuMemAddressReserve, 10020);
57+
LOAD_SYM(cuMemCreate, 10020);
58+
LOAD_SYM(cuMemGetAddressRange, 3020);
59+
LOAD_SYM(cuMemGetAllocationGranularity, 10020);
60+
LOAD_SYM(cuMemMap, 10020);
61+
LOAD_SYM(cuMemRetainAllocationHandle, 11000);
62+
LOAD_SYM(cuMemRelease, 10020);
63+
LOAD_SYM(cuMemSetAccess, 10020);
64+
LOAD_SYM(cuMemUnmap, 10020);
5765
#endif
5866
}
5967

0 commit comments

Comments
 (0)