From cd8cee7b599303fd70ac6be69431598c5626efdc Mon Sep 17 00:00:00 2001 From: David Schneller Date: Sun, 5 Oct 2025 00:13:32 +0200 Subject: [PATCH 1/3] CUDA 13 fixes --- interfaces/cuda/Control.cu | 2 -- interfaces/cuda/Copy.cu | 17 ++++++++++++++++- interfaces/cuda/Memory.cu | 20 ++++++++++++++++---- interfaces/hip/Control.cpp | 2 -- 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/interfaces/cuda/Control.cu b/interfaces/cuda/Control.cu index b55b6f0..f154787 100644 --- a/interfaces/cuda/Control.cu +++ b/interfaces/cuda/Control.cu @@ -125,11 +125,9 @@ std::string ConcreteAPI::getDeviceInfoAsText(int deviceId) { info << "memPitch: " << property.memPitch << '\n'; info << "maxThreadsPerBlock: " << property.maxThreadsPerBlock << '\n'; info << "totalConstMem: " << property.totalConstMem << '\n'; - info << "clockRate: " << property.clockRate << '\n'; info << "multiProcessorCount: " << property.multiProcessorCount << '\n'; info << "integrated: " << property.integrated << '\n'; info << "canMapHostMemory: " << property.canMapHostMemory << '\n'; - info << "computeMode: " << property.computeMode << '\n'; info << "concurrentKernels: " << property.concurrentKernels << '\n'; info << "pciBusID: " << property.pciBusID << '\n'; info << "pciDeviceID: " << property.pciDeviceID << '\n'; diff --git a/interfaces/cuda/Copy.cu b/interfaces/cuda/Copy.cu index 0b7de7f..4292cb6 100644 --- a/interfaces/cuda/Copy.cu +++ b/interfaces/cuda/Copy.cu @@ -71,9 +71,24 @@ void ConcreteAPI::prefetchUnifiedMemTo(Destination type, const void *devPtr, siz void *streamPtr) { isFlagSet(status); cudaStream_t stream = (streamPtr == nullptr) ? 0 : (static_cast(streamPtr)); + + cudaMemLocation location{}; + if (type == Destination::Host) { + location.id = cudaCpuDeviceId; + location.type = cudaMemLocationTypeHost; + } + else if (allowedConcurrentManagedAccess) { + location.id = currentDeviceId; + location.type = cudaMemLocationTypeDevice; + } + cudaMemPrefetchAsync(devPtr, count, - type == Destination::CurrentDevice ? currentDeviceId : cudaCpuDeviceId, +#if CUDART_VERSION >= 13000 + location, 0, +#else + location.type, +#endif stream); CHECK_ERR; } diff --git a/interfaces/cuda/Memory.cu b/interfaces/cuda/Memory.cu index 8ab88c5..2402197 100644 --- a/interfaces/cuda/Memory.cu +++ b/interfaces/cuda/Memory.cu @@ -96,14 +96,26 @@ void *ConcreteAPI::allocUnifiedMem(size_t size, bool compress, Destination hint) void *devPtr; cudaMallocManaged(&devPtr, size, cudaMemAttachGlobal); CHECK_ERR; + + cudaMemLocation location{}; if (hint == Destination::Host) { - cudaMemAdvise(devPtr, size, cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId); - CHECK_ERR; + location.id = cudaCpuDeviceId; + location.type = cudaMemLocationTypeHost; } else if (allowedConcurrentManagedAccess) { - cudaMemAdvise(devPtr, size, cudaMemAdviseSetPreferredLocation, currentDeviceId); - CHECK_ERR; + location.id = currentDeviceId; + location.type = cudaMemLocationTypeDevice; } + + cudaMemAdvise(devPtr, size, cudaMemAdviseSetPreferredLocation, +#if CUDART_VERSION >= 13000 + location +#else + location.type +#endif + ); + CHECK_ERR; + statistics.allocatedMemBytes += size; statistics.allocatedUnifiedMemBytes += size; memToSizeMap[devPtr] = size; diff --git a/interfaces/hip/Control.cpp b/interfaces/hip/Control.cpp index 0e4774e..b2721ac 100644 --- a/interfaces/hip/Control.cpp +++ b/interfaces/hip/Control.cpp @@ -134,11 +134,9 @@ std::string ConcreteAPI::getDeviceInfoAsText(int deviceId) { info << "memPitch: " << property.memPitch << '\n'; info << "maxThreadsPerBlock: " << property.maxThreadsPerBlock << '\n'; info << "totalConstMem: " << property.totalConstMem << '\n'; - info << "clockRate: " << property.clockRate << '\n'; info << "multiProcessorCount: " << property.multiProcessorCount << '\n'; info << "integrated: " << property.integrated << '\n'; info << "canMapHostMemory: " << property.canMapHostMemory << '\n'; - info << "computeMode: " << property.computeMode << '\n'; info << "concurrentKernels: " << property.concurrentKernels << '\n'; info << "pciBusID: " << property.pciBusID << '\n'; info << "pciDeviceID: " << property.pciDeviceID << '\n'; From 7f23c6da466454c5161f208ee68ae534450e0e80 Mon Sep 17 00:00:00 2001 From: David Schneller Date: Fri, 17 Oct 2025 17:31:30 +0200 Subject: [PATCH 2/3] Fix ACPP OMP flags --- sycl.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sycl.cmake b/sycl.cmake index b16397e..3813fb4 100644 --- a/sycl.cmake +++ b/sycl.cmake @@ -51,7 +51,11 @@ if ((${DEVICE_BACKEND} STREQUAL "acpp") OR (${DEVICE_BACKEND} STREQUAL "hipsycl" find_package(AdaptiveCpp REQUIRED) find_package(OpenMP REQUIRED) target_compile_options(device PRIVATE -Wno-unknown-cuda-version) - target_link_libraries(device PUBLIC ${OpenMP_CXX_FLAGS}) + + # we need to link both to OpenMP_CXX and the OpenMP_CXX_FLAGS (otherwise some symbols won't be found) + target_link_libraries(device PRIVATE OpenMP::OpenMP_CXX) + target_link_libraries(device PRIVATE ${OpenMP_CXX_FLAGS}) + add_sycl_to_target(TARGET device SOURCES ${DEVICE_SOURCE_FILES}) else() find_package(DpcppFlags REQUIRED) From c6861478c6dd9457f19165c4028c6a237f7e2da4 Mon Sep 17 00:00:00 2001 From: David Schneller Date: Fri, 17 Oct 2025 18:06:04 +0200 Subject: [PATCH 3/3] More backwards compatibility --- interfaces/cuda/Copy.cu | 6 +++++- interfaces/cuda/Memory.cu | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/interfaces/cuda/Copy.cu b/interfaces/cuda/Copy.cu index 4292cb6..8a4c30a 100644 --- a/interfaces/cuda/Copy.cu +++ b/interfaces/cuda/Copy.cu @@ -75,11 +75,15 @@ void ConcreteAPI::prefetchUnifiedMemTo(Destination type, const void *devPtr, siz cudaMemLocation location{}; if (type == Destination::Host) { location.id = cudaCpuDeviceId; +#if CUDART_VERSION >= 13000 location.type = cudaMemLocationTypeHost; +#endif } else if (allowedConcurrentManagedAccess) { location.id = currentDeviceId; +#if CUDART_VERSION >= 13000 location.type = cudaMemLocationTypeDevice; +#endif } cudaMemPrefetchAsync(devPtr, @@ -87,7 +91,7 @@ void ConcreteAPI::prefetchUnifiedMemTo(Destination type, const void *devPtr, siz #if CUDART_VERSION >= 13000 location, 0, #else - location.type, + location.id, #endif stream); CHECK_ERR; diff --git a/interfaces/cuda/Memory.cu b/interfaces/cuda/Memory.cu index 2402197..eba255e 100644 --- a/interfaces/cuda/Memory.cu +++ b/interfaces/cuda/Memory.cu @@ -100,18 +100,22 @@ void *ConcreteAPI::allocUnifiedMem(size_t size, bool compress, Destination hint) cudaMemLocation location{}; if (hint == Destination::Host) { location.id = cudaCpuDeviceId; +#if CUDART_VERSION >= 13000 location.type = cudaMemLocationTypeHost; +#endif } else if (allowedConcurrentManagedAccess) { location.id = currentDeviceId; +#if CUDART_VERSION >= 13000 location.type = cudaMemLocationTypeDevice; +#endif } cudaMemAdvise(devPtr, size, cudaMemAdviseSetPreferredLocation, #if CUDART_VERSION >= 13000 location #else - location.type + location.id #endif ); CHECK_ERR;