Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions interfaces/cuda/Control.cu
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,9 @@ std::string ConcreteAPI::getDeviceInfoAsText(int deviceId) {
info << "memPitch: " << property.memPitch << '\n';
info << "maxThreadsPerBlock: " << property.maxThreadsPerBlock << '\n';
info << "totalConstMem: " << property.totalConstMem << '\n';
info << "clockRate: " << property.clockRate << '\n';
info << "multiProcessorCount: " << property.multiProcessorCount << '\n';
info << "integrated: " << property.integrated << '\n';
info << "canMapHostMemory: " << property.canMapHostMemory << '\n';
info << "computeMode: " << property.computeMode << '\n';
info << "concurrentKernels: " << property.concurrentKernels << '\n';
info << "pciBusID: " << property.pciBusID << '\n';
info << "pciDeviceID: " << property.pciDeviceID << '\n';
Expand Down
21 changes: 20 additions & 1 deletion interfaces/cuda/Copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,28 @@ void ConcreteAPI::prefetchUnifiedMemTo(Destination type, const void *devPtr, siz
void *streamPtr) {
isFlagSet<InterfaceInitialized>(status);
cudaStream_t stream = (streamPtr == nullptr) ? 0 : (static_cast<cudaStream_t>(streamPtr));

cudaMemLocation location{};
if (type == Destination::Host) {
location.id = cudaCpuDeviceId;
#if CUDART_VERSION >= 13000
location.type = cudaMemLocationTypeHost;
#endif
}
else if (allowedConcurrentManagedAccess) {
location.id = currentDeviceId;
#if CUDART_VERSION >= 13000
location.type = cudaMemLocationTypeDevice;
#endif
}

cudaMemPrefetchAsync(devPtr,
count,
type == Destination::CurrentDevice ? currentDeviceId : cudaCpuDeviceId,
#if CUDART_VERSION >= 13000
location, 0,
#else
location.id,
#endif
stream);
CHECK_ERR;
}
Expand Down
24 changes: 20 additions & 4 deletions interfaces/cuda/Memory.cu
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,30 @@ void *ConcreteAPI::allocUnifiedMem(size_t size, bool compress, Destination hint)
void *devPtr;
cudaMallocManaged(&devPtr, size, cudaMemAttachGlobal);
CHECK_ERR;

cudaMemLocation location{};
if (hint == Destination::Host) {
cudaMemAdvise(devPtr, size, cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId);
CHECK_ERR;
location.id = cudaCpuDeviceId;
#if CUDART_VERSION >= 13000
location.type = cudaMemLocationTypeHost;
#endif
}
else if (allowedConcurrentManagedAccess) {
cudaMemAdvise(devPtr, size, cudaMemAdviseSetPreferredLocation, currentDeviceId);
CHECK_ERR;
location.id = currentDeviceId;
#if CUDART_VERSION >= 13000
location.type = cudaMemLocationTypeDevice;
#endif
}

cudaMemAdvise(devPtr, size, cudaMemAdviseSetPreferredLocation,
#if CUDART_VERSION >= 13000
location
#else
location.id
#endif
);
CHECK_ERR;

statistics.allocatedMemBytes += size;
statistics.allocatedUnifiedMemBytes += size;
memToSizeMap[devPtr] = size;
Expand Down
2 changes: 0 additions & 2 deletions interfaces/hip/Control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,9 @@ std::string ConcreteAPI::getDeviceInfoAsText(int deviceId) {
info << "memPitch: " << property.memPitch << '\n';
info << "maxThreadsPerBlock: " << property.maxThreadsPerBlock << '\n';
info << "totalConstMem: " << property.totalConstMem << '\n';
info << "clockRate: " << property.clockRate << '\n';
info << "multiProcessorCount: " << property.multiProcessorCount << '\n';
info << "integrated: " << property.integrated << '\n';
info << "canMapHostMemory: " << property.canMapHostMemory << '\n';
info << "computeMode: " << property.computeMode << '\n';
info << "concurrentKernels: " << property.concurrentKernels << '\n';
info << "pciBusID: " << property.pciBusID << '\n';
info << "pciDeviceID: " << property.pciDeviceID << '\n';
Expand Down
6 changes: 5 additions & 1 deletion sycl.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ if ((${DEVICE_BACKEND} STREQUAL "acpp") OR (${DEVICE_BACKEND} STREQUAL "hipsycl"
find_package(AdaptiveCpp REQUIRED)
find_package(OpenMP REQUIRED)
target_compile_options(device PRIVATE -Wno-unknown-cuda-version)
target_link_libraries(device PUBLIC ${OpenMP_CXX_FLAGS})

# we need to link both to OpenMP_CXX and the OpenMP_CXX_FLAGS (otherwise some symbols won't be found)
target_link_libraries(device PRIVATE OpenMP::OpenMP_CXX)
target_link_libraries(device PRIVATE ${OpenMP_CXX_FLAGS})

add_sycl_to_target(TARGET device SOURCES ${DEVICE_SOURCE_FILES})
else()
find_package(DpcppFlags REQUIRED)
Expand Down