From 729609c0369e5b20bb08c8b4d6cf42071397ad04 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 19 Jan 2026 21:49:07 +0100 Subject: [PATCH 1/2] [HIPIFY][CUDA 13.1][Driver] `CUDA 13.1.0` support - Step 1 - `Driver` - Part 1 - `Data Types` + Updated the regenerated `hipify-perl` and `Driver` `CUDA2HIP` docs accordingly --- bin/hipify-perl | 11 ++++++ ...A_Driver_API_functions_supported_by_HIP.md | 11 ++++++ src/CUDA2HIP_Driver_API_types.cpp | 37 +++++++++++++++++++ src/Statistics.cpp | 1 + src/Statistics.h | 1 + 5 files changed, 61 insertions(+) diff --git a/bin/hipify-perl b/bin/hipify-perl index 7e874941..38d43db7 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -9128,6 +9128,7 @@ sub simpleSubstitutions { subst("CU_STREAM_CAPTURE_STATUS_INVALIDATED", "hipStreamCaptureStatusInvalidated", "numeric_literal"); subst("CU_STREAM_CAPTURE_STATUS_NONE", "hipStreamCaptureStatusNone", "numeric_literal"); subst("CU_STREAM_DEFAULT", "hipStreamDefault", "numeric_literal"); + subst("CU_STREAM_MEM_OP_ATOMIC_REDUCTION", "hipStreamMemOpAtomicReduction", "numeric_literal"); subst("CU_STREAM_MEM_OP_BARRIER", "hipStreamMemOpBarrier", "numeric_literal"); subst("CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES", "hipStreamMemOpFlushRemoteWrites", "numeric_literal"); subst("CU_STREAM_MEM_OP_WAIT_VALUE_32", "hipStreamMemOpWaitValue32", "numeric_literal"); @@ -12738,6 +12739,11 @@ sub warnRemovedFunctions { "CUstreamMemoryBarrier_flags_enum", "CUstreamMemoryBarrier_flags", "CUstreamMemOpMemoryBarrierParams_st", + "CUstreamMemOpAtomicReductionParams_st", + "CUstreamAtomicReductionOpType_enum", + "CUstreamAtomicReductionOpType", + "CUstreamAtomicReductionDataType_enum", + "CUstreamAtomicReductionDataType", "CUshared_carveout_enum", "CUshared_carveout", "CUprocessState_enum", @@ -12978,6 +12984,11 @@ sub warnRemovedFunctions { "CU_STREAM_WAIT_VALUE_FLUSH", "CU_STREAM_MEMORY_BARRIER_TYPE_SYS", "CU_STREAM_MEMORY_BARRIER_TYPE_GPU", + "CU_STREAM_ATOMIC_REDUCTION_UNSIGNED_64", + "CU_STREAM_ATOMIC_REDUCTION_UNSIGNED_32", + "CU_STREAM_ATOMIC_REDUCTION_OP_OR", + "CU_STREAM_ATOMIC_REDUCTION_OP_AND", + "CU_STREAM_ATOMIC_REDUCTION_OP_ADD", "CU_SHAREDMEM_CARVEOUT_MAX_SHARED", "CU_SHAREDMEM_CARVEOUT_MAX_L1", "CU_SHAREDMEM_CARVEOUT_DEFAULT", diff --git a/docs/reference/tables/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/reference/tables/CUDA_Driver_API_functions_supported_by_HIP.md index 7955d298..0ca3ccf0 100644 --- a/docs/reference/tables/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/docs/reference/tables/CUDA_Driver_API_functions_supported_by_HIP.md @@ -1068,6 +1068,11 @@ |`CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE`| | | | |`hipSharedMemBankSizeEightByte`|1.6.0| | | | | | |`CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE`| | | | |`hipSharedMemBankSizeFourByte`|1.6.0| | | | | | |`CU_STREAM_ADD_CAPTURE_DEPENDENCIES`|11.3| | | |`hipStreamAddCaptureDependencies`|5.0.0| | | | | | +|`CU_STREAM_ATOMIC_REDUCTION_OP_ADD`|13.1| | | | | | | | | | | +|`CU_STREAM_ATOMIC_REDUCTION_OP_AND`|13.1| | | | | | | | | | | +|`CU_STREAM_ATOMIC_REDUCTION_OP_OR`|13.1| | | | | | | | | | | +|`CU_STREAM_ATOMIC_REDUCTION_UNSIGNED_32`|13.1| | | | | | | | | | | +|`CU_STREAM_ATOMIC_REDUCTION_UNSIGNED_64`|13.1| | | | | | | | | | | |`CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW`|11.0| | | |`hipLaunchAttributeAccessPolicyWindow`|6.2.0| | | | | | |`CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN`|12.0| | | |`hipLaunchAttributeMemSyncDomain`|7.1.0| | | | | | |`CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP`|12.0| | | |`hipLaunchAttributeMemSyncDomainMap`|7.1.0| | | | | | @@ -1083,6 +1088,7 @@ |`CU_STREAM_LEGACY`| | | | |`hipStreamLegacy`|6.2.0| | | | | | |`CU_STREAM_MEMORY_BARRIER_TYPE_GPU`|11.7| | | | | | | | | | | |`CU_STREAM_MEMORY_BARRIER_TYPE_SYS`|11.7| | | | | | | | | | | +|`CU_STREAM_MEM_OP_ATOMIC_REDUCTION`|13.1| | | |`hipStreamMemOpAtomicReduction`| | | | | | | |`CU_STREAM_MEM_OP_BARRIER`|11.7| | | |`hipStreamMemOpBarrier`|6.4.0| | | | | | |`CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES`|8.0| | | |`hipStreamMemOpFlushRemoteWrites`|6.4.0| | | | | | |`CU_STREAM_MEM_OP_WAIT_VALUE_32`|8.0| | | |`hipStreamMemOpWaitValue32`|6.4.0| | | | | | @@ -1536,6 +1542,10 @@ |`CUsharedconfig`| | | | |`hipSharedMemConfig`|1.6.0| | | | | | |`CUsharedconfig_enum`| | | | |`hipSharedMemConfig`|1.6.0| | | | | | |`CUstream`| | | | |`hipStream_t`|1.5.0| | | | | | +|`CUstreamAtomicReductionDataType`|13.1| | | | | | | | | | | +|`CUstreamAtomicReductionDataType_enum`|13.1| | | | | | | | | | | +|`CUstreamAtomicReductionOpType`|13.1| | | | | | | | | | | +|`CUstreamAtomicReductionOpType_enum`|13.1| | | | | | | | | | | |`CUstreamAttrID`|11.0| | | |`hipLaunchAttributeID`|6.2.0| | | | | | |`CUstreamAttrID_enum`|11.0| | |11.8|`hipLaunchAttributeID`|6.2.0| | | | | | |`CUstreamAttrValue`|11.0| | | |`hipLaunchAttributeValue`|6.2.0| |7.1.0| | | | @@ -1551,6 +1561,7 @@ |`CUstreamCaptureMode_enum`|10.1| | | |`hipStreamCaptureMode`|4.3.0| | | | | | |`CUstreamCaptureStatus`|10.0| | | |`hipStreamCaptureStatus`|4.3.0| | | | | | |`CUstreamCaptureStatus_enum`|10.0| | | |`hipStreamCaptureStatus`|4.3.0| | | | | | +|`CUstreamMemOpAtomicReductionParams_st`|13.1| | | | | | | | | | | |`CUstreamMemOpMemoryBarrierParams_st`|11.7| | | | | | | | | | | |`CUstreamMemoryBarrier_flags`|11.7| | | | | | | | | | | |`CUstreamMemoryBarrier_flags_enum`|11.7| | | | | | | | | | | diff --git a/src/CUDA2HIP_Driver_API_types.cpp b/src/CUDA2HIP_Driver_API_types.cpp index d23928fb..a72a0433 100644 --- a/src/CUDA2HIP_Driver_API_types.cpp +++ b/src/CUDA2HIP_Driver_API_types.cpp @@ -541,6 +541,9 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // cudaLogsCallbackHandle {"CUlogsCallbackHandle", {"hipLogsCallbackHandle", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUstreamMemOpAtomicReductionParams_st", {"hipStreamMemOpAtomicReductionParams_st", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // 2. Unions {"CUstreamBatchMemOpParams", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, @@ -2090,6 +2093,7 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { {"CU_STREAM_MEM_OP_WAIT_VALUE_64", {"hipStreamMemOpWaitValue64", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 4 {"CU_STREAM_MEM_OP_WRITE_VALUE_64", {"hipStreamMemOpWriteValue64", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 5 {"CU_STREAM_MEM_OP_BARRIER", {"hipStreamMemOpBarrier", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 6 + {"CU_STREAM_MEM_OP_ATOMIC_REDUCTION", {"hipStreamMemOpAtomicReduction", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES}}, // 8 // cudaStreamCaptureStatus {"CUstreamCaptureStatus", {"hipStreamCaptureStatus", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES}}, @@ -3103,6 +3107,28 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP { // cudaAtomicCapabilityVector32x4 {"CU_ATOMIC_CAPABILITY_VECTOR_32x4", {"hipAtomicCapabilityVector32x4", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUstreamAtomicReductionOpType", {"hipStreamAtomicReductionOpType", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUstreamAtomicReductionOpType_enum", {"hipStreamAtomicReductionOpType", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUstreamAtomicReductionOpType enum values + // + {"CU_STREAM_ATOMIC_REDUCTION_OP_OR", {"hipStreamAtomicReductionOpPr", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_STREAM_ATOMIC_REDUCTION_OP_AND", {"hipStreamAtomicReductionOpAnd", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_STREAM_ATOMIC_REDUCTION_OP_ADD", {"hipStreamAtomicReductionOpAdd", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + + // + {"CUstreamAtomicReductionDataType", {"hipStreamAtomicReductionDataType", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CUstreamAtomicReductionDataType_enum", {"hipStreamAtomicReductionOpType", "", CONV_TYPE, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // CUstreamAtomicReductionOpType enum values + // + {"CU_STREAM_ATOMIC_REDUCTION_UNSIGNED_32", {"hipStreamAtomicReductionUnsigned32", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // + {"CU_STREAM_ATOMIC_REDUCTION_UNSIGNED_64", {"hipStreamAtomicReductionUnsigned64", "", CONV_NUMERIC_LITERAL, API_DRIVER, SEC::DATA_TYPES, HIP_UNSUPPORTED}}, + // 4. Typedefs // no analogue @@ -4387,6 +4413,17 @@ const std::map CUDA_DRIVER_TYPE_NAME_VER_MAP { {"CU_MEM_ALLOCATION_TYPE_MANAGED", {CUDA_130, CUDA_0, CUDA_0 }}, {"CUcheckpointGpuPair_st", {CUDA_130, CUDA_0, CUDA_0 }}, {"CUcheckpointGpuPair", {CUDA_130, CUDA_0, CUDA_0 }}, + {"CU_STREAM_MEM_OP_ATOMIC_REDUCTION", {CUDA_131, CUDA_0, CUDA_0 }}, + {"CUstreamAtomicReductionOpType", {CUDA_131, CUDA_0, CUDA_0 }}, + {"CUstreamAtomicReductionOpType_enum", {CUDA_131, CUDA_0, CUDA_0 }}, + {"CU_STREAM_ATOMIC_REDUCTION_OP_OR", {CUDA_131, CUDA_0, CUDA_0 }}, + {"CU_STREAM_ATOMIC_REDUCTION_OP_AND", {CUDA_131, CUDA_0, CUDA_0 }}, + {"CU_STREAM_ATOMIC_REDUCTION_OP_ADD", {CUDA_131, CUDA_0, CUDA_0 }}, + {"CUstreamAtomicReductionDataType", {CUDA_131, CUDA_0, CUDA_0 }}, + {"CUstreamAtomicReductionDataType_enum", {CUDA_131, CUDA_0, CUDA_0 }}, + {"CU_STREAM_ATOMIC_REDUCTION_UNSIGNED_32", {CUDA_131, CUDA_0, CUDA_0 }}, + {"CU_STREAM_ATOMIC_REDUCTION_UNSIGNED_64", {CUDA_131, CUDA_0, CUDA_0 }}, + {"CUstreamMemOpAtomicReductionParams_st", {CUDA_131, CUDA_0, CUDA_0 }}, }; const std::map HIP_DRIVER_TYPE_NAME_VER_MAP { diff --git a/src/Statistics.cpp b/src/Statistics.cpp index 62d0abf9..c33a1ebc 100644 --- a/src/Statistics.cpp +++ b/src/Statistics.cpp @@ -518,6 +518,7 @@ std::string Statistics::getCudaVersion(const cudaVersions &ver) { case CUDA_128: return "12.8"; case CUDA_129: return "12.9"; case CUDA_130: return "13.0"; + case CUDA_131: return "13.1"; case CUDNN_10: return "1.0.0"; case CUDNN_20: return "2.0.0"; case CUDNN_30: return "3.0.0"; diff --git a/src/Statistics.h b/src/Statistics.h index c491cffc..a4b9c4f9 100644 --- a/src/Statistics.h +++ b/src/Statistics.h @@ -258,6 +258,7 @@ enum cudaVersions { CUDA_128 = 12080, CUDA_129 = 12090, CUDA_130 = 13000, + CUDA_131 = 13010, CUDA_LATEST = CUDA_129, CUDA_PARTIALLY_SUPPORTED = CUDA_130, CUDNN_10 = 100, From 546426c02203d932612ef7a6c926bc072149f2c9 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 19 Jan 2026 21:54:49 +0100 Subject: [PATCH 2/2] [HIPIFY][TENSOR][doc] `cuTensor 2.4.1.0` is the latest supported version + No API changes since `2.4.0.0` --- docs/building/build-hipify-clang-linux.rst | 10 +++++----- docs/building/build-hipify-clang-windows.rst | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/building/build-hipify-clang-linux.rst b/docs/building/build-hipify-clang-linux.rst index 5c7aa9a7..d24fcb20 100644 --- a/docs/building/build-hipify-clang-linux.rst +++ b/docs/building/build-hipify-clang-linux.rst @@ -134,8 +134,8 @@ Linux testing On Linux, the following configurations are tested: -* Ubuntu 22-24: LLVM 13.0.0 - 21.1.8, CUDA 7.0 - 12.9.1, cuDNN 8.0.5 - 9.16.0, cuTensor 1.0.1.0 - 2.4.0.0 -* Ubuntu 20-21: LLVM 9.0.0 - 20.1.8, CUDA 7.0 - 12.8.1, cuDNN 5.1.10 - 9.16.0, cuTensor 1.0.1.0 - 2.4.0.0 +* Ubuntu 22-24: LLVM 13.0.0 - 21.1.8, CUDA 7.0 - 12.9.1, cuDNN 8.0.5 - 9.16.0, cuTensor 1.0.1.0 - 2.4.1.0 +* Ubuntu 20-21: LLVM 9.0.0 - 20.1.8, CUDA 7.0 - 12.8.1, cuDNN 5.1.10 - 9.16.0, cuTensor 1.0.1.0 - 2.4.1.0 * Ubuntu 16-19: LLVM 8.0.0 - 14.0.6, CUDA 7.0 - 10.2, cuDNN 5.1.10 - 8.0.5 * Ubuntu 14: LLVM 4.0.0 - 7.1.0, CUDA 7.0 - 9.0, cuDNN 5.0.5 - 7.6.5 @@ -160,7 +160,7 @@ Here's how to build ``hipify-clang`` with testing support on ``Ubuntu 24.04.02`` -DCMAKE_PREFIX_PATH=$ROOT_DIR/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.9.1 \ -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-9.16.0 \ - -DCUDA_TENSOR_ROOT_DIR=/usr/local/cutensor-2.4.0.0 \ + -DCUDA_TENSOR_ROOT_DIR=/usr/local/cutensor-2.4.1.0 \ -DLLVM_EXTERNAL_LIT=$ROOT_DIR/build/bin/llvm-lit \ ../hipify @@ -200,7 +200,7 @@ The corresponding successful output is (assuming ROOT_DIR is ``/usr/llvm/21.1.8` -- - CUDA Toolkit path : /usr/local/cuda-12.9.1 -- - CUDA Samples path : -- - cuDNN path : /usr/local/cudnn-9.16.0 - -- - cuTENSOR path : /usr/local/cuTensor/2.4.0.0 + -- - cuTENSOR path : /usr/local/cuTensor/2.4.1.0 -- - CUB path : -- Found CUDAToolkit: /usr/local/cuda-12.9.1/targets/x86_64-linux/include (found version "12.9.86") -- Performing Test CMAKE_HAVE_LIBC_PTHREAD @@ -210,7 +210,7 @@ The corresponding successful output is (assuming ROOT_DIR is ``/usr/llvm/21.1.8` -- - CUDA Toolkit path : /usr/local/cuda-12.9.1 -- - CUDA Samples path : OFF -- - cuDNN path : /usr/local/cudnn-9.16.0/include - -- - cuTENSOR path : /usr/local/cuTensor/2.4.0.0/include + -- - cuTENSOR path : /usr/local/cuTensor/2.4.1.0/include -- - CUB path : /usr/local/cuda-12.9.1/include/cub -- Configuring done (0.6s) -- Generating done (0.0s) diff --git a/docs/building/build-hipify-clang-windows.rst b/docs/building/build-hipify-clang-windows.rst index d87f5b11..0cca3d93 100644 --- a/docs/building/build-hipify-clang-windows.rst +++ b/docs/building/build-hipify-clang-windows.rst @@ -92,7 +92,7 @@ We recommend that you build ``LLVM+Clang`` from sources, as prebuilt binaries ar .. code-block:: bash - -DCUDA_TENSOR_ROOT_DIR=D:/CUDA/cuTensor/2.4.0.0 + -DCUDA_TENSOR_ROOT_DIR=D:/CUDA/cuTensor/2.4.1.0 - [Optional] Install `cuDNN `_ belonging to the version corresponding to the CUDA version: @@ -264,7 +264,7 @@ Building with testing support using ``Visual Studio 17 2022`` on ``Windows 11``: -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.9" \ -DCUDA_SDK_ROOT_DIR="C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.9" \ -DCUDA_DNN_ROOT_DIR=D:/CUDA/cuDNN/9.16.0 \ - -DCUDA_TENSOR_ROOT_DIR=D:/CUDA/cuTensor/2.4.0.0 \ + -DCUDA_TENSOR_ROOT_DIR=D:/CUDA/cuTensor/2.4.1.0 \ -DLLVM_EXTERNAL_LIT=%ROOT_DIR%/build/Release/bin/llvm-lit.py \ ../hipify @@ -302,14 +302,14 @@ The corresponding successful output is (assuming %ROOT_DIR% is ``D:/LLVM/21.1.8` -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.9 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.9 -- - cuDNN path : D:/CUDA/cuDNN/9.16.0 - -- - cuTENSOR path : D:/CUDA/cuTensor/2.4.0.0 + -- - cuTENSOR path : D:/CUDA/cuTensor/2.4.1.0 -- - CUB path : -- Found CUDAToolkit: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.9/include (found version "12.9.86") -- Found CUDA config: -- - CUDA Toolkit path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.9 -- - CUDA Samples path : C:/ProgramData/NVIDIA Corporation/CUDA Samples/v12.9 -- - cuDNN path : D:/CUDA/cuDNN/9.16.0/include - -- - cuTENSOR path : D:/CUDA/cuTensor/2.4.0.0/include + -- - cuTENSOR path : D:/CUDA/cuTensor/2.4.1.0/include -- - CUB path : C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.9/include/cub -- Configuring done (4.4s) -- Generating done (0.1s)