From 94f3676cbdf1ef0efb4014a22b8e5a3208853666 Mon Sep 17 00:00:00 2001 From: Kumar Saurabh Date: Thu, 28 Aug 2025 14:24:54 -0500 Subject: [PATCH 1/5] better cmake list for automatically detecting gpu architecture --- CMakeLists.txt | 43 ++++++++++++++++++++++++++++++------------- README.md | 1 + install.sh | 11 +++++++++-- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cbda13f..433e612 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,22 +94,39 @@ else() list(APPEND CMAKE_HOST_FLAGS "-O3;-march=native") endif() -# GPU arch targets -set(TARGETS "gfx900;gfx906") -if(HIP_VERSION VERSION_GREATER_EQUAL "3.7") - set(TARGETS "${TARGETS};gfx908") -endif() -if(HIP_VERSION VERSION_GREATER_EQUAL "4.3") - set(TARGETS "${TARGETS};gfx90a") -endif() -if (HIP_VERSION VERSION_GREATER_EQUAL "5.7") - set(TARGETS "${TARGETS};gfx942") +set(ARCHS "") # use plural to indicate list +if(DEFINED HPL_BUILD_ARCH AND NOT HPL_BUILD_ARCH STREQUAL "") + string(REPLACE "," ";" ARCHS "${HPL_BUILD_ARCH}") + list(TRANSFORM ARCHS STRIP) + list(REMOVE_DUPLICATES ARCHS) + message(STATUS "Using manually specified GPU targets: ${ARCHS}") +else() + message(STATUS "Detecting available architecture") + find_program(ROCM_AGENT rocm_agent_enumerator) + if(ROCM_AGENT) + execute_process( + COMMAND ${ROCM_AGENT} + OUTPUT_VARIABLE ROCM_ARCHS + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + string(REPLACE "\n" ";" ARCHS ${ROCM_ARCHS}) + list(REMOVE_DUPLICATES ARCHS) + foreach(match ${ARCHS}) + string(REGEX REPLACE "Name:\\s+" "" arch "${match}") + list(APPEND ARCH "${arch}") + endforeach() + endif() endif() -if (HIP_VERSION VERSION_GREATER_EQUAL "6.5") - set(TARGETS "${TARGETS};gfx950;gfx1100") + +if(ARCHS STREQUAL "") + message(FATAL_ERROR "No GPU architectures detected via rocminfo and no BUILD_ARCH specified. Use -DBUILD_ARCH=gfxXXX") endif() -foreach(target ${TARGETS}) +message(STATUS "Building for GPU architecture: ${ARCHS}") + +# Generate HIP_HIPCC_FLAGS +foreach(target ${ARCHS}) list(APPEND HIP_HIPCC_FLAGS "--offload-arch=${target}") endforeach() diff --git a/README.md b/README.md index 2f7fb27..6152147 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ cd rocHPL # --with-rocm= - Path to ROCm install (Default: /opt/rocm) # --with-rocblas= - Path to rocBLAS library (Default: /opt/rocm/rocblas) # --with-mpi= - Path to external MPI install (Default: clone+build OpenMPI) +# --arch="arch-list" - Specify comma separated architecture list to build (Default: detect from rocminfo)" # --verbose-print - Verbose output during HPL setup (Default: true) # --progress-report - Print progress report to terminal during HPL run (Default: true) # --detailed-timing - Record detailed timers during HPL run (Default: true) diff --git a/install.sh b/install.sh index 754d529..73a5f75 100755 --- a/install.sh +++ b/install.sh @@ -17,6 +17,7 @@ function display_help() echo " [--with-rocm=] Path to ROCm install (Default: /opt/rocm)" echo " [--with-rocblas=] Path to rocBLAS library (Default: /opt/rocm/rocblas)" echo " [--with-mpi=] Path to external MPI install (Default: clone+build OpenMPI)" + echo " [--arch] Specify comma separated architecture list to build (Default: detect from rocminfo)" echo " [--verbose-print] Verbose output during HPL setup (Default: true)" echo " [--progress-report] Print progress report to terminal during HPL run (Default: true)" echo " [--detailed-timing] Record detailed timers during HPL run (Default: true)" @@ -230,7 +231,7 @@ enable_tracing=false # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ $? -eq 4 ]]; then - GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-rocblas:,verbose-print:,progress-report:,detailed-timing:,enable-tracing: --options hg -- "$@") + GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-rocblas:,verbose-print:,arch:,progress-report:,detailed-timing:,enable-tracing: --options hg -- "$@") else echo "Need a new version of getopt" exit_with_error 1 @@ -261,6 +262,9 @@ while true; do --with-mpi) with_mpi=${2} shift 2 ;; + --arch) + arch=${2} + shift 2 ;; --with-rocblas) with_rocblas=${2} shift 2 ;; @@ -311,6 +315,7 @@ pushd . fi + # ################################################# # configure & build # ################################################# @@ -323,7 +328,6 @@ pushd . else cmake_common_options="${cmake_common_options} -DCMAKE_BUILD_TYPE=Debug" fi - shopt -s nocasematch if [[ "${verbose_print}" == on || "${verbose_print}" == true || "${verbose_print}" == 1 || "${verbose_print}" == enabled ]]; then cmake_common_options="${cmake_common_options} -DHPL_VERBOSE_PRINT=ON" @@ -337,6 +341,9 @@ pushd . if [[ "${enable_tracing}" == on || "${enable_tracing}" == true || "${enable_tracing}" == 1 || "${enable_tracing}" == enabled ]]; then cmake_common_options="${cmake_common_options} -DHPL_TRACING=ON" fi + if [[ -n "${arch}" ]]; then + cmake_common_options="${cmake_common_options} -DHPL_BUILD_ARCH=${arch}" + fi shopt -u nocasematch # Build library with AMD toolchain because of existence of device kernels From e5eb3e348679734d87a8654f441e94275350e769 Mon Sep 17 00:00:00 2001 From: Kumar Saurabh Date: Thu, 28 Aug 2025 14:30:34 -0500 Subject: [PATCH 2/5] Adding -name to agent-enumerator --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 433e612..5c78474 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,7 +105,7 @@ else() find_program(ROCM_AGENT rocm_agent_enumerator) if(ROCM_AGENT) execute_process( - COMMAND ${ROCM_AGENT} + COMMAND "${ROCM_AGENT}" -name OUTPUT_VARIABLE ROCM_ARCHS ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE From dbcc257e0213f86a6ee1b3efac12ce384dd20839 Mon Sep 17 00:00:00 2001 From: Kumar Saurabh Date: Thu, 28 Aug 2025 14:33:18 -0500 Subject: [PATCH 3/5] fix instruction --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c78474..bb4dc1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -120,7 +120,7 @@ else() endif() if(ARCHS STREQUAL "") - message(FATAL_ERROR "No GPU architectures detected via rocminfo and no BUILD_ARCH specified. Use -DBUILD_ARCH=gfxXXX") + message(FATAL_ERROR "No GPU architectures detected via rocminfo and no BUILD_ARCH specified. Use ./install.sh --arch=gfxXXX") endif() message(STATUS "Building for GPU architecture: ${ARCHS}") From 8177ee17fdabf10188f2b90a6030c2de15684411 Mon Sep 17 00:00:00 2001 From: Kumar Saurabh Date: Thu, 28 Aug 2025 15:00:27 -0500 Subject: [PATCH 4/5] Minor clarification --- README.md | 2 +- install.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6152147..24cddef 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ cd rocHPL # --with-rocm= - Path to ROCm install (Default: /opt/rocm) # --with-rocblas= - Path to rocBLAS library (Default: /opt/rocm/rocblas) # --with-mpi= - Path to external MPI install (Default: clone+build OpenMPI) -# --arch="arch-list" - Specify comma separated architecture list to build (Default: detect from rocminfo)" +# --arch="arch-list" - Specify comma separated architecture list to build (Default: detect from rocm_agent_enumerator)" # --verbose-print - Verbose output during HPL setup (Default: true) # --progress-report - Print progress report to terminal during HPL run (Default: true) # --detailed-timing - Record detailed timers during HPL run (Default: true) diff --git a/install.sh b/install.sh index 73a5f75..0c4f7e9 100755 --- a/install.sh +++ b/install.sh @@ -17,7 +17,7 @@ function display_help() echo " [--with-rocm=] Path to ROCm install (Default: /opt/rocm)" echo " [--with-rocblas=] Path to rocBLAS library (Default: /opt/rocm/rocblas)" echo " [--with-mpi=] Path to external MPI install (Default: clone+build OpenMPI)" - echo " [--arch] Specify comma separated architecture list to build (Default: detect from rocminfo)" + echo " [--arch] Specify comma separated architecture list to build (Default: detect from rocm_agent_enumerator)" echo " [--verbose-print] Verbose output during HPL setup (Default: true)" echo " [--progress-report] Print progress report to terminal during HPL run (Default: true)" echo " [--detailed-timing] Record detailed timers during HPL run (Default: true)" From a16df021ccf9118e434f4c71b0cb31bd89ca5db0 Mon Sep 17 00:00:00 2001 From: Kumar Saurabh Date: Thu, 28 Aug 2025 15:26:53 -0500 Subject: [PATCH 5/5] Using rocminfo instead to agent_enumerator --- CMakeLists.txt | 19 ++++++++++++++----- README.md | 2 +- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bb4dc1d..7bf427e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,16 +102,25 @@ if(DEFINED HPL_BUILD_ARCH AND NOT HPL_BUILD_ARCH STREQUAL "") message(STATUS "Using manually specified GPU targets: ${ARCHS}") else() message(STATUS "Detecting available architecture") - find_program(ROCM_AGENT rocm_agent_enumerator) - if(ROCM_AGENT) + ############ Find using rocminfo ##################### + find_program(ROCMINFO_EXECUTABLE rocminfo) + if(ROCMINFO_EXECUTABLE) execute_process( - COMMAND "${ROCM_AGENT}" -name - OUTPUT_VARIABLE ROCM_ARCHS + COMMAND ${ROCMINFO_EXECUTABLE} + OUTPUT_VARIABLE ROCMINFO_OUTPUT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE ) - string(REPLACE "\n" ";" ARCHS ${ROCM_ARCHS}) + + # 1) Only match lines where the token follows "Name:" + string(REGEX MATCHALL "Name:[ \t]+gfx[0-9a-z]+" ARCH_MATCHES "${ROCMINFO_OUTPUT}") + + # 2) Strip the leading "Name: " to keep just gfx tokens + string(REGEX REPLACE "Name:[ \t]+" "" ARCHS "${ARCH_MATCHES}") + + # 3) Remove duplicates list(REMOVE_DUPLICATES ARCHS) + foreach(match ${ARCHS}) string(REGEX REPLACE "Name:\\s+" "" arch "${match}") list(APPEND ARCH "${arch}") diff --git a/README.md b/README.md index 24cddef..6152147 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ cd rocHPL # --with-rocm= - Path to ROCm install (Default: /opt/rocm) # --with-rocblas= - Path to rocBLAS library (Default: /opt/rocm/rocblas) # --with-mpi= - Path to external MPI install (Default: clone+build OpenMPI) -# --arch="arch-list" - Specify comma separated architecture list to build (Default: detect from rocm_agent_enumerator)" +# --arch="arch-list" - Specify comma separated architecture list to build (Default: detect from rocminfo)" # --verbose-print - Verbose output during HPL setup (Default: true) # --progress-report - Print progress report to terminal during HPL run (Default: true) # --detailed-timing - Record detailed timers during HPL run (Default: true)