From f4ed884107531f293fed8ba5c59a8eebca6a3a23 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Sun, 25 Jan 2026 13:32:34 -0800 Subject: [PATCH 01/22] Enable windows rocm build of whisper.cpp Add ROCm support and CI improvements ci: automate library bundling in ROCm build workflow Replace manual copying of ROCm libraries and shared objects with an automated CMake-based bundling step using GET_RUNTIME_DEPENDENCIES. This ensures all linked libraries (e.g., libamdhip64, librocm_sysdeps) are recursively detected and bundled into build/bin, filtering out system libs like libc.so, while improving portability and reducing maintenance for dependency management. build: enhance library bundling to handle symlinks for portable distribution - Updated CMake script to resolve and copy real targets of symlinks, then recreate symlinks in build dir - Modified chmod to only affect real .so* files, ignoring symlinks - Removed outdated comments and improved script clarity for better portability of whisper-cli binaries Replaced file(CREATE_LINK): We now use execute_process(COMMAND ln -sf ...) which is standard on Linux Changing Bundle Linked Libraries to use linux based ldd instead of cmake, I am trying to make the smallest changes to whisper.cpp as possible without modifying existing things like cmake files or adding more files. --- .github/workflows/build.yml | 575 +++++++++++++++++++++++++++++++++++- 1 file changed, 568 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5c1cf93ba2a..692e5e7697e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -45,6 +45,14 @@ on: options: - full-ci - release-only + gfx_target: + description: 'AMD GPU targets (comma-separated)' + required: false + default: 'gfx1151,gfx1150,gfx120X,gfx110X' + rocm_version: + description: 'ROCm version to use (e.g., 7.11.0a20251205) or "latest" to auto-detect' + required: false + default: 'latest' concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -57,6 +65,8 @@ env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} ubuntu_image: "ubuntu:22.04" VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" + GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X' }} + ROCM_VERSION: ${{ github.event.inputs.rocm_version || 'latest' }} jobs: determine-tag: @@ -64,13 +74,11 @@ jobs: outputs: tag_name: ${{ steps.tag.outputs.name }} should_release: ${{ steps.tag.outputs.should_release }} - steps: - name: Checkout with full history uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Determine tag name id: tag shell: bash @@ -114,6 +122,58 @@ jobs: echo "name=$TAG_NAME" >> $GITHUB_OUTPUT echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT + prepare-matrix: + runs-on: ubuntu-latest + outputs: + windows_matrix: ${{ steps.set-matrix.outputs.windows_matrix }} + ubuntu_matrix: ${{ steps.set-matrix.outputs.ubuntu_matrix }} + should_build_windows: ${{ steps.set-matrix.outputs.should_build_windows }} + should_build_ubuntu: ${{ steps.set-matrix.outputs.should_build_ubuntu }} + steps: + - name: Set matrix + id: set-matrix + run: | + targets="${{ env.GFX_TARGETS }}" + operating_systems="windows,ubuntu" + + echo "Input targets: $targets" + echo "Input operating systems: $operating_systems" + + target_array=$(echo "$targets" \ + | tr ',' '\n' \ + | sed 's/^ *//;s/ *$//' \ + | sed 's/^"//;s/"$//' \ + | jq -R . \ + | jq -s .) + + windows_matrix=$(echo "$target_array" \ + | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}') + + # 3. Create Ubuntu Matrix + ubuntu_matrix=$(echo "$target_array" \ + | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["linux/amd64"]}') + + # Check which operating systems to build + should_build_windows="false" + should_build_ubuntu="false" + + if [[ "$operating_systems" == *"windows"* ]]; then + should_build_windows="true" + echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT + fi + + if [[ "$operating_systems" == *"ubuntu"* ]]; then + should_build_ubuntu="true" + echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT + fi + + echo "should_build_windows=$should_build_windows" >> $GITHUB_OUTPUT + echo "should_build_ubuntu=$should_build_ubuntu" >> $GITHUB_OUTPUT + + echo "Windows build: $should_build_windows" + echo "Ubuntu build: $should_build_ubuntu" + echo "Generated matrix: $matrix_targets" + #linux/amd64 ubuntu-22: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -394,11 +454,7 @@ jobs: strategy: fail-fast: false matrix: - build: [Debug, Release] - #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] - # TODO: arm/v7 disabled due to clang bug - # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990 - arch: [linux/amd64, linux/arm64, linux/ppc64le] + ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} steps: - name: Clone @@ -426,6 +482,263 @@ jobs: cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang make ctest -L gh --output-on-failure' + + ubuntu-rocm: + if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || + github.event.inputs.run_type == 'full-ci' }} + runs-on: ubuntu-22.04 + needs: [determine-tag, prepare-matrix] + # Check if we should run (based on the prepare-matrix output) + strategy: + # Uses the matrix generated in prepare-matrix (gfx_target, arch, build, sdl2) + matrix: ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} + fail-fast: false + outputs: + rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} + + steps: + - name: Free disk space + run: curl -fsSL https://raw.githubusercontent.com/kou/arrow/e49d8ae15583ceff03237571569099a6ad62be32/ci/scripts/util_free_space.sh | bash + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt update + sudo apt install -y cmake ninja-build unzip curl build-essential libsdl2-dev git patchelf + + - name: Download and extract ROCm directly to /opt/rocm + run: | + # Determine ROCm version to use + rocm_version="${{ env.ROCM_VERSION }}" + current_target="${{ matrix.gfx_target }}" + + # Add appropriate suffixes for different GPU targets + s3_target="$current_target" + if [ "$current_target" = "gfx110X" ]; then + s3_target="${current_target}-dgpu" + echo "Using S3 target with -dgpu suffix: $s3_target" + elif [ "$current_target" = "gfx120X" ]; then + s3_target="${current_target}-all" + echo "Using S3 target with -all suffix: $s3_target" + fi + + if [ "$rocm_version" = "latest" ]; then + echo "Auto-detecting latest ROCm version for target: $current_target" + s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-linux-${s3_target}-7") + + # Extract all files + files=$(echo "$s3_response" | grep -oP '(?<=)[^<]*' | grep "therock-dist-linux-${s3_target}-") + + # Extract versions and sort them properly + latest_file="" + latest_major=0 + latest_minor=0 + latest_patch=0 + latest_rc=0 + latest_is_alpha=false + + while IFS= read -r file; do + if [[ "$file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + version="${BASH_REMATCH[1]}" + major=$(echo "$version" | cut -d. -f1) + minor=$(echo "$version" | cut -d. -f2) + patch=$(echo "$version" | cut -d. -f3 | sed 's/\(a\|rc\).*//') + rc=$(echo "$version" | sed 's/.*\(a\|rc\)//') + is_alpha=false + if [[ "$version" =~ a ]]; then is_alpha=true; fi + + is_newer=false + if [ "$major" -gt "$latest_major" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then + if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then is_newer=true; + elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; + fi + fi + + if [ "$is_newer" = true ]; then + latest_file="$file" + latest_major="$major" + latest_minor="$minor" + latest_patch="$patch" + latest_rc="$rc" + latest_is_alpha="$is_alpha" + fi + fi + done <<< "$files" + + echo "Found latest file: $latest_file" + + if [[ "$latest_file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + rocm_version="${BASH_REMATCH[1]}" + echo "Detected latest ROCm version: $rocm_version" + else + echo "Failed to extract ROCm version from latest file: $latest_file" + exit 1 + fi + + rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/$latest_file" + else + rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-${s3_target}-${rocm_version}.tar.gz" + fi + + echo "DETECTED_ROCM_VERSION=$rocm_version" >> $GITHUB_ENV + + # Create directory and stream extraction + sudo mkdir -p /opt/rocm + curl -sL "$rocm_url" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 + + - name: Set ROCm environment variables + run: | + echo "HIP_PATH=/opt/rocm" >> $GITHUB_ENV + echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV + echo "HIP_PLATFORM=amd" >> $GITHUB_ENV + echo "/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH" >> $GITHUB_PATH + + - name: Find ROCm bitcode path + run: | + # Dynamically find the directory containing device libraries (amdgcn/bitcode) + BITCODE_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) + + if [ -z "$BITCODE_PATH" ]; then + echo "::error::Could not find 'bitcode' directory in /opt/rocm" + find /opt/rocm -maxdepth 5 + exit 1 + fi + + echo "Found bitcode at: $BITCODE_PATH" + echo "ROCM_BITCODE_PATH=$BITCODE_PATH" >> $GITHUB_ENV + + - name: Configure CMake + run: | + + # Map GPU targets + current_target="${{ matrix.gfx_target }}" + echo "Input target: $current_target" + + if [ "$current_target" = "gfx110X" ]; then + mapped_target="gfx1100;gfx1101;gfx1102" + elif [ "$current_target" = "gfx1151" ]; then + mapped_target="gfx1151" + elif [ "$current_target" = "gfx1150" ]; then + mapped_target="gfx1150" + elif [ "$current_target" = "gfx120X" ]; then + mapped_target="gfx1200;gfx1201" + else + mapped_target="$current_target" + fi + echo "Mapped target: $mapped_target" + + cmake -S . -B build -G Ninja \ + -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \ + -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ + -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm --rocm-device-lib-path=${{ env.ROCM_BITCODE_PATH }}" \ + -DCMAKE_PREFIX_PATH=/opt/rocm \ + -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + -DGPU_TARGETS="$mapped_target" \ + -DBUILD_SHARED_LIBS=ON \ + -DGGML_HIP=ON \ + -DGGML_ROCM=1 \ + -DWHISPER_SDL2=${{ matrix.sdl2 }} + + - name: Build + run: cmake --build build --config ${{ matrix.build }} -j $(nproc) + + # Copy Backend ROCm Folders --- + - name: Copy ROCm core libs to build directory + run: | + build_bin_path="build/bin" + rocm_bin_path="/opt/rocm/bin" + rocm_lib_path="/opt/rocm/lib" + + # Copy rocblas/library folder + rocblas_lib_path="$rocm_lib_path/rocblas/library" + if [ -d "$rocblas_lib_path" ]; then + dest_rocblas_path="$build_bin_path/rocblas/library" + mkdir -p "$(dirname "$dest_rocblas_path")" + cp -r "$rocblas_lib_path" "$(dirname "$dest_rocblas_path")/" + echo "Copied: rocblas/library" + fi + + # Copy hipblaslt/library folder + hipblaslt_lib_path="$rocm_lib_path/hipblaslt/library" + if [ -d "$hipblaslt_lib_path" ]; then + dest_hipblaslt_path="$build_bin_path/hipblaslt/library" + mkdir -p "$(dirname "$dest_hipblaslt_path")" + cp -r "$hipblaslt_lib_path" "$(dirname "$dest_hipblaslt_path")/" + echo "Copied: hipblaslt/library" + fi + + # Copy required shared libraries + # We use generic wildcards to catch versioned .so files + echo "Copying shared libraries..." + cp -v $rocm_lib_path/libhipblas.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/librocblas.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libamdhip64.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/librocsolver.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libroctx64.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libhipblaslt.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libamd_comgr.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libhsa-runtime64.so* "$build_bin_path/" 2>/dev/null || true + + # Copy LLVM runtime libs often needed + cp -v $rocm_lib_path/llvm/lib/libLLVM.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/llvm/lib/libclang-cpp.so* "$build_bin_path/" 2>/dev/null || true + + - name: Bundle Linked Libraries + run: | + build_bin="build/bin" + echo "Scanning dependencies for whisper-cli..." + + ldd "$build_bin/whisper-cli" | grep "=> /" | while read -r line; do + + soname=$(echo "$line" | awk '{print $1}') + path=$(echo "$line" | awk '{print $3}') + + if [[ "$soname" =~ ^(libc\.so|libm\.so|libdl\.so|librt\.so|libpthread\.so|libstdc\+\+|libgcc_s|ld-linux) ]]; then + continue + fi + echo "Bundling: $soname" + echo " Source: $path" + cp -L "$path" "$build_bin/$soname" + done + chmod +x "$build_bin"/*.so* + + - name: Set RPATH for portable distribution + run: | + cd build/bin + # Set RPATH to $ORIGIN so the binary looks for .so files in its own directory + # wildcards catch whisper-cli, whisper-bench, etc., plus shared libs + for file in *.so* whisper-*; do + if [ -f "$file" ] && [ ! -L "$file" ]; then + # Only patch ELF files (executables and shared objects) + if file "$file" | grep -q "ELF"; then + patchelf --set-rpath '$ORIGIN' "$file" 2>/dev/null || true + echo "Patched RPATH for $file" + fi + fi + done + + - name: Pack bin artifacts + run: | + cd build/bin + SAFE_ARCH=$(echo "${{ matrix.arch }}" | tr '/' '-') + + # 1. Save SAFE_ARCH to GITHUB_ENV so the next step can use it + echo "SAFE_ARCH=$SAFE_ARCH" >> $GITHUB_ENV + + # Include target in filename (e.g., whisper-bin-gfx1100-linux-amd64.zip) + zip -r ../../whisper-bin-${{ matrix.gfx_target }}-${SAFE_ARCH}.zip . + + - name: Upload binaries + if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + uses: actions/upload-artifact@v4 + with: + name: whisper-bin-${{ matrix.gfx_target }}-${{ env.SAFE_ARCH }} + path: whisper-bin-*.zip ubuntu-22-gcc-sanitized: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -1001,6 +1314,252 @@ jobs: with: name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip path: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip + + windows-rocm: + if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || + github.event.inputs.run_type == 'full-ci' }} + runs-on: windows-latest + needs: [determine-tag, prepare-matrix] + strategy: + matrix: ${{fromJson(needs.prepare-matrix.outputs.windows_matrix)}} + fail-fast: false + outputs: + rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} + + steps: + - name: Clone + uses: actions/checkout@v4 + + - name: Install Ninja + run: choco install ninja + + - name: Fetch SDL2 and Patch Header (Robust) + if: matrix.sdl2 == 'ON' + shell: pwsh + run: | + $sdlVer = "${{ matrix.s2ver }}" + $url = "https://github.com/libsdl-org/SDL/releases/download/release-$sdlVer/SDL2-devel-$sdlVer-VC.zip" + + Write-Host "Downloading SDL2 from $url..." + Invoke-WebRequest -Uri $url -OutFile "sdl2.zip" + + Write-Host "Extracting SDL2..." + 7z x sdl2.zip + + # 1. Locate CMake config dynamically + $cmakeFile = Get-ChildItem -Path . -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1 + if ($cmakeFile) { + $cmakeDir = $cmakeFile.DirectoryName + Write-Host "Found SDL2 CMake dir at: $cmakeDir" + echo "SDL2_DIR=$cmakeDir" >> $env:GITHUB_ENV + } else { + Write-Error "FATAL: Could not find sdl2-config.cmake inside extracted files!" + exit 1 + } + + # 2. Find and Patch SDL_endian.h recursively + Write-Host "Searching for SDL_endian.h..." + $headerFile = Get-ChildItem -Path . -Recurse -Filter "SDL_endian.h" | Select-Object -First 1 + + if ($headerFile) { + Write-Host "Found header at: $($headerFile.FullName)" + $content = Get-Content $headerFile.FullName -Raw + + # The Fix: Comment out the extern declaration + if ($content -match 'extern void _m_prefetch') { + $content = $content -replace 'extern void _m_prefetch\(void \*__P\);', '// extern void _m_prefetch(void *__P);' + Set-Content -Path $headerFile.FullName -Value $content + Write-Host "SUCCESS: Patched _m_prefetch in SDL_endian.h" + } else { + Write-Host "WARNING: _m_prefetch string not found. It might be a different SDL version or already patched." + } + } else { + # Debug output if file is missing + Write-Host "Listing root directories:" + Get-ChildItem -Path . -Directory | Format-Table Name + Write-Error "FATAL: Could not locate SDL_endian.h in the workspace." + exit 1 + } + + - name: Download ROCm nightly tarball + run: | + # Determine ROCm version to use + $rocmVersion = "${{ env.ROCM_VERSION }}" + $currentTarget = "${{ matrix.gfx_target }}" + + $s3Target = $currentTarget + if ($currentTarget -eq "gfx110X") { + $s3Target = "$currentTarget-dgpu" + Write-Host "Using S3 target with -dgpu suffix: $s3Target" + } elseif ($currentTarget -eq "gfx120X") { + $s3Target = "$currentTarget-all" + Write-Host "Using S3 target with -all suffix: $s3Target" + } + + if ($rocmVersion -eq "latest") { + Write-Host "Auto-detecting latest ROCm version for target: $currentTarget" + $s3Response = (Invoke-WebRequest "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-windows-$s3Target-7").Content + $files = $s3Response -split '' | Where-Object {$_ -match ''} | ForEach-Object { ($_ -split '')[0] } + + $versionFiles = @() + foreach ($file in $files) { + if ($file -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { + $version = $matches[1] + $versionFiles += [PSCustomObject]@{ + File = $file + Version = $version + Major = [int]($version -split '\.')[0] + Minor = [int]($version -split '\.')[1] + Patch = [int](($version -split '\.')[2] -replace '(?:a|rc).*', '') + RC = [int]($version -replace '.*(?:a|rc)', '') + IsAlpha = $version -match 'a' + } + } + } + + $latestFile = ($versionFiles | Sort-Object Major, Minor, Patch, @{Expression={if($_.IsAlpha){1}else{0}}}, RC | Select-Object -Last 1).File + Write-Host "Found latest file: $latestFile" + + if ($latestFile -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { + $rocmVersion = $matches[1] + Write-Host "Detected latest ROCm version: $rocmVersion" + } else { + Write-Error "Failed to extract ROCm version from latest file: $latestFile" + exit 1 + } + $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$latestFile" + } else { + $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-windows-$s3Target-$rocmVersion.tar.gz" + } + + echo "DETECTED_ROCM_VERSION=$rocmVersion" >> $env:GITHUB_ENV + Invoke-WebRequest -Uri $rocmUrl -OutFile "rocm.tar.gz" + + - name: Extract ROCm to C:\opt\rocm + run: | + New-Item -ItemType Directory -Force -Path "C:\opt\rocm" + tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 + + - name: Configure + shell: pwsh + run: | + $currentTarget = "${{ matrix.gfx_target }}" + Write-Host "Input target: $currentTarget" + + if ($currentTarget -eq "gfx110X") { + $mapped_target = "gfx1100;gfx1101;gfx1102" + } elseif ($currentTarget -eq "gfx1151") { + $mapped_target = "gfx1151" + } elseif ($currentTarget -eq "gfx1150") { + $mapped_target = "gfx1150" + } elseif ($currentTarget -eq "gfx120X") { + $mapped_target = "gfx1200;gfx1201" + } else { + $mapped_target = $currentTarget + } + Write-Host "Mapped target: $mapped_target" + + # Set up environment variables and PATH + $env:HIP_PATH = "C:\opt\rocm" + $env:HIP_PLATFORM = "amd" + # Ensure bin comes before llvm\bin for consistency + $env:PATH = "$env:HIP_PATH\bin;$env:HIP_PATH\lib\llvm\bin;$env:PATH" + + # Define CMake arguments + $cmakeArgs = @( + "-S", ".", + "-B", "build", + "-G", "Ninja Multi-Config", + "-DGPU_TARGETS=$mapped_target", + "-DGGML_HIP=ON", + "-DCMAKE_C_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang.exe", + "-DCMAKE_CXX_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", + "-DCMAKE_HIP_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", + "-DCMAKE_C_FLAGS='-D__PRFCHWINTRIN_H'", + "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'", + "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm", + "-DCMAKE_PREFIX_PATH=$env:HIP_PATH", + "-DGGML_ROCM=1", + "-DCMAKE_BUILD_TYPE=${{ matrix.build }}", + "-DBUILD_SHARED_LIBS=ON", + "-DWHISPER_SDL2=${{ matrix.sdl2 }}" + ) + # Run CMake + cmake @cmakeArgs + + - name: Build + shell: pwsh + run: | + cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS + + - name: Copy ROCm core DLLs to build directory + run: | + $rocmVersion = if ($env:DETECTED_ROCM_VERSION) { $env:DETECTED_ROCM_VERSION } else { $env:ROCM_VERSION } + $buildBinPath = "build/bin/${{ matrix.build }}" + $rocmBinPath = "C:\opt\rocm\bin" + + Write-Host "Copying ROCm core DLL files..." + + if (Test-Path $rocmBinPath) { + # Copy files matching patterns + $filesToCopy = @( + "amdhip64_*.dll", + "amd_comgr*.dll", + "libhipblas.dll", + "rocblas.dll", + "rocsolver.dll", + "hipblaslt.dll", + "libhipblaslt.dll", + "hipblas.dll" + ) + + foreach ($pattern in $filesToCopy) { + $matchingFiles = Get-ChildItem -Path $rocmBinPath -Name $pattern -ErrorAction SilentlyContinue + if ($matchingFiles) { + foreach ($file in $matchingFiles) { + Copy-Item (Join-Path $rocmBinPath $file) (Join-Path $buildBinPath $file) + Write-Host "Copied: $file" + } + } + } + + # Copy rocblas/library + $rocblasLibPath = Join-Path $rocmBinPath "rocblas\library" + if (Test-Path $rocblasLibPath) { + Copy-Item -Path $rocblasLibPath -Destination (Join-Path $buildBinPath "rocblas\library") -Recurse -Force + Write-Host "Copied: rocblas\library" + } + + # Copy hipblaslt/library + $hipblasltLibPath = Join-Path $rocmBinPath "hipblaslt\library" + if (Test-Path $hipblasltLibPath) { + Copy-Item -Path $hipblasltLibPath -Destination (Join-Path $buildBinPath "hipblaslt\library") -Recurse -Force + Write-Host "Copied: hipblaslt\library" + } + } + + - name: Copy SDL2.dll + if: matrix.sdl2 == 'ON' + run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} + + - name: Copy SDL2.dll + if: matrix.sdl2 == 'ON' + run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}" + + - name: Pack bin artifacts + shell: pwsh + run: | + # Create unique zip name with target suffix + $zipName = "whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip" + Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath $zipName + + - name: Upload binaries + if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + uses: actions/upload-artifact@v4 + with: + # Unique artifact name per matrix job + name: whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip + path: whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip emscripten: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -1265,6 +1824,8 @@ jobs: - windows - windows-blas - windows-cublas + - windows-rocm + - ubuntu-rocm steps: - name: Clone From acc507e3c012e46e654feb78eafdf2a59fa6d2b8 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 29 Jan 2026 12:17:59 -0800 Subject: [PATCH 02/22] ci: copy ROCm sysdep libs and ensure build directory exists Add copying of ROCm system dependency libraries (e.g., elf, drm, numa) to the build bundle in CI to include required shared libraries for proper ROCm functionality. Also ensure the build directory is created if it doesn't exist to avoid copy failures. --- .github/workflows/build.yml | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 692e5e7697e..7fdee1cd50e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -648,12 +648,16 @@ jobs: run: cmake --build build --config ${{ matrix.build }} -j $(nproc) # Copy Backend ROCm Folders --- - - name: Copy ROCm core libs to build directory + - name: Copy ROCm core and sysdep libs to build directory run: | build_bin_path="build/bin" rocm_bin_path="/opt/rocm/bin" rocm_lib_path="/opt/rocm/lib" + rocm_sysdeps_path="/opt/rocm/lib/rocm_sysdeps/lib" + # Ensure build directory exists + mkdir -p "$build_bin_path" + # Copy rocblas/library folder rocblas_lib_path="$rocm_lib_path/rocblas/library" if [ -d "$rocblas_lib_path" ]; then @@ -672,9 +676,8 @@ jobs: echo "Copied: hipblaslt/library" fi - # Copy required shared libraries - # We use generic wildcards to catch versioned .so files - echo "Copying shared libraries..." + # Copy standard ROCm shared libraries + echo "Copying core shared libraries..." cp -v $rocm_lib_path/libhipblas.so* "$build_bin_path/" 2>/dev/null || true cp -v $rocm_lib_path/librocblas.so* "$build_bin_path/" 2>/dev/null || true cp -v $rocm_lib_path/libamdhip64.so* "$build_bin_path/" 2>/dev/null || true @@ -684,10 +687,16 @@ jobs: cp -v $rocm_lib_path/libamd_comgr.so* "$build_bin_path/" 2>/dev/null || true cp -v $rocm_lib_path/libhsa-runtime64.so* "$build_bin_path/" 2>/dev/null || true - # Copy LLVM runtime libs often needed + # Copy LLVM runtime libs cp -v $rocm_lib_path/llvm/lib/libLLVM.so* "$build_bin_path/" 2>/dev/null || true cp -v $rocm_lib_path/llvm/lib/libclang-cpp.so* "$build_bin_path/" 2>/dev/null || true + if [ -d "$rocm_sysdeps_path" ]; then + echo "Copying sysdep libraries from $rocm_sysdeps_path..." + # Using a broad wildcard ensures we grab elf.so.1, drm.so.2, numa.so.1, etc. + cp -v $rocm_sysdeps_path/librocm_sysdeps_*.so* "$build_bin_path/" + fi + - name: Bundle Linked Libraries run: | build_bin="build/bin" From e7166266d26c6cbeb0350de0e6b998e37ffebf11 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 26 Feb 2026 12:04:15 -0800 Subject: [PATCH 03/22] Added workflows and actions for self hosted runners. --- .../cleanup-processes-linux/action.yml | 19 +++ .../cleanup-processes-windows/action.yml | 15 +++ .github/workflows/build.yml | 111 ++++++++++++++++++ .github/workflows/runner_heartbeat.yml | 59 ++++++++++ ci/run.sh | 2 +- 5 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 .github/actions/cleanup-processes-linux/action.yml create mode 100644 .github/actions/cleanup-processes-windows/action.yml create mode 100644 .github/workflows/runner_heartbeat.yml diff --git a/.github/actions/cleanup-processes-linux/action.yml b/.github/actions/cleanup-processes-linux/action.yml new file mode 100644 index 00000000000..58649fcc41b --- /dev/null +++ b/.github/actions/cleanup-processes-linux/action.yml @@ -0,0 +1,19 @@ +name: 'Cleanup GPU Processes (Linux)' +description: 'Kill zombie whisper/GPU processes on self-hosted Linux runners' + +runs: + using: 'composite' + steps: + - name: Kill zombie processes + shell: bash + run: | + echo "=== Cleaning up stale processes ===" + pkill -f "whisper-cli" 2>/dev/null || true + pkill -f "whisper-bench" 2>/dev/null || true + pkill -f "whisper-server" 2>/dev/null || true + pkill -f "ctest.*whisper" 2>/dev/null || true + if command -v rocm-smi &>/dev/null; then + echo "=== GPU process check ===" + rocm-smi --showpids 2>/dev/null || true + fi + echo "=== Cleanup complete ===" diff --git a/.github/actions/cleanup-processes-windows/action.yml b/.github/actions/cleanup-processes-windows/action.yml new file mode 100644 index 00000000000..91a9424dd22 --- /dev/null +++ b/.github/actions/cleanup-processes-windows/action.yml @@ -0,0 +1,15 @@ +name: 'Cleanup GPU Processes (Windows)' +description: 'Kill zombie whisper/GPU processes on self-hosted Windows runners' + +runs: + using: 'composite' + steps: + - name: Kill zombie processes + shell: pwsh + run: | + Write-Host "=== Cleaning up stale processes ===" + $processNames = @("whisper-cli", "whisper-bench", "whisper-server", "ctest") + foreach ($name in $processNames) { + Get-Process -Name $name -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue + } + Write-Host "=== Cleanup complete ===" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7fdee1cd50e..8a61f838c23 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2148,3 +2148,114 @@ jobs: run: | vulkaninfo --summary GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp + + # AMD ROCm GPU Testing (self-hosted runners) + test-rocm-linux: + runs-on: ${{ matrix.runner }} + + strategy: + fail-fast: false + matrix: + include: + - gfx_target: gfx1151 + runner: [stx-halo, Linux] + # Uncomment when runners are available: + # - gfx_target: gfx1100 + # runner: [navi31, Linux] + # - gfx_target: gfx1200 + # runner: [rdna4, Linux] + # - gfx_target: gfx1150 + # runner: [rai300_400, Linux] + + concurrency: + group: rocm-test-linux-${{ matrix.gfx_target }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + + timeout-minutes: 120 + + steps: + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-linux + + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Verify ROCm installation + run: | + echo "=== ROCm Environment ===" + rocm-smi || echo "rocm-smi not found" + rocminfo | head -40 || echo "rocminfo not found" + hipcc --version || echo "hipcc not found" + echo "=== GPU Info ===" + rocm-smi --showproductname 2>/dev/null || true + + - name: Test + id: ggml-ci + run: | + GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS=${{ matrix.gfx_target }} GG_BUILD_LOW_PERF=1 \ + bash ./ci/run.sh ~/results/whisper.cpp-rocm-${{ matrix.gfx_target }} /mnt/whisper.cpp + + - name: Cleanup after run + if: always() + uses: ./.github/actions/cleanup-processes-linux + + test-rocm-windows: + runs-on: ${{ matrix.runner }} + + strategy: + fail-fast: false + matrix: + include: + - gfx_target: gfx1151 + runner: [stx-halo, Windows] + # Uncomment when runners are available: + # - gfx_target: gfx1100 + # runner: [navi31, Windows] + # - gfx_target: gfx1200 + # runner: [rdna4, Windows] + # - gfx_target: gfx1150 + # runner: [rai300_400, Windows] + + concurrency: + group: rocm-test-windows-${{ matrix.gfx_target }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + + timeout-minutes: 120 + + steps: + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-windows + + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Verify ROCm installation + shell: pwsh + run: | + Write-Host "=== ROCm Environment ===" + & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null + & "$env:HIP_PATH\bin\hipcc.exe" --version 2>$null + + - name: Configure ROCm environment + shell: pwsh + run: | + $rocmPath = $env:HIP_PATH + if (-not $rocmPath) { $rocmPath = "C:\opt\rocm" } + echo "HIP_PATH=$rocmPath" >> $env:GITHUB_ENV + echo "ROCM_PATH=$rocmPath" >> $env:GITHUB_ENV + echo "HIP_PLATFORM=amd" >> $env:GITHUB_ENV + echo "$rocmPath\bin" >> $env:GITHUB_PATH + echo "$rocmPath\lib\llvm\bin" >> $env:GITHUB_PATH + + - name: Test + id: ggml-ci + shell: bash + run: | + GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS=${{ matrix.gfx_target }} GG_BUILD_LOW_PERF=1 \ + bash ./ci/run.sh ~/results/whisper.cpp-rocm-${{ matrix.gfx_target }} /mnt/whisper.cpp + + - name: Cleanup after run + if: always() + uses: ./.github/actions/cleanup-processes-windows diff --git a/.github/workflows/runner_heartbeat.yml b/.github/workflows/runner_heartbeat.yml new file mode 100644 index 00000000000..dff7d7f05a3 --- /dev/null +++ b/.github/workflows/runner_heartbeat.yml @@ -0,0 +1,59 @@ +name: Runner Heartbeat + +on: + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +jobs: + check-rocm-linux: + strategy: + fail-fast: false + matrix: + include: + - runner: [rai300_400, Linux] + name: rai300-400-linux + - runner: [stx-halo, Linux] + name: stx-halo-linux + runs-on: ${{ matrix.runner }} + timeout-minutes: 10 + steps: + - name: Heartbeat + run: | + echo "=== Runner Heartbeat: ${{ matrix.name }} ===" + echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "Hostname: $(hostname)" + echo "=== GPU Status ===" + rocm-smi 2>/dev/null || echo "rocm-smi not available" + echo "=== Disk Space ===" + df -h / /mnt 2>/dev/null || df -h / + echo "=== Memory ===" + free -h + echo "=== ROCm Version ===" + cat /opt/rocm/.info/version 2>/dev/null || echo "ROCm version file not found" + + check-rocm-windows: + strategy: + fail-fast: false + matrix: + include: + - runner: [rai300_400, Windows] + name: rai300-400-windows + - runner: [stx-halo, Windows] + name: stx-halo-windows + runs-on: ${{ matrix.runner }} + timeout-minutes: 10 + steps: + - name: Heartbeat + shell: pwsh + run: | + Write-Host "=== Runner Heartbeat: ${{ matrix.name }} ===" + Write-Host "Timestamp: $(Get-Date -Format o)" + Write-Host "Hostname: $env:COMPUTERNAME" + Write-Host "=== GPU Status ===" + & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null + Write-Host "=== Disk Space ===" + Get-PSDrive -PSProvider FileSystem | Format-Table Name, Used, Free -AutoSize + Write-Host "=== Memory ===" + $os = Get-CimInstance Win32_OperatingSystem + Write-Host "Free: $([math]::Round($os.FreePhysicalMemory/1MB, 1)) GB / Total: $([math]::Round($os.TotalVisibleMemorySize/1MB, 1)) GB" diff --git a/ci/run.sh b/ci/run.sh index cbe28442e16..59e2cefad52 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -223,7 +223,7 @@ function gg_run_ctest { gg_check_build_requirements (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + (time cmake --build . --config ${mode} -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log From 1c5061f545f28e15e0b4c46477c6fe32555177df Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 26 Feb 2026 12:49:28 -0800 Subject: [PATCH 04/22] Reorder jobs. --- .github/workflows/build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ecec06d4dfd..e9e1006ba18 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2154,13 +2154,13 @@ jobs: timeout-minutes: 120 steps: - - name: Cleanup before run - uses: ./.github/actions/cleanup-processes-linux - - name: Clone id: checkout uses: actions/checkout@v4 + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-linux + - name: Verify ROCm installation run: | echo "=== ROCm Environment ===" @@ -2204,13 +2204,13 @@ jobs: timeout-minutes: 120 steps: - - name: Cleanup before run - uses: ./.github/actions/cleanup-processes-windows - - name: Clone id: checkout uses: actions/checkout@v4 + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-windows + - name: Verify ROCm installation shell: pwsh run: | From a4a6a712848aaa80b49d0eacf56999cec06614c7 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 26 Feb 2026 13:05:11 -0800 Subject: [PATCH 05/22] adding exports for rocm path and hip path. --- .github/workflows/build.yml | 4 ++-- ci/run.sh | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e9e1006ba18..553a16884ec 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1003,7 +1003,7 @@ jobs: Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-bin-${{ matrix.arch }}.zip" - name: Upload binaries - if: matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }} + if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v6 with: name: whisper-bin-${{ matrix.arch }}.zip @@ -1089,7 +1089,7 @@ jobs: Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-blas-bin-${{ matrix.arch }}.zip" - name: Upload binaries - if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }} + if: ${{ matrix.blas == 'ON' && matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v6 with: name: whisper-blas-bin-${{ matrix.arch }}.zip diff --git a/ci/run.sh b/ci/run.sh index 59e2cefad52..7e69ec91f3b 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -79,6 +79,12 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then fi CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}" + + # Set HIP environment if not already set + export HIP_PLATFORM=${HIP_PLATFORM:-amd} + export ROCM_PATH=${ROCM_PATH:-/opt/rocm} + export HIP_PATH=${HIP_PATH:-/opt/rocm} + CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_PREFIX_PATH=${ROCM_PATH} -DCMAKE_HIP_COMPILER=${ROCM_PATH}/lib/llvm/bin/clang++" fi if [ ! -z ${GG_BUILD_SYCL} ]; then From 1e0ec6db17ceeb4ffc77510b44b94072efa2c370 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 26 Feb 2026 13:11:14 -0800 Subject: [PATCH 06/22] adding rocm lib path to ld_library_path --- ci/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/run.sh b/ci/run.sh index 7e69ec91f3b..85ade166658 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -84,6 +84,7 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then export HIP_PLATFORM=${HIP_PLATFORM:-amd} export ROCM_PATH=${ROCM_PATH:-/opt/rocm} export HIP_PATH=${HIP_PATH:-/opt/rocm} + export LD_LIBRARY_PATH=${ROCM_PATH}/lib:${LD_LIBRARY_PATH} CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_PREFIX_PATH=${ROCM_PATH} -DCMAKE_HIP_COMPILER=${ROCM_PATH}/lib/llvm/bin/clang++" fi From bc289aa7b93e29d3fcba9213df7c924f347e8de6 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Mon, 2 Mar 2026 22:56:26 -0800 Subject: [PATCH 07/22] Fixed a merge conflict. --- .github/workflows/build.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 553a16884ec..347041a836d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -438,11 +438,12 @@ jobs: strategy: fail-fast: false matrix: - ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} + arch: [linux/amd64] + build: [Release] steps: - name: Clone - uses: actions/checkout@v6 + uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v3 From c8a540dff94588c919fa9ba6967659efe8dd18f8 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Tue, 3 Mar 2026 10:45:03 -0800 Subject: [PATCH 08/22] Add AMD ROCm GPU build and test CI infrastructure - Add self-hosted runner test jobs (test-rocm-linux, test-rocm-windows) for gfx1151/gfx1150 - Add cleanup composite actions for Linux and Windows runners - Add runner heartbeat monitoring workflow - Configure ci/run.sh with ROCm environment (HIP_PLATFORM, LD_LIBRARY_PATH, cmake flags) - Add Windows ROCm build support to build.yml - Fix conditional expression syntax warnings in build.yml --- .../cleanup-processes-linux/action.yml | 19 + .../cleanup-processes-windows/action.yml | 15 + .github/workflows/build.yml | 702 +++++++++++++++++- .github/workflows/runner_heartbeat.yml | 59 ++ ci/run.sh | 9 +- 5 files changed, 793 insertions(+), 11 deletions(-) create mode 100644 .github/actions/cleanup-processes-linux/action.yml create mode 100644 .github/actions/cleanup-processes-windows/action.yml create mode 100644 .github/workflows/runner_heartbeat.yml diff --git a/.github/actions/cleanup-processes-linux/action.yml b/.github/actions/cleanup-processes-linux/action.yml new file mode 100644 index 00000000000..58649fcc41b --- /dev/null +++ b/.github/actions/cleanup-processes-linux/action.yml @@ -0,0 +1,19 @@ +name: 'Cleanup GPU Processes (Linux)' +description: 'Kill zombie whisper/GPU processes on self-hosted Linux runners' + +runs: + using: 'composite' + steps: + - name: Kill zombie processes + shell: bash + run: | + echo "=== Cleaning up stale processes ===" + pkill -f "whisper-cli" 2>/dev/null || true + pkill -f "whisper-bench" 2>/dev/null || true + pkill -f "whisper-server" 2>/dev/null || true + pkill -f "ctest.*whisper" 2>/dev/null || true + if command -v rocm-smi &>/dev/null; then + echo "=== GPU process check ===" + rocm-smi --showpids 2>/dev/null || true + fi + echo "=== Cleanup complete ===" diff --git a/.github/actions/cleanup-processes-windows/action.yml b/.github/actions/cleanup-processes-windows/action.yml new file mode 100644 index 00000000000..91a9424dd22 --- /dev/null +++ b/.github/actions/cleanup-processes-windows/action.yml @@ -0,0 +1,15 @@ +name: 'Cleanup GPU Processes (Windows)' +description: 'Kill zombie whisper/GPU processes on self-hosted Windows runners' + +runs: + using: 'composite' + steps: + - name: Kill zombie processes + shell: pwsh + run: | + Write-Host "=== Cleaning up stale processes ===" + $processNames = @("whisper-cli", "whisper-bench", "whisper-server", "ctest") + foreach ($name in $processNames) { + Get-Process -Name $name -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue + } + Write-Host "=== Cleanup complete ===" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8ce887fd111..347041a836d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -45,6 +45,14 @@ on: options: - full-ci - release-only + gfx_target: + description: 'AMD GPU targets (comma-separated)' + required: false + default: 'gfx1151,gfx1150,gfx120X,gfx110X' + rocm_version: + description: 'ROCm version to use (e.g., 7.11.0a20251205) or "latest" to auto-detect' + required: false + default: 'latest' concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -57,6 +65,8 @@ env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} ubuntu_image: "ubuntu:22.04" VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" + GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X' }} + ROCM_VERSION: ${{ github.event.inputs.rocm_version || 'latest' }} jobs: determine-tag: @@ -64,13 +74,11 @@ jobs: outputs: tag_name: ${{ steps.tag.outputs.name }} should_release: ${{ steps.tag.outputs.should_release }} - steps: - name: Checkout with full history uses: actions/checkout@v6 with: fetch-depth: 0 - - name: Determine tag name id: tag shell: bash @@ -114,6 +122,58 @@ jobs: echo "name=$TAG_NAME" >> $GITHUB_OUTPUT echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT + prepare-matrix: + runs-on: ubuntu-latest + outputs: + windows_matrix: ${{ steps.set-matrix.outputs.windows_matrix }} + ubuntu_matrix: ${{ steps.set-matrix.outputs.ubuntu_matrix }} + should_build_windows: ${{ steps.set-matrix.outputs.should_build_windows }} + should_build_ubuntu: ${{ steps.set-matrix.outputs.should_build_ubuntu }} + steps: + - name: Set matrix + id: set-matrix + run: | + targets="${{ env.GFX_TARGETS }}" + operating_systems="windows,ubuntu" + + echo "Input targets: $targets" + echo "Input operating systems: $operating_systems" + + target_array=$(echo "$targets" \ + | tr ',' '\n' \ + | sed 's/^ *//;s/ *$//' \ + | sed 's/^"//;s/"$//' \ + | jq -R . \ + | jq -s .) + + windows_matrix=$(echo "$target_array" \ + | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}') + + # 3. Create Ubuntu Matrix + ubuntu_matrix=$(echo "$target_array" \ + | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["linux/amd64"]}') + + # Check which operating systems to build + should_build_windows="false" + should_build_ubuntu="false" + + if [[ "$operating_systems" == *"windows"* ]]; then + should_build_windows="true" + echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT + fi + + if [[ "$operating_systems" == *"ubuntu"* ]]; then + should_build_ubuntu="true" + echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT + fi + + echo "should_build_windows=$should_build_windows" >> $GITHUB_OUTPUT + echo "should_build_ubuntu=$should_build_ubuntu" >> $GITHUB_OUTPUT + + echo "Windows build: $should_build_windows" + echo "Ubuntu build: $should_build_ubuntu" + echo "Generated matrix: $matrix_targets" + #linux/amd64 ubuntu-22: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -378,15 +438,12 @@ jobs: strategy: fail-fast: false matrix: - build: [Debug, Release] - #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] - # TODO: arm/v7 disabled due to clang bug - # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990 - arch: [linux/amd64, linux/arm64, linux/ppc64le] + arch: [linux/amd64] + build: [Release] steps: - name: Clone - uses: actions/checkout@v6 + uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -406,6 +463,272 @@ jobs: cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang make ctest -L gh --output-on-failure' + + ubuntu-rocm: + if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || + github.event.inputs.run_type == 'full-ci' }} + runs-on: ubuntu-22.04 + needs: [determine-tag, prepare-matrix] + # Check if we should run (based on the prepare-matrix output) + strategy: + # Uses the matrix generated in prepare-matrix (gfx_target, arch, build, sdl2) + matrix: ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} + fail-fast: false + outputs: + rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} + + steps: + - name: Free disk space + run: curl -fsSL https://raw.githubusercontent.com/kou/arrow/e49d8ae15583ceff03237571569099a6ad62be32/ci/scripts/util_free_space.sh | bash + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt update + sudo apt install -y cmake ninja-build unzip curl build-essential libsdl2-dev git patchelf + + - name: Download and extract ROCm directly to /opt/rocm + run: | + # Determine ROCm version to use + rocm_version="${{ env.ROCM_VERSION }}" + current_target="${{ matrix.gfx_target }}" + + # Add appropriate suffixes for different GPU targets + s3_target="$current_target" + if [ "$current_target" = "gfx110X" ]; then + s3_target="${current_target}-dgpu" + echo "Using S3 target with -dgpu suffix: $s3_target" + elif [ "$current_target" = "gfx120X" ]; then + s3_target="${current_target}-all" + echo "Using S3 target with -all suffix: $s3_target" + fi + + if [ "$rocm_version" = "latest" ]; then + echo "Auto-detecting latest ROCm version for target: $current_target" + s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-linux-${s3_target}-7") + + # Extract all files + files=$(echo "$s3_response" | grep -oP '(?<=)[^<]*' | grep "therock-dist-linux-${s3_target}-") + + # Extract versions and sort them properly + latest_file="" + latest_major=0 + latest_minor=0 + latest_patch=0 + latest_rc=0 + latest_is_alpha=false + + while IFS= read -r file; do + if [[ "$file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + version="${BASH_REMATCH[1]}" + major=$(echo "$version" | cut -d. -f1) + minor=$(echo "$version" | cut -d. -f2) + patch=$(echo "$version" | cut -d. -f3 | sed 's/\(a\|rc\).*//') + rc=$(echo "$version" | sed 's/.*\(a\|rc\)//') + is_alpha=false + if [[ "$version" =~ a ]]; then is_alpha=true; fi + + is_newer=false + if [ "$major" -gt "$latest_major" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then + if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then is_newer=true; + elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; + fi + fi + + if [ "$is_newer" = true ]; then + latest_file="$file" + latest_major="$major" + latest_minor="$minor" + latest_patch="$patch" + latest_rc="$rc" + latest_is_alpha="$is_alpha" + fi + fi + done <<< "$files" + + echo "Found latest file: $latest_file" + + if [[ "$latest_file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + rocm_version="${BASH_REMATCH[1]}" + echo "Detected latest ROCm version: $rocm_version" + else + echo "Failed to extract ROCm version from latest file: $latest_file" + exit 1 + fi + + rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/$latest_file" + else + rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-${s3_target}-${rocm_version}.tar.gz" + fi + + echo "DETECTED_ROCM_VERSION=$rocm_version" >> $GITHUB_ENV + + # Create directory and stream extraction + sudo mkdir -p /opt/rocm + curl -sL "$rocm_url" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 + + - name: Set ROCm environment variables + run: | + echo "HIP_PATH=/opt/rocm" >> $GITHUB_ENV + echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV + echo "HIP_PLATFORM=amd" >> $GITHUB_ENV + echo "/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH" >> $GITHUB_PATH + + - name: Find ROCm bitcode path + run: | + # Dynamically find the directory containing device libraries (amdgcn/bitcode) + BITCODE_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) + + if [ -z "$BITCODE_PATH" ]; then + echo "::error::Could not find 'bitcode' directory in /opt/rocm" + find /opt/rocm -maxdepth 5 + exit 1 + fi + + echo "Found bitcode at: $BITCODE_PATH" + echo "ROCM_BITCODE_PATH=$BITCODE_PATH" >> $GITHUB_ENV + + - name: Configure CMake + run: | + + # Map GPU targets + current_target="${{ matrix.gfx_target }}" + echo "Input target: $current_target" + + if [ "$current_target" = "gfx110X" ]; then + mapped_target="gfx1100;gfx1101;gfx1102" + elif [ "$current_target" = "gfx1151" ]; then + mapped_target="gfx1151" + elif [ "$current_target" = "gfx1150" ]; then + mapped_target="gfx1150" + elif [ "$current_target" = "gfx120X" ]; then + mapped_target="gfx1200;gfx1201" + else + mapped_target="$current_target" + fi + echo "Mapped target: $mapped_target" + + cmake -S . -B build -G Ninja \ + -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \ + -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ + -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm --rocm-device-lib-path=${{ env.ROCM_BITCODE_PATH }}" \ + -DCMAKE_PREFIX_PATH=/opt/rocm \ + -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + -DGPU_TARGETS="$mapped_target" \ + -DBUILD_SHARED_LIBS=ON \ + -DGGML_HIP=ON \ + -DGGML_ROCM=1 \ + -DWHISPER_SDL2=${{ matrix.sdl2 }} + + - name: Build + run: cmake --build build --config ${{ matrix.build }} -j $(nproc) + + # Copy Backend ROCm Folders --- + - name: Copy ROCm core and sysdep libs to build directory + run: | + build_bin_path="build/bin" + rocm_bin_path="/opt/rocm/bin" + rocm_lib_path="/opt/rocm/lib" + rocm_sysdeps_path="/opt/rocm/lib/rocm_sysdeps/lib" + + # Ensure build directory exists + mkdir -p "$build_bin_path" + + # Copy rocblas/library folder + rocblas_lib_path="$rocm_lib_path/rocblas/library" + if [ -d "$rocblas_lib_path" ]; then + dest_rocblas_path="$build_bin_path/rocblas/library" + mkdir -p "$(dirname "$dest_rocblas_path")" + cp -r "$rocblas_lib_path" "$(dirname "$dest_rocblas_path")/" + echo "Copied: rocblas/library" + fi + + # Copy hipblaslt/library folder + hipblaslt_lib_path="$rocm_lib_path/hipblaslt/library" + if [ -d "$hipblaslt_lib_path" ]; then + dest_hipblaslt_path="$build_bin_path/hipblaslt/library" + mkdir -p "$(dirname "$dest_hipblaslt_path")" + cp -r "$hipblaslt_lib_path" "$(dirname "$dest_hipblaslt_path")/" + echo "Copied: hipblaslt/library" + fi + + # Copy standard ROCm shared libraries + echo "Copying core shared libraries..." + cp -v $rocm_lib_path/libhipblas.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/librocblas.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libamdhip64.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/librocsolver.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libroctx64.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libhipblaslt.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libamd_comgr.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libhsa-runtime64.so* "$build_bin_path/" 2>/dev/null || true + + # Copy LLVM runtime libs + cp -v $rocm_lib_path/llvm/lib/libLLVM.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/llvm/lib/libclang-cpp.so* "$build_bin_path/" 2>/dev/null || true + + if [ -d "$rocm_sysdeps_path" ]; then + echo "Copying sysdep libraries from $rocm_sysdeps_path..." + # Using a broad wildcard ensures we grab elf.so.1, drm.so.2, numa.so.1, etc. + cp -v $rocm_sysdeps_path/librocm_sysdeps_*.so* "$build_bin_path/" + fi + + - name: Bundle Linked Libraries + run: | + build_bin="build/bin" + echo "Scanning dependencies for whisper-cli..." + + ldd "$build_bin/whisper-cli" | grep "=> /" | while read -r line; do + + soname=$(echo "$line" | awk '{print $1}') + path=$(echo "$line" | awk '{print $3}') + + if [[ "$soname" =~ ^(libc\.so|libm\.so|libdl\.so|librt\.so|libpthread\.so|libstdc\+\+|libgcc_s|ld-linux) ]]; then + continue + fi + echo "Bundling: $soname" + echo " Source: $path" + cp -L "$path" "$build_bin/$soname" + done + chmod +x "$build_bin"/*.so* + + - name: Set RPATH for portable distribution + run: | + cd build/bin + # Set RPATH to $ORIGIN so the binary looks for .so files in its own directory + # wildcards catch whisper-cli, whisper-bench, etc., plus shared libs + for file in *.so* whisper-*; do + if [ -f "$file" ] && [ ! -L "$file" ]; then + # Only patch ELF files (executables and shared objects) + if file "$file" | grep -q "ELF"; then + patchelf --set-rpath '$ORIGIN' "$file" 2>/dev/null || true + echo "Patched RPATH for $file" + fi + fi + done + + - name: Pack bin artifacts + run: | + cd build/bin + SAFE_ARCH=$(echo "${{ matrix.arch }}" | tr '/' '-') + + # 1. Save SAFE_ARCH to GITHUB_ENV so the next step can use it + echo "SAFE_ARCH=$SAFE_ARCH" >> $GITHUB_ENV + + # Include target in filename (e.g., whisper-bin-gfx1100-linux-amd64.zip) + zip -r ../../whisper-bin-${{ matrix.gfx_target }}-${SAFE_ARCH}.zip . + + - name: Upload binaries + if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + uses: actions/upload-artifact@v4 + with: + name: whisper-bin-${{ matrix.gfx_target }}-${{ env.SAFE_ARCH }} + path: whisper-bin-*.zip ubuntu-22-gcc-sanitized: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -681,7 +1004,7 @@ jobs: Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-bin-${{ matrix.arch }}.zip" - name: Upload binaries - if: matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }} + if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v6 with: name: whisper-bin-${{ matrix.arch }}.zip @@ -767,7 +1090,7 @@ jobs: Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-blas-bin-${{ matrix.arch }}.zip" - name: Upload binaries - if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }} + if: ${{ matrix.blas == 'ON' && matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v6 with: name: whisper-blas-bin-${{ matrix.arch }}.zip @@ -981,6 +1304,252 @@ jobs: with: name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip path: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip + + windows-rocm: + if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || + github.event.inputs.run_type == 'full-ci' }} + runs-on: windows-latest + needs: [determine-tag, prepare-matrix] + strategy: + matrix: ${{fromJson(needs.prepare-matrix.outputs.windows_matrix)}} + fail-fast: false + outputs: + rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} + + steps: + - name: Clone + uses: actions/checkout@v4 + + - name: Install Ninja + run: choco install ninja + + - name: Fetch SDL2 and Patch Header (Robust) + if: matrix.sdl2 == 'ON' + shell: pwsh + run: | + $sdlVer = "${{ matrix.s2ver }}" + $url = "https://github.com/libsdl-org/SDL/releases/download/release-$sdlVer/SDL2-devel-$sdlVer-VC.zip" + + Write-Host "Downloading SDL2 from $url..." + Invoke-WebRequest -Uri $url -OutFile "sdl2.zip" + + Write-Host "Extracting SDL2..." + 7z x sdl2.zip + + # 1. Locate CMake config dynamically + $cmakeFile = Get-ChildItem -Path . -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1 + if ($cmakeFile) { + $cmakeDir = $cmakeFile.DirectoryName + Write-Host "Found SDL2 CMake dir at: $cmakeDir" + echo "SDL2_DIR=$cmakeDir" >> $env:GITHUB_ENV + } else { + Write-Error "FATAL: Could not find sdl2-config.cmake inside extracted files!" + exit 1 + } + + # 2. Find and Patch SDL_endian.h recursively + Write-Host "Searching for SDL_endian.h..." + $headerFile = Get-ChildItem -Path . -Recurse -Filter "SDL_endian.h" | Select-Object -First 1 + + if ($headerFile) { + Write-Host "Found header at: $($headerFile.FullName)" + $content = Get-Content $headerFile.FullName -Raw + + # The Fix: Comment out the extern declaration + if ($content -match 'extern void _m_prefetch') { + $content = $content -replace 'extern void _m_prefetch\(void \*__P\);', '// extern void _m_prefetch(void *__P);' + Set-Content -Path $headerFile.FullName -Value $content + Write-Host "SUCCESS: Patched _m_prefetch in SDL_endian.h" + } else { + Write-Host "WARNING: _m_prefetch string not found. It might be a different SDL version or already patched." + } + } else { + # Debug output if file is missing + Write-Host "Listing root directories:" + Get-ChildItem -Path . -Directory | Format-Table Name + Write-Error "FATAL: Could not locate SDL_endian.h in the workspace." + exit 1 + } + + - name: Download ROCm nightly tarball + run: | + # Determine ROCm version to use + $rocmVersion = "${{ env.ROCM_VERSION }}" + $currentTarget = "${{ matrix.gfx_target }}" + + $s3Target = $currentTarget + if ($currentTarget -eq "gfx110X") { + $s3Target = "$currentTarget-dgpu" + Write-Host "Using S3 target with -dgpu suffix: $s3Target" + } elseif ($currentTarget -eq "gfx120X") { + $s3Target = "$currentTarget-all" + Write-Host "Using S3 target with -all suffix: $s3Target" + } + + if ($rocmVersion -eq "latest") { + Write-Host "Auto-detecting latest ROCm version for target: $currentTarget" + $s3Response = (Invoke-WebRequest "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-windows-$s3Target-7").Content + $files = $s3Response -split '' | Where-Object {$_ -match ''} | ForEach-Object { ($_ -split '')[0] } + + $versionFiles = @() + foreach ($file in $files) { + if ($file -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { + $version = $matches[1] + $versionFiles += [PSCustomObject]@{ + File = $file + Version = $version + Major = [int]($version -split '\.')[0] + Minor = [int]($version -split '\.')[1] + Patch = [int](($version -split '\.')[2] -replace '(?:a|rc).*', '') + RC = [int]($version -replace '.*(?:a|rc)', '') + IsAlpha = $version -match 'a' + } + } + } + + $latestFile = ($versionFiles | Sort-Object Major, Minor, Patch, @{Expression={if($_.IsAlpha){1}else{0}}}, RC | Select-Object -Last 1).File + Write-Host "Found latest file: $latestFile" + + if ($latestFile -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { + $rocmVersion = $matches[1] + Write-Host "Detected latest ROCm version: $rocmVersion" + } else { + Write-Error "Failed to extract ROCm version from latest file: $latestFile" + exit 1 + } + $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$latestFile" + } else { + $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-windows-$s3Target-$rocmVersion.tar.gz" + } + + echo "DETECTED_ROCM_VERSION=$rocmVersion" >> $env:GITHUB_ENV + Invoke-WebRequest -Uri $rocmUrl -OutFile "rocm.tar.gz" + + - name: Extract ROCm to C:\opt\rocm + run: | + New-Item -ItemType Directory -Force -Path "C:\opt\rocm" + tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 + + - name: Configure + shell: pwsh + run: | + $currentTarget = "${{ matrix.gfx_target }}" + Write-Host "Input target: $currentTarget" + + if ($currentTarget -eq "gfx110X") { + $mapped_target = "gfx1100;gfx1101;gfx1102" + } elseif ($currentTarget -eq "gfx1151") { + $mapped_target = "gfx1151" + } elseif ($currentTarget -eq "gfx1150") { + $mapped_target = "gfx1150" + } elseif ($currentTarget -eq "gfx120X") { + $mapped_target = "gfx1200;gfx1201" + } else { + $mapped_target = $currentTarget + } + Write-Host "Mapped target: $mapped_target" + + # Set up environment variables and PATH + $env:HIP_PATH = "C:\opt\rocm" + $env:HIP_PLATFORM = "amd" + # Ensure bin comes before llvm\bin for consistency + $env:PATH = "$env:HIP_PATH\bin;$env:HIP_PATH\lib\llvm\bin;$env:PATH" + + # Define CMake arguments + $cmakeArgs = @( + "-S", ".", + "-B", "build", + "-G", "Ninja Multi-Config", + "-DGPU_TARGETS=$mapped_target", + "-DGGML_HIP=ON", + "-DCMAKE_C_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang.exe", + "-DCMAKE_CXX_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", + "-DCMAKE_HIP_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", + "-DCMAKE_C_FLAGS='-D__PRFCHWINTRIN_H'", + "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'", + "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm", + "-DCMAKE_PREFIX_PATH=$env:HIP_PATH", + "-DGGML_ROCM=1", + "-DCMAKE_BUILD_TYPE=${{ matrix.build }}", + "-DBUILD_SHARED_LIBS=ON", + "-DWHISPER_SDL2=${{ matrix.sdl2 }}" + ) + # Run CMake + cmake @cmakeArgs + + - name: Build + shell: pwsh + run: | + cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS + + - name: Copy ROCm core DLLs to build directory + run: | + $rocmVersion = if ($env:DETECTED_ROCM_VERSION) { $env:DETECTED_ROCM_VERSION } else { $env:ROCM_VERSION } + $buildBinPath = "build/bin/${{ matrix.build }}" + $rocmBinPath = "C:\opt\rocm\bin" + + Write-Host "Copying ROCm core DLL files..." + + if (Test-Path $rocmBinPath) { + # Copy files matching patterns + $filesToCopy = @( + "amdhip64_*.dll", + "amd_comgr*.dll", + "libhipblas.dll", + "rocblas.dll", + "rocsolver.dll", + "hipblaslt.dll", + "libhipblaslt.dll", + "hipblas.dll" + ) + + foreach ($pattern in $filesToCopy) { + $matchingFiles = Get-ChildItem -Path $rocmBinPath -Name $pattern -ErrorAction SilentlyContinue + if ($matchingFiles) { + foreach ($file in $matchingFiles) { + Copy-Item (Join-Path $rocmBinPath $file) (Join-Path $buildBinPath $file) + Write-Host "Copied: $file" + } + } + } + + # Copy rocblas/library + $rocblasLibPath = Join-Path $rocmBinPath "rocblas\library" + if (Test-Path $rocblasLibPath) { + Copy-Item -Path $rocblasLibPath -Destination (Join-Path $buildBinPath "rocblas\library") -Recurse -Force + Write-Host "Copied: rocblas\library" + } + + # Copy hipblaslt/library + $hipblasltLibPath = Join-Path $rocmBinPath "hipblaslt\library" + if (Test-Path $hipblasltLibPath) { + Copy-Item -Path $hipblasltLibPath -Destination (Join-Path $buildBinPath "hipblaslt\library") -Recurse -Force + Write-Host "Copied: hipblaslt\library" + } + } + + - name: Copy SDL2.dll + if: matrix.sdl2 == 'ON' + run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} + + - name: Copy SDL2.dll + if: matrix.sdl2 == 'ON' + run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}" + + - name: Pack bin artifacts + shell: pwsh + run: | + # Create unique zip name with target suffix + $zipName = "whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip" + Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath $zipName + + - name: Upload binaries + if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + uses: actions/upload-artifact@v4 + with: + # Unique artifact name per matrix job + name: whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip + path: whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip emscripten: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -1245,6 +1814,8 @@ jobs: - windows - windows-blas - windows-cublas + - windows-rocm + - ubuntu-rocm steps: - name: Clone @@ -1558,3 +2129,114 @@ jobs: run: | vulkaninfo --summary GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp + + # AMD ROCm GPU Testing (self-hosted runners) + test-rocm-linux: + runs-on: ${{ matrix.runner }} + + strategy: + fail-fast: false + matrix: + include: + - gfx_target: gfx1151 + runner: [stx-halo, Linux] + # Uncomment when runners are available: + # - gfx_target: gfx1100 + # runner: [navi31, Linux] + # - gfx_target: gfx1200 + # runner: [rdna4, Linux] + # - gfx_target: gfx1150 + # runner: [rai300_400, Linux] + + concurrency: + group: rocm-test-linux-${{ matrix.gfx_target }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + + timeout-minutes: 120 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-linux + + - name: Verify ROCm installation + run: | + echo "=== ROCm Environment ===" + rocm-smi || echo "rocm-smi not found" + rocminfo | head -40 || echo "rocminfo not found" + hipcc --version || echo "hipcc not found" + echo "=== GPU Info ===" + rocm-smi --showproductname 2>/dev/null || true + + - name: Test + id: ggml-ci + run: | + GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS=${{ matrix.gfx_target }} GG_BUILD_LOW_PERF=1 \ + bash ./ci/run.sh ~/results/whisper.cpp-rocm-${{ matrix.gfx_target }} /mnt/whisper.cpp + + - name: Cleanup after run + if: always() + uses: ./.github/actions/cleanup-processes-linux + + test-rocm-windows: + runs-on: ${{ matrix.runner }} + + strategy: + fail-fast: false + matrix: + include: + - gfx_target: gfx1151 + runner: [stx-halo, Windows] + # Uncomment when runners are available: + # - gfx_target: gfx1100 + # runner: [navi31, Windows] + # - gfx_target: gfx1200 + # runner: [rdna4, Windows] + # - gfx_target: gfx1150 + # runner: [rai300_400, Windows] + + concurrency: + group: rocm-test-windows-${{ matrix.gfx_target }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + + timeout-minutes: 120 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-windows + + - name: Verify ROCm installation + shell: pwsh + run: | + Write-Host "=== ROCm Environment ===" + & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null + & "$env:HIP_PATH\bin\hipcc.exe" --version 2>$null + + - name: Configure ROCm environment + shell: pwsh + run: | + $rocmPath = $env:HIP_PATH + if (-not $rocmPath) { $rocmPath = "C:\opt\rocm" } + echo "HIP_PATH=$rocmPath" >> $env:GITHUB_ENV + echo "ROCM_PATH=$rocmPath" >> $env:GITHUB_ENV + echo "HIP_PLATFORM=amd" >> $env:GITHUB_ENV + echo "$rocmPath\bin" >> $env:GITHUB_PATH + echo "$rocmPath\lib\llvm\bin" >> $env:GITHUB_PATH + + - name: Test + id: ggml-ci + shell: bash + run: | + GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS=${{ matrix.gfx_target }} GG_BUILD_LOW_PERF=1 \ + bash ./ci/run.sh ~/results/whisper.cpp-rocm-${{ matrix.gfx_target }} /mnt/whisper.cpp + + - name: Cleanup after run + if: always() + uses: ./.github/actions/cleanup-processes-windows diff --git a/.github/workflows/runner_heartbeat.yml b/.github/workflows/runner_heartbeat.yml new file mode 100644 index 00000000000..dff7d7f05a3 --- /dev/null +++ b/.github/workflows/runner_heartbeat.yml @@ -0,0 +1,59 @@ +name: Runner Heartbeat + +on: + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +jobs: + check-rocm-linux: + strategy: + fail-fast: false + matrix: + include: + - runner: [rai300_400, Linux] + name: rai300-400-linux + - runner: [stx-halo, Linux] + name: stx-halo-linux + runs-on: ${{ matrix.runner }} + timeout-minutes: 10 + steps: + - name: Heartbeat + run: | + echo "=== Runner Heartbeat: ${{ matrix.name }} ===" + echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "Hostname: $(hostname)" + echo "=== GPU Status ===" + rocm-smi 2>/dev/null || echo "rocm-smi not available" + echo "=== Disk Space ===" + df -h / /mnt 2>/dev/null || df -h / + echo "=== Memory ===" + free -h + echo "=== ROCm Version ===" + cat /opt/rocm/.info/version 2>/dev/null || echo "ROCm version file not found" + + check-rocm-windows: + strategy: + fail-fast: false + matrix: + include: + - runner: [rai300_400, Windows] + name: rai300-400-windows + - runner: [stx-halo, Windows] + name: stx-halo-windows + runs-on: ${{ matrix.runner }} + timeout-minutes: 10 + steps: + - name: Heartbeat + shell: pwsh + run: | + Write-Host "=== Runner Heartbeat: ${{ matrix.name }} ===" + Write-Host "Timestamp: $(Get-Date -Format o)" + Write-Host "Hostname: $env:COMPUTERNAME" + Write-Host "=== GPU Status ===" + & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null + Write-Host "=== Disk Space ===" + Get-PSDrive -PSProvider FileSystem | Format-Table Name, Used, Free -AutoSize + Write-Host "=== Memory ===" + $os = Get-CimInstance Win32_OperatingSystem + Write-Host "Free: $([math]::Round($os.FreePhysicalMemory/1MB, 1)) GB / Total: $([math]::Round($os.TotalVisibleMemorySize/1MB, 1)) GB" diff --git a/ci/run.sh b/ci/run.sh index cbe28442e16..85ade166658 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -79,6 +79,13 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then fi CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}" + + # Set HIP environment if not already set + export HIP_PLATFORM=${HIP_PLATFORM:-amd} + export ROCM_PATH=${ROCM_PATH:-/opt/rocm} + export HIP_PATH=${HIP_PATH:-/opt/rocm} + export LD_LIBRARY_PATH=${ROCM_PATH}/lib:${LD_LIBRARY_PATH} + CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_PREFIX_PATH=${ROCM_PATH} -DCMAKE_HIP_COMPILER=${ROCM_PATH}/lib/llvm/bin/clang++" fi if [ ! -z ${GG_BUILD_SYCL} ]; then @@ -223,7 +230,7 @@ function gg_run_ctest { gg_check_build_requirements (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + (time cmake --build . --config ${mode} -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log From 91fa40d96e74120f49f31b8d787831653cae0ab6 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Wed, 4 Mar 2026 11:49:12 -0800 Subject: [PATCH 09/22] I have removed duplicated steps, stray echo, all dead code and utputs should_build outputs to be specific. I ahve removed outputs.rocm_version from both ci steps, extracted resolve_rocm to a shared script for both jobs to use them. Fixed the matrix, removed both ubuntu-rocm and windows-rocm FGGML_ROCM=1 flag which doesn't apply because it isn't a real flag. Also commented out heartbeat runners. --- .github/workflows/build.yml | 236 +++---------------------- .github/workflows/runner_heartbeat.yml | 56 +++--- ci/map-gpu-target.sh | 28 +++ ci/resolve-rocm-version.sh | 96 ++++++++++ ci/run.sh | 1 + 5 files changed, 181 insertions(+), 236 deletions(-) create mode 100755 ci/map-gpu-target.sh create mode 100755 ci/resolve-rocm-version.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 347041a836d..1a8f5c9c80e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -127,17 +127,12 @@ jobs: outputs: windows_matrix: ${{ steps.set-matrix.outputs.windows_matrix }} ubuntu_matrix: ${{ steps.set-matrix.outputs.ubuntu_matrix }} - should_build_windows: ${{ steps.set-matrix.outputs.should_build_windows }} - should_build_ubuntu: ${{ steps.set-matrix.outputs.should_build_ubuntu }} steps: - name: Set matrix id: set-matrix run: | targets="${{ env.GFX_TARGETS }}" - operating_systems="windows,ubuntu" - echo "Input targets: $targets" - echo "Input operating systems: $operating_systems" target_array=$(echo "$targets" \ | tr ',' '\n' \ @@ -149,31 +144,11 @@ jobs: windows_matrix=$(echo "$target_array" \ | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}') - # 3. Create Ubuntu Matrix ubuntu_matrix=$(echo "$target_array" \ | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["linux/amd64"]}') - # Check which operating systems to build - should_build_windows="false" - should_build_ubuntu="false" - - if [[ "$operating_systems" == *"windows"* ]]; then - should_build_windows="true" - echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT - fi - - if [[ "$operating_systems" == *"ubuntu"* ]]; then - should_build_ubuntu="true" - echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT - fi - - echo "should_build_windows=$should_build_windows" >> $GITHUB_OUTPUT - echo "should_build_ubuntu=$should_build_ubuntu" >> $GITHUB_OUTPUT - - echo "Windows build: $should_build_windows" - echo "Ubuntu build: $should_build_ubuntu" - echo "Generated matrix: $matrix_targets" - #linux/amd64 + echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT + echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT ubuntu-22: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -469,17 +444,20 @@ jobs: github.event.inputs.run_type == 'full-ci' }} runs-on: ubuntu-22.04 needs: [determine-tag, prepare-matrix] - # Check if we should run (based on the prepare-matrix output) strategy: - # Uses the matrix generated in prepare-matrix (gfx_target, arch, build, sdl2) matrix: ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} fail-fast: false - outputs: - rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} steps: - name: Free disk space - run: curl -fsSL https://raw.githubusercontent.com/kou/arrow/e49d8ae15583ceff03237571569099a6ad62be32/ci/scripts/util_free_space.sh | bash + run: | + echo "=== Disk usage before cleanup ===" + df -h / + sudo rm -rf /usr/local/lib/android /opt/ghc /usr/local/share/boost \ + /usr/share/dotnet /usr/local/.ghcup /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force 2>/dev/null || true + echo "=== Disk usage after cleanup ===" + df -h / - name: Checkout repository uses: actions/checkout@v4 @@ -491,86 +469,10 @@ jobs: - name: Download and extract ROCm directly to /opt/rocm run: | - # Determine ROCm version to use - rocm_version="${{ env.ROCM_VERSION }}" - current_target="${{ matrix.gfx_target }}" - - # Add appropriate suffixes for different GPU targets - s3_target="$current_target" - if [ "$current_target" = "gfx110X" ]; then - s3_target="${current_target}-dgpu" - echo "Using S3 target with -dgpu suffix: $s3_target" - elif [ "$current_target" = "gfx120X" ]; then - s3_target="${current_target}-all" - echo "Using S3 target with -all suffix: $s3_target" - fi - - if [ "$rocm_version" = "latest" ]; then - echo "Auto-detecting latest ROCm version for target: $current_target" - s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-linux-${s3_target}-7") - - # Extract all files - files=$(echo "$s3_response" | grep -oP '(?<=)[^<]*' | grep "therock-dist-linux-${s3_target}-") - - # Extract versions and sort them properly - latest_file="" - latest_major=0 - latest_minor=0 - latest_patch=0 - latest_rc=0 - latest_is_alpha=false - - while IFS= read -r file; do - if [[ "$file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then - version="${BASH_REMATCH[1]}" - major=$(echo "$version" | cut -d. -f1) - minor=$(echo "$version" | cut -d. -f2) - patch=$(echo "$version" | cut -d. -f3 | sed 's/\(a\|rc\).*//') - rc=$(echo "$version" | sed 's/.*\(a\|rc\)//') - is_alpha=false - if [[ "$version" =~ a ]]; then is_alpha=true; fi - - is_newer=false - if [ "$major" -gt "$latest_major" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then - if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then is_newer=true; - elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; - fi - fi - - if [ "$is_newer" = true ]; then - latest_file="$file" - latest_major="$major" - latest_minor="$minor" - latest_patch="$patch" - latest_rc="$rc" - latest_is_alpha="$is_alpha" - fi - fi - done <<< "$files" - - echo "Found latest file: $latest_file" - - if [[ "$latest_file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then - rocm_version="${BASH_REMATCH[1]}" - echo "Detected latest ROCm version: $rocm_version" - else - echo "Failed to extract ROCm version from latest file: $latest_file" - exit 1 - fi - - rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/$latest_file" - else - rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-${s3_target}-${rocm_version}.tar.gz" - fi - - echo "DETECTED_ROCM_VERSION=$rocm_version" >> $GITHUB_ENV - - # Create directory and stream extraction + source ci/resolve-rocm-version.sh linux "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" + echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV sudo mkdir -p /opt/rocm - curl -sL "$rocm_url" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 + curl -sL "$ROCM_TARBALL_URL" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 - name: Set ROCm environment variables run: | @@ -595,23 +497,7 @@ jobs: - name: Configure CMake run: | - - # Map GPU targets - current_target="${{ matrix.gfx_target }}" - echo "Input target: $current_target" - - if [ "$current_target" = "gfx110X" ]; then - mapped_target="gfx1100;gfx1101;gfx1102" - elif [ "$current_target" = "gfx1151" ]; then - mapped_target="gfx1151" - elif [ "$current_target" = "gfx1150" ]; then - mapped_target="gfx1150" - elif [ "$current_target" = "gfx120X" ]; then - mapped_target="gfx1200;gfx1201" - else - mapped_target="$current_target" - fi - echo "Mapped target: $mapped_target" + source ci/map-gpu-target.sh "${{ matrix.gfx_target }}" cmake -S . -B build -G Ninja \ -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \ @@ -619,10 +505,9 @@ jobs: -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm --rocm-device-lib-path=${{ env.ROCM_BITCODE_PATH }}" \ -DCMAKE_PREFIX_PATH=/opt/rocm \ -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - -DGPU_TARGETS="$mapped_target" \ + -DGPU_TARGETS="$MAPPED_GPU_TARGET" \ -DBUILD_SHARED_LIBS=ON \ -DGGML_HIP=ON \ - -DGGML_ROCM=1 \ -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build @@ -1313,8 +1198,6 @@ jobs: strategy: matrix: ${{fromJson(needs.prepare-matrix.outputs.windows_matrix)}} fail-fast: false - outputs: - rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} steps: - name: Clone @@ -1372,95 +1255,36 @@ jobs: } - name: Download ROCm nightly tarball + shell: bash run: | - # Determine ROCm version to use - $rocmVersion = "${{ env.ROCM_VERSION }}" - $currentTarget = "${{ matrix.gfx_target }}" - - $s3Target = $currentTarget - if ($currentTarget -eq "gfx110X") { - $s3Target = "$currentTarget-dgpu" - Write-Host "Using S3 target with -dgpu suffix: $s3Target" - } elseif ($currentTarget -eq "gfx120X") { - $s3Target = "$currentTarget-all" - Write-Host "Using S3 target with -all suffix: $s3Target" - } - - if ($rocmVersion -eq "latest") { - Write-Host "Auto-detecting latest ROCm version for target: $currentTarget" - $s3Response = (Invoke-WebRequest "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-windows-$s3Target-7").Content - $files = $s3Response -split '' | Where-Object {$_ -match ''} | ForEach-Object { ($_ -split '')[0] } - - $versionFiles = @() - foreach ($file in $files) { - if ($file -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { - $version = $matches[1] - $versionFiles += [PSCustomObject]@{ - File = $file - Version = $version - Major = [int]($version -split '\.')[0] - Minor = [int]($version -split '\.')[1] - Patch = [int](($version -split '\.')[2] -replace '(?:a|rc).*', '') - RC = [int]($version -replace '.*(?:a|rc)', '') - IsAlpha = $version -match 'a' - } - } - } - - $latestFile = ($versionFiles | Sort-Object Major, Minor, Patch, @{Expression={if($_.IsAlpha){1}else{0}}}, RC | Select-Object -Last 1).File - Write-Host "Found latest file: $latestFile" - - if ($latestFile -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { - $rocmVersion = $matches[1] - Write-Host "Detected latest ROCm version: $rocmVersion" - } else { - Write-Error "Failed to extract ROCm version from latest file: $latestFile" - exit 1 - } - $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$latestFile" - } else { - $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-windows-$s3Target-$rocmVersion.tar.gz" - } - - echo "DETECTED_ROCM_VERSION=$rocmVersion" >> $env:GITHUB_ENV - Invoke-WebRequest -Uri $rocmUrl -OutFile "rocm.tar.gz" + source ci/resolve-rocm-version.sh windows "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" + echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV + curl -sL "$ROCM_TARBALL_URL" -o rocm.tar.gz - name: Extract ROCm to C:\opt\rocm run: | New-Item -ItemType Directory -Force -Path "C:\opt\rocm" tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 + - name: Map GPU target + id: gpu-target + shell: bash + run: | + source ci/map-gpu-target.sh "${{ matrix.gfx_target }}" + echo "mapped=$MAPPED_GPU_TARGET" >> $GITHUB_OUTPUT + - name: Configure shell: pwsh run: | - $currentTarget = "${{ matrix.gfx_target }}" - Write-Host "Input target: $currentTarget" - - if ($currentTarget -eq "gfx110X") { - $mapped_target = "gfx1100;gfx1101;gfx1102" - } elseif ($currentTarget -eq "gfx1151") { - $mapped_target = "gfx1151" - } elseif ($currentTarget -eq "gfx1150") { - $mapped_target = "gfx1150" - } elseif ($currentTarget -eq "gfx120X") { - $mapped_target = "gfx1200;gfx1201" - } else { - $mapped_target = $currentTarget - } - Write-Host "Mapped target: $mapped_target" - - # Set up environment variables and PATH $env:HIP_PATH = "C:\opt\rocm" $env:HIP_PLATFORM = "amd" - # Ensure bin comes before llvm\bin for consistency $env:PATH = "$env:HIP_PATH\bin;$env:HIP_PATH\lib\llvm\bin;$env:PATH" - # Define CMake arguments $cmakeArgs = @( "-S", ".", "-B", "build", "-G", "Ninja Multi-Config", - "-DGPU_TARGETS=$mapped_target", + "-DGPU_TARGETS=${{ steps.gpu-target.outputs.mapped }}", "-DGGML_HIP=ON", "-DCMAKE_C_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang.exe", "-DCMAKE_CXX_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", @@ -1469,12 +1293,10 @@ jobs: "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'", "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm", "-DCMAKE_PREFIX_PATH=$env:HIP_PATH", - "-DGGML_ROCM=1", "-DCMAKE_BUILD_TYPE=${{ matrix.build }}", "-DBUILD_SHARED_LIBS=ON", "-DWHISPER_SDL2=${{ matrix.sdl2 }}" ) - # Run CMake cmake @cmakeArgs - name: Build @@ -1528,10 +1350,6 @@ jobs: } } - - name: Copy SDL2.dll - if: matrix.sdl2 == 'ON' - run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} - - name: Copy SDL2.dll if: matrix.sdl2 == 'ON' run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}" diff --git a/.github/workflows/runner_heartbeat.yml b/.github/workflows/runner_heartbeat.yml index dff7d7f05a3..de133378750 100644 --- a/.github/workflows/runner_heartbeat.yml +++ b/.github/workflows/runner_heartbeat.yml @@ -11,8 +11,9 @@ jobs: fail-fast: false matrix: include: - - runner: [rai300_400, Linux] - name: rai300-400-linux + # Uncomment when self-hosted runners are registered: + # - runner: [rai300_400, Linux] + # name: rai300-400-linux - runner: [stx-halo, Linux] name: stx-halo-linux runs-on: ${{ matrix.runner }} @@ -32,28 +33,29 @@ jobs: echo "=== ROCm Version ===" cat /opt/rocm/.info/version 2>/dev/null || echo "ROCm version file not found" - check-rocm-windows: - strategy: - fail-fast: false - matrix: - include: - - runner: [rai300_400, Windows] - name: rai300-400-windows - - runner: [stx-halo, Windows] - name: stx-halo-windows - runs-on: ${{ matrix.runner }} - timeout-minutes: 10 - steps: - - name: Heartbeat - shell: pwsh - run: | - Write-Host "=== Runner Heartbeat: ${{ matrix.name }} ===" - Write-Host "Timestamp: $(Get-Date -Format o)" - Write-Host "Hostname: $env:COMPUTERNAME" - Write-Host "=== GPU Status ===" - & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null - Write-Host "=== Disk Space ===" - Get-PSDrive -PSProvider FileSystem | Format-Table Name, Used, Free -AutoSize - Write-Host "=== Memory ===" - $os = Get-CimInstance Win32_OperatingSystem - Write-Host "Free: $([math]::Round($os.FreePhysicalMemory/1MB, 1)) GB / Total: $([math]::Round($os.TotalVisibleMemorySize/1MB, 1)) GB" + # Uncomment when Windows self-hosted runners are registered: + # check-rocm-windows: + # strategy: + # fail-fast: false + # matrix: + # include: + # - runner: [rai300_400, Windows] + # name: rai300-400-windows + # - runner: [stx-halo, Windows] + # name: stx-halo-windows + # runs-on: ${{ matrix.runner }} + # timeout-minutes: 10 + # steps: + # - name: Heartbeat + # shell: pwsh + # run: | + # Write-Host "=== Runner Heartbeat: ${{ matrix.name }} ===" + # Write-Host "Timestamp: $(Get-Date -Format o)" + # Write-Host "Hostname: $env:COMPUTERNAME" + # Write-Host "=== GPU Status ===" + # & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null + # Write-Host "=== Disk Space ===" + # Get-PSDrive -PSProvider FileSystem | Format-Table Name, Used, Free -AutoSize + # Write-Host "=== Memory ===" + # $os = Get-CimInstance Win32_OperatingSystem + # Write-Host "Free: $([math]::Round($os.FreePhysicalMemory/1MB, 1)) GB / Total: $([math]::Round($os.TotalVisibleMemorySize/1MB, 1)) GB" diff --git a/ci/map-gpu-target.sh b/ci/map-gpu-target.sh new file mode 100755 index 00000000000..1e7de7c9fcf --- /dev/null +++ b/ci/map-gpu-target.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Map a GFX target shorthand to specific GPU architectures for CMake. +# +# Usage: +# source ci/map-gpu-target.sh +# +# Arguments: +# gfx_target - GPU target (gfx1151, gfx1150, gfx110X, gfx120X, or specific) +# +# Outputs (exported): +# MAPPED_GPU_TARGET - Semicolon-separated list of GPU architectures + +gfx_target="$1" + +if [ -z "$gfx_target" ]; then + echo "Usage: source ci/map-gpu-target.sh " + return 1 2>/dev/null || exit 1 +fi + +case "$gfx_target" in + gfx110X) MAPPED_GPU_TARGET="gfx1100;gfx1101;gfx1102" ;; + gfx120X) MAPPED_GPU_TARGET="gfx1200;gfx1201" ;; + *) MAPPED_GPU_TARGET="$gfx_target" ;; +esac + +export MAPPED_GPU_TARGET +echo "Mapped GPU target: $gfx_target -> $MAPPED_GPU_TARGET" diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh new file mode 100755 index 00000000000..7d9e1c60fc9 --- /dev/null +++ b/ci/resolve-rocm-version.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# +# Resolve the latest ROCm nightly tarball URL for a given GPU target and platform. +# +# Usage: +# source ci/resolve-rocm-version.sh +# +# Arguments: +# platform - "linux" or "windows" +# gfx_target - GPU target (gfx1151, gfx1150, gfx110X, gfx120X) +# rocm_version - Specific version (e.g. 7.11.0a20251205) or "latest" +# +# Outputs (exported): +# ROCM_RESOLVED_VERSION - The resolved version string +# ROCM_TARBALL_URL - The full S3 URL to download + +platform="$1" +gfx_target="$2" +rocm_version="$3" + +if [ -z "$platform" ] || [ -z "$gfx_target" ] || [ -z "$rocm_version" ]; then + echo "Usage: source ci/resolve-rocm-version.sh " + return 1 2>/dev/null || exit 1 +fi + +# Map GPU target to S3 naming convention +s3_target="$gfx_target" +if [ "$gfx_target" = "gfx110X" ]; then + s3_target="${gfx_target}-dgpu" +elif [ "$gfx_target" = "gfx120X" ]; then + s3_target="${gfx_target}-all" +fi + +dist_prefix="therock-dist-${platform}-${s3_target}" + +if [ "$rocm_version" = "latest" ]; then + echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." + s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") + + files=$(echo "$s3_response" | grep -oP '(?<=)[^<]*' | grep "${dist_prefix}-") + + latest_file="" + latest_major=0 + latest_minor=0 + latest_patch=0 + latest_rc=0 + latest_is_alpha=false + + while IFS= read -r file; do + if [[ "$file" =~ ${dist_prefix}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + version="${BASH_REMATCH[1]}" + major=$(echo "$version" | cut -d. -f1) + minor=$(echo "$version" | cut -d. -f2) + patch=$(echo "$version" | cut -d. -f3 | sed 's/\(a\|rc\).*//') + rc=$(echo "$version" | sed 's/.*\(a\|rc\)//') + is_alpha=false + if [[ "$version" =~ a ]]; then is_alpha=true; fi + + is_newer=false + if [ "$major" -gt "$latest_major" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then + if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then is_newer=true; + elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; + fi + fi + + if [ "$is_newer" = true ]; then + latest_file="$file" + latest_major="$major" + latest_minor="$minor" + latest_patch="$patch" + latest_rc="$rc" + latest_is_alpha="$is_alpha" + fi + fi + done <<< "$files" + + echo "Found latest file: $latest_file" + + if [[ "$latest_file" =~ ${dist_prefix}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + rocm_version="${BASH_REMATCH[1]}" + echo "Detected latest ROCm version: $rocm_version" + else + echo "Failed to extract ROCm version from latest file: $latest_file" + return 1 2>/dev/null || exit 1 + fi + + export ROCM_TARBALL_URL="https://therock-nightly-tarball.s3.amazonaws.com/$latest_file" +else + export ROCM_TARBALL_URL="https://therock-nightly-tarball.s3.amazonaws.com/${dist_prefix}-${rocm_version}.tar.gz" +fi + +export ROCM_RESOLVED_VERSION="$rocm_version" +echo "ROCm URL: $ROCM_TARBALL_URL" diff --git a/ci/run.sh b/ci/run.sh index 85ade166658..a65928a1431 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -230,6 +230,7 @@ function gg_run_ctest { gg_check_build_requirements (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + # Use cmake --build instead of make for cross-platform compatibility (Windows ROCm CI) (time cmake --build . --config ${mode} -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log From 265c868c6689d3404fbc076fdb5d2ac244d6f915 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Wed, 4 Mar 2026 13:49:46 -0800 Subject: [PATCH 10/22] Replaced grep -oP '(?<=)[^<]*' with portable sed -n 's/.*\([^<]*\)<\/Key>.*/\1/gp'. This works on both Linux and Windows Git Bash. --- ci/resolve-rocm-version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 7d9e1c60fc9..6a49e401671 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -37,7 +37,7 @@ if [ "$rocm_version" = "latest" ]; then echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - files=$(echo "$s3_response" | grep -oP '(?<=)[^<]*' | grep "${dist_prefix}-") + files=$(echo "$s3_response" | sed -n 's/.*\([^<]*\)<\/Key>.*/\1/gp' | grep "${dist_prefix}-") latest_file="" latest_major=0 From 2c853b7aabf0dbfe307b916280f496b552e05bcf Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Wed, 4 Mar 2026 15:27:42 -0800 Subject: [PATCH 11/22] Reverted the change using cmake instead of make adding mode. --- .github/workflows/build.yml | 9 ++++++--- ci/resolve-rocm-version.sh | 2 +- ci/run.sh | 3 +-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1a8f5c9c80e..c5c9290ddce 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -413,12 +413,15 @@ jobs: strategy: fail-fast: false matrix: - arch: [linux/amd64] - build: [Release] + build: [Debug, Release] + #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] + # TODO: arm/v7 disabled due to clang bug + # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990 + arch: [linux/amd64, linux/arm64, linux/ppc64le] steps: - name: Clone - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up QEMU uses: docker/setup-qemu-action@v3 diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 6a49e401671..eb2bcc58727 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -37,7 +37,7 @@ if [ "$rocm_version" = "latest" ]; then echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - files=$(echo "$s3_response" | sed -n 's/.*\([^<]*\)<\/Key>.*/\1/gp' | grep "${dist_prefix}-") + files=$(echo "$s3_response" | sed 's//\n/g' | sed -n 's/\([^<]*\)<\/Key>.*/\1/p' | grep "${dist_prefix}-") latest_file="" latest_major=0 diff --git a/ci/run.sh b/ci/run.sh index a65928a1431..9f6d73d9c04 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -230,8 +230,7 @@ function gg_run_ctest { gg_check_build_requirements (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - # Use cmake --build instead of make for cross-platform compatibility (Windows ROCm CI) - (time cmake --build . --config ${mode} -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + (time make -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log From 2fd0fb78dee8e36c59ec1944c33cd51d9e99a5fe Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Mon, 9 Mar 2026 13:43:18 -0700 Subject: [PATCH 12/22] make compatible with git bash --- ci/resolve-rocm-version.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index eb2bcc58727..e0fa5eba79e 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -24,10 +24,9 @@ if [ -z "$platform" ] || [ -z "$gfx_target" ] || [ -z "$rocm_version" ]; then fi # Map GPU target to S3 naming convention +# Group targets (gfx110X, gfx120X) use "-all" suffix; individual targets have no suffix s3_target="$gfx_target" -if [ "$gfx_target" = "gfx110X" ]; then - s3_target="${gfx_target}-dgpu" -elif [ "$gfx_target" = "gfx120X" ]; then +if [ "$gfx_target" = "gfx110X" ] || [ "$gfx_target" = "gfx120X" ]; then s3_target="${gfx_target}-all" fi @@ -37,7 +36,7 @@ if [ "$rocm_version" = "latest" ]; then echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - files=$(echo "$s3_response" | sed 's//\n/g' | sed -n 's/\([^<]*\)<\/Key>.*/\1/p' | grep "${dist_prefix}-") + files=$(echo "$s3_response" | tr '<' '\n' | sed -n 's/^Key>\([^<]*\)/\1/p' | grep "${dist_prefix}-") latest_file="" latest_major=0 From 4d25bcfc083025a4cb6045e2f9a6f8ffb27f55cf Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Mon, 9 Mar 2026 14:53:01 -0700 Subject: [PATCH 13/22] changing the approach to use awk instead, since prior attempt with tr, sed and grep didn't work. --- ci/resolve-rocm-version.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index e0fa5eba79e..6ed39691426 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -36,7 +36,11 @@ if [ "$rocm_version" = "latest" ]; then echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - files=$(echo "$s3_response" | tr '<' '\n' | sed -n 's/^Key>\([^<]*\)/\1/p' | grep "${dist_prefix}-") + # Use awk for XML parsing - portable across Linux and Windows Git Bash + files=$(echo "$s3_response" | awk -v prefix="${dist_prefix}-" ' + BEGIN { RS=""; FS="" } + NR>1 && $1 ~ prefix { print $1 } + ') latest_file="" latest_major=0 From f1287e3807b44f719c3d331d53ecf4330a915a7d Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Mon, 9 Mar 2026 15:57:22 -0700 Subject: [PATCH 14/22] attempting grep now. --- ci/resolve-rocm-version.sh | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 6ed39691426..b715c483993 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -36,11 +36,9 @@ if [ "$rocm_version" = "latest" ]; then echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - # Use awk for XML parsing - portable across Linux and Windows Git Bash - files=$(echo "$s3_response" | awk -v prefix="${dist_prefix}-" ' - BEGIN { RS=""; FS="" } - NR>1 && $1 ~ prefix { print $1 } - ') + # Use grep -o (basic regex, no PCRE) + sed for XML parsing + # Works on full Git Bash AND MinGit/BusyBox variants + files=$(echo "$s3_response" | tr -d '\r' | grep -o '[^<]*' | sed 's///;s/<\/Key>//' | grep "^${dist_prefix}-") latest_file="" latest_major=0 From cd3b5fcee95a8536aef628a9bb6334e1f69cec54 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Mon, 9 Mar 2026 16:27:02 -0700 Subject: [PATCH 15/22] adding powershell job for winodws instead of shell because shells grep on windows doesnt work very well. --- .github/workflows/build.yml | 64 ++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c5c9290ddce..d0a9625ebd8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1257,12 +1257,66 @@ jobs: exit 1 } - - name: Download ROCm nightly tarball - shell: bash + - name: Resolve ROCm version and download tarball + shell: pwsh run: | - source ci/resolve-rocm-version.sh windows "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" - echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV - curl -sL "$ROCM_TARBALL_URL" -o rocm.tar.gz + $gfxTarget = "${{ matrix.gfx_target }}" + $rocmVersion = "${{ env.ROCM_VERSION }}" + + $s3Target = $gfxTarget + if ($gfxTarget -eq "gfx110X" -or $gfxTarget -eq "gfx120X") { + $s3Target = "$gfxTarget-all" + } + $distPrefix = "therock-dist-windows-$s3Target" + + if ($rocmVersion -eq "latest") { + Write-Host "Auto-detecting latest ROCm version for windows/$gfxTarget..." + $response = Invoke-RestMethod -Uri "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=$distPrefix-7" + $keys = $response.ListBucketResult.Contents.Key | Where-Object { $_ -match "^$([regex]::Escape($distPrefix))-" } + + $latestFile = $null + $latestMajor = 0; $latestMinor = 0; $latestPatch = 0 + $latestBuild = 0; $latestIsAlpha = $false + + foreach ($key in $keys) { + if ($key -match "$([regex]::Escape($distPrefix))-.*?(\d+)\.(\d+)\.(\d+)(a|rc)(\d+)\.tar\.gz$") { + $major = [int]$Matches[1]; $minor = [int]$Matches[2]; $patch = [int]$Matches[3] + $preType = $Matches[4]; $build = [int]$Matches[5] + $isAlpha = ($preType -eq "a") + + $isNewer = $false + if ($major -gt $latestMajor) { $isNewer = $true } + elseif ($major -eq $latestMajor -and $minor -gt $latestMinor) { $isNewer = $true } + elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -gt $latestPatch) { $isNewer = $true } + elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -eq $latestPatch) { + if ($isAlpha -and -not $latestIsAlpha) { $isNewer = $true } + elseif ($isAlpha -eq $latestIsAlpha -and $build -gt $latestBuild) { $isNewer = $true } + } + + if ($isNewer) { + $latestFile = $key + $latestMajor = $major; $latestMinor = $minor; $latestPatch = $patch + $latestBuild = $build; $latestIsAlpha = $isAlpha + } + } + } + + Write-Host "Found latest file: $latestFile" + if ($latestFile -match "(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz$") { + $rocmVersion = $Matches[1] + Write-Host "Detected latest ROCm version: $rocmVersion" + } else { + Write-Error "Failed to extract ROCm version from: $latestFile" + exit 1 + } + $tarballUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$latestFile" + } else { + $tarballUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$distPrefix-$rocmVersion.tar.gz" + } + + Write-Host "ROCm URL: $tarballUrl" + echo "DETECTED_ROCM_VERSION=$rocmVersion" >> $env:GITHUB_ENV + Invoke-WebRequest -Uri $tarballUrl -OutFile rocm.tar.gz - name: Extract ROCm to C:\opt\rocm run: | From 73de19c4de130e52c8f70675a9807c79289c36f9 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 16 Apr 2026 15:34:32 -0700 Subject: [PATCH 16/22] fix: address PR review comments from ramkrishna2910 - Fix alpha/RC version ordering bug in resolve-rocm-version.sh and build.yml (alpha was incorrectly treated as newer than RC) - Fix NULL check bug on ndim validation in ruby_whisper_context.c (ndim check was incorrectly guarded by format != NULL) - Add ${{ }} wrapper on if: conditionals at lines 615 and 1422 in build.yml --- .github/workflows/build.yml | 6 +++--- bindings/ruby/ext/ruby_whisper_context.c | 2 +- ci/resolve-rocm-version.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d0a9625ebd8..94cbde537c1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -612,7 +612,7 @@ jobs: zip -r ../../whisper-bin-${{ matrix.gfx_target }}-${SAFE_ARCH}.zip . - name: Upload binaries - if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v4 with: name: whisper-bin-${{ matrix.gfx_target }}-${{ env.SAFE_ARCH }} @@ -1289,7 +1289,7 @@ jobs: elseif ($major -eq $latestMajor -and $minor -gt $latestMinor) { $isNewer = $true } elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -gt $latestPatch) { $isNewer = $true } elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -eq $latestPatch) { - if ($isAlpha -and -not $latestIsAlpha) { $isNewer = $true } + if (-not $isAlpha -and $latestIsAlpha) { $isNewer = $true } elseif ($isAlpha -eq $latestIsAlpha -and $build -gt $latestBuild) { $isNewer = $true } } @@ -1419,7 +1419,7 @@ jobs: Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath $zipName - name: Upload binaries - if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v4 with: # Unique artifact name per matrix job diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c index c39d43bd76c..df48b8fc1d6 100644 --- a/bindings/ruby/ext/ruby_whisper_context.c +++ b/bindings/ruby/ext/ruby_whisper_context.c @@ -308,7 +308,7 @@ check_memory_view(rb_memory_view_t *memview) rb_warn("currently only format \"f\" is supported for MemoryView, but given: %s", memview->format); return false; } - if (memview->format != NULL && memview->ndim != 1) { + if (memview->ndim != 1) { rb_warn("currently only 1 dimensional MemoryView is supported, but given: %zd", memview->ndim); return false; } diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index b715c483993..997b693d5a9 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -62,7 +62,7 @@ if [ "$rocm_version" = "latest" ]; then elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then - if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then is_newer=true; + if [ "$is_alpha" = false ] && [ "$latest_is_alpha" = true ]; then is_newer=true; elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; fi fi From 93276f0e02c6e06dd12a5091a44365fe20bd4776 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 16 Apr 2026 16:13:44 -0700 Subject: [PATCH 17/22] refactor: consolidate ROCm version resolution to shared bash script Replace duplicated ~55 lines of PowerShell version resolution logic in windows-rocm job with a call to ci/resolve-rocm-version.sh via Git Bash. This eliminates code duplication and ensures both Linux and Windows use the same version resolution logic. --- .github/workflows/build.yml | 62 +++---------------------------------- 1 file changed, 4 insertions(+), 58 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 94cbde537c1..3dd9fcb1c06 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1258,65 +1258,11 @@ jobs: } - name: Resolve ROCm version and download tarball - shell: pwsh + shell: bash run: | - $gfxTarget = "${{ matrix.gfx_target }}" - $rocmVersion = "${{ env.ROCM_VERSION }}" - - $s3Target = $gfxTarget - if ($gfxTarget -eq "gfx110X" -or $gfxTarget -eq "gfx120X") { - $s3Target = "$gfxTarget-all" - } - $distPrefix = "therock-dist-windows-$s3Target" - - if ($rocmVersion -eq "latest") { - Write-Host "Auto-detecting latest ROCm version for windows/$gfxTarget..." - $response = Invoke-RestMethod -Uri "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=$distPrefix-7" - $keys = $response.ListBucketResult.Contents.Key | Where-Object { $_ -match "^$([regex]::Escape($distPrefix))-" } - - $latestFile = $null - $latestMajor = 0; $latestMinor = 0; $latestPatch = 0 - $latestBuild = 0; $latestIsAlpha = $false - - foreach ($key in $keys) { - if ($key -match "$([regex]::Escape($distPrefix))-.*?(\d+)\.(\d+)\.(\d+)(a|rc)(\d+)\.tar\.gz$") { - $major = [int]$Matches[1]; $minor = [int]$Matches[2]; $patch = [int]$Matches[3] - $preType = $Matches[4]; $build = [int]$Matches[5] - $isAlpha = ($preType -eq "a") - - $isNewer = $false - if ($major -gt $latestMajor) { $isNewer = $true } - elseif ($major -eq $latestMajor -and $minor -gt $latestMinor) { $isNewer = $true } - elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -gt $latestPatch) { $isNewer = $true } - elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -eq $latestPatch) { - if (-not $isAlpha -and $latestIsAlpha) { $isNewer = $true } - elseif ($isAlpha -eq $latestIsAlpha -and $build -gt $latestBuild) { $isNewer = $true } - } - - if ($isNewer) { - $latestFile = $key - $latestMajor = $major; $latestMinor = $minor; $latestPatch = $patch - $latestBuild = $build; $latestIsAlpha = $isAlpha - } - } - } - - Write-Host "Found latest file: $latestFile" - if ($latestFile -match "(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz$") { - $rocmVersion = $Matches[1] - Write-Host "Detected latest ROCm version: $rocmVersion" - } else { - Write-Error "Failed to extract ROCm version from: $latestFile" - exit 1 - } - $tarballUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$latestFile" - } else { - $tarballUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$distPrefix-$rocmVersion.tar.gz" - } - - Write-Host "ROCm URL: $tarballUrl" - echo "DETECTED_ROCM_VERSION=$rocmVersion" >> $env:GITHUB_ENV - Invoke-WebRequest -Uri $tarballUrl -OutFile rocm.tar.gz + source ci/resolve-rocm-version.sh windows "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" + echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV + curl -sL "$ROCM_TARBALL_URL" -o rocm.tar.gz - name: Extract ROCm to C:\opt\rocm run: | From 9a4b11062074b2250a4d6c441bee84eb2dc90a6a Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Fri, 17 Apr 2026 07:12:09 -0700 Subject: [PATCH 18/22] fix(ci): fix Windows ROCm latest auto-detection regex compatibility Replace PCRE non-greedy .*? with ERE-compatible [^0-9]* in Bash regex patterns. Bash [[ =~ ]] uses POSIX ERE which does not support .*? non-greedy quantifier. On Windows Git Bash this fails strictly, leaving latest_file empty and causing 'Failed to extract ROCm version' error. Also adds: - File count validation with S3 response debug output - Empty latest_file check showing candidate files - Empty file line skip to prevent false regex matches --- ci/resolve-rocm-version.sh | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 997b693d5a9..0244c1a77cb 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -40,6 +40,15 @@ if [ "$rocm_version" = "latest" ]; then # Works on full Git Bash AND MinGit/BusyBox variants files=$(echo "$s3_response" | tr -d '\r' | grep -o '[^<]*' | sed 's///;s/<\/Key>//' | grep "^${dist_prefix}-") + # Validate that we found any files at all + file_count=$(echo "$files" | grep -c '.' 2>/dev/null || echo "0") + if [ "$file_count" -eq 0 ]; then + echo "ERROR: No ROCm tarball files found for prefix '${dist_prefix}-'" + echo "S3 response (first 500 chars): $(echo "$s3_response" | head -c 500)" + return 1 2>/dev/null || exit 1 + fi + echo "Found $file_count candidate files from S3" + latest_file="" latest_major=0 latest_minor=0 @@ -47,8 +56,17 @@ if [ "$rocm_version" = "latest" ]; then latest_rc=0 latest_is_alpha=false + # ERE-compatible regex pattern for version extraction. + # IMPORTANT: Bash [[ =~ ]] uses POSIX ERE, NOT PCRE. + # The PCRE non-greedy quantifier .*? is NOT supported in ERE. + # On Windows Git Bash, .*? is interpreted literally and fails to match. + # Fix: Use [^0-9]* (match non-digit chars) instead of .*? - this is ERE-compatible + # and works correctly since each filename contains exactly one version number. + version_regex="^${dist_prefix}-[^0-9]*([0-9]+\\.[0-9]+\\.[0-9]+(a|rc)[0-9]+)\\.tar\\.gz$" + while IFS= read -r file; do - if [[ "$file" =~ ${dist_prefix}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + [ -z "$file" ] && continue + if [[ "$file" =~ $version_regex ]]; then version="${BASH_REMATCH[1]}" major=$(echo "$version" | cut -d. -f1) minor=$(echo "$version" | cut -d. -f2) @@ -80,7 +98,15 @@ if [ "$rocm_version" = "latest" ]; then echo "Found latest file: $latest_file" - if [[ "$latest_file" =~ ${dist_prefix}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + if [ -z "$latest_file" ]; then + echo "ERROR: No valid ROCm tarball files matched the version pattern" + echo "Showing first 5 candidate files:" + echo "$files" | head -5 + return 1 2>/dev/null || exit 1 + fi + + # Extract version from the resolved file using the same ERE-compatible pattern + if [[ "$latest_file" =~ $version_regex ]]; then rocm_version="${BASH_REMATCH[1]}" echo "Detected latest ROCm version: $rocm_version" else From fbe51d99766ecbeda972ed9588a9ca17559cef69 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Fri, 17 Apr 2026 07:46:43 -0700 Subject: [PATCH 19/22] Fix echo command for ROCm URL output --- ci/resolve-rocm-version.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 0244c1a77cb..c9974724c94 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -57,11 +57,6 @@ if [ "$rocm_version" = "latest" ]; then latest_is_alpha=false # ERE-compatible regex pattern for version extraction. - # IMPORTANT: Bash [[ =~ ]] uses POSIX ERE, NOT PCRE. - # The PCRE non-greedy quantifier .*? is NOT supported in ERE. - # On Windows Git Bash, .*? is interpreted literally and fails to match. - # Fix: Use [^0-9]* (match non-digit chars) instead of .*? - this is ERE-compatible - # and works correctly since each filename contains exactly one version number. version_regex="^${dist_prefix}-[^0-9]*([0-9]+\\.[0-9]+\\.[0-9]+(a|rc)[0-9]+)\\.tar\\.gz$" while IFS= read -r file; do From e07190559111a45305d940ca11230d2c93d24ca9 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Fri, 17 Apr 2026 08:10:07 -0700 Subject: [PATCH 20/22] update version_regex to be windows and linux compatbile. --- ci/resolve-rocm-version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index c9974724c94..ee985dd47d7 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -57,7 +57,7 @@ if [ "$rocm_version" = "latest" ]; then latest_is_alpha=false # ERE-compatible regex pattern for version extraction. - version_regex="^${dist_prefix}-[^0-9]*([0-9]+\\.[0-9]+\\.[0-9]+(a|rc)[0-9]+)\\.tar\\.gz$" + version_regex="^${dist_prefix}-([0-9]+[.][0-9]+[.][0-9]+(a|rc)[0-9]+)[.]tar[.]gz$" while IFS= read -r file; do [ -z "$file" ] && continue From 62589cb3ea2e12b76b4e305b8b0b2a8ed011858f Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Fri, 17 Apr 2026 08:23:55 -0700 Subject: [PATCH 21/22] refactor(ci): replace Amazon S3 nightly scanning with AMD official repo - Rewrite resolve-rocm-version.sh to use AMD's official tarball repo (repo.amd.com/rocm/tarball/) instead of scanning Amazon S3 - Remove 'latest' auto-detection logic which failed on Windows Git Bash due to PCRE vs ERE regex incompatibility - Add version format validation and clear error messages - Update build.yml workflow_dispatch to use concrete ROCm versions (7.12.0, 7.2.1) with choice options instead of 'latest' --- .github/workflows/build.yml | 10 ++- ci/resolve-rocm-version.sh | 121 +++++++++--------------------------- 2 files changed, 38 insertions(+), 93 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3dd9fcb1c06..16be67d5f25 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,9 +50,13 @@ on: required: false default: 'gfx1151,gfx1150,gfx120X,gfx110X' rocm_version: - description: 'ROCm version to use (e.g., 7.11.0a20251205) or "latest" to auto-detect' + description: 'ROCm version to use (e.g., 7.12.0, 7.2.1)' required: false - default: 'latest' + default: '7.12.0' + type: choice + options: + - 7.12.0 + - 7.2.1 concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -66,7 +70,7 @@ env: ubuntu_image: "ubuntu:22.04" VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X' }} - ROCM_VERSION: ${{ github.event.inputs.rocm_version || 'latest' }} + ROCM_VERSION: ${{ github.event.inputs.rocm_version || '7.12.0' }} jobs: determine-tag: diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index ee985dd47d7..fbfe5e68e1d 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -1,18 +1,21 @@ #!/bin/bash # -# Resolve the latest ROCm nightly tarball URL for a given GPU target and platform. +# Resolve the ROCm tarball URL for a given platform and version. +# +# Uses AMD's official repo tarball distribution: +# https://repo.amd.com/rocm/tarball/therock-dist-{platform}-{gfx_target}-{version}.tar.gz # # Usage: # source ci/resolve-rocm-version.sh # # Arguments: # platform - "linux" or "windows" -# gfx_target - GPU target (gfx1151, gfx1150, gfx110X, gfx120X) -# rocm_version - Specific version (e.g. 7.11.0a20251205) or "latest" +# gfx_target - GPU target (defaults to gfx1151 if not specified or is a group target) +# rocm_version - Specific version (e.g. 7.12.0, 7.2.1) - required, no "latest" auto-detection # # Outputs (exported): # ROCM_RESOLVED_VERSION - The resolved version string -# ROCM_TARBALL_URL - The full S3 URL to download +# ROCM_TARBALL_URL - The full URL to download platform="$1" gfx_target="$2" @@ -23,96 +26,34 @@ if [ -z "$platform" ] || [ -z "$gfx_target" ] || [ -z "$rocm_version" ]; then return 1 2>/dev/null || exit 1 fi -# Map GPU target to S3 naming convention -# Group targets (gfx110X, gfx120X) use "-all" suffix; individual targets have no suffix -s3_target="$gfx_target" -if [ "$gfx_target" = "gfx110X" ] || [ "$gfx_target" = "gfx120X" ]; then - s3_target="${gfx_target}-all" -fi - -dist_prefix="therock-dist-${platform}-${s3_target}" - +# Validate that a specific version was provided (no "latest" auto-detection) if [ "$rocm_version" = "latest" ]; then - echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." - s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - - # Use grep -o (basic regex, no PCRE) + sed for XML parsing - # Works on full Git Bash AND MinGit/BusyBox variants - files=$(echo "$s3_response" | tr -d '\r' | grep -o '[^<]*' | sed 's///;s/<\/Key>//' | grep "^${dist_prefix}-") - - # Validate that we found any files at all - file_count=$(echo "$files" | grep -c '.' 2>/dev/null || echo "0") - if [ "$file_count" -eq 0 ]; then - echo "ERROR: No ROCm tarball files found for prefix '${dist_prefix}-'" - echo "S3 response (first 500 chars): $(echo "$s3_response" | head -c 500)" - return 1 2>/dev/null || exit 1 - fi - echo "Found $file_count candidate files from S3" - - latest_file="" - latest_major=0 - latest_minor=0 - latest_patch=0 - latest_rc=0 - latest_is_alpha=false - - # ERE-compatible regex pattern for version extraction. - version_regex="^${dist_prefix}-([0-9]+[.][0-9]+[.][0-9]+(a|rc)[0-9]+)[.]tar[.]gz$" - - while IFS= read -r file; do - [ -z "$file" ] && continue - if [[ "$file" =~ $version_regex ]]; then - version="${BASH_REMATCH[1]}" - major=$(echo "$version" | cut -d. -f1) - minor=$(echo "$version" | cut -d. -f2) - patch=$(echo "$version" | cut -d. -f3 | sed 's/\(a\|rc\).*//') - rc=$(echo "$version" | sed 's/.*\(a\|rc\)//') - is_alpha=false - if [[ "$version" =~ a ]]; then is_alpha=true; fi - - is_newer=false - if [ "$major" -gt "$latest_major" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then - if [ "$is_alpha" = false ] && [ "$latest_is_alpha" = true ]; then is_newer=true; - elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; - fi - fi - - if [ "$is_newer" = true ]; then - latest_file="$file" - latest_major="$major" - latest_minor="$minor" - latest_patch="$patch" - latest_rc="$rc" - latest_is_alpha="$is_alpha" - fi - fi - done <<< "$files" - - echo "Found latest file: $latest_file" - - if [ -z "$latest_file" ]; then - echo "ERROR: No valid ROCm tarball files matched the version pattern" - echo "Showing first 5 candidate files:" - echo "$files" | head -5 - return 1 2>/dev/null || exit 1 - fi + echo "ERROR: 'latest' auto-detection is not supported." + echo "Please specify a concrete ROCm version (e.g., 7.12.0, 7.2.1)." + echo "Available versions: https://repo.amd.com/rocm/tarball/" + return 1 2>/dev/null || exit 1 +fi - # Extract version from the resolved file using the same ERE-compatible pattern - if [[ "$latest_file" =~ $version_regex ]]; then - rocm_version="${BASH_REMATCH[1]}" - echo "Detected latest ROCm version: $rocm_version" - else - echo "Failed to extract ROCm version from latest file: $latest_file" - return 1 2>/dev/null || exit 1 - fi +# Validate version format (should be X.Y.Z or X.Y.ZaNNNNNNNN pattern) +if ! echo "$rocm_version" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then + echo "ERROR: Invalid ROCm version format: '$rocm_version'" + echo "Expected format: X.Y.Z (e.g., 7.12.0) or X.Y.ZaNNNNNNNN (e.g., 7.11.0a20251205)" + return 1 2>/dev/null || exit 1 +fi - export ROCM_TARBALL_URL="https://therock-nightly-tarball.s3.amazonaws.com/$latest_file" -else - export ROCM_TARBALL_URL="https://therock-nightly-tarball.s3.amazonaws.com/${dist_prefix}-${rocm_version}.tar.gz" +# For the AMD tarball distribution, use gfx1151 as the base target +# The tarball contains ROCm tools/libraries for all supported GPUs +# GPU targets are specified during build via GPU_TARGETS CMake variable +# Group targets (gfx110X, gfx120X) should use gfx1151 as the base +base_target="gfx1151" +if [ "$gfx_target" != "gfx110X" ] && [ "$gfx_target" != "gfx120X" ] && [ "$gfx_target" != "gfx1150" ] && [ "$gfx_target" != "gfx1100" ]; then + # Use the specific target if it's an individual target + base_target="$gfx_target" fi +# Construct the AMD official repo URL +ROCM_TARBALL_URL="https://repo.amd.com/rocm/tarball/therock-dist-${platform}-${base_target}-${rocm_version}.tar.gz" + export ROCM_RESOLVED_VERSION="$rocm_version" +echo "ROCm version: $ROCM_RESOLVED_VERSION" echo "ROCm URL: $ROCM_TARBALL_URL" From 8ce7027abb69ab637bafce0defdde5db6def75c8 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Fri, 17 Apr 2026 08:25:12 -0700 Subject: [PATCH 22/22] fix(ci): allow custom ROCm version input in workflow_dispatch Remove type:choice restriction so users can type any ROCm version while keeping 7.12.0 as default and linking to available versions --- .github/workflows/build.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 16be67d5f25..f70e7218ad3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,13 +50,9 @@ on: required: false default: 'gfx1151,gfx1150,gfx120X,gfx110X' rocm_version: - description: 'ROCm version to use (e.g., 7.12.0, 7.2.1)' + description: 'ROCm version to use (e.g., 7.12.0, 7.2.1). Available versions: https://repo.amd.com/rocm/tarball/' required: false default: '7.12.0' - type: choice - options: - - 7.12.0 - - 7.2.1 concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}