fix(ci): omit Hopper targets from CUDA 11.8 wheels

abetlen · abetlen · commit ab20c1929d39 · 2026-06-01T03:05:20.000-07:00
diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
@@ -199,10 +199,15 @@ jobs:
           }
           $cudaTagVersion = $nvccVersion.Replace('.','')
           $env:VERBOSE = '1'
+          $cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual"
+          if ([version]$nvccVersion -lt [version]"12.0") {
+            # CUDA 11.8 cannot compile llama.cpp's Hopper PDL device calls.
+            $cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real"
+          }
           # Build real cubins for the supported GPUs, including Pascal, and keep
           # one forward-compatible PTX target instead of embedding PTX for every
           # SM. This keeps the wheel under GitHub's 2 GiB release-asset limit.
-          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"
+          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=$cudaArchs -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"
           $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
           python -m build --wheel
           # Publish tags that reflect the actual installed toolkit version.
diff --git a/README.md b/README.md
@@ -126,7 +126,7 @@ CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
 It is also possible to install a pre-built wheel with CUDA support. As long as your system meets some requirements:
 
 - CUDA Version is 11.8, 12.1, 12.2, 12.3, 12.4 or 12.5
-- NVIDIA GPU compute capability is 6.0 or newer
+- NVIDIA GPU compute capability is 6.0 through 8.9 for CUDA 11.8 wheels, or 6.0 or newer for CUDA 12 wheels
 - Python Version is 3.10, 3.11 or 3.12
 
 ```bash