From 813626d46895e5b10315352e7f7f7fa12562e827 Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Sun, 31 May 2026 23:25:59 -0700
Subject: [PATCH] fix(ci): add Pascal compute capability targets to CUDA wheel
 builds

---
 .github/workflows/build-wheels-cuda.yaml | 4 ++--
 CHANGELOG.md                             | 2 ++
 README.md                                | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
index be55bf4834..87bd365957 100644
--- a/.github/workflows/build-wheels-cuda.yaml
+++ b/.github/workflows/build-wheels-cuda.yaml
@@ -169,10 +169,10 @@ jobs:
           }
           $cudaTagVersion = $nvccVersion.Replace('.','')
           $env:VERBOSE = '1'
-          # Build real cubins for the supported GPUs, including sm_70, and keep
+          # Build real cubins for the supported GPUs, including Pascal, and keep
           # one forward-compatible PTX target instead of embedding PTX for every
           # SM. This keeps the wheel under GitHub's 2 GiB release-asset limit.
-          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"
+          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"
           $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
           python -m build --wheel
           # Publish tags that reflect the actual installed toolkit version.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6d02bb8e32..9144353983 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- fix(ci): add Pascal compute capability targets to CUDA wheel builds by @abetlen in #2237
+
 ## [0.3.24]
 
 - docs: update ROCm install instructions by @agronholm in #1867
diff --git a/README.md b/README.md
index a64c7faa86..7e02b174d4 100644
--- a/README.md
+++ b/README.md
@@ -126,6 +126,7 @@ CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
 It is also possible to install a pre-built wheel with CUDA support. As long as your system meets some requirements:
 
 - CUDA Version is 12.1, 12.2, 12.3, 12.4 or 12.5
+- NVIDIA GPU compute capability is 6.0 or newer
 - Python Version is 3.10, 3.11 or 3.12
 
 ```bash