From 813626d46895e5b10315352e7f7f7fa12562e827 Mon Sep 17 00:00:00 2001 From: abetlen Date: Sun, 31 May 2026 23:25:59 -0700 Subject: [PATCH] fix(ci): add Pascal compute capability targets to CUDA wheel builds --- .github/workflows/build-wheels-cuda.yaml | 4 ++-- CHANGELOG.md | 2 ++ README.md | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml index be55bf4834..87bd365957 100644 --- a/.github/workflows/build-wheels-cuda.yaml +++ b/.github/workflows/build-wheels-cuda.yaml @@ -169,10 +169,10 @@ jobs: } $cudaTagVersion = $nvccVersion.Replace('.','') $env:VERBOSE = '1' - # Build real cubins for the supported GPUs, including sm_70, and keep + # Build real cubins for the supported GPUs, including Pascal, and keep # one forward-compatible PTX target instead of embedding PTX for every # SM. This keeps the wheel under GitHub's 2 GiB release-asset limit. - $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS" + $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS" $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off' python -m build --wheel # Publish tags that reflect the actual installed toolkit version. diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d02bb8e32..9144353983 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- fix(ci): add Pascal compute capability targets to CUDA wheel builds by @abetlen in #2237 + ## [0.3.24] - docs: update ROCm install instructions by @agronholm in #1867 diff --git a/README.md b/README.md index a64c7faa86..7e02b174d4 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,7 @@ CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python It is also possible to install a pre-built wheel with CUDA support. As long as your system meets some requirements: - CUDA Version is 12.1, 12.2, 12.3, 12.4 or 12.5 +- NVIDIA GPU compute capability is 6.0 or newer - Python Version is 3.10, 3.11 or 3.12 ```bash