From 23317a90a5972ca51ebd75907b632a1ac1448af6 Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 1 Jun 2026 02:49:31 -0700 Subject: [PATCH] feat(ci): add CUDA 13 wheel builds --- .github/workflows/build-wheels-cuda.yaml | 11 +++++++---- CHANGELOG.md | 1 + README.md | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml index 723236ca82..f1b2b8b6b5 100644 --- a/.github/workflows/build-wheels-cuda.yaml +++ b/.github/workflows/build-wheels-cuda.yaml @@ -24,7 +24,7 @@ jobs: # wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic, # so one builder per toolkit version is sufficient. 'pyver' = @("3.9") - 'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1") + 'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "13.0.2", "13.2.1") 'releasetag' = @("basic") 'exclude' = @( @{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' }, @@ -115,8 +115,8 @@ jobs: } elseif ($IsLinux) { mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_linux-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev" } elseif ($IsWindows) { - if ($cudaVersion -like '12.5.*') { - # The Windows 12.5 toolkit meta-package pulls compiler activation + if ($cudaVersion -like '12.5.*' -or [version]$cudaVersion -ge [version]"13.0") { + # The Windows 12.5+ toolkit meta-package pulls compiler activation # scripts that overflow cmd.exe after MSVC is already initialized. mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-nvcc_win-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-libraries-dev=$cudaVersion" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev" } else { @@ -209,8 +209,11 @@ jobs: if ([version]$nvccVersion -lt [version]"12.0") { # CUDA 11.8 cannot compile llama.cpp's Hopper PDL device calls. $cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real" + } elseif ([version]$nvccVersion -ge [version]"13.0") { + # CUDA 13 dropped offline compilation support for pre-Turing targets. + $cudaArchs = "75-real;80-real;86-real;89-real;90-real;90-virtual" } - # Build real cubins for the supported GPUs, including Pascal, and keep + # Build real cubins for the supported GPUs and keep # one forward-compatible PTX target instead of embedding PTX for every # SM. This keeps the wheel under GitHub's 2 GiB release-asset limit. $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=$cudaArchs -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS" diff --git a/CHANGELOG.md b/CHANGELOG.md index e2fb8e951c..c80984ff68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- feat(ci): add CUDA 13.0 and 13.2 wheel builds by @abetlen in #2239 - feat(ci): add CUDA 11.8 wheel builds by @abetlen in #2238 - fix(ci): add Pascal compute capability targets to CUDA wheel builds by @abetlen in #2237 diff --git a/README.md b/README.md index 2b7a7d98c9..5de330af46 100644 --- a/README.md +++ b/README.md @@ -125,8 +125,8 @@ CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python It is also possible to install a pre-built wheel with CUDA support. As long as your system meets some requirements: -- CUDA Version is 11.8, 12.1, 12.2, 12.3, 12.4 or 12.5 -- NVIDIA GPU compute capability is 6.0 through 8.9 for CUDA 11.8 wheels, or 6.0 or newer for CUDA 12 wheels +- CUDA Version is 11.8, 12.1, 12.2, 12.3, 12.4, 12.5, 13.0 or 13.2 +- NVIDIA GPU compute capability is 6.0 through 8.9 for CUDA 11.8 wheels, 6.0 or newer for CUDA 12 wheels, or 7.5 or newer for CUDA 13 wheels - Python Version is 3.10, 3.11 or 3.12 ```bash @@ -141,6 +141,8 @@ Where `` is one of the following: - `cu123`: CUDA 12.3 - `cu124`: CUDA 12.4 - `cu125`: CUDA 12.5 +- `cu130`: CUDA 13.0 +- `cu132`: CUDA 13.2 For example, to install the CUDA 12.1 wheel: