From ad4f275622d4e8e0f86041a00e5bc5811de695dc Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Mon, 1 Jun 2026 02:41:56 -0700
Subject: [PATCH 1/6] feat(ci): add CUDA 11.8 wheel builds

---
 .github/workflows/build-wheels-cuda.yaml | 2 +-
 CHANGELOG.md                             | 1 +
 README.md                                | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
index 87bd365957..066a229960 100644
--- a/.github/workflows/build-wheels-cuda.yaml
+++ b/.github/workflows/build-wheels-cuda.yaml
@@ -24,7 +24,7 @@ jobs:
               # wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic,
               # so one builder per toolkit version is sufficient.
               'pyver' = @("3.9")
-              'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1")
+              'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1")
               'releasetag' = @("basic")
               'exclude' = @(
                 @{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' },
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9144353983..e2fb8e951c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- feat(ci): add CUDA 11.8 wheel builds by @abetlen in #2238
 - fix(ci): add Pascal compute capability targets to CUDA wheel builds by @abetlen in #2237
 
 ## [0.3.24]
diff --git a/README.md b/README.md
index 7e02b174d4..c6a7b3ca45 100644
--- a/README.md
+++ b/README.md
@@ -125,7 +125,7 @@ CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
 
 It is also possible to install a pre-built wheel with CUDA support. As long as your system meets some requirements:
 
-- CUDA Version is 12.1, 12.2, 12.3, 12.4 or 12.5
+- CUDA Version is 11.8, 12.1, 12.2, 12.3, 12.4 or 12.5
 - NVIDIA GPU compute capability is 6.0 or newer
 - Python Version is 3.10, 3.11 or 3.12
 
@@ -135,6 +135,7 @@ pip install llama-cpp-python \
 ```
 
 Where `<cuda-version>` is one of the following:
+- `cu118`: CUDA 11.8
 - `cu121`: CUDA 12.1
 - `cu122`: CUDA 12.2
 - `cu123`: CUDA 12.3

From d74eb9633bd414b36bf089fe6f439ac426c68441 Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Mon, 1 Jun 2026 02:58:53 -0700
Subject: [PATCH 2/6] fix(ci): make CUDA 11.8 wheel builds version-consistent

---
 .github/workflows/build-wheels-cuda.yaml | 36 ++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
index 066a229960..5ec31e7a89 100644
--- a/.github/workflows/build-wheels-cuda.yaml
+++ b/.github/workflows/build-wheels-cuda.yaml
@@ -20,16 +20,22 @@ jobs:
         id: set-matrix
         run: |
           $matrix = @{
-              'os' = @('ubuntu-22.04', 'windows-2022')
+              'os' = @('ubuntu-22.04', 'windows-2022', 'windows-2019')
               # wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic,
               # so one builder per toolkit version is sufficient.
               'pyver' = @("3.9")
               'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1")
               'releasetag' = @("basic")
               'exclude' = @(
+                @{ 'os' = 'windows-2022'; 'cuda' = '11.8.0' },
                 @{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' },
                 @{ 'os' = 'windows-2022'; 'cuda' = '12.2.2' },
-                @{ 'os' = 'windows-2022'; 'cuda' = '12.3.2' }
+                @{ 'os' = 'windows-2022'; 'cuda' = '12.3.2' },
+                @{ 'os' = 'windows-2019'; 'cuda' = '12.1.1' },
+                @{ 'os' = 'windows-2019'; 'cuda' = '12.2.2' },
+                @{ 'os' = 'windows-2019'; 'cuda' = '12.3.2' },
+                @{ 'os' = 'windows-2019'; 'cuda' = '12.4.1' },
+                @{ 'os' = 'windows-2019'; 'cuda' = '12.5.1' }
               )
           }
 
@@ -81,7 +87,31 @@ jobs:
         run: |
           $cudaVersion = $env:CUDAVER
           $cudaChannel = "nvidia/label/cuda-$cudaVersion"
-          if ($IsLinux) {
+          if ($cudaVersion -eq '11.8.0') {
+            if ($IsLinux) {
+              $cudaPackages = @(
+                "${cudaChannel}::cuda-nvcc_linux-64=11.8.0",
+                "${cudaChannel}::cuda-cccl=11.8.89",
+                "${cudaChannel}::cuda-cudart=11.8.89",
+                "${cudaChannel}::cuda-cudart-dev=11.8.89",
+                "${cudaChannel}::cuda-driver-dev=11.8.89",
+                "${cudaChannel}::libcublas=11.11.3.6",
+                "${cudaChannel}::libcublas-dev=11.11.3.6"
+              )
+            } elseif ($IsWindows) {
+              $cudaPackages = @(
+                "${cudaChannel}::cuda-nvcc_win-64=11.8.0",
+                "${cudaChannel}::cuda-cccl=11.8.89",
+                "${cudaChannel}::cuda-cudart=11.8.89",
+                "${cudaChannel}::cuda-cudart-dev=11.8.89",
+                "${cudaChannel}::libcublas=11.11.3.6",
+                "${cudaChannel}::libcublas-dev=11.11.3.6"
+              )
+            } else {
+              throw 'Unsupported CUDA wheel build platform'
+            }
+            mamba install -y --channel-priority strict --override-channels -c $cudaChannel $cudaPackages
+          } elseif ($IsLinux) {
             mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_linux-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
           } elseif ($IsWindows) {
             if ($cudaVersion -like '12.5.*') {

From 809be58805d88cb44d2454d7c4eb021f17874e5e Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Mon, 1 Jun 2026 03:00:53 -0700
Subject: [PATCH 3/6] fix(ci): allow non-CUDA dependencies for CUDA 11.8 wheels

---
 .github/workflows/build-wheels-cuda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
index 5ec31e7a89..524f4497ba 100644
--- a/.github/workflows/build-wheels-cuda.yaml
+++ b/.github/workflows/build-wheels-cuda.yaml
@@ -110,7 +110,7 @@ jobs:
             } else {
               throw 'Unsupported CUDA wheel build platform'
             }
-            mamba install -y --channel-priority strict --override-channels -c $cudaChannel $cudaPackages
+            mamba install -y --channel-priority flexible --override-channels -c $cudaChannel $cudaPackages
           } elseif ($IsLinux) {
             mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_linux-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
           } elseif ($IsWindows) {

From 372045fa0d5f8cfa7050bd6df785755a134581f2 Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Mon, 1 Jun 2026 03:05:04 -0700
Subject: [PATCH 4/6] fix(ci): omit Hopper targets from CUDA 11.8 wheels

---
 .github/workflows/build-wheels-cuda.yaml | 7 ++++++-
 README.md                                | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
index 524f4497ba..5ca21b1577 100644
--- a/.github/workflows/build-wheels-cuda.yaml
+++ b/.github/workflows/build-wheels-cuda.yaml
@@ -199,10 +199,15 @@ jobs:
           }
           $cudaTagVersion = $nvccVersion.Replace('.','')
           $env:VERBOSE = '1'
+          $cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual"
+          if ([version]$nvccVersion -lt [version]"12.0") {
+            # CUDA 11.8 cannot compile llama.cpp's Hopper PDL device calls.
+            $cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real"
+          }
           # Build real cubins for the supported GPUs, including Pascal, and keep
           # one forward-compatible PTX target instead of embedding PTX for every
           # SM. This keeps the wheel under GitHub's 2 GiB release-asset limit.
-          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"
+          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=$cudaArchs -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"
           $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
           python -m build --wheel
           # Publish tags that reflect the actual installed toolkit version.
diff --git a/README.md b/README.md
index c6a7b3ca45..2b7a7d98c9 100644
--- a/README.md
+++ b/README.md
@@ -126,7 +126,7 @@ CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
 It is also possible to install a pre-built wheel with CUDA support. As long as your system meets some requirements:
 
 - CUDA Version is 11.8, 12.1, 12.2, 12.3, 12.4 or 12.5
-- NVIDIA GPU compute capability is 6.0 or newer
+- NVIDIA GPU compute capability is 6.0 through 8.9 for CUDA 11.8 wheels, or 6.0 or newer for CUDA 12 wheels
 - Python Version is 3.10, 3.11 or 3.12
 
 ```bash

From d6eb46a62b3232b3972c394806e1c268edb27bae Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Mon, 1 Jun 2026 03:13:35 -0700
Subject: [PATCH 5/6] fix(ci): use GCC 11 for CUDA 11.8 Linux wheels

---
 .github/workflows/build-wheels-cuda.yaml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
index 5ca21b1577..83200ecdfc 100644
--- a/.github/workflows/build-wheels-cuda.yaml
+++ b/.github/workflows/build-wheels-cuda.yaml
@@ -152,7 +152,12 @@ jobs:
           $cudaRootCmake = $cudaRoot.Replace('\', '/')
           $env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRootCmake -DCUDA_TOOLKIT_ROOT_DIR=$cudaRootCmake"
           if ($IsLinux) {
-            if (Test-Path '/usr/bin/g++-12') {
+            if ([version]$env:CUDAVER -lt [version]"12.0" -and (Test-Path '/usr/bin/g++-11')) {
+              $env:CC = '/usr/bin/gcc-11'
+              $env:CXX = '/usr/bin/g++-11'
+              $env:CUDAHOSTCXX = '/usr/bin/g++-11'
+              $cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX"
+            } elseif (Test-Path '/usr/bin/g++-12') {
               $env:CC = '/usr/bin/gcc-12'
               $env:CXX = '/usr/bin/g++-12'
               $env:CUDAHOSTCXX = '/usr/bin/g++-12'

From b7bae926058bd6e041034a54eff75339e3cabcf9 Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Mon, 1 Jun 2026 03:29:20 -0700
Subject: [PATCH 6/6] fix(ci): use MSVC 14.29 for CUDA 11.8 Windows wheels

---
 .github/workflows/build-wheels-cuda.yaml | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
index 83200ecdfc..723236ca82 100644
--- a/.github/workflows/build-wheels-cuda.yaml
+++ b/.github/workflows/build-wheels-cuda.yaml
@@ -20,22 +20,16 @@ jobs:
         id: set-matrix
         run: |
           $matrix = @{
-              'os' = @('ubuntu-22.04', 'windows-2022', 'windows-2019')
+              'os' = @('ubuntu-22.04', 'windows-2022')
               # wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic,
               # so one builder per toolkit version is sufficient.
               'pyver' = @("3.9")
               'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1")
               'releasetag' = @("basic")
               'exclude' = @(
-                @{ 'os' = 'windows-2022'; 'cuda' = '11.8.0' },
                 @{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' },
                 @{ 'os' = 'windows-2022'; 'cuda' = '12.2.2' },
-                @{ 'os' = 'windows-2022'; 'cuda' = '12.3.2' },
-                @{ 'os' = 'windows-2019'; 'cuda' = '12.1.1' },
-                @{ 'os' = 'windows-2019'; 'cuda' = '12.2.2' },
-                @{ 'os' = 'windows-2019'; 'cuda' = '12.3.2' },
-                @{ 'os' = 'windows-2019'; 'cuda' = '12.4.1' },
-                @{ 'os' = 'windows-2019'; 'cuda' = '12.5.1' }
+                @{ 'os' = 'windows-2022'; 'cuda' = '12.3.2' }
               )
           }
 
@@ -56,8 +50,15 @@ jobs:
       AVXVER: ${{ matrix.releasetag }}
 
     steps:
+      - name: Set up MSVC for CUDA 11.8
+        if: runner.os == 'Windows' && matrix.cuda == '11.8.0'
+        uses: ilammy/msvc-dev-cmd@v1
+        with:
+          arch: x64
+          toolset: 14.29
+
       - name: Set up MSVC
-        if: runner.os == 'Windows'
+        if: runner.os == 'Windows' && matrix.cuda != '11.8.0'
         uses: ilammy/msvc-dev-cmd@v1
         with:
           arch: x64