Skip to content

Commit 655edba

Browse files
committed
Merge remote-tracking branch 'origin/main' into TNing/main
2 parents 73bbb75 + f1bfa11 commit 655edba

43 files changed

Lines changed: 3022 additions & 986 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/build-and-release.yaml

Lines changed: 77 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,24 @@ jobs:
4444
- name: Build wheels
4545
uses: pypa/cibuildwheel@v2.22.0
4646
env:
47-
# disable repair
47+
# Keep repair disabled by default for non-Linux platforms in this job.
4848
CIBW_REPAIR_WHEEL_COMMAND: ""
49+
# Linux needs auditwheel repair so manylinux and musllinux wheels are
50+
# published with distinct platform tags instead of generic linux tags.
51+
CIBW_REPAIR_WHEEL_COMMAND_LINUX: "LD_LIBRARY_PATH=/project/llama_cpp/lib auditwheel repair -w {dest_dir} {wheel}"
52+
# The release wheel is tagged py3-none, so one build per platform
53+
# covers all supported Python versions and avoids duplicate names.
54+
CIBW_BUILD_LINUX: "cp38-*"
55+
CIBW_BUILD_MACOS: "cp39-*"
56+
CIBW_BUILD_WINDOWS: "cp39-*"
57+
# Skip cibuildwheel's default i686 sidecar and keep Linux release
58+
# wheels on a portable x86_64 CPU baseline.
59+
CIBW_ARCHS_LINUX: "auto64"
60+
CIBW_ARCHS_WINDOWS: "AMD64"
61+
CIBW_ENVIRONMENT_LINUX: CMAKE_ARGS="-DGGML_NATIVE=off"
62+
# Keep macOS release wheels on a portable CPU baseline instead of
63+
# inheriting the hosted runner's native flags.
64+
CIBW_ENVIRONMENT_MACOS: CMAKE_ARGS="-DGGML_NATIVE=off"
4965
with:
5066
package-dir: .
5167
output-dir: wheelhouse
@@ -57,7 +73,55 @@ jobs:
5773

5874
build_wheels_arm64:
5975
name: Build arm64 wheels
76+
runs-on: ubuntu-24.04-arm
77+
steps:
78+
- uses: actions/checkout@v4
79+
with:
80+
submodules: "recursive"
81+
82+
- name: Build wheels
83+
uses: pypa/cibuildwheel@v2.22.0
84+
env:
85+
CIBW_SKIP: "pp*"
86+
CIBW_REPAIR_WHEEL_COMMAND: "LD_LIBRARY_PATH=$PWD/llama_cpp/lib auditwheel repair -w {dest_dir} {wheel}"
87+
CIBW_ARCHS: "aarch64"
88+
# Keep native arm64 builds on a portable CPU baseline instead of
89+
# tuning wheels to the hosted runner.
90+
CIBW_ENVIRONMENT: CMAKE_ARGS="-DGGML_NATIVE=off"
91+
# The release wheel is tagged py3-none, so one build covers all
92+
# supported Python versions and avoids duplicate wheel names.
93+
CIBW_BUILD: "cp38-*"
94+
with:
95+
output-dir: wheelhouse
96+
97+
- name: Upload wheels as artifacts
98+
uses: actions/upload-artifact@v4
99+
with:
100+
name: wheels_arm64
101+
path: ./wheelhouse/*.whl
102+
103+
build_wheels_riscv64:
104+
name: Build riscv64 wheels (${{ matrix.shard.name }})
60105
runs-on: ubuntu-latest
106+
strategy:
107+
fail-fast: false
108+
matrix:
109+
shard:
110+
- name: cp310
111+
build: "cp310-*"
112+
artifact: wheels_riscv64_cp310
113+
- name: cp311
114+
build: "cp311-*"
115+
artifact: wheels_riscv64_cp311
116+
- name: cp312
117+
build: "cp312-*"
118+
artifact: wheels_riscv64_cp312
119+
- name: cp313
120+
build: "cp313-*"
121+
artifact: wheels_riscv64_cp313
122+
- name: cp314
123+
build: "cp314-*"
124+
artifact: wheels_riscv64_cp314
61125
steps:
62126
- uses: actions/checkout@v4
63127
with:
@@ -66,23 +130,27 @@ jobs:
66130
- name: Set up QEMU
67131
uses: docker/setup-qemu-action@v3
68132
with:
69-
platforms: linux/arm64
133+
platforms: linux/riscv64
70134

71135
- name: Build wheels
72-
uses: pypa/cibuildwheel@v2.22.0
136+
uses: pypa/cibuildwheel@v3.1.2
73137
env:
74138
CIBW_SKIP: "*musllinux* pp*"
75139
CIBW_REPAIR_WHEEL_COMMAND: ""
76-
CIBW_ARCHS: "aarch64"
77-
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DCMAKE_CROSSCOMPILING=ON"
78-
CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
140+
CIBW_ARCHS: "riscv64"
141+
# Build riscv64 wheels against a conservative baseline instead of
142+
# enabling RVV-related extensions from the build container.
143+
CIBW_ENVIRONMENT: CMAKE_ARGS="-DGGML_NATIVE=off -DGGML_RVV=off -DGGML_RV_ZFH=off -DGGML_RV_ZVFH=off -DGGML_RV_ZICBOP=off -DGGML_RV_ZIHINTPAUSE=off"
144+
# Split the emulated riscv64 build into one Python version per job
145+
# to minimize wall-clock time without changing the release artifacts.
146+
CIBW_BUILD: ${{ matrix.shard.build }}
79147
with:
80148
output-dir: wheelhouse
81149

82150
- name: Upload wheels as artifacts
83151
uses: actions/upload-artifact@v4
84152
with:
85-
name: wheels_arm64
153+
name: ${{ matrix.shard.artifact }}
86154
path: ./wheelhouse/*.whl
87155

88156
build_sdist:
@@ -129,7 +197,8 @@ jobs:
129197

130198
release:
131199
name: Release
132-
needs: [build_wheels, build_wheels_arm64, build_sdist]
200+
needs: [build_wheels, build_wheels_arm64, build_wheels_riscv64, build_sdist]
201+
if: startsWith(github.ref, 'refs/tags/')
133202
runs-on: ubuntu-latest
134203

135204
steps:

.github/workflows/build-docker.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,15 @@ jobs:
1616
with:
1717
submodules: "recursive"
1818

19+
- name: Set image tag
20+
run: |
21+
if [[ "${GITHUB_REF_TYPE}" == "tag" ]]; then
22+
image_tag="${GITHUB_REF_NAME}"
23+
else
24+
image_tag="${GITHUB_REF_NAME//\//-}"
25+
fi
26+
echo "IMAGE_TAG=$image_tag" >> "$GITHUB_ENV"
27+
1928
- name: Set up QEMU
2029
uses: docker/setup-qemu-action@v3
2130

@@ -40,7 +49,7 @@ jobs:
4049
platforms: linux/amd64,linux/arm64
4150
tags: |
4251
ghcr.io/abetlen/llama-cpp-python:latest
43-
ghcr.io/abetlen/llama-cpp-python:${{ github.ref_name }}
52+
ghcr.io/abetlen/llama-cpp-python:${{ env.IMAGE_TAG }}
4453
build-args: |
4554
BUILDKIT_INLINE_CACHE=1
4655

.github/workflows/build-wheels-cuda.yaml

Lines changed: 148 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,17 @@ jobs:
2020
id: set-matrix
2121
run: |
2222
$matrix = @{
23-
'os' = @('ubuntu-22.04') #, 'windows-2022')
24-
'pyver' = @("3.9", "3.10", "3.11", "3.12")
25-
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") #, "12.5.1", "12.6.1")
23+
'os' = @('ubuntu-22.04', 'windows-2022')
24+
# wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic,
25+
# so one builder per toolkit version is sufficient.
26+
'pyver' = @("3.9")
27+
'cuda' = @("11.8.0", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "13.0.2", "13.2.1")
2628
'releasetag' = @("basic")
29+
'exclude' = @(
30+
@{ 'os' = 'windows-2022'; 'cuda' = '12.1.1' },
31+
@{ 'os' = 'windows-2022'; 'cuda' = '12.2.2' },
32+
@{ 'os' = 'windows-2022'; 'cuda' = '12.3.2' }
33+
)
2734
}
2835
2936
$matrixOut = ConvertTo-Json $matrix -Compress
@@ -43,11 +50,18 @@ jobs:
4350
AVXVER: ${{ matrix.releasetag }}
4451

4552
steps:
46-
- name: Add MSBuild to PATH
47-
if: runner.os == 'Windows'
48-
uses: microsoft/setup-msbuild@v2
53+
- name: Set up MSVC for CUDA 11.8
54+
if: runner.os == 'Windows' && matrix.cuda == '11.8.0'
55+
uses: ilammy/msvc-dev-cmd@v1
4956
with:
50-
vs-version: '[16.11,16.12)'
57+
arch: x64
58+
toolset: 14.29
59+
60+
- name: Set up MSVC
61+
if: runner.os == 'Windows' && matrix.cuda != '11.8.0'
62+
uses: ilammy/msvc-dev-cmd@v1
63+
with:
64+
arch: x64
5165

5266
- uses: actions/checkout@v4
5367
with:
@@ -67,67 +81,149 @@ jobs:
6781
add-pip-as-python-dependency: true
6882
auto-activate-base: false
6983

70-
- name: VS Integration Cache
71-
id: vs-integration-cache
72-
if: runner.os == 'Windows'
73-
uses: actions/cache@v4
74-
with:
75-
path: ./MSBuildExtensions
76-
key: cuda-${{ matrix.cuda }}-vs-integration
77-
78-
- name: Get Visual Studio Integration
79-
if: runner.os == 'Windows' && steps.vs-integration-cache.outputs.cache-hit != 'true'
80-
run: |
81-
if ($env:CUDAVER -eq '12.1.1') {$x = '12.1.0'} else {$x = $env:CUDAVER}
82-
$links = (Invoke-RestMethod 'https://raw.githubusercontent.com/Jimver/cuda-toolkit/master/src/links/windows-links.ts').Trim().split().where({$_ -ne ''})
83-
for ($i=$q=0;$i -lt $links.count -and $q -lt 2;$i++) {if ($links[$i] -eq "'$x',") {$q++}}
84-
Invoke-RestMethod $links[$i].Trim("'") -OutFile 'cudainstaller.zip'
85-
& 'C:\Program Files\7-Zip\7z.exe' e cudainstaller.zip -oMSBuildExtensions -r *\MSBuildExtensions\* > $null
86-
Remove-Item 'cudainstaller.zip'
87-
88-
- name: Install Visual Studio Integration
89-
if: runner.os == 'Windows'
90-
run: |
91-
$y = (gi '.\MSBuildExtensions').fullname + '\*'
92-
(gi 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
93-
$cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_')
94-
echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV
95-
9684
- name: Install Dependencies
9785
env:
9886
MAMBA_DOWNLOAD_FAILFAST: "0"
9987
MAMBA_NO_LOW_SPEED_LIMIT: "1"
10088
run: |
10189
$cudaVersion = $env:CUDAVER
102-
mamba install -y 'cuda' -c nvidia/label/cuda-$cudaVersion
103-
python -m pip install build wheel
90+
$cudaChannel = "nvidia/label/cuda-$cudaVersion"
91+
if ($cudaVersion -eq '11.8.0') {
92+
if ($IsLinux) {
93+
$cudaPackages = @(
94+
"${cudaChannel}::cuda-nvcc_linux-64=11.8.0",
95+
"${cudaChannel}::cuda-cccl=11.8.89",
96+
"${cudaChannel}::cuda-cudart=11.8.89",
97+
"${cudaChannel}::cuda-cudart-dev=11.8.89",
98+
"${cudaChannel}::cuda-driver-dev=11.8.89",
99+
"${cudaChannel}::libcublas=11.11.3.6",
100+
"${cudaChannel}::libcublas-dev=11.11.3.6"
101+
)
102+
} elseif ($IsWindows) {
103+
$cudaPackages = @(
104+
"${cudaChannel}::cuda-nvcc_win-64=11.8.0",
105+
"${cudaChannel}::cuda-cccl=11.8.89",
106+
"${cudaChannel}::cuda-cudart=11.8.89",
107+
"${cudaChannel}::cuda-cudart-dev=11.8.89",
108+
"${cudaChannel}::libcublas=11.11.3.6",
109+
"${cudaChannel}::libcublas-dev=11.11.3.6"
110+
)
111+
} else {
112+
throw 'Unsupported CUDA wheel build platform'
113+
}
114+
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel $cudaPackages
115+
} elseif ($IsLinux) {
116+
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_linux-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
117+
} elseif ($IsWindows) {
118+
if ($cudaVersion -like '12.5.*' -or [version]$cudaVersion -ge [version]"13.0") {
119+
# The Windows 12.5+ toolkit meta-package pulls compiler activation
120+
# scripts that overflow cmd.exe after MSVC is already initialized.
121+
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-nvcc_win-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-libraries-dev=$cudaVersion" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
122+
} else {
123+
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "${cudaChannel}::cuda-toolkit=$cudaVersion" "${cudaChannel}::cuda-nvcc_win-64" "${cudaChannel}::cuda-cccl" "${cudaChannel}::cuda-cudart" "${cudaChannel}::cuda-cudart-dev"
124+
}
125+
} else {
126+
throw 'Unsupported CUDA wheel build platform'
127+
}
128+
if ($LASTEXITCODE -ne 0) {
129+
exit $LASTEXITCODE
130+
}
131+
if ($IsWindows) {
132+
python -m pip install build wheel ninja
133+
} else {
134+
python -m pip install build wheel
135+
}
104136
105137
- name: Build Wheel
106138
run: |
107-
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
108-
$env:CUDA_PATH = $env:CONDA_PREFIX
109-
$env:CUDA_HOME = $env:CONDA_PREFIX
110-
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX
139+
$pathSeparator = if ($IsWindows) { ';' } else { ':' }
140+
if ($IsWindows) {
141+
$cudaRoot = Join-Path $env:CONDA_PREFIX 'Library'
142+
} elseif (Test-Path (Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux/include/cuda_runtime.h')) {
143+
$cudaRoot = Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux'
144+
} else {
145+
$cudaRoot = $env:CONDA_PREFIX
146+
}
147+
148+
$env:CUDA_PATH = $cudaRoot
149+
$env:CUDA_HOME = $cudaRoot
150+
$env:CUDAToolkit_ROOT = $cudaRoot
151+
$env:CUDA_TOOLKIT_ROOT_DIR = $cudaRoot
152+
$cudaHostCompilerArg = ''
153+
$cudaRootCmake = $cudaRoot.Replace('\', '/')
154+
$env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRootCmake -DCUDA_TOOLKIT_ROOT_DIR=$cudaRootCmake"
111155
if ($IsLinux) {
112-
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
156+
if ([version]$env:CUDAVER -lt [version]"12.0" -and (Test-Path '/usr/bin/g++-11')) {
157+
$env:CC = '/usr/bin/gcc-11'
158+
$env:CXX = '/usr/bin/g++-11'
159+
$env:CUDAHOSTCXX = '/usr/bin/g++-11'
160+
$cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX"
161+
} elseif (Test-Path '/usr/bin/g++-12') {
162+
$env:CC = '/usr/bin/gcc-12'
163+
$env:CXX = '/usr/bin/g++-12'
164+
$env:CUDAHOSTCXX = '/usr/bin/g++-12'
165+
$cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX"
166+
}
167+
$env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRoot -DCUDA_TOOLKIT_ROOT_DIR=$cudaRoot$cudaHostCompilerArg"
168+
$env:CPATH = "$cudaRoot/include$pathSeparator$env:CPATH"
169+
$env:CPLUS_INCLUDE_PATH = "$cudaRoot/include$pathSeparator$env:CPLUS_INCLUDE_PATH"
170+
$env:LIBRARY_PATH = "$cudaRoot/lib$pathSeparator$env:CONDA_PREFIX/lib$pathSeparator$env:LIBRARY_PATH"
171+
$env:LD_LIBRARY_PATH = "$cudaRoot/lib$pathSeparator$env:CONDA_PREFIX/lib$pathSeparator$env:LD_LIBRARY_PATH"
172+
} elseif ($IsWindows) {
173+
$ninjaPath = ((Get-Command ninja -ErrorAction Stop).Source).Replace('\', '/')
174+
$env:CMAKE_GENERATOR = 'Ninja'
175+
$env:CMAKE_MAKE_PROGRAM = $ninjaPath
176+
$env:PATH = "$(Join-Path $cudaRoot 'bin')$pathSeparator$env:PATH"
177+
}
178+
179+
if ($IsWindows) {
180+
$nvccCandidates = @(
181+
(Join-Path $cudaRoot 'bin\nvcc.exe'),
182+
(Join-Path $env:CONDA_PREFIX 'Library\bin\nvcc.exe'),
183+
(Join-Path $env:CONDA_PREFIX 'bin\nvcc.exe')
184+
)
185+
} else {
186+
$nvccCandidates = @(
187+
(Join-Path $env:CONDA_PREFIX 'bin/nvcc'),
188+
(Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux/bin/nvcc')
189+
)
113190
}
191+
$nvccPath = $nvccCandidates | Where-Object { Test-Path $_ } | Select-Object -First 1
192+
if (-not $nvccPath) {
193+
throw 'Failed to find nvcc in the conda environment'
194+
}
195+
$env:CUDACXX = $nvccPath
196+
$env:PATH = "$(Split-Path $nvccPath)$pathSeparator$env:PATH"
197+
if ($IsWindows) {
198+
$nvccPathCmake = $nvccPath.Replace('\', '/')
199+
$env:CUDACXX = $nvccPathCmake
200+
$env:CMAKE_ARGS = "-DCMAKE_CUDA_COMPILER=$nvccPathCmake -DCMAKE_CUDA_COMPILER_ARG1=-allow-unsupported-compiler -DCMAKE_MAKE_PROGRAM=$env:CMAKE_MAKE_PROGRAM $env:CMAKE_ARGS"
201+
}
202+
$nvccVersion = ((& $nvccPath --version) | Select-String 'release ([0-9]+\.[0-9]+)').Matches[0].Groups[1].Value
203+
if (-not $nvccVersion) {
204+
throw 'Failed to detect the installed CUDA toolkit version'
205+
}
206+
$cudaTagVersion = $nvccVersion.Replace('.','')
114207
$env:VERBOSE = '1'
115-
$env:CMAKE_ARGS = '-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all'
116-
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
117-
# if ($env:AVXVER -eq 'AVX') {
208+
$cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real;90-virtual"
209+
if ([version]$nvccVersion -lt [version]"12.0") {
210+
# CUDA 11.8 cannot compile llama.cpp's Hopper PDL device calls.
211+
$cudaArchs = "60-real;61-real;70-real;75-real;80-real;86-real;89-real"
212+
} elseif ([version]$nvccVersion -ge [version]"13.0") {
213+
# CUDA 13 dropped offline compilation support for pre-Turing targets.
214+
$cudaArchs = "75-real;80-real;86-real;89-real;90-real;90-virtual"
215+
}
216+
# Build real cubins for the supported GPUs and keep
217+
# one forward-compatible PTX target instead of embedding PTX for every
218+
# SM. This keeps the wheel under GitHub's 2 GiB release-asset limit.
219+
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=$cudaArchs -DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DCMAKE_CUDA_FLAGS_INIT=-allow-unsupported-compiler $env:CMAKE_ARGS"
118220
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
119-
# }
120-
# if ($env:AVXVER -eq 'AVX512') {
121-
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX512=on'
122-
# }
123-
# if ($env:AVXVER -eq 'basic') {
124-
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=off -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
125-
# }
126221
python -m build --wheel
127-
# write the build tag to the output
128-
Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
222+
# Publish tags that reflect the actual installed toolkit version.
223+
Write-Output "CUDA_VERSION=$cudaTagVersion" >> $env:GITHUB_ENV
129224
130225
- uses: softprops/action-gh-release@v2
226+
if: startsWith(github.ref, 'refs/tags/')
131227
with:
132228
files: dist/*
133229
# Set tag_name to <tag>-cu<cuda_version>

0 commit comments

Comments
 (0)