|
1 | | -name: Build Release (CUDA) |
2 | | - |
3 | | -on: workflow_dispatch |
4 | | - |
5 | | -permissions: |
6 | | - contents: write |
7 | | - |
8 | | -jobs: |
9 | | - define_matrix: |
10 | | - name: Define Build Matrix |
11 | | - runs-on: ubuntu-latest |
12 | | - outputs: |
13 | | - matrix: ${{ steps.set-matrix.outputs.matrix }} |
14 | | - defaults: |
15 | | - run: |
16 | | - shell: pwsh |
17 | | - |
18 | | - steps: |
19 | | - - name: Define Job Output |
20 | | - id: set-matrix |
21 | | - run: | |
22 | | - $matrix = @{ |
23 | | - 'os' = @('ubuntu-24.04') |
24 | | - 'pyver' = @("3.13", "3.14") |
25 | | - 'cuda' = @("13.1.1") |
26 | | - 'releasetag' = @("basic") |
27 | | - } |
28 | | -
|
29 | | - $matrixOut = ConvertTo-Json $matrix -Compress |
30 | | - Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT |
31 | | -
|
32 | | - build_wheels: |
33 | | - name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }} |
34 | | - needs: define_matrix |
35 | | - runs-on: ${{ matrix.os }} |
36 | | - permissions: |
37 | | - id-token: write |
38 | | - contents: write |
39 | | - attestations: write |
40 | | - strategy: |
41 | | - matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} |
42 | | - defaults: |
43 | | - run: |
44 | | - shell: pwsh |
45 | | - env: |
46 | | - CUDAVER: ${{ matrix.cuda }} |
47 | | - AVXVER: ${{ matrix.releasetag }} |
48 | | - |
49 | | - steps: |
50 | | - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 |
51 | | - with: |
52 | | - submodules: "recursive" |
53 | | - |
54 | | - - name: Verify GLIBC version |
55 | | - run: ldd --version |
56 | | - |
57 | | - - name: Install the latest version of uv |
58 | | - uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7 |
59 | | - with: |
60 | | - version: "latest" |
61 | | - activate-environment: true |
62 | | - |
63 | | - - name: Setup Mamba |
64 | | - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 |
65 | | - with: |
66 | | - activate-environment: "build" |
67 | | - python-version: ${{ matrix.pyver }} |
68 | | - miniforge-version: latest |
69 | | - add-pip-as-python-dependency: true |
70 | | - auto-activate-base: false |
71 | | - |
72 | | - - name: Install Dependencies |
73 | | - env: |
74 | | - MAMBA_DOWNLOAD_FAILFAST: "0" |
75 | | - MAMBA_NO_LOW_SPEED_LIMIT: "1" |
76 | | - run: | |
77 | | - $cudaVersion = $env:CUDAVER |
78 | | - mamba install -y "cuda-toolkit=$cudaVersion" -c nvidia |
79 | | -
|
80 | | - - name: Build Wheel |
81 | | - working-directory: vendor/llama-cpp-python |
82 | | - run: | |
83 | | - $cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','') |
84 | | - $env:CUDA_PATH = $env:CONDA_PREFIX |
85 | | - $env:CUDA_HOME = $env:CONDA_PREFIX |
86 | | - $env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX |
87 | | -
|
88 | | - if ($IsLinux) { |
89 | | - $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH |
90 | | - } |
91 | | - $env:VERBOSE = '1' |
92 | | - $env:CMAKE_ARGS = '-DGGML_CUDA=on -DLLAVA_BUILD=off -DCMAKE_CUDA_ARCHITECTURES=75;80;' |
93 | | - $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=OFF $env:CMAKE_ARGS" |
94 | | - $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off' |
95 | | -
|
96 | | - # Rename the release version with +cu{cuda_ver} build tag |
97 | | - (Get-Content llama_cpp/__init__.py) -replace '__version__ = "([^"]*)"', ('__version__ = "' + '$1' + '+cu' + $cudaVersion + '"') | Set-Content llama_cpp/__init__.py |
98 | | - |
99 | | - #Build wheel |
100 | | - uv -v build --no-create-gitignore --python cpython@${{ matrix.pyver }} --wheel |
101 | | - |
102 | | - # write the build tag to the output |
103 | | - Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV |
104 | | -
|
105 | | - - uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2 |
106 | | - with: |
107 | | - files: vendor/llama-cpp-python/dist/*.whl |
108 | | - tag_name: ${{ github.ref_name }} |
109 | | - env: |
110 | | - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
111 | | - |
112 | | - - uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3 |
113 | | - with: |
114 | | - subject-path: 'vendor/llama-cpp-python/dist/*.whl' |
115 | | - |
116 | | - |
117 | | - |
118 | | - |
119 | | - |
120 | | - |
| 1 | +name: Build Release (CUDA) |
| 2 | + |
| 3 | +on: workflow_dispatch |
| 4 | + |
| 5 | +permissions: |
| 6 | + contents: write |
| 7 | + |
| 8 | +jobs: |
| 9 | + build_wheels: |
| 10 | + name: Build Wheel CUDA ${{ matrix.cuda_config.ver }} Py ${{ matrix.pyver }} |
| 11 | + runs-on: ubuntu-latest |
| 12 | + container: |
| 13 | + image: nvidia/cuda:${{ matrix.cuda_config.ver }}-devel-ubuntu24.04 |
| 14 | + permissions: |
| 15 | + id-token: write |
| 16 | + contents: write |
| 17 | + attestations: write |
| 18 | + strategy: |
| 19 | + fail-fast: false |
| 20 | + matrix: |
| 21 | + cuda_config: |
| 22 | + - ver: 13.1.1 |
| 23 | + short: cu131 |
| 24 | + arch: 75;80 |
| 25 | + pyver: ["3.13", "3.14"] |
| 26 | + |
| 27 | + defaults: |
| 28 | + run: |
| 29 | + shell: bash |
| 30 | + |
| 31 | + steps: |
| 32 | + - name: Install system dependencies |
| 33 | + run: | |
| 34 | + apt-get update |
| 35 | + apt-get install -y software-properties-common git curl build-essential cmake libssl-dev |
| 36 | +
|
| 37 | + - name: Checkout repository |
| 38 | + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 |
| 39 | + with: |
| 40 | + submodules: recursive |
| 41 | + |
| 42 | + - name: Install UV and Python |
| 43 | + run: | |
| 44 | + curl -LsSf https://astral.sh/uv/install.sh | sh |
| 45 | + echo "$HOME/.local/bin" >> $GITHUB_PATH |
| 46 | + |
| 47 | + - name: Install Python |
| 48 | + run: | |
| 49 | + uv venv --python ${{ matrix.pyver }} |
| 50 | + echo "$GITHUB_WORKSPACE/.venv/bin" >> $GITHUB_PATH |
| 51 | +
|
| 52 | + - name: Environment Diagnostics |
| 53 | + run: | |
| 54 | + echo "========== SYSTEM DIAGNOSTICS ==========" |
| 55 | + echo "GLIBC: $(ldd --version | head -n1)" |
| 56 | + echo "Python: $(python --version)" |
| 57 | + echo "uv: $(uv --version)" |
| 58 | + echo "CMake: $(cmake --version | head -n1)" |
| 59 | + echo "nvcc: $(nvcc --version 2>/dev/null | grep release || echo 'not found')" |
| 60 | + echo "Driver: $(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null || echo 'not available')" |
| 61 | + echo "========================================" |
| 62 | + |
| 63 | + - name: Build Wheel |
| 64 | + working-directory: vendor/llama-cpp-python |
| 65 | + env: |
| 66 | + VERBOSE: 1 |
| 67 | + CMAKE_ARGS: >- |
| 68 | + -DGGML_CUDA=on |
| 69 | + -DLLAVA_BUILD=off |
| 70 | + -DCMAKE_CUDA_ARCHITECTURES=${{ matrix.cuda_config.arch }} |
| 71 | + -DGGML_CUDA_FORCE_MMQ=OFF |
| 72 | + -DGGML_AVX2=off |
| 73 | + -DGGML_FMA=off |
| 74 | + -DGGML_F16C=off |
| 75 | + -DLLAMA_BUILD_EXAMPLES=OFF |
| 76 | + -DLLAMA_BUILD_TESTS=OFF |
| 77 | + -DLLAMA_BUILD_SERVER=OFF |
| 78 | + -DCMAKE_EXE_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs -lcuda" |
| 79 | + run: | |
| 80 | + # Rename the release version with +cu{cuda_ver} build tag |
| 81 | + sed -i 's/__version__ = "\([^"]*\)"/__version__ = "\1+${{ matrix.cuda_config.short }}"/' llama_cpp/__init__.py |
| 82 | + |
| 83 | + # Build wheel using uv |
| 84 | + uv -v build --no-create-gitignore --python cpython@${{ matrix.pyver }} --wheel |
| 85 | +
|
| 86 | + - name: List built wheels |
| 87 | + run: ls -lh vendor/llama-cpp-python/dist/*.whl |
| 88 | + |
| 89 | + - name: Upload Release Assets |
| 90 | + uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2 |
| 91 | + with: |
| 92 | + files: vendor/llama-cpp-python/dist/*.whl |
| 93 | + tag_name: ${{ github.ref_name }} |
| 94 | + env: |
| 95 | + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
| 96 | + |
| 97 | + - name: Attest Build Provenance |
| 98 | + uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3 |
| 99 | + with: |
| 100 | + subject-path: 'vendor/llama-cpp-python/dist/*.whl' |
0 commit comments