chamalgomes
diff --git a/‎.github/workflows/llama-build-cuda.yaml‎
Lines changed: 100 additions & 120 deletions b/‎.github/workflows/llama-build-cuda.yaml‎
Lines changed: 100 additions & 120 deletions
diff --git a/‎.github/workflows/test-cuda.yaml‎
Lines changed: 0 additions & 100 deletions b/‎.github/workflows/test-cuda.yaml‎
Lines changed: 0 additions & 100 deletions
@@ -1,120 +1,100 @@
-name: Build Release (CUDA)
-
-on: workflow_dispatch
-
-permissions:
-  contents: write
-
-jobs:
-  define_matrix:
-    name: Define Build Matrix
-    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-    defaults:
-      run:
-        shell: pwsh
-
-    steps:
-      - name: Define Job Output
-        id: set-matrix
-        run: |
-          $matrix = @{
-              'os' = @('ubuntu-24.04')
-              'pyver' = @("3.13", "3.14")
-              'cuda' = @("13.1.1")
-              'releasetag' = @("basic")
-          }
-
-          $matrixOut = ConvertTo-Json $matrix -Compress
-          Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
-
-  build_wheels:
-    name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
-    needs: define_matrix
-    runs-on: ${{ matrix.os }}
-    permissions:
-      id-token: write
-      contents: write
-      attestations: write
-    strategy:
-      matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
-    defaults:
-      run:
-        shell: pwsh
-    env:
-      CUDAVER: ${{ matrix.cuda }}
-      AVXVER: ${{ matrix.releasetag }}
-
-    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
-        with:
-          submodules: "recursive"
-
-      - name: Verify GLIBC version
-        run: ldd --version
-
-      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7
-        with:
-          version: "latest"
-          activate-environment: true
-
-      - name: Setup Mamba
-        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
-        with:
-          activate-environment: "build"
-          python-version: ${{ matrix.pyver }}
-          miniforge-version: latest
-          add-pip-as-python-dependency: true
-          auto-activate-base: false
-
-      - name: Install Dependencies
-        env:
-          MAMBA_DOWNLOAD_FAILFAST: "0"
-          MAMBA_NO_LOW_SPEED_LIMIT: "1"
-        run: |
-          $cudaVersion = $env:CUDAVER
-          mamba install -y "cuda-toolkit=$cudaVersion" -c nvidia
-
-      - name: Build Wheel
-        working-directory: vendor/llama-cpp-python
-        run: |
-          $cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
-          $env:CUDA_PATH = $env:CONDA_PREFIX
-          $env:CUDA_HOME = $env:CONDA_PREFIX
-          $env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX
-
-          if ($IsLinux) {
-            $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
-          } 
-          $env:VERBOSE = '1'
-          $env:CMAKE_ARGS = '-DGGML_CUDA=on -DLLAVA_BUILD=off -DCMAKE_CUDA_ARCHITECTURES=75;80;'
-          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=OFF $env:CMAKE_ARGS"
-          $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
-
-          # Rename the release version with +cu{cuda_ver} build tag
-          (Get-Content llama_cpp/__init__.py) -replace '__version__ = "([^"]*)"', ('__version__ = "' + '$1' + '+cu' + $cudaVersion + '"') | Set-Content llama_cpp/__init__.py
-          
-          #Build wheel
-          uv -v build --no-create-gitignore --python cpython@${{ matrix.pyver }} --wheel
-          
-          # write the build tag to the output
-          Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
-
-      - uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2
-        with:
-          files: vendor/llama-cpp-python/dist/*.whl
-          tag_name: ${{ github.ref_name }}
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-      - uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3
-        with:
-          subject-path: 'vendor/llama-cpp-python/dist/*.whl'
-
-
-
-
-
-
+name: Build Release (CUDA)
+
+on: workflow_dispatch
+
+permissions:
+  contents: write
+
+jobs:
+  build_wheels:
+    name: Build Wheel CUDA ${{ matrix.cuda_config.ver }} Py ${{ matrix.pyver }}
+    runs-on: ubuntu-latest
+    container:
+      image: nvidia/cuda:${{ matrix.cuda_config.ver }}-devel-ubuntu24.04
+    permissions:
+      id-token: write
+      contents: write
+      attestations: write
+    strategy:
+      fail-fast: false
+      matrix:
+        cuda_config:
+          - ver: 13.1.1
+            short: cu131
+            arch: 75;80
+        pyver: ["3.13", "3.14"]
+
+    defaults:
+      run:
+        shell: bash
+
+    steps:
+      - name: Install system dependencies
+        run: |
+          apt-get update
+          apt-get install -y software-properties-common git curl build-essential cmake libssl-dev
+
+      - name: Checkout repository
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        with:
+          submodules: recursive
+
+      - name: Install UV and Python
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          echo "$HOME/.local/bin" >> $GITHUB_PATH
+          
+      - name: Install Python 
+        run: |
+          uv venv --python ${{ matrix.pyver }}
+          echo "$GITHUB_WORKSPACE/.venv/bin" >> $GITHUB_PATH
+
+      - name: Environment Diagnostics
+        run: |
+          echo "========== SYSTEM DIAGNOSTICS =========="
+          echo "GLIBC:   $(ldd --version | head -n1)"
+          echo "Python:  $(python --version)"
+          echo "uv:      $(uv --version)"
+          echo "CMake:   $(cmake --version | head -n1)"
+          echo "nvcc:    $(nvcc --version 2>/dev/null | grep release || echo 'not found')"
+          echo "Driver:  $(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null || echo 'not available')"
+          echo "========================================"
+          
+      - name: Build Wheel
+        working-directory: vendor/llama-cpp-python
+        env:
+          VERBOSE: 1
+          CMAKE_ARGS: >-
+            -DGGML_CUDA=on
+            -DLLAVA_BUILD=off
+            -DCMAKE_CUDA_ARCHITECTURES=${{ matrix.cuda_config.arch }}
+            -DGGML_CUDA_FORCE_MMQ=OFF
+            -DGGML_AVX2=off
+            -DGGML_FMA=off
+            -DGGML_F16C=off
+            -DLLAMA_BUILD_EXAMPLES=OFF
+            -DLLAMA_BUILD_TESTS=OFF
+            -DLLAMA_BUILD_SERVER=OFF
+            -DCMAKE_EXE_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs -lcuda"
+        run: |
+          # Rename the release version with +cu{cuda_ver} build tag
+          sed -i 's/__version__ = "\([^"]*\)"/__version__ = "\1+${{ matrix.cuda_config.short }}"/' llama_cpp/__init__.py
+          
+          # Build wheel using uv
+          uv -v build --no-create-gitignore --python cpython@${{ matrix.pyver }} --wheel
+
+      - name: List built wheels
+        run: ls -lh vendor/llama-cpp-python/dist/*.whl
+
+      - name: Upload Release Assets
+        uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2
+        with:
+          files: vendor/llama-cpp-python/dist/*.whl
+          tag_name: ${{ github.ref_name }}
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Attest Build Provenance
+        uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3
+        with:
+          subject-path: 'vendor/llama-cpp-python/dist/*.whl'