diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index 684083edb1..42226d1535 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -98,7 +98,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-pytorch - matrix_filter: 'map(select(.MODE == "nightly-pytorch"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-pytorch"))' test-pytorch-linux-aarch64: name: "Nightly PyTorch (linux-aarch64)" @@ -116,7 +116,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-pytorch - matrix_filter: 'map(select(.MODE == "nightly-pytorch"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-pytorch"))' test-pytorch-windows: name: "Nightly PyTorch (win-64)" @@ -134,7 +134,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-pytorch - matrix_filter: 'map(select(.MODE == "nightly-pytorch"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-pytorch"))' # ── numba-cuda tests ── @@ -154,7 +154,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-numba-cuda - matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-numba-cuda"))' test-numba-cuda-linux-aarch64: name: "Nightly numba-cuda (linux-aarch64)" @@ -172,7 +172,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-numba-cuda - matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-numba-cuda"))' test-numba-cuda-windows: name: "Nightly numba-cuda (win-64)" @@ -190,7 +190,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-numba-cuda - matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-numba-cuda"))' # ── Standard tests on nightly-only runners ── @@ -210,7 +210,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: standard - matrix_filter: 'map(select(.MODE == "nightly-standard"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-standard"))' # ── Status check ── diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 72ee298f77..f72ebacb3e 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -100,7 +100,7 @@ jobs: echo "OLD_BRANCH=${OLD_BRANCH}" >> "$GITHUB_OUTPUT" test: - name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }}${{ matrix.FLAVOR && format(', {0}', matrix.FLAVOR) || '' }}${{ matrix.TORCH_VER && format(', {0}', matrix.TORCH_VER) || '' }}${{ matrix.MODE == 'nightly-numba-cuda' && ', latest' || '' }} + name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }}${{ matrix.FLAVOR && format(', {0}', matrix.FLAVOR) || '' }}${{ matrix.ENV.TORCH_VER && format(', {0}+{1}', matrix.ENV.TORCH_VER, matrix.ENV.TORCH_CUDA) || '' }}${{ matrix.ENV.MODE == 'nightly-numba-cuda' && ', latest' || '' }} timeout-minutes: 60 needs: compute-matrix strategy: @@ -400,8 +400,6 @@ jobs: env: CUDA_VER: ${{ matrix.CUDA_VER }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} - TORCH_VER: ${{ matrix.TORCH_VER }} - TORCH_CUDA: ${{ matrix.TORCH_CUDA }} run: run-tests nightly-pytorch - name: Install cuda-python wheels + numba-cuda diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 6db50b89da..bf50581527 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -90,7 +90,7 @@ jobs: echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" test: - name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }} (${{ matrix.DRIVER_MODE }})${{ matrix.TORCH_VER && format(', {0}', matrix.TORCH_VER) || '' }}${{ matrix.MODE == 'nightly-numba-cuda' && ', latest' || '' }} + name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }} (${{ matrix.DRIVER_MODE }})${{ matrix.ENV.TORCH_VER && format(', {0}+{1}', matrix.ENV.TORCH_VER, matrix.ENV.TORCH_CUDA) || '' }}${{ matrix.ENV.MODE == 'nightly-numba-cuda' && ', latest' || '' }} timeout-minutes: 60 # The build stage could fail but we want the CI to keep moving. needs: compute-matrix @@ -377,8 +377,6 @@ jobs: env: CUDA_VER: ${{ matrix.CUDA_VER }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} - TORCH_VER: ${{ matrix.TORCH_VER }} - TORCH_CUDA: ${{ matrix.TORCH_CUDA }} shell: bash --noprofile --norc -xeuo pipefail {0} run: run-tests nightly-pytorch diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index ad12bb243a..c9eafd4f52 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -27,8 +27,10 @@ # job. Every key/value pair is written to GITHUB_ENV after the standard # environment-variable setup step, so the variables are visible to all # subsequent steps (including the cuda.bindings and cuda.core test -# steps). Example: +# steps). Nightly rows also use ENV.MODE as a matrix-filter tag (see +# ci-nightly.yml). Examples: # ENV: { CUDA_PYTHON_PER_THREAD_DEFAULT_STREAM: '1' } +# ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } linux: pull-request: @@ -81,22 +83,25 @@ linux: - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', FLAVOR: 'wsl' } nightly: # nightly-pytorch - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } # nightly-numba-cuda - - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '580.65.06' } - - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - # nightly-standard (arm64 l4×2 — nightly-only per runner team request) - - { MODE: 'nightly-standard', ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest' } - - { MODE: 'nightly-standard', ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '580.65.06', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } + # nightly-standard (arm64 nightly-only runners — per runner team request) + # TODO: gh200 row disabled — currently hangs on stream-ordered memory + # allocator (cudaMallocAsync); runner pool needs fixing first. + # - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'gh200', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } + - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } + - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } windows: pull-request: @@ -124,10 +129,10 @@ windows: - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'h100', GPU_COUNT: '2', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } nightly: # nightly-pytorch - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } # nightly-numba-cuda - - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '596.36', DRIVER_MODE: 'TCC' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '596.36', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-numba-cuda' } }