From 6824e001b6362a0639e51ab9a7f2d678c603262c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 27 Jun 2026 03:57:51 +0000 Subject: [PATCH 1/5] CI: tidy test-matrix nightly; add arm64 gh200; bump torch to 2.12.1 - ci/test-matrix.yml: move per-row MODE/TORCH_VER/TORCH_CUDA into the ENV map (rides the existing matrix-env injection step). Add a nightly-standard arm64 gh200 row. Bump latest-PyTorch rows from 2.11.0 to 2.12.1; 2.9.1 rows untouched. - .github/workflows/ci-nightly.yml: matrix_filter selectors now key on .ENV.MODE. - .github/workflows/test-wheel-{linux,windows}.yml: job-name format strings read TORCH_VER/MODE from matrix.ENV; TORCH_CUDA also rendered in the name (e.g. ", 2.12.1+cu126"). Drop the now-redundant TORCH_VER/TORCH_CUDA lines from the pytorch step's env block. --- .github/workflows/ci-nightly.yml | 14 +++---- .github/workflows/test-wheel-linux.yml | 4 +- .github/workflows/test-wheel-windows.yml | 4 +- ci/test-matrix.yml | 47 +++++++++++++----------- 4 files changed, 34 insertions(+), 35 deletions(-) diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index 684083edb13..42226d15352 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -98,7 +98,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-pytorch - matrix_filter: 'map(select(.MODE == "nightly-pytorch"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-pytorch"))' test-pytorch-linux-aarch64: name: "Nightly PyTorch (linux-aarch64)" @@ -116,7 +116,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-pytorch - matrix_filter: 'map(select(.MODE == "nightly-pytorch"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-pytorch"))' test-pytorch-windows: name: "Nightly PyTorch (win-64)" @@ -134,7 +134,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-pytorch - matrix_filter: 'map(select(.MODE == "nightly-pytorch"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-pytorch"))' # ── numba-cuda tests ── @@ -154,7 +154,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-numba-cuda - matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-numba-cuda"))' test-numba-cuda-linux-aarch64: name: "Nightly numba-cuda (linux-aarch64)" @@ -172,7 +172,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-numba-cuda - matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-numba-cuda"))' test-numba-cuda-windows: name: "Nightly numba-cuda (win-64)" @@ -190,7 +190,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: nightly-numba-cuda - matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-numba-cuda"))' # ── Standard tests on nightly-only runners ── @@ -210,7 +210,7 @@ jobs: run-id: ${{ needs.find-wheels.outputs.RUN_ID }} sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} test-mode: standard - matrix_filter: 'map(select(.MODE == "nightly-standard"))' + matrix_filter: 'map(select(.ENV.MODE == "nightly-standard"))' # ── Status check ── diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 72ee298f77a..f72ebacb3ea 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -100,7 +100,7 @@ jobs: echo "OLD_BRANCH=${OLD_BRANCH}" >> "$GITHUB_OUTPUT" test: - name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }}${{ matrix.FLAVOR && format(', {0}', matrix.FLAVOR) || '' }}${{ matrix.TORCH_VER && format(', {0}', matrix.TORCH_VER) || '' }}${{ matrix.MODE == 'nightly-numba-cuda' && ', latest' || '' }} + name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }}${{ matrix.FLAVOR && format(', {0}', matrix.FLAVOR) || '' }}${{ matrix.ENV.TORCH_VER && format(', {0}+{1}', matrix.ENV.TORCH_VER, matrix.ENV.TORCH_CUDA) || '' }}${{ matrix.ENV.MODE == 'nightly-numba-cuda' && ', latest' || '' }} timeout-minutes: 60 needs: compute-matrix strategy: @@ -400,8 +400,6 @@ jobs: env: CUDA_VER: ${{ matrix.CUDA_VER }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} - TORCH_VER: ${{ matrix.TORCH_VER }} - TORCH_CUDA: ${{ matrix.TORCH_CUDA }} run: run-tests nightly-pytorch - name: Install cuda-python wheels + numba-cuda diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 6db50b89da7..bf505815276 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -90,7 +90,7 @@ jobs: echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" test: - name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }} (${{ matrix.DRIVER_MODE }})${{ matrix.TORCH_VER && format(', {0}', matrix.TORCH_VER) || '' }}${{ matrix.MODE == 'nightly-numba-cuda' && ', latest' || '' }} + name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }} (${{ matrix.DRIVER_MODE }})${{ matrix.ENV.TORCH_VER && format(', {0}+{1}', matrix.ENV.TORCH_VER, matrix.ENV.TORCH_CUDA) || '' }}${{ matrix.ENV.MODE == 'nightly-numba-cuda' && ', latest' || '' }} timeout-minutes: 60 # The build stage could fail but we want the CI to keep moving. needs: compute-matrix @@ -377,8 +377,6 @@ jobs: env: CUDA_VER: ${{ matrix.CUDA_VER }} LOCAL_CTK: ${{ matrix.LOCAL_CTK }} - TORCH_VER: ${{ matrix.TORCH_VER }} - TORCH_CUDA: ${{ matrix.TORCH_CUDA }} shell: bash --noprofile --norc -xeuo pipefail {0} run: run-tests nightly-pytorch diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index ad12bb243aa..839f3e80f08 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -27,8 +27,10 @@ # job. Every key/value pair is written to GITHUB_ENV after the standard # environment-variable setup step, so the variables are visible to all # subsequent steps (including the cuda.bindings and cuda.core test -# steps). Example: +# steps). Nightly rows also use ENV.MODE as a matrix-filter tag (see +# ci-nightly.yml). Examples: # ENV: { CUDA_PYTHON_PER_THREAD_DEFAULT_STREAM: '1' } +# ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } linux: pull-request: @@ -81,22 +83,23 @@ linux: - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', FLAVOR: 'wsl' } nightly: # nightly-pytorch - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } # nightly-numba-cuda - - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '580.65.06' } - - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - - { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - # nightly-standard (arm64 l4×2 — nightly-only per runner team request) - - { MODE: 'nightly-standard', ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest' } - - { MODE: 'nightly-standard', ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '580.65.06', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } + # nightly-standard (arm64 nightly-only runners — per runner team request) + - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'gh200', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } + - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } + - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } windows: pull-request: @@ -124,10 +127,10 @@ windows: - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'h100', GPU_COUNT: '2', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } nightly: # nightly-pytorch - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu130' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } - - { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } # nightly-numba-cuda - - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - - { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '596.36', DRIVER_MODE: 'TCC' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '596.36', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-numba-cuda' } } From 97ee1cf6de50a66ef92bf4aa76754e4cfdb4c8fb Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 27 Jun 2026 04:18:34 +0000 Subject: [PATCH 2/5] ci/test-matrix.yml: align nightly columns with pull-request rows Pad PY_VER and GPU columns in the nightly section to match the widths used by the pull-request rows above (17-char PY_VER, 19-char GPU). Purely cosmetic; YAML parse and matrix expansion unchanged. --- ci/test-matrix.yml | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index 839f3e80f08..54b49a405e3 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -83,23 +83,23 @@ linux: - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', FLAVOR: 'wsl' } nightly: # nightly-pytorch - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } - - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } - - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } - - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } - - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } # nightly-numba-cuda - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '580.65.06', ENV: { MODE: 'nightly-numba-cuda' } } - - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } - - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '580.65.06', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } # nightly-standard (arm64 nightly-only runners — per runner team request) - - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'gh200', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } - - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } - - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } + - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'gh200', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } + - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } + - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } windows: pull-request: @@ -127,10 +127,10 @@ windows: - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'h100', GPU_COUNT: '2', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } nightly: # nightly-pytorch - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu130' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' } } # nightly-numba-cuda - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-numba-cuda' } } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '596.36', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-numba-cuda' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '596.36', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-numba-cuda' } } From d29bc340f8bbe675098686265baad367ec051591 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 27 Jun 2026 04:18:38 +0000 Subject: [PATCH 3/5] Temporarily add push trigger to ci-nightly.yml for testing Remove before merging. --- .github/workflows/ci-nightly.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index 42226d15352..90db74a387b 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -16,6 +16,10 @@ concurrency: cancel-in-progress: true on: + push: + branches: + - "main" + - "pull-request/[0-9]+" schedule: # 2:17 AM UTC daily, after the midnight main CI build finishes. # Avoid minute 0 because GitHub documents high scheduled-workflow load From 4c70cfac0557f9415ad840f8928e19026645b1ca Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 28 Jun 2026 02:55:03 +0000 Subject: [PATCH 4/5] ci/test-matrix.yml: temporarily comment out gh200 nightly row The gh200 runner currently hangs on stream-ordered memory allocator calls (cudaMallocAsync). Disabling until the runner-side issue is resolved. --- ci/test-matrix.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index 54b49a405e3..c9eafd4f521 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -97,7 +97,9 @@ linux: - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } # nightly-standard (arm64 nightly-only runners — per runner team request) - - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'gh200', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } + # TODO: gh200 row disabled — currently hangs on stream-ordered memory + # allocator (cudaMallocAsync); runner pool needs fixing first. + # - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'gh200', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest', ENV: { MODE: 'nightly-standard' } } From 7e002a618ed32fd6e60833fbe6770d574366314c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 28 Jun 2026 02:55:08 +0000 Subject: [PATCH 5/5] Revert "Temporarily add push trigger to ci-nightly.yml for testing" This reverts commit d29bc340f8bbe675098686265baad367ec051591. --- .github/workflows/ci-nightly.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index 90db74a387b..42226d15352 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -16,10 +16,6 @@ concurrency: cancel-in-progress: true on: - push: - branches: - - "main" - - "pull-request/[0-9]+" schedule: # 2:17 AM UTC daily, after the midnight main CI build finishes. # Avoid minute 0 because GitHub documents high scheduled-workflow load