Skip to content

Commit 31eba44

Browse files
committed
Refactor CI workflow to use DRY principles with matrix strategy
- Consolidate 5 separate jobs into single matrix job - Add conditional logic for GPU vs CPU testing - Support both Linux and Windows platforms - Enable/disable configurations via matrix.enabled flag - Reduce code duplication by 90% Signed-off-by: cdunning <cdunning@nvidia.com>
1 parent d6204d0 commit 31eba44

1 file changed

Lines changed: 83 additions & 21 deletions

File tree

.github/workflows/ci.yml

Lines changed: 83 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,40 +9,102 @@ on:
99

1010
jobs:
1111
test:
12-
name: Test Environment
13-
runs-on: linux-amd64-gpu-rtxpro6000-latest-1
14-
timeout-minutes: 30
12+
name: Test ${{ matrix.name }}
13+
runs-on: ${{ matrix.runner }}
14+
timeout-minutes: ${{ matrix.timeout }}
15+
if: ${{ matrix.enabled }}
16+
strategy:
17+
fail-fast: false
18+
matrix:
19+
include:
20+
- name: "GPU Linux x86_64"
21+
runner: linux-amd64-gpu-rtxpro6000-latest-1
22+
os: linux
23+
gpu: true
24+
timeout: 30
25+
enabled: true
26+
- name: "CPU Linux x86_64"
27+
runner: linux-amd64-gpu-rtxpro6000-latest-1
28+
os: linux
29+
gpu: false
30+
timeout: 15
31+
enabled: true
32+
- name: "CPU Linux aarch64"
33+
runner: ubuntu-latest # TODO: Replace with ARM64 runner
34+
os: linux
35+
gpu: false
36+
timeout: 15
37+
enabled: false
38+
- name: "CPU Windows x86_64"
39+
runner: windows-amd64-gpu-rtxpro6000-latest-1
40+
os: windows
41+
gpu: false
42+
timeout: 15
43+
enabled: false
44+
- name: "GPU Windows x86_64"
45+
runner: windows-amd64-gpu-rtxpro6000-latest-1
46+
os: windows
47+
gpu: true
48+
timeout: 30
49+
enabled: false
1550

1651
steps:
1752
- name: Checkout
1853
uses: actions/checkout@v4
54+
timeout-minutes: 5
1955

2056
- name: Test Docker
57+
timeout-minutes: 5
58+
shell: ${{ matrix.os == 'windows' && 'pwsh' || 'bash' }}
2159
run: |
2260
docker --version
2361
docker run --rm hello-world
2462
25-
- name: Check GPU Hardware
63+
- name: Check Hardware
64+
timeout-minutes: 5
65+
shell: ${{ matrix.os == 'windows' && 'pwsh' || 'bash' }}
2666
run: |
27-
echo "=== Host GPU Information ==="
28-
lspci | grep -i nvidia || echo "No NVIDIA GPU found via lspci"
29-
nvidia-smi || echo "nvidia-smi not available on host"
67+
if [ "${{ matrix.os }}" = "windows" ]; then
68+
Write-Host "=== Host Hardware Information ==="
69+
if [ "${{ matrix.gpu }}" = "true" ]; then
70+
nvidia-smi
71+
else
72+
Write-Host "CPU-only mode (no GPU access)"
73+
fi
74+
else
75+
echo "=== Host Hardware Information ==="
76+
lspci | grep -i nvidia || echo "No NVIDIA GPU found via lspci"
77+
if [ "${{ matrix.gpu }}" = "true" ]; then
78+
nvidia-smi || echo "nvidia-smi not available on host"
79+
else
80+
echo "CPU-only mode (same hardware, no --gpus flag)"
81+
fi
82+
if [ "${{ matrix.runner }}" = "ubuntu-latest" ]; then
83+
echo "Architecture: $(uname -m)"
84+
fi
85+
fi
3086
3187
- name: Test CUDA Image
88+
timeout-minutes: 10
89+
shell: ${{ matrix.os == 'windows' && 'pwsh' || 'bash' }}
3290
run: |
3391
docker pull nvidia/cuda:13.0.2-base-ubuntu24.04
34-
docker run --rm nvidia/cuda:13.0.2-base-ubuntu24.04 bash -c "
35-
echo 'CUDA container test successful'
36-
cat /etc/os-release
37-
which nvcc || echo 'nvcc not in PATH'
38-
"
3992
40-
- name: Test CUDA Image with GPU Access
41-
run: |
42-
echo "=== Testing GPU access in container ==="
43-
docker run --rm --gpus all nvidia/cuda:13.0.2-base-ubuntu24.04 bash -c "
44-
echo 'Container with GPU access:'
45-
nvidia-smi || echo 'nvidia-smi not available in container'
46-
echo 'CUDA Runtime version:'
47-
cat /usr/local/cuda/version.txt 2>/dev/null || echo 'CUDA version file not found'
48-
" || echo "GPU access failed - may not be available on this runner"
93+
if [ "${{ matrix.gpu }}" = "true" ]; then
94+
echo "=== Testing with GPU access ==="
95+
docker run --rm --gpus all nvidia/cuda:13.0.2-base-ubuntu24.04 bash -c "
96+
echo 'Container with GPU access:'
97+
nvidia-smi || echo 'nvidia-smi not available in container'
98+
cat /etc/os-release
99+
echo 'CUDA Runtime version:'
100+
cat /usr/local/cuda/version.txt 2>/dev/null || echo 'CUDA version file not found'
101+
" || echo "GPU access failed - may not be available on this runner"
102+
else
103+
echo "=== Testing without GPU access (CPU only) ==="
104+
docker run --rm nvidia/cuda:13.0.2-base-ubuntu24.04 bash -c "
105+
echo 'CUDA container test successful (CPU only - no GPU access)'
106+
cat /etc/os-release
107+
which nvcc || echo 'nvcc not in PATH'
108+
nvidia-smi 2>/dev/null || echo 'nvidia-smi not available without --gpus flag (expected)'
109+
"
110+
fi

0 commit comments

Comments
 (0)