Skip to content

Commit 1248fe2

Browse files
author
ssjia
committed
[ET-VK][CI] Add test-vulkan-genai job for Parakeet on NVIDIA GPU runner
Pull Request resolved: #18335 Add a new GitHub CI job that exports and runs the Parakeet TDT model with the Vulkan backend on an NVIDIA GPU runner. The Vulkan export and runner code already exists but had no CI coverage. - Add `--gpu` flag to `setup-vulkan-linux-deps.sh` to skip SwiftShader installation when running on machines with a real GPU driver - Add `vulkan` as a supported device in `export_model_artifact.sh` and `test_model_e2e.sh` - Add `test-vulkan-genai` job to `pull.yml` on `linux.g5.4xlarge.nvidia.gpu` ghstack-source-id: 354902046 @exported-using-ghexport Differential Revision: [D97344728](https://our.internmc.facebook.com/intern/diff/D97344728/)
1 parent 45b3e68 commit 1248fe2

4 files changed

Lines changed: 60 additions & 9 deletions

File tree

.ci/scripts/export_model_artifact.sh

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Usage: export_model_artifact.sh <device> <hf_model> [quant_name] [output_dir] [m
1414
Export a HuggingFace model to CUDA/Metal/XNNPACK format with optional quantization.
1515
1616
Arguments:
17-
device cuda, metal, or xnnpack (required)
17+
device cuda, metal, vulkan, or xnnpack (required)
1818
1919
hf_model HuggingFace model ID (required)
2020
Supported models:
@@ -49,6 +49,7 @@ Examples:
4949
export_model_artifact.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
5050
export_model_artifact.sh cuda-windows "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./output"
5151
export_model_artifact.sh cuda "google/gemma-3-4b-it" "non-quantized" "./output"
52+
export_model_artifact.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "./output"
5253
export_model_artifact.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./output"
5354
export_model_artifact.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./output"
5455
export_model_artifact.sh xnnpack "mistralai/Voxtral-Mini-4B-Realtime-2602" "quantized-8da4w" "./output"
@@ -103,9 +104,11 @@ case "$DEVICE" in
103104
;;
104105
xnnpack)
105106
;;
107+
vulkan)
108+
;;
106109
*)
107110
echo "Error: Unsupported device '$DEVICE'"
108-
echo "Supported devices: cuda, cuda-windows, metal, xnnpack"
111+
echo "Supported devices: cuda, cuda-windows, metal, vulkan, xnnpack"
109112
exit 1
110113
;;
111114
esac
@@ -218,8 +221,8 @@ case "$QUANT_NAME" in
218221
EXTRA_ARGS="--qlinear fpa4w --qlinear_encoder fpa4w"
219222
;;
220223
quantized-8da4w)
221-
if [ "$DEVICE" != "xnnpack" ]; then
222-
echo "Error: quantized-8da4w is only supported with xnnpack device"
224+
if [ "$DEVICE" != "xnnpack" ] && [ "$DEVICE" != "vulkan" ]; then
225+
echo "Error: quantized-8da4w is only supported with xnnpack or vulkan device"
223226
exit 1
224227
fi
225228
EXTRA_ARGS="--qlinear 8da4w --qlinear_group_size 32 --qlinear_encoder 8da4w --qlinear_encoder_group_size 32"
@@ -242,9 +245,11 @@ pip list
242245
if [ "$MODEL_NAME" = "parakeet" ]; then
243246
pip install -r examples/models/parakeet/install_requirements.txt
244247

245-
# Set dtype based on backend (XNNPACK uses fp32, CUDA/Metal use bf16)
248+
# Set dtype based on backend (XNNPACK uses fp32, Vulkan uses fp16, CUDA/Metal use bf16)
246249
if [ "$DEVICE" = "xnnpack" ]; then
247250
DTYPE_ARG=""
251+
elif [ "$DEVICE" = "vulkan" ]; then
252+
DTYPE_ARG="--vulkan_force_fp16"
248253
else
249254
DTYPE_ARG="--dtype bf16"
250255
fi

.ci/scripts/setup-vulkan-linux-deps.sh

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,5 +45,15 @@ install_vulkan_sdk() {
4545

4646
VULKAN_SDK_VERSION="1.4.321.1"
4747

48-
install_swiftshader
48+
# Parse arguments: --gpu skips SwiftShader (use NVIDIA driver's Vulkan ICD instead)
49+
USE_GPU=false
50+
for arg in "$@"; do
51+
case $arg in
52+
--gpu) USE_GPU=true ;;
53+
esac
54+
done
55+
56+
if [ "$USE_GPU" = false ]; then
57+
install_swiftshader
58+
fi
4959
install_vulkan_sdk "${VULKAN_SDK_VERSION}"

.ci/scripts/test_model_e2e.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Usage: test_model_e2e.sh <device> <hf_model> <quant_name> [model_dir] [mode]
1414
Build and run end-to-end tests for CUDA/Metal/XNNPACK models.
1515
1616
Arguments:
17-
device cuda, metal, or xnnpack (required)
17+
device cuda, metal, vulkan, or xnnpack (required)
1818
1919
hf_model HuggingFace model ID (required)
2020
Supported models:
@@ -47,6 +47,7 @@ Examples:
4747
test_model_e2e.sh metal "openai/whisper-small" "non-quantized"
4848
test_model_e2e.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
4949
test_model_e2e.sh cuda "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./model_output"
50+
test_model_e2e.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "./model_output"
5051
test_model_e2e.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./model_output"
5152
test_model_e2e.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./model_output"
5253
test_model_e2e.sh metal "mistralai/Voxtral-Mini-4B-Realtime-2602" "non-quantized" "." "vr-streaming"
@@ -262,8 +263,8 @@ echo "::endgroup::"
262263

263264
echo "::group::Build $MODEL_NAME Runner"
264265

265-
if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "xnnpack" ]; then
266-
echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', or 'xnnpack'."
266+
if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "vulkan" ] && [ "$DEVICE" != "xnnpack" ]; then
267+
echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', 'vulkan', or 'xnnpack'."
267268
exit 1
268269
fi
269270

.github/workflows/pull.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,41 @@ jobs:
13101310
python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"
13111311
python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*"
13121312
1313+
test-vulkan-genai:
1314+
name: test-vulkan-genai
1315+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1316+
permissions:
1317+
id-token: write
1318+
contents: read
1319+
with:
1320+
runner: linux.g5.4xlarge.nvidia.gpu
1321+
docker-image: ci-image:executorch-ubuntu-22.04-clang12
1322+
gpu-arch-type: cuda
1323+
gpu-arch-version: "12.6"
1324+
submodules: 'recursive'
1325+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
1326+
timeout: 90
1327+
script: |
1328+
set -eux
1329+
1330+
# The generic Linux job chooses to use base env, not the one setup by the image
1331+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
1332+
conda activate "${CONDA_ENV}"
1333+
1334+
# Setup Vulkan SDK (no SwiftShader — use NVIDIA driver's Vulkan ICD)
1335+
source .ci/scripts/setup-vulkan-linux-deps.sh --gpu
1336+
1337+
# Setup ExecuTorch
1338+
PYTHON_EXECUTABLE=python \
1339+
CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
1340+
.ci/scripts/setup-linux.sh --build-tool "cmake"
1341+
1342+
# Export parakeet with Vulkan backend
1343+
bash .ci/scripts/export_model_artifact.sh vulkan "nvidia/parakeet-tdt" "quantized-8da4w" "${RUNNER_ARTIFACT_DIR}"
1344+
1345+
# Build runner and test e2e
1346+
bash .ci/scripts/test_model_e2e.sh vulkan "nvidia/parakeet-tdt" "quantized-8da4w" "${RUNNER_ARTIFACT_DIR}"
1347+
13131348
test-coreml-bc-macos:
13141349
name: test-coreml-bc-macos (${{ matrix.runner }})
13151350
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main

0 commit comments

Comments
 (0)