From fa3cc1cc15311b66b96ac67331384db9c7cef2ad Mon Sep 17 00:00:00 2001 From: ssjia Date: Thu, 19 Mar 2026 12:15:10 -0700 Subject: [PATCH] [ET-VK][CI] Add test-vulkan-genai job for Parakeet on NVIDIA GPU runner Add a new GitHub CI job that exports and runs the Parakeet TDT model with the Vulkan backend on an NVIDIA GPU runner. The Vulkan export and runner code already exists but had no CI coverage. - Add `--gpu` flag to `setup-vulkan-linux-deps.sh` to skip SwiftShader installation when running on machines with a real GPU driver - Add `vulkan` as a supported device in `export_model_artifact.sh` and `test_model_e2e.sh` - Add `test-vulkan-genai` job to `pull.yml` on `linux.g5.4xlarge.nvidia.gpu` Differential Revision: [D97344728](https://our.internmc.facebook.com/intern/diff/D97344728/) [ghstack-poisoned] --- .ci/scripts/export_model_artifact.sh | 11 +++++--- .ci/scripts/setup-vulkan-linux-deps.sh | 12 ++++++++- .ci/scripts/test_model_e2e.sh | 7 +++--- .github/workflows/pull.yml | 35 ++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/.ci/scripts/export_model_artifact.sh b/.ci/scripts/export_model_artifact.sh index f8dc663ad04..df4a1e9e555 100755 --- a/.ci/scripts/export_model_artifact.sh +++ b/.ci/scripts/export_model_artifact.sh @@ -14,7 +14,7 @@ Usage: export_model_artifact.sh [quant_name] [output_dir] [m Export a HuggingFace model to CUDA/Metal/XNNPACK format with optional quantization. Arguments: - device cuda, metal, or xnnpack (required) + device cuda, metal, vulkan, or xnnpack (required) hf_model HuggingFace model ID (required) Supported models: @@ -49,6 +49,7 @@ Examples: export_model_artifact.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" export_model_artifact.sh cuda-windows "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./output" export_model_artifact.sh cuda "google/gemma-3-4b-it" "non-quantized" "./output" + export_model_artifact.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "./output" export_model_artifact.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./output" export_model_artifact.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./output" export_model_artifact.sh xnnpack "mistralai/Voxtral-Mini-4B-Realtime-2602" "quantized-8da4w" "./output" @@ -103,9 +104,11 @@ case "$DEVICE" in ;; xnnpack) ;; + vulkan) + ;; *) echo "Error: Unsupported device '$DEVICE'" - echo "Supported devices: cuda, cuda-windows, metal, xnnpack" + echo "Supported devices: cuda, cuda-windows, metal, vulkan, xnnpack" exit 1 ;; esac @@ -242,9 +245,11 @@ pip list if [ "$MODEL_NAME" = "parakeet" ]; then pip install -r examples/models/parakeet/install_requirements.txt - # Set dtype based on backend (XNNPACK uses fp32, CUDA/Metal use bf16) + # Set dtype based on backend (XNNPACK uses fp32, Vulkan uses fp16, CUDA/Metal use bf16) if [ "$DEVICE" = "xnnpack" ]; then DTYPE_ARG="" + elif [ "$DEVICE" = "vulkan" ]; then + DTYPE_ARG="--vulkan_force_fp16" else DTYPE_ARG="--dtype bf16" fi diff --git a/.ci/scripts/setup-vulkan-linux-deps.sh b/.ci/scripts/setup-vulkan-linux-deps.sh index cd99ff0d6ff..d7b8536167e 100755 --- a/.ci/scripts/setup-vulkan-linux-deps.sh +++ b/.ci/scripts/setup-vulkan-linux-deps.sh @@ -45,5 +45,15 @@ install_vulkan_sdk() { VULKAN_SDK_VERSION="1.4.321.1" -install_swiftshader +# Parse arguments: --gpu skips SwiftShader (use NVIDIA driver's Vulkan ICD instead) +USE_GPU=false +for arg in "$@"; do + case $arg in + --gpu) USE_GPU=true ;; + esac +done + +if [ "$USE_GPU" = false ]; then + install_swiftshader +fi install_vulkan_sdk "${VULKAN_SDK_VERSION}" diff --git a/.ci/scripts/test_model_e2e.sh b/.ci/scripts/test_model_e2e.sh index 7014b3caef6..8c574b7d29f 100755 --- a/.ci/scripts/test_model_e2e.sh +++ b/.ci/scripts/test_model_e2e.sh @@ -14,7 +14,7 @@ Usage: test_model_e2e.sh [model_dir] [mode] Build and run end-to-end tests for CUDA/Metal/XNNPACK models. Arguments: - device cuda, metal, or xnnpack (required) + device cuda, metal, vulkan, or xnnpack (required) hf_model HuggingFace model ID (required) Supported models: @@ -47,6 +47,7 @@ Examples: test_model_e2e.sh metal "openai/whisper-small" "non-quantized" test_model_e2e.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output" test_model_e2e.sh cuda "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./model_output" + test_model_e2e.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "./model_output" test_model_e2e.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./model_output" test_model_e2e.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./model_output" test_model_e2e.sh metal "mistralai/Voxtral-Mini-4B-Realtime-2602" "non-quantized" "." "vr-streaming" @@ -262,8 +263,8 @@ echo "::endgroup::" echo "::group::Build $MODEL_NAME Runner" -if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "xnnpack" ]; then - echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', or 'xnnpack'." +if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "vulkan" ] && [ "$DEVICE" != "xnnpack" ]; then + echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', 'vulkan', or 'xnnpack'." exit 1 fi diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 6c9f2c30148..3499efbb102 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -1310,6 +1310,41 @@ jobs: python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*" python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*" + test-vulkan-genai: + name: test-vulkan-genai + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + with: + runner: linux.g5.4xlarge.nvidia.gpu + docker-image: ci-image:executorch-ubuntu-22.04-clang12 + gpu-arch-type: cuda + gpu-arch-version: "12.6" + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + set -eux + + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + + # Setup Vulkan SDK (no SwiftShader — use NVIDIA driver's Vulkan ICD) + source .ci/scripts/setup-vulkan-linux-deps.sh --gpu + + # Setup ExecuTorch + PYTHON_EXECUTABLE=python \ + CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \ + .ci/scripts/setup-linux.sh --build-tool "cmake" + + # Export parakeet with Vulkan backend + source .ci/scripts/export_model_artifact.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "${RUNNER_ARTIFACT_DIR}" + + # Build runner and test e2e + source .ci/scripts/test_model_e2e.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "${RUNNER_ARTIFACT_DIR}" + test-coreml-bc-macos: name: test-coreml-bc-macos (${{ matrix.runner }}) uses: pytorch/test-infra/.github/workflows/macos_job.yml@main