From fa3cc1cc15311b66b96ac67331384db9c7cef2ad Mon Sep 17 00:00:00 2001
From: ssjia <ssjia@devgpu053.atn3.facebook.com>
Date: Thu, 19 Mar 2026 12:15:10 -0700
Subject: [PATCH] [ET-VK][CI] Add test-vulkan-genai job for Parakeet on NVIDIA
 GPU runner

Add a new GitHub CI job that exports and runs the Parakeet TDT model with
the Vulkan backend on an NVIDIA GPU runner. The Vulkan export and runner
code already exists but had no CI coverage.

- Add `--gpu` flag to `setup-vulkan-linux-deps.sh` to skip SwiftShader
  installation when running on machines with a real GPU driver
- Add `vulkan` as a supported device in `export_model_artifact.sh` and
  `test_model_e2e.sh`
- Add `test-vulkan-genai` job to `pull.yml` on `linux.g5.4xlarge.nvidia.gpu`

Differential Revision: [D97344728](https://our.internmc.facebook.com/intern/diff/D97344728/)

[ghstack-poisoned]
---
 .ci/scripts/export_model_artifact.sh   | 11 +++++---
 .ci/scripts/setup-vulkan-linux-deps.sh | 12 ++++++++-
 .ci/scripts/test_model_e2e.sh          |  7 +++---
 .github/workflows/pull.yml             | 35 ++++++++++++++++++++++++++
 4 files changed, 58 insertions(+), 7 deletions(-)
diff --git a/.ci/scripts/export_model_artifact.sh b/.ci/scripts/export_model_artifact.sh
index f8dc663ad04..df4a1e9e555 100755
--- a/.ci/scripts/export_model_artifact.sh
+++ b/.ci/scripts/export_model_artifact.sh
@@ -14,7 +14,7 @@ Usage: export_model_artifact.sh <device> <hf_model> [quant_name] [output_dir] [m
 Export a HuggingFace model to CUDA/Metal/XNNPACK format with optional quantization.
 
 Arguments:
-  device       cuda, metal, or xnnpack (required)
+  device       cuda, metal, vulkan, or xnnpack (required)
 
   hf_model     HuggingFace model ID (required)
                Supported models:
@@ -49,6 +49,7 @@ Examples:
   export_model_artifact.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
   export_model_artifact.sh cuda-windows "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./output"
   export_model_artifact.sh cuda "google/gemma-3-4b-it" "non-quantized" "./output"
+  export_model_artifact.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "./output"
   export_model_artifact.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./output"
   export_model_artifact.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./output"
   export_model_artifact.sh xnnpack "mistralai/Voxtral-Mini-4B-Realtime-2602" "quantized-8da4w" "./output"
@@ -103,9 +104,11 @@ case "$DEVICE" in
     ;;
   xnnpack)
     ;;
+  vulkan)
+    ;;
   *)
     echo "Error: Unsupported device '$DEVICE'"
-    echo "Supported devices: cuda, cuda-windows, metal, xnnpack"
+    echo "Supported devices: cuda, cuda-windows, metal, vulkan, xnnpack"
     exit 1
     ;;
 esac
@@ -242,9 +245,11 @@ pip list
 if [ "$MODEL_NAME" = "parakeet" ]; then
   pip install -r examples/models/parakeet/install_requirements.txt
 
-  # Set dtype based on backend (XNNPACK uses fp32, CUDA/Metal use bf16)
+  # Set dtype based on backend (XNNPACK uses fp32, Vulkan uses fp16, CUDA/Metal use bf16)
   if [ "$DEVICE" = "xnnpack" ]; then
     DTYPE_ARG=""
+  elif [ "$DEVICE" = "vulkan" ]; then
+    DTYPE_ARG="--vulkan_force_fp16"
   else
     DTYPE_ARG="--dtype bf16"
   fi
diff --git a/.ci/scripts/setup-vulkan-linux-deps.sh b/.ci/scripts/setup-vulkan-linux-deps.sh
index cd99ff0d6ff..d7b8536167e 100755
--- a/.ci/scripts/setup-vulkan-linux-deps.sh
+++ b/.ci/scripts/setup-vulkan-linux-deps.sh
@@ -45,5 +45,15 @@ install_vulkan_sdk() {
 
 VULKAN_SDK_VERSION="1.4.321.1"
 
-install_swiftshader
+# Parse arguments: --gpu skips SwiftShader (use NVIDIA driver's Vulkan ICD instead)
+USE_GPU=false
+for arg in "$@"; do
+  case $arg in
+    --gpu) USE_GPU=true ;;
+  esac
+done
+
+if [ "$USE_GPU" = false ]; then
+  install_swiftshader
+fi
 install_vulkan_sdk "${VULKAN_SDK_VERSION}"
diff --git a/.ci/scripts/test_model_e2e.sh b/.ci/scripts/test_model_e2e.sh
index 7014b3caef6..8c574b7d29f 100755
--- a/.ci/scripts/test_model_e2e.sh
+++ b/.ci/scripts/test_model_e2e.sh
@@ -14,7 +14,7 @@ Usage: test_model_e2e.sh <device> <hf_model> <quant_name> [model_dir] [mode]
 Build and run end-to-end tests for CUDA/Metal/XNNPACK models.
 
 Arguments:
-  device      cuda, metal, or xnnpack (required)
+  device      cuda, metal, vulkan, or xnnpack (required)
 
   hf_model    HuggingFace model ID (required)
               Supported models:
@@ -47,6 +47,7 @@ Examples:
   test_model_e2e.sh metal "openai/whisper-small" "non-quantized"
   test_model_e2e.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
   test_model_e2e.sh cuda "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./model_output"
+  test_model_e2e.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "./model_output"
   test_model_e2e.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./model_output"
   test_model_e2e.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./model_output"
   test_model_e2e.sh metal "mistralai/Voxtral-Mini-4B-Realtime-2602" "non-quantized" "." "vr-streaming"
@@ -262,8 +263,8 @@ echo "::endgroup::"
 
 echo "::group::Build $MODEL_NAME Runner"
 
-if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "xnnpack" ]; then
-  echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', or 'xnnpack'."
+if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "vulkan" ] && [ "$DEVICE" != "xnnpack" ]; then
+  echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', 'vulkan', or 'xnnpack'."
   exit 1
 fi
 
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 6c9f2c30148..3499efbb102 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -1310,6 +1310,41 @@ jobs:
         python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"
         python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*"
 
+  test-vulkan-genai:
+    name: test-vulkan-genai
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      runner: linux.g5.4xlarge.nvidia.gpu
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12
+      gpu-arch-type: cuda
+      gpu-arch-version: "12.6"
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        set -eux
+
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        # Setup Vulkan SDK (no SwiftShader — use NVIDIA driver's Vulkan ICD)
+        source .ci/scripts/setup-vulkan-linux-deps.sh --gpu
+
+        # Setup ExecuTorch
+        PYTHON_EXECUTABLE=python \
+        CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
+        .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+        # Export parakeet with Vulkan backend
+        source .ci/scripts/export_model_artifact.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "${RUNNER_ARTIFACT_DIR}"
+
+        # Build runner and test e2e
+        source .ci/scripts/test_model_e2e.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "${RUNNER_ARTIFACT_DIR}"
+
   test-coreml-bc-macos:
     name: test-coreml-bc-macos (${{ matrix.runner }})
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main