Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions .ci/scripts/export_model_artifact.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Usage: export_model_artifact.sh <device> <hf_model> [quant_name] [output_dir] [m
Export a HuggingFace model to CUDA/Metal/XNNPACK format with optional quantization.

Arguments:
device cuda, metal, or xnnpack (required)
device cuda, metal, vulkan, or xnnpack (required)

hf_model HuggingFace model ID (required)
Supported models:
Expand Down Expand Up @@ -49,6 +49,7 @@ Examples:
export_model_artifact.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
export_model_artifact.sh cuda-windows "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./output"
export_model_artifact.sh cuda "google/gemma-3-4b-it" "non-quantized" "./output"
export_model_artifact.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "./output"
export_model_artifact.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./output"
export_model_artifact.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./output"
export_model_artifact.sh xnnpack "mistralai/Voxtral-Mini-4B-Realtime-2602" "quantized-8da4w" "./output"
Expand Down Expand Up @@ -103,9 +104,11 @@ case "$DEVICE" in
;;
xnnpack)
;;
vulkan)
;;
*)
echo "Error: Unsupported device '$DEVICE'"
echo "Supported devices: cuda, cuda-windows, metal, xnnpack"
echo "Supported devices: cuda, cuda-windows, metal, vulkan, xnnpack"
exit 1
;;
esac
Expand Down Expand Up @@ -218,8 +221,8 @@ case "$QUANT_NAME" in
EXTRA_ARGS="--qlinear fpa4w --qlinear_encoder fpa4w"
;;
quantized-8da4w)
if [ "$DEVICE" != "xnnpack" ]; then
echo "Error: quantized-8da4w is only supported with xnnpack device"
if [ "$DEVICE" != "xnnpack" ] && [ "$DEVICE" != "vulkan" ]; then
echo "Error: quantized-8da4w is only supported with xnnpack or vulkan device"
exit 1
fi
EXTRA_ARGS="--qlinear 8da4w --qlinear_group_size 32 --qlinear_encoder 8da4w --qlinear_encoder_group_size 32"
Expand All @@ -242,9 +245,11 @@ pip list
if [ "$MODEL_NAME" = "parakeet" ]; then
pip install -r examples/models/parakeet/install_requirements.txt

# Set dtype based on backend (XNNPACK uses fp32, CUDA/Metal use bf16)
# Set dtype based on backend (XNNPACK uses fp32, Vulkan uses fp16, CUDA/Metal use bf16)
if [ "$DEVICE" = "xnnpack" ]; then
DTYPE_ARG=""
elif [ "$DEVICE" = "vulkan" ]; then
DTYPE_ARG="--vulkan_force_fp16"
else
DTYPE_ARG="--dtype bf16"
fi
Expand Down
12 changes: 11 additions & 1 deletion .ci/scripts/setup-vulkan-linux-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,15 @@ install_vulkan_sdk() {

VULKAN_SDK_VERSION="1.4.321.1"

install_swiftshader
# Parse arguments: --gpu skips SwiftShader (use NVIDIA driver's Vulkan ICD instead)
USE_GPU=false
for arg in "$@"; do
case $arg in
--gpu) USE_GPU=true ;;
esac
done

if [ "$USE_GPU" = false ]; then
install_swiftshader
fi
install_vulkan_sdk "${VULKAN_SDK_VERSION}"
7 changes: 4 additions & 3 deletions .ci/scripts/test_model_e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Usage: test_model_e2e.sh <device> <hf_model> <quant_name> [model_dir] [mode]
Build and run end-to-end tests for CUDA/Metal/XNNPACK models.

Arguments:
device cuda, metal, or xnnpack (required)
device cuda, metal, vulkan, or xnnpack (required)

hf_model HuggingFace model ID (required)
Supported models:
Expand Down Expand Up @@ -47,6 +47,7 @@ Examples:
test_model_e2e.sh metal "openai/whisper-small" "non-quantized"
test_model_e2e.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
test_model_e2e.sh cuda "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./model_output"
test_model_e2e.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "./model_output"
test_model_e2e.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./model_output"
test_model_e2e.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./model_output"
test_model_e2e.sh metal "mistralai/Voxtral-Mini-4B-Realtime-2602" "non-quantized" "." "vr-streaming"
Expand Down Expand Up @@ -262,8 +263,8 @@ echo "::endgroup::"

echo "::group::Build $MODEL_NAME Runner"

if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "xnnpack" ]; then
echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', or 'xnnpack'."
if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "vulkan" ] && [ "$DEVICE" != "xnnpack" ]; then
echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', 'vulkan', or 'xnnpack'."
exit 1
fi

Expand Down
35 changes: 35 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1340,6 +1340,41 @@ jobs:
python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"
python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*"

test-vulkan-genai:
name: test-vulkan-genai
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux.g5.4xlarge.nvidia.gpu
docker-image: ci-image:executorch-ubuntu-22.04-clang12
gpu-arch-type: cuda
gpu-arch-version: "12.6"
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
set -eux

# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"

# Setup Vulkan SDK (no SwiftShader — use NVIDIA driver's Vulkan ICD)
source .ci/scripts/setup-vulkan-linux-deps.sh --gpu

# Setup ExecuTorch
PYTHON_EXECUTABLE=python \
CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
.ci/scripts/setup-linux.sh --build-tool "cmake"

# Export parakeet with Vulkan backend
bash .ci/scripts/export_model_artifact.sh vulkan "nvidia/parakeet-tdt" "quantized-8da4w" "${RUNNER_ARTIFACT_DIR}"

# Build runner and test e2e
bash .ci/scripts/test_model_e2e.sh vulkan "nvidia/parakeet-tdt" "quantized-8da4w" "${RUNNER_ARTIFACT_DIR}"

test-coreml-bc-macos:
name: test-coreml-bc-macos (${{ matrix.runner }})
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
Expand Down
20 changes: 15 additions & 5 deletions examples/models/parakeet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,21 @@ if(TARGET optimized_native_cpu_ops_lib)
endif()
endif()

# CPU-only builds need quantized and custom ops
if(NOT EXECUTORCH_BUILD_CUDA)
list(APPEND link_libraries quantized_ops_lib custom_ops)
executorch_target_link_options_shared_lib(quantized_ops_lib)
executorch_target_link_options_shared_lib(custom_ops)
# Quantized and custom ops
if(TARGET quantized_ops_lib)
list(APPEND link_libraries quantized_ops_lib)
get_target_property(_is_imported quantized_ops_lib IMPORTED)
if(NOT _is_imported)
executorch_target_link_options_shared_lib(quantized_ops_lib)
endif()
endif()

if(TARGET custom_ops)
list(APPEND link_libraries custom_ops)
get_target_property(_is_imported custom_ops IMPORTED)
if(NOT _is_imported)
executorch_target_link_options_shared_lib(custom_ops)
endif()
endif()

# XNNPACK
Expand Down
Loading