diff --git a/.ci/scripts/export_model_artifact.sh b/.ci/scripts/export_model_artifact.sh index f8dc663ad04..c80d955c6e5 100755 --- a/.ci/scripts/export_model_artifact.sh +++ b/.ci/scripts/export_model_artifact.sh @@ -14,7 +14,7 @@ Usage: export_model_artifact.sh [quant_name] [output_dir] [m Export a HuggingFace model to CUDA/Metal/XNNPACK format with optional quantization. Arguments: - device cuda, metal, or xnnpack (required) + device cuda, metal, vulkan, or xnnpack (required) hf_model HuggingFace model ID (required) Supported models: @@ -49,6 +49,7 @@ Examples: export_model_artifact.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" export_model_artifact.sh cuda-windows "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./output" export_model_artifact.sh cuda "google/gemma-3-4b-it" "non-quantized" "./output" + export_model_artifact.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "./output" export_model_artifact.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./output" export_model_artifact.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./output" export_model_artifact.sh xnnpack "mistralai/Voxtral-Mini-4B-Realtime-2602" "quantized-8da4w" "./output" @@ -103,9 +104,11 @@ case "$DEVICE" in ;; xnnpack) ;; + vulkan) + ;; *) echo "Error: Unsupported device '$DEVICE'" - echo "Supported devices: cuda, cuda-windows, metal, xnnpack" + echo "Supported devices: cuda, cuda-windows, metal, vulkan, xnnpack" exit 1 ;; esac @@ -218,8 +221,8 @@ case "$QUANT_NAME" in EXTRA_ARGS="--qlinear fpa4w --qlinear_encoder fpa4w" ;; quantized-8da4w) - if [ "$DEVICE" != "xnnpack" ]; then - echo "Error: quantized-8da4w is only supported with xnnpack device" + if [ "$DEVICE" != "xnnpack" ] && [ "$DEVICE" != "vulkan" ]; then + echo "Error: quantized-8da4w is only supported with xnnpack or vulkan device" exit 1 fi EXTRA_ARGS="--qlinear 8da4w --qlinear_group_size 32 --qlinear_encoder 8da4w --qlinear_encoder_group_size 32" @@ -242,9 +245,11 @@ pip list if [ "$MODEL_NAME" = "parakeet" ]; then pip install -r examples/models/parakeet/install_requirements.txt - # Set dtype based on backend (XNNPACK uses fp32, CUDA/Metal use bf16) + # Set dtype based on backend (XNNPACK uses fp32, Vulkan uses fp16, CUDA/Metal use bf16) if [ "$DEVICE" = "xnnpack" ]; then DTYPE_ARG="" + elif [ "$DEVICE" = "vulkan" ]; then + DTYPE_ARG="--vulkan_force_fp16" else DTYPE_ARG="--dtype bf16" fi diff --git a/.ci/scripts/setup-vulkan-linux-deps.sh b/.ci/scripts/setup-vulkan-linux-deps.sh index cd99ff0d6ff..d7b8536167e 100755 --- a/.ci/scripts/setup-vulkan-linux-deps.sh +++ b/.ci/scripts/setup-vulkan-linux-deps.sh @@ -45,5 +45,15 @@ install_vulkan_sdk() { VULKAN_SDK_VERSION="1.4.321.1" -install_swiftshader +# Parse arguments: --gpu skips SwiftShader (use NVIDIA driver's Vulkan ICD instead) +USE_GPU=false +for arg in "$@"; do + case $arg in + --gpu) USE_GPU=true ;; + esac +done + +if [ "$USE_GPU" = false ]; then + install_swiftshader +fi install_vulkan_sdk "${VULKAN_SDK_VERSION}" diff --git a/.ci/scripts/test_model_e2e.sh b/.ci/scripts/test_model_e2e.sh index 7014b3caef6..8c574b7d29f 100755 --- a/.ci/scripts/test_model_e2e.sh +++ b/.ci/scripts/test_model_e2e.sh @@ -14,7 +14,7 @@ Usage: test_model_e2e.sh [model_dir] [mode] Build and run end-to-end tests for CUDA/Metal/XNNPACK models. Arguments: - device cuda, metal, or xnnpack (required) + device cuda, metal, vulkan, or xnnpack (required) hf_model HuggingFace model ID (required) Supported models: @@ -47,6 +47,7 @@ Examples: test_model_e2e.sh metal "openai/whisper-small" "non-quantized" test_model_e2e.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output" test_model_e2e.sh cuda "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./model_output" + test_model_e2e.sh vulkan "nvidia/parakeet-tdt" "non-quantized" "./model_output" test_model_e2e.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./model_output" test_model_e2e.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./model_output" test_model_e2e.sh metal "mistralai/Voxtral-Mini-4B-Realtime-2602" "non-quantized" "." "vr-streaming" @@ -262,8 +263,8 @@ echo "::endgroup::" echo "::group::Build $MODEL_NAME Runner" -if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "xnnpack" ]; then - echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', or 'xnnpack'." +if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "vulkan" ] && [ "$DEVICE" != "xnnpack" ]; then + echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', 'vulkan', or 'xnnpack'." exit 1 fi diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 0062baf63e3..8c9aad082fd 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -1340,6 +1340,41 @@ jobs: python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*" python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*" + test-vulkan-genai: + name: test-vulkan-genai + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + with: + runner: linux.g5.4xlarge.nvidia.gpu + docker-image: ci-image:executorch-ubuntu-22.04-clang12 + gpu-arch-type: cuda + gpu-arch-version: "12.6" + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + set -eux + + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + + # Setup Vulkan SDK (no SwiftShader — use NVIDIA driver's Vulkan ICD) + source .ci/scripts/setup-vulkan-linux-deps.sh --gpu + + # Setup ExecuTorch + PYTHON_EXECUTABLE=python \ + CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \ + .ci/scripts/setup-linux.sh --build-tool "cmake" + + # Export parakeet with Vulkan backend + bash .ci/scripts/export_model_artifact.sh vulkan "nvidia/parakeet-tdt" "quantized-8da4w" "${RUNNER_ARTIFACT_DIR}" + + # Build runner and test e2e + bash .ci/scripts/test_model_e2e.sh vulkan "nvidia/parakeet-tdt" "quantized-8da4w" "${RUNNER_ARTIFACT_DIR}" + test-coreml-bc-macos: name: test-coreml-bc-macos (${{ matrix.runner }}) uses: pytorch/test-infra/.github/workflows/macos_job.yml@main diff --git a/examples/models/parakeet/CMakeLists.txt b/examples/models/parakeet/CMakeLists.txt index 9354afe5f86..12319b2b76e 100644 --- a/examples/models/parakeet/CMakeLists.txt +++ b/examples/models/parakeet/CMakeLists.txt @@ -40,11 +40,21 @@ if(TARGET optimized_native_cpu_ops_lib) endif() endif() -# CPU-only builds need quantized and custom ops -if(NOT EXECUTORCH_BUILD_CUDA) - list(APPEND link_libraries quantized_ops_lib custom_ops) - executorch_target_link_options_shared_lib(quantized_ops_lib) - executorch_target_link_options_shared_lib(custom_ops) +# Quantized and custom ops +if(TARGET quantized_ops_lib) + list(APPEND link_libraries quantized_ops_lib) + get_target_property(_is_imported quantized_ops_lib IMPORTED) + if(NOT _is_imported) + executorch_target_link_options_shared_lib(quantized_ops_lib) + endif() +endif() + +if(TARGET custom_ops) + list(APPEND link_libraries custom_ops) + get_target_property(_is_imported custom_ops IMPORTED) + if(NOT _is_imported) + executorch_target_link_options_shared_lib(custom_ops) + endif() endif() # XNNPACK