diff --git a/.ci/scripts/setup-vulkan-linux-deps.sh b/.ci/scripts/setup-vulkan-linux-deps.sh index a0dcb75ad4a..e088cc9442b 100755 --- a/.ci/scripts/setup-vulkan-linux-deps.sh +++ b/.ci/scripts/setup-vulkan-linux-deps.sh @@ -1,4 +1,3 @@ - #!/bin/bash # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. @@ -43,7 +42,35 @@ install_vulkan_sdk() { export PATH="${PATH}:${_vulkan_sdk_dir}/${VULKAN_SDK_VERSION}/x86_64/bin/" } +setup_real_gpu_icd() { + # On a real-GPU runner the system Vulkan ICD is installed by the GPU driver. + # The loader searches both /etc/vulkan/icd.d and /usr/share/vulkan/icd.d, so + # check both. If a system ICD is present, do NOT use SwiftShader so the real + # device (and its fp16/int16/dot-product shader variants) is exercised. Fall + # back to SwiftShader if no system ICD is found so the job stays green either + # way. + if ls /etc/vulkan/icd.d/*.json /usr/share/vulkan/icd.d/*.json \ + >/dev/null 2>&1; then + echo "System Vulkan ICD(s) detected:" + ls /etc/vulkan/icd.d/*.json /usr/share/vulkan/icd.d/*.json 2>/dev/null + unset ETVK_USING_SWIFTSHADER || true + else + echo "WARNING: no system Vulkan ICD found; using SwiftShader." + install_swiftshader + fi +} + VULKAN_SDK_VERSION="1.4.321.1" -install_swiftshader -install_vulkan_sdk "${VULKAN_SDK_VERSION}" +# The no-argument default installs SwiftShader so the existing CPU-runner CI is +# unchanged. Pass "real-gpu" to prefer a real system ICD when one is present. +case "${1:-swiftshader}" in + real-gpu) + install_vulkan_sdk "${VULKAN_SDK_VERSION}" + setup_real_gpu_icd + ;; + swiftshader | *) + install_swiftshader + install_vulkan_sdk "${VULKAN_SDK_VERSION}" + ;; +esac diff --git a/.ci/scripts/setup-vulkan-windows-deps.ps1 b/.ci/scripts/setup-vulkan-windows-deps.ps1 new file mode 100644 index 00000000000..335f457714f --- /dev/null +++ b/.ci/scripts/setup-vulkan-windows-deps.ps1 @@ -0,0 +1,37 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Install glslc (the Vulkan shader compiler) on Windows via conda-forge's +# shaderc package, and make sure it is on PATH. glslc is the only build-time +# Vulkan dependency -- the Vulkan headers and the volk loader come from the +# in-tree submodules -- so this avoids depending on the heavyweight LunarG SDK +# installer. Requires conda to be available (the callers create/activate an env). + +$ErrorActionPreference = "Stop" + +Write-Host "Installing shaderc (provides glslc) from conda-forge..." +conda install -y -c conda-forge shaderc +if ($LASTEXITCODE -ne 0) { + Write-Error "Failed to install shaderc from conda-forge (exit ${LASTEXITCODE})" + exit 1 +} + +$glslc = Get-Command glslc -ErrorAction SilentlyContinue +if (-not $glslc) { + Write-Error "glslc not found on PATH after installing shaderc" + exit 1 +} + +# Expose glslc to the current process and, when running as a GitHub Actions step, +# to subsequent steps. +$glslcDir = Split-Path -Parent $glslc.Source +$env:PATH = "${glslcDir};${env:PATH}" +if ($env:GITHUB_PATH) { + Add-Content -Path $env:GITHUB_PATH -Value $glslcDir +} + +Write-Host "glslc available at $($glslc.Source)" +& glslc --version diff --git a/.ci/scripts/setup-windows-msvc-vulkan.ps1 b/.ci/scripts/setup-windows-msvc-vulkan.ps1 new file mode 100644 index 00000000000..7fa2006e83f --- /dev/null +++ b/.ci/scripts/setup-windows-msvc-vulkan.ps1 @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Build-validation for the Vulkan backend under MSVC on Windows. Mirrors +# setup-windows-msvc.ps1 but installs glslc (the Vulkan shader compiler) and +# configures/builds the vulkan_backend target. This is a bring-up job: it exists +# to surface MSVC portability issues in the Vulkan/volk/VMA code, so it may need +# iteration. + +conda create --yes --quiet -n et python=3.12 +conda activate et + +# Install cmake +conda install -y cmake + +# Activate the VS environment - this is required for MSVC to work. +& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64 + +# Install glslc (via conda-forge shaderc) and put it on PATH in this process. +.ci/scripts/setup-vulkan-windows-deps.ps1 + +# Install CI requirements +pip install -r .ci/docker/requirements-ci.txt + +$buildDir = "cmake-out-vulkan" +if (Test-Path -Path $buildDir) { + Remove-Item -Path $buildDir -Recurse -Force +} +New-Item -Path $buildDir -ItemType Directory + +cmake -S . -B $buildDir ` + -DCMAKE_BUILD_TYPE=Release ` + -DEXECUTORCH_BUILD_VULKAN=ON ` + -DPYTHON_EXECUTABLE=python + +if ($LASTEXITCODE -ne 0) { + Write-Host "CMake configuration failed. Exit code: $LASTEXITCODE." + exit $LASTEXITCODE +} + +cmake --build $buildDir --config Release --target vulkan_backend -j16 + +if ($LASTEXITCODE -ne 0) { + Write-Host "Vulkan backend MSVC build failed. Exit code: $LASTEXITCODE." + exit $LASTEXITCODE +} + +Write-Host "Vulkan backend MSVC build completed successfully!" diff --git a/.ci/scripts/test_backend.sh b/.ci/scripts/test_backend.sh index fe9b564a18f..80352fe1393 100755 --- a/.ci/scripts/test_backend.sh +++ b/.ci/scripts/test_backend.sh @@ -51,8 +51,15 @@ if [[ "$FLOW" == *qnn* ]]; then fi if [[ "$FLOW" == *vulkan* ]]; then - # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate. - source .ci/scripts/setup-vulkan-linux-deps.sh + # Setup the Vulkan SDK and select an ICD: use the real system GPU ICD when one + # is present (real-GPU runner), otherwise fall back to SwiftShader (CPU + # runner). The Vulkan loader searches both standard ICD directories. + if ls /etc/vulkan/icd.d/*.json /usr/share/vulkan/icd.d/*.json \ + >/dev/null 2>&1; then + source .ci/scripts/setup-vulkan-linux-deps.sh "real-gpu" + else + source .ci/scripts/setup-vulkan-linux-deps.sh "swiftshader" + fi EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON" fi diff --git a/.ci/scripts/wheel/pre_build_script.sh b/.ci/scripts/wheel/pre_build_script.sh index 365398d27a4..431b5c3fa90 100755 --- a/.ci/scripts/wheel/pre_build_script.sh +++ b/.ci/scripts/wheel/pre_build_script.sh @@ -69,3 +69,37 @@ if [[ "$(uname -s)" == "Linux" && "$(uname -m)" == "x86_64" ]]; then echo "QNN_SDK_ROOT=${QNN_SDK_ROOT}" >> "${GITHUB_ENV}" echo "QNN SDK downloaded to ${QNN_SDK_ROOT}" fi + +# Provision the Vulkan SDK (glslc) and submodules ONLY when explicitly requested +# via EXECUTORCH_BUILD_VULKAN. The default wheel build leaves this unset, so it +# does no extra work (no submodule fetch, no SDK download) and is unaffected. +if [[ "${EXECUTORCH_BUILD_VULKAN:-0}" != "0" \ + && "${EXECUTORCH_BUILD_VULKAN:-OFF}" != "OFF" ]]; then + echo "Initializing Vulkan backend third-party submodules..." + VULKAN_SUBMODULES=( + backends/vulkan/third-party/Vulkan-Headers + backends/vulkan/third-party/volk + backends/vulkan/third-party/VulkanMemoryAllocator + ) + if [[ $UNAME_S == *"MINGW"* || $UNAME_S == *"MSYS"* ]]; then + git -c http.sslBackend=openssl submodule update --init "${VULKAN_SUBMODULES[@]}" + echo "Installing Vulkan SDK for Windows wheel build..." + powershell -ExecutionPolicy Bypass -File .ci/scripts/setup-vulkan-windows-deps.ps1 + else + git submodule update --init "${VULKAN_SUBMODULES[@]}" + echo "Installing Vulkan SDK for Linux wheel build..." + VULKAN_SDK_VERSION="1.4.341.1" + _vulkan_sdk_url="https://sdk.lunarg.com/sdk/download/${VULKAN_SDK_VERSION}/linux/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.xz" + _vulkan_sdk_dir="${HOME}/.vulkan-sdk/${VULKAN_SDK_VERSION}" + mkdir -p "${_vulkan_sdk_dir}" + curl --silent --show-error --location --fail --retry 3 --retry-all-errors \ + --output /tmp/vulkansdk.tar.xz "${_vulkan_sdk_url}" + tar -C "${_vulkan_sdk_dir}" -xJf /tmp/vulkansdk.tar.xz + VULKAN_SDK="${_vulkan_sdk_dir}/${VULKAN_SDK_VERSION}/x86_64" + export VULKAN_SDK + export PATH="${VULKAN_SDK}/bin:${PATH}" + echo "VULKAN_SDK=${VULKAN_SDK}" >> "${GITHUB_ENV}" + echo "${VULKAN_SDK}/bin" >> "${GITHUB_PATH}" + echo "Vulkan SDK installed to ${VULKAN_SDK}" + fi +fi diff --git a/.ci/scripts/wheel/test_linux.py b/.ci/scripts/wheel/test_linux.py index c441bcec91f..7545b4c6f20 100644 --- a/.ci/scripts/wheel/test_linux.py +++ b/.ci/scripts/wheel/test_linux.py @@ -31,6 +31,13 @@ ), f"OpenvinoBackend not found in registered backends: {registered}" print("✓ OpenvinoBackend is registered") + # Vulkan backend is optional: only present when the wheel was built with + # EXECUTORCH_BUILD_VULKAN=1 and the Vulkan SDK (glslc) was available. + if "VulkanBackend" in registered: + print("✓ VulkanBackend is registered") + else: + print("⚠ VulkanBackend not registered (expected for the default wheel)") + test_base.run_tests( model_tests=[ test_base.ModelTest( diff --git a/.ci/scripts/wheel/test_windows.py b/.ci/scripts/wheel/test_windows.py index d2d8b29a534..ba141d4498c 100644 --- a/.ci/scripts/wheel/test_windows.py +++ b/.ci/scripts/wheel/test_windows.py @@ -5,6 +5,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import platform from typing import List import torch @@ -15,6 +16,7 @@ from executorch.examples.xnnpack.quantization.utils import quantize as quantize_xnn from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower from executorch.extension.pybindings.portable_lib import ( + _get_registered_backend_names, _load_for_executorch_from_buffer, ) from test_base import ModelTest @@ -63,6 +65,15 @@ def run_tests(model_tests: List[ModelTest]) -> None: if __name__ == "__main__": + if platform.system() == "Windows": + registered = _get_registered_backend_names() + # Vulkan backend is optional: only present when the wheel was built with + # EXECUTORCH_BUILD_VULKAN=1 and the Vulkan SDK (glslc) was available. + if "VulkanBackend" in registered: + print("✓ VulkanBackend is registered") + else: + print("⚠ VulkanBackend not registered (expected for the default wheel)") + run_tests( model_tests=[ ModelTest( diff --git a/.github/workflows/test-backend-vulkan.yml b/.github/workflows/test-backend-vulkan.yml index 0461527b073..d8300b9c72a 100644 --- a/.github/workflows/test-backend-vulkan.yml +++ b/.github/workflows/test-backend-vulkan.yml @@ -17,6 +17,8 @@ concurrency: cancel-in-progress: true jobs: + # Default coverage: builds + runs on SwiftShader (software Vulkan) on CPU + # runners. Runs on every PR and nightly. test-vulkan: uses: ./.github/workflows/_test_backend.yml with: @@ -28,3 +30,47 @@ jobs: ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} timeout: 120 run-linux: true + + # Real-GPU coverage on an NVIDIA runner (exercises the fp16/int16/dot-product + # shader variants that SwiftShader cannot). Standalone job (does not go through + # _test_backend.yml) so the GPU is attached via gpu-arch-type, mirroring + # cuda.yml. Not run on pull_request to avoid GPU cost and to keep it from + # blocking PR merges; runs on nightly/push/dispatch. + test-vulkan-real-gpu: + if: ${{ github.event_name != 'pull_request' }} + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + with: + timeout: 90 + runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "12.6" + use-custom-docker-registry: false + submodules: recursive + ref: ${{ github.sha }} + script: | + set -eux + + # Install the Vulkan SDK (glslc) and select a real system ICD. The NVIDIA + # driver on this runner provides the ICD; install the loader as well. + # NOTE: first-run check - inspect the vulkaninfo output below to confirm a + # real NVIDIA device is selected (not llvmpipe/SwiftShader). If no system + # ICD is present, setup-vulkan-linux-deps.sh falls back to SwiftShader. + sudo apt-get update && sudo apt-get install -y libvulkan1 vulkan-tools || true + source .ci/scripts/setup-vulkan-linux-deps.sh real-gpu + vulkaninfo --summary || true + + PYTHON_EXECUTABLE=python ./install_executorch.sh + + cmake -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_VULKAN=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DPYTHON_EXECUTABLE=python \ + -Bcmake-out . + cmake --build cmake-out -j4 --target executor_runner + + # Export a model to the Vulkan backend and run it on the GPU. + python -m examples.vulkan.export -m mv2 -o . + ./cmake-out/executor_runner --model_path mv2.pte diff --git a/.github/workflows/vulkan-windows.yml b/.github/workflows/vulkan-windows.yml new file mode 100644 index 00000000000..2555e4289a8 --- /dev/null +++ b/.github/workflows/vulkan-windows.yml @@ -0,0 +1,48 @@ +name: Test Vulkan Backend Windows Build + +# Build-validation for the Vulkan backend under MSVC on Windows. This is a +# bring-up job (no GPU): it confirms the backend configures and compiles with +# MSVC. Real-GPU Windows E2E is a follow-up once a Windows Vulkan GPU runner is +# available. Path-filtered and not part of the required PR checks so it can be +# iterated on without blocking unrelated work. + +on: + push: + branches: + - main + - release/* + tags: + - ciflow/nightly/* + pull_request: + paths: + - backends/vulkan/** + - .ci/scripts/setup-vulkan-windows-deps.ps1 + - .ci/scripts/setup-windows-msvc-vulkan.ps1 + - .github/workflows/vulkan-windows.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + build-vulkan-windows-msvc: + name: build-vulkan-windows-msvc + uses: pytorch/test-infra/.github/workflows/windows_job.yml@main + with: + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + git config --global http.sslBackend openssl + git submodule update --init backends/vulkan/third-party/Vulkan-Headers backends/vulkan/third-party/volk backends/vulkan/third-party/VulkanMemoryAllocator + git submodule update --init + conda init powershell + powershell -Command "& { + Set-PSDebug -Trace 1 + \$ErrorActionPreference = 'Stop' + \$PSNativeCommandUseErrorActionPreference = \$true + .ci/scripts/setup-windows-msvc-vulkan.ps1 + }" diff --git a/backends/vulkan/CMakeLists.txt b/backends/vulkan/CMakeLists.txt index d9acde79ecf..6945d67779c 100644 --- a/backends/vulkan/CMakeLists.txt +++ b/backends/vulkan/CMakeLists.txt @@ -41,6 +41,24 @@ set(VULKAN_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/Vulkan-Headers) set(VOLK_PATH ${VULKAN_THIRD_PARTY_PATH}/volk) set(VMA_PATH ${VULKAN_THIRD_PARTY_PATH}/VulkanMemoryAllocator) +# These third-party dependencies are git submodules. They are not part of the +# default submodule set checked out by install_executorch.py, so fail early with +# an actionable message rather than a confusing missing-header error. +if(NOT EXISTS "${VOLK_PATH}/volk.c" + OR NOT EXISTS "${VULKAN_HEADERS_PATH}/include/vulkan/vulkan.h" + OR NOT EXISTS "${VMA_PATH}/include/vk_mem_alloc.h" +) + message( + FATAL_ERROR + "The Vulkan backend third-party submodules are missing. " + "Run the following from the repository root:\n" + " git submodule update --init " + "backends/vulkan/third-party/Vulkan-Headers " + "backends/vulkan/third-party/volk " + "backends/vulkan/third-party/VulkanMemoryAllocator" + ) +endif() + set(COMMON_INCLUDES $ $ @@ -49,7 +67,11 @@ set(COMMON_INCLUDES # Compile settings -set(VULKAN_CXX_FLAGS "-fexceptions") +# Exceptions are required: the vk_api layer throws on Vulkan errors (see +# vk_api/Exception.h). MSVC does not understand -fexceptions and enables C++ +# exceptions via /EHsc instead, so select the flag per compiler. +set(VULKAN_CXX_FLAGS "$<$>:-fexceptions>") +list(APPEND VULKAN_CXX_FLAGS "$<$:/EHsc>") list(APPEND VULKAN_CXX_FLAGS "-DUSE_VULKAN_WRAPPER") list(APPEND VULKAN_CXX_FLAGS "-DUSE_VULKAN_VOLK") diff --git a/backends/vulkan/cmake/ShaderLibrary.cmake b/backends/vulkan/cmake/ShaderLibrary.cmake index e2045cbf7da..0fb99757b0c 100644 --- a/backends/vulkan/cmake/ShaderLibrary.cmake +++ b/backends/vulkan/cmake/ShaderLibrary.cmake @@ -24,17 +24,33 @@ if(NOT EXECUTORCH_ROOT) message("WARNING: EXECUTORCH_ROOT is not set! A failure is likely imminent.") endif() -find_program(GLSLC_PATH glslc PATHS $ENV{PATH}) +# find_program already searches the PATH environment variable and appends the +# platform executable suffix (.exe on Windows). Add the Vulkan SDK bin dir as a +# hint so glslc is found on Windows even when only VULKAN_SDK is set. +find_program(GLSLC_PATH glslc HINTS $ENV{VULKAN_SDK}/bin $ENV{VULKAN_SDK}/Bin) if(NOT GLSLC_PATH AND EXECUTORCH_BUILD_VULKAN) - message( - FATAL_ERROR - "glslc from the Vulkan SDK must be installed to build the Vulkan backend. " - "Please install the Vulkan SDK 1.4.341.1 or newer from " - "https://vulkan.lunarg.com/sdk/home and ensure that the glslc binary is in your PATH. " - "Note that the glslc distributed with the Android NDK is not compatible since it " - "does not support the GL_EXT_integer_dot_product extension. " - ) + if(EXECUTORCH_BUILD_WHEEL_DO_NOT_USE) + # In a wheel/pybind build, degrade gracefully so the wheel can still be + # produced without the Vulkan backend rather than failing the whole build. + message( + STATUS + "glslc not found; the Vulkan backend will not be included in the wheel." + ) + set(EXECUTORCH_BUILD_VULKAN + OFF + CACHE BOOL "" FORCE + ) + else() + message( + FATAL_ERROR + "glslc from the Vulkan SDK must be installed to build the Vulkan backend. " + "Please install the Vulkan SDK 1.4.341.1 or newer from " + "https://vulkan.lunarg.com/sdk/home and ensure that the glslc binary is in your PATH. " + "Note that the glslc distributed with the Android NDK is not compatible since it " + "does not support the GL_EXT_integer_dot_product extension. " + ) + endif() endif() # Required to enable linking with --whole-archive diff --git a/backends/vulkan/partitioner/vulkan_partitioner.py b/backends/vulkan/partitioner/vulkan_partitioner.py index 60b4c3346f3..fb51a0edfad 100644 --- a/backends/vulkan/partitioner/vulkan_partitioner.py +++ b/backends/vulkan/partitioner/vulkan_partitioner.py @@ -378,9 +378,15 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult: exported_program.graph_module ) - texture_limits: utils.ImageExtents = self.options.get( - "texture_limits", utils.DEFAULT_TEXTURE_LIMITS - ) + # small_texture_limits opts into the conservative 3D texture limit that is + # compatible with most desktop/laptop GPUs (the Vulkan spec only guarantees + # 2048). An explicit texture_limits always takes precedence. + if "texture_limits" in self.options: + texture_limits: utils.ImageExtents = self.options["texture_limits"] + elif self.options.get("small_texture_limits", False): + texture_limits = utils.SMALL_TEXTURE_LIMITS + else: + texture_limits = utils.DEFAULT_TEXTURE_LIMITS buffer_limit: int = self.options.get("buffer_limit", utils.DEFAULT_BUFFER_LIMIT) capability_partitioner = CapabilityBasedPartitioner( exported_program.graph_module, diff --git a/backends/vulkan/runtime/api/Context.cpp b/backends/vulkan/runtime/api/Context.cpp index d090a62f370..a2c69c45cd9 100644 --- a/backends/vulkan/runtime/api/Context.cpp +++ b/backends/vulkan/runtime/api/Context.cpp @@ -212,6 +212,14 @@ void Context::register_blit( vkapi::PipelineBarrier& pipeline_barrier, vkapi::VulkanImage& src, vkapi::VulkanImage& dst) { + // vkCmdBlitImage requires a queue with graphics or transfer capability. The + // queue is selected by compute capability only, so on desktop GPUs that + // expose compute-only queue families this could otherwise be invalid usage. + // On mobile the single universal queue always has these bits set. + VK_CHECK_COND( + queue_.capabilities & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT), + "The Vulkan queue selected for compute does not support blit operations " + "(neither VK_QUEUE_GRAPHICS_BIT nor VK_QUEUE_TRANSFER_BIT is set)."); cmd_.insert_barrier(pipeline_barrier); cmd_.blit(src, dst); } diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index 47cefa1031a..cba78403358 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -680,7 +680,11 @@ vkapi::VulkanBuffer allocate_buffer( return vkapi::VulkanBuffer(); } - VK_CHECK_COND(numel <= context_ptr->adapter_ptr()->max_buffer_numel()); + // max_buffer_numel() returns maxStorageBufferRange, which is a size in bytes, + // so compare it against the buffer size in bytes (not the element count). + VK_CHECK_COND( + element_size(dtype) * numel <= + context_ptr->adapter_ptr()->max_buffer_numel()); return adapter_ptr->vma().create_storage_buffer( element_size(dtype) * numel, allocate_memory); diff --git a/backends/vulkan/runtime/gen_vulkan_spv.py b/backends/vulkan/runtime/gen_vulkan_spv.py index 93d6f9e41aa..69c87563bbd 100644 --- a/backends/vulkan/runtime/gen_vulkan_spv.py +++ b/backends/vulkan/runtime/gen_vulkan_spv.py @@ -1123,6 +1123,7 @@ def compile_spirv(shader_paths_pair) -> Tuple[str, str]: # Construct name of SPIR-V file to be compiled spv_out_path = os.path.join(output_dir, f"{src_file_name}.spv") + cached_spv_out_path = None if cache_dir is not None: # Construct the file names of cached SPIR-V file to check if they exist # in the cache. @@ -1160,7 +1161,9 @@ def compile_spirv(shader_paths_pair) -> Tuple[str, str]: subprocess.run(cmd_no_opt, check=True, capture_output=True) except subprocess.CalledProcessError as e_no_opt: # Delete any existing cached SPIR-V file if it exists - if os.path.exists(cached_spv_out_path): + if cached_spv_out_path is not None and os.path.exists( + cached_spv_out_path + ): os.remove(cached_spv_out_path) raise RuntimeError( @@ -1169,7 +1172,9 @@ def compile_spirv(shader_paths_pair) -> Tuple[str, str]: else: # Delete any existing cached SPIR-V file if it exists - if os.path.exists(cached_spv_out_path): + if cached_spv_out_path is not None and os.path.exists( + cached_spv_out_path + ): os.remove(cached_spv_out_path) raise RuntimeError(f"{err_msg_base} {e.stderr}") from e diff --git a/backends/vulkan/runtime/graph/ops/glsl/coopmat_mm.yaml b/backends/vulkan/runtime/graph/ops/glsl/coopmat_mm.yaml index bd5c2377cf6..05b26adfb24 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/coopmat_mm.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/coopmat_mm.yaml @@ -12,6 +12,10 @@ coopmat_mm: parameter_names_with_default_values: + # GL_KHR_cooperative_matrix requires SPIR-V 1.6, so target Vulkan 1.3 when + # compiling this shader (the default target-env of 1.1 is too low). Other + # shaders are unaffected and keep the default. + VK_VERSION: '1.3' DTYPE: float PRECISION: highp WEIGHT_LAYOUT: row_major diff --git a/backends/vulkan/runtime/vk_api/Adapter.cpp b/backends/vulkan/runtime/vk_api/Adapter.cpp index b762c95205b..b28c7601687 100644 --- a/backends/vulkan/runtime/vk_api/Adapter.cpp +++ b/backends/vulkan/runtime/vk_api/Adapter.cpp @@ -140,6 +140,20 @@ VkDevice create_logical_device( enabled_device_extensions, requested_device_extensions); + // Enable the base device features that ExecuTorch shaders rely on, but only + // those that the physical device reports as supported. With pEnabledFeatures + // left null, all base features are disabled; using a shader that performs + // e.g. int16 arithmetic without enabling shaderInt16 is invalid usage and + // crashes on drivers that enforce it. Unsupported features stay VK_FALSE, so + // this is a no-op on devices that lack them. + VkPhysicalDeviceFeatures enabled_features{}; + enabled_features.shaderInt16 = + physical_device.supports_int16_shader_types ? VK_TRUE : VK_FALSE; + enabled_features.shaderInt64 = + physical_device.supports_int64_shader_types ? VK_TRUE : VK_FALSE; + enabled_features.shaderFloat64 = + physical_device.supports_float64_shader_types ? VK_TRUE : VK_FALSE; + VkDeviceCreateInfo device_create_info{ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // sType nullptr, // pNext @@ -151,7 +165,7 @@ VkDevice create_logical_device( static_cast( enabled_device_extensions.size()), // enabledExtensionCount enabled_device_extensions.data(), // ppEnabledExtensionNames - nullptr, // pEnabledFeatures + &enabled_features, // pEnabledFeatures }; void* extension_list_top = nullptr; diff --git a/backends/vulkan/runtime/vk_api/Runtime.cpp b/backends/vulkan/runtime/vk_api/Runtime.cpp index 3d3a146d80d..86cf182ece0 100644 --- a/backends/vulkan/runtime/vk_api/Runtime.cpp +++ b/backends/vulkan/runtime/vk_api/Runtime.cpp @@ -10,6 +10,7 @@ #include +#include #include #include #include @@ -239,19 +240,62 @@ VkDebugReportCallbackEXT create_debug_report_callback( // Adapter selection methods // -uint32_t select_first(const std::vector& devices) { +// Ranks compute-capable devices so that a real GPU is preferred over a software +// rasterizer (e.g. SwiftShader/lavapipe, which report as CPU). On a single-GPU +// system (e.g. mobile) there is only one candidate, so the choice is unchanged. +int compute_device_priority(const PhysicalDevice& device) { + if (device.num_compute_queues == 0) { + return -1; // not compute-capable, never select + } + switch (device.properties.deviceType) { + case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: + return 5; + case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: + return 4; + case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: + return 3; + case VK_PHYSICAL_DEVICE_TYPE_CPU: + return 1; + default: + return 2; + } +} + +uint32_t select_compute_device( + const std::vector& devices) { + const uint32_t invalid = + devices.size() + 1; // out of range signals invalidity if (devices.empty()) { - return devices.size() + 1; // return out of range to signal invalidity + return invalid; + } + + // Allow overriding device selection via the ETVK_DEVICE_INDEX environment + // variable, which is useful on multi-GPU desktop systems. Invalid values fall + // through to automatic selection below. + const char* device_index_env = std::getenv("ETVK_DEVICE_INDEX"); + if (device_index_env != nullptr) { + char* end = nullptr; + const long idx = std::strtol(device_index_env, &end, 10); + if (end != device_index_env && *end == '\0' && idx >= 0 && + static_cast(idx) < devices.size() && + devices[static_cast(idx)].first.num_compute_queues > 0) { + return static_cast(idx); + } } - // Select the first adapter that has compute capability + // Otherwise pick the highest-priority compute-capable device, preferring the + // first one on ties (preserving the previous first-match behavior). + uint32_t best_i = invalid; + int best_priority = -1; for (size_t i = 0; i < devices.size(); ++i) { - if (devices[i].first.num_compute_queues > 0) { - return i; + const int priority = compute_device_priority(devices[i].first); + if (priority > best_priority) { + best_priority = priority; + best_i = static_cast(i); } } - return devices.size() + 1; + return best_i; } // @@ -312,7 +356,7 @@ Runtime::Runtime(const RuntimeConfig config) try { switch (config.default_selector) { case AdapterSelector::First: - default_adapter_i_ = create_adapter(select_first); + default_adapter_i_ = create_adapter(select_compute_device); } } catch (...) { } diff --git a/backends/vulkan/runtime/vk_api/memory/vma_api.h b/backends/vulkan/runtime/vk_api/memory/vma_api.h index dc7abbf8b1e..cf267a27d11 100644 --- a/backends/vulkan/runtime/vk_api/memory/vma_api.h +++ b/backends/vulkan/runtime/vk_api/memory/vma_api.h @@ -25,17 +25,28 @@ #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 #define VMA_VULKAN_VERSION 1002000 -#ifdef __clang__ +#if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wnullability-completeness" #pragma clang diagnostic ignored "-Wunused-variable" -#endif /* __clang__ */ +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wunused-parameter" +#elif defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4100 4101 4189) +#endif #include -#ifdef __clang__ +#if defined(__clang__) #pragma clang diagnostic pop -#endif /* __clang__ */ +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#elif defined(_MSC_VER) +#pragma warning(pop) +#endif #else // !ETVK_USE_META_VMA @@ -71,16 +82,27 @@ */ #endif /* VULKAN_DEBUG */ -#ifdef __clang__ +#if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wnullability-completeness" #pragma clang diagnostic ignored "-Wunused-variable" -#endif /* __clang__ */ +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wunused-parameter" +#elif defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4100 4101 4189) +#endif #include -#ifdef __clang__ +#if defined(__clang__) #pragma clang diagnostic pop -#endif /* __clang__ */ +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#elif defined(_MSC_VER) +#pragma warning(pop) +#endif #endif // ETVK_USE_META_VMA diff --git a/backends/vulkan/test/test_vulkan_compile_options.py b/backends/vulkan/test/test_vulkan_compile_options.py new file mode 100644 index 00000000000..f44850d2915 --- /dev/null +++ b/backends/vulkan/test/test_vulkan_compile_options.py @@ -0,0 +1,48 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from typing import Any, Dict + +from executorch.backends.vulkan.partitioner.vulkan_partitioner import ( + parse_compile_options, +) +from executorch.backends.vulkan.vulkan_preprocess import parse_compile_spec + + +class TestVulkanCompileOptions(unittest.TestCase): + """Verify that compile options survive the partitioner -> backend round trip. + + The partitioner serializes the user-provided options into CompileSpecs + (parse_compile_options) and the backend deserializes them at preprocess time + (parse_compile_spec). Boolean options that are serialized but not handled on + the deserialization side are silently dropped, which is a class of bug that + previously hid the small_texture_limits desktop-compatibility option. + """ + + def _round_trip(self, options: Dict[str, Any]) -> Dict[str, Any]: + return parse_compile_spec(parse_compile_options(options)) + + def test_small_texture_limits_round_trips(self) -> None: + round_tripped = self._round_trip({"small_texture_limits": True}) + self.assertTrue(round_tripped.get("small_texture_limits")) + + def test_skip_memory_planning_round_trips(self) -> None: + round_tripped = self._round_trip({"skip_memory_planning": True}) + self.assertTrue(round_tripped.get("skip_memory_planning")) + + def test_force_fp16_round_trips(self) -> None: + round_tripped = self._round_trip({"force_fp16": True}) + self.assertTrue(round_tripped.get("force_fp16")) + + def test_unset_options_are_absent(self) -> None: + round_tripped = self._round_trip({}) + self.assertNotIn("small_texture_limits", round_tripped) + self.assertNotIn("skip_memory_planning", round_tripped) + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/vulkan/utils.py b/backends/vulkan/utils.py index 7febff260c6..b349fb51001 100644 --- a/backends/vulkan/utils.py +++ b/backends/vulkan/utils.py @@ -588,6 +588,10 @@ def node_has_target(node: Any, target: str): ImageExtents = Tuple[int, int, int] DEFAULT_TEXTURE_LIMITS = (16384, 16384, 2048) +# Conservative 3D texture limit compatible with most desktop/laptop GPUs. The +# Vulkan spec only guarantees maxImageDimension3D >= 2048, whereas mobile GPUs +# commonly support 16384. Used when the small_texture_limits option is set. +SMALL_TEXTURE_LIMITS = (2048, 2048, 2048) DEFAULT_BUFFER_LIMIT = 128 * (1024 * 1024) all_storage_types: Set[VkStorageType] = { diff --git a/backends/vulkan/vulkan_preprocess.py b/backends/vulkan/vulkan_preprocess.py index e9d5613668a..53a81d1772e 100644 --- a/backends/vulkan/vulkan_preprocess.py +++ b/backends/vulkan/vulkan_preprocess.py @@ -6,7 +6,6 @@ # pyre-strict -import copy from functools import partial from typing import Any, Callable, Dict, final, List @@ -114,6 +113,12 @@ def parse_compile_spec(compile_specs: List[CompileSpec]) -> Dict[str, Any]: if spec.key == "force_fp16": options[spec.key] = bool.from_bytes(spec.value, byteorder="little") + if spec.key == "small_texture_limits": + options[spec.key] = bool.from_bytes(spec.value, byteorder="little") + + if spec.key == "skip_memory_planning": + options[spec.key] = bool.from_bytes(spec.value, byteorder="little") + # Unhandled options are ignored return options @@ -130,16 +135,15 @@ def preprocess( # noqa: C901 ) -> PreprocessResult: compile_options = parse_compile_spec(module_compile_spec) - default_texture_limits = copy.deepcopy(utils.DEFAULT_TEXTURE_LIMITS) # 2048 is the typical limit value for 3D textures, but mobile GPUs often support # 16384. Since the Vulkan delegate primarily targets mobile GPUs at the moment, - # 16394 is the default texture limit used. This option is provided as a - # convenient way to switch to using a limit of 2048 for image textures which - # will be compatible with most GPUs. + # 16384 is the default texture limit used. The small_texture_limits option is + # provided as a convenient way to switch to a limit of 2048 for image textures, + # which will be compatible with most desktop/laptop GPUs. if compile_options.get("small_texture_limits", False): - default_texture_limits[0] = 2048 - default_texture_limits[1] = 2048 - default_texture_limits[2] = 2048 + default_texture_limits = utils.SMALL_TEXTURE_LIMITS + else: + default_texture_limits = utils.DEFAULT_TEXTURE_LIMITS limits_x = compile_options.get("texture_limits_x", default_texture_limits[0]) limits_y = compile_options.get("texture_limits_y", default_texture_limits[1]) diff --git a/setup.py b/setup.py index 85228bd37ae..cd2bb5332cb 100644 --- a/setup.py +++ b/setup.py @@ -134,6 +134,7 @@ def _minimal_cmake_flags() -> List[str]: "-DEXECUTORCH_BUILD_PYBIND=OFF", "-DEXECUTORCH_BUILD_QNN=OFF", "-DEXECUTORCH_BUILD_TESTS=OFF", + "-DEXECUTORCH_BUILD_VULKAN=OFF", "-DEXECUTORCH_BUILD_XNNPACK=OFF", ] @@ -962,6 +963,9 @@ def run(self): # noqa C901 if cmake_cache.is_enabled("EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER"): cmake_build_args += ["--target", "_llm_runner"] + if cmake_cache.is_enabled("EXECUTORCH_BUILD_VULKAN"): + cmake_build_args += ["--target", "vulkan_backend"] + if cmake_cache.is_enabled("EXECUTORCH_BUILD_CUDA"): cmake_build_args += ["--target", "aoti_cuda_backend"] cmake_build_args += ["--target", "aoti_common_shims_slim"] diff --git a/tools/cmake/preset/pybind.cmake b/tools/cmake/preset/pybind.cmake index ecce850ab3c..9a17f561785 100644 --- a/tools/cmake/preset/pybind.cmake +++ b/tools/cmake/preset/pybind.cmake @@ -97,3 +97,31 @@ else() FATAL_ERROR "Unsupported CMAKE_SYSTEM_NAME for pybind: ${CMAKE_SYSTEM_NAME}" ) endif() + +# Opt-in Vulkan backend for Linux/Windows wheels. Enabled ONLY when the build +# requests it via the EXECUTORCH_BUILD_VULKAN env var AND glslc (Vulkan SDK) is +# available to compile the shaders. This keeps the default wheel (and +# macOS/Android) byte-for-byte unchanged: GPU backends are opt-in rather than +# bundled into the universal wheel. +if(CMAKE_SYSTEM_NAME STREQUAL "Linux" + OR CMAKE_SYSTEM_NAME STREQUAL "Windows" + OR CMAKE_SYSTEM_NAME STREQUAL "WIN32" +) + if(DEFINED ENV{EXECUTORCH_BUILD_VULKAN} + AND NOT "$ENV{EXECUTORCH_BUILD_VULKAN}" STREQUAL "0" + AND NOT "$ENV{EXECUTORCH_BUILD_VULKAN}" STREQUAL "OFF" + ) + find_program( + GLSLC_PATH glslc HINTS $ENV{VULKAN_SDK}/bin $ENV{VULKAN_SDK}/Bin + ) + if(GLSLC_PATH) + set_overridable_option(EXECUTORCH_BUILD_VULKAN ON) + message(STATUS "Enabling Vulkan backend for wheel; glslc: ${GLSLC_PATH}") + else() + message( + STATUS "EXECUTORCH_BUILD_VULKAN requested but glslc was not found; " + "the Vulkan backend will not be included." + ) + endif() + endif() +endif()