diff --git a/.ci/scripts/setup-vulkan-linux-deps.sh b/.ci/scripts/setup-vulkan-linux-deps.sh
index a0dcb75ad4a..e088cc9442b 100755
--- a/.ci/scripts/setup-vulkan-linux-deps.sh
+++ b/.ci/scripts/setup-vulkan-linux-deps.sh
@@ -1,4 +1,3 @@
-
 #!/bin/bash
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
@@ -43,7 +42,35 @@ install_vulkan_sdk() {
   export PATH="${PATH}:${_vulkan_sdk_dir}/${VULKAN_SDK_VERSION}/x86_64/bin/"
 }
 
+setup_real_gpu_icd() {
+  # On a real-GPU runner the system Vulkan ICD is installed by the GPU driver.
+  # The loader searches both /etc/vulkan/icd.d and /usr/share/vulkan/icd.d, so
+  # check both. If a system ICD is present, do NOT use SwiftShader so the real
+  # device (and its fp16/int16/dot-product shader variants) is exercised. Fall
+  # back to SwiftShader if no system ICD is found so the job stays green either
+  # way.
+  if ls /etc/vulkan/icd.d/*.json /usr/share/vulkan/icd.d/*.json \
+      >/dev/null 2>&1; then
+    echo "System Vulkan ICD(s) detected:"
+    ls /etc/vulkan/icd.d/*.json /usr/share/vulkan/icd.d/*.json 2>/dev/null
+    unset ETVK_USING_SWIFTSHADER || true
+  else
+    echo "WARNING: no system Vulkan ICD found; using SwiftShader."
+    install_swiftshader
+  fi
+}
+
 VULKAN_SDK_VERSION="1.4.321.1"
 
-install_swiftshader
-install_vulkan_sdk "${VULKAN_SDK_VERSION}"
+# The no-argument default installs SwiftShader so the existing CPU-runner CI is
+# unchanged. Pass "real-gpu" to prefer a real system ICD when one is present.
+case "${1:-swiftshader}" in
+  real-gpu)
+    install_vulkan_sdk "${VULKAN_SDK_VERSION}"
+    setup_real_gpu_icd
+    ;;
+  swiftshader | *)
+    install_swiftshader
+    install_vulkan_sdk "${VULKAN_SDK_VERSION}"
+    ;;
+esac
diff --git a/.ci/scripts/setup-vulkan-windows-deps.ps1 b/.ci/scripts/setup-vulkan-windows-deps.ps1
new file mode 100644
index 00000000000..335f457714f
--- /dev/null
+++ b/.ci/scripts/setup-vulkan-windows-deps.ps1
@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Install glslc (the Vulkan shader compiler) on Windows via conda-forge's
+# shaderc package, and make sure it is on PATH. glslc is the only build-time
+# Vulkan dependency -- the Vulkan headers and the volk loader come from the
+# in-tree submodules -- so this avoids depending on the heavyweight LunarG SDK
+# installer. Requires conda to be available (the callers create/activate an env).
+
+$ErrorActionPreference = "Stop"
+
+Write-Host "Installing shaderc (provides glslc) from conda-forge..."
+conda install -y -c conda-forge shaderc
+if ($LASTEXITCODE -ne 0) {
+    Write-Error "Failed to install shaderc from conda-forge (exit ${LASTEXITCODE})"
+    exit 1
+}
+
+$glslc = Get-Command glslc -ErrorAction SilentlyContinue
+if (-not $glslc) {
+    Write-Error "glslc not found on PATH after installing shaderc"
+    exit 1
+}
+
+# Expose glslc to the current process and, when running as a GitHub Actions step,
+# to subsequent steps.
+$glslcDir = Split-Path -Parent $glslc.Source
+$env:PATH = "${glslcDir};${env:PATH}"
+if ($env:GITHUB_PATH) {
+    Add-Content -Path $env:GITHUB_PATH -Value $glslcDir
+}
+
+Write-Host "glslc available at $($glslc.Source)"
+& glslc --version
diff --git a/.ci/scripts/setup-windows-msvc-vulkan.ps1 b/.ci/scripts/setup-windows-msvc-vulkan.ps1
new file mode 100644
index 00000000000..7fa2006e83f
--- /dev/null
+++ b/.ci/scripts/setup-windows-msvc-vulkan.ps1
@@ -0,0 +1,51 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Build-validation for the Vulkan backend under MSVC on Windows. Mirrors
+# setup-windows-msvc.ps1 but installs glslc (the Vulkan shader compiler) and
+# configures/builds the vulkan_backend target. This is a bring-up job: it exists
+# to surface MSVC portability issues in the Vulkan/volk/VMA code, so it may need
+# iteration.
+
+conda create --yes --quiet -n et python=3.12
+conda activate et
+
+# Install cmake
+conda install -y cmake
+
+# Activate the VS environment - this is required for MSVC to work.
+& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
+
+# Install glslc (via conda-forge shaderc) and put it on PATH in this process.
+.ci/scripts/setup-vulkan-windows-deps.ps1
+
+# Install CI requirements
+pip install -r .ci/docker/requirements-ci.txt
+
+$buildDir = "cmake-out-vulkan"
+if (Test-Path -Path $buildDir) {
+    Remove-Item -Path $buildDir -Recurse -Force
+}
+New-Item -Path $buildDir -ItemType Directory
+
+cmake -S . -B $buildDir `
+    -DCMAKE_BUILD_TYPE=Release `
+    -DEXECUTORCH_BUILD_VULKAN=ON `
+    -DPYTHON_EXECUTABLE=python
+
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "CMake configuration failed. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+cmake --build $buildDir --config Release --target vulkan_backend -j16
+
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Vulkan backend MSVC build failed. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+Write-Host "Vulkan backend MSVC build completed successfully!"
diff --git a/.ci/scripts/test_backend.sh b/.ci/scripts/test_backend.sh
index fe9b564a18f..80352fe1393 100755
--- a/.ci/scripts/test_backend.sh
+++ b/.ci/scripts/test_backend.sh
@@ -51,8 +51,15 @@ if [[ "$FLOW" == *qnn* ]]; then
 fi
 
 if [[ "$FLOW" == *vulkan* ]]; then
-    # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate.
-    source .ci/scripts/setup-vulkan-linux-deps.sh
+    # Setup the Vulkan SDK and select an ICD: use the real system GPU ICD when one
+    # is present (real-GPU runner), otherwise fall back to SwiftShader (CPU
+    # runner). The Vulkan loader searches both standard ICD directories.
+    if ls /etc/vulkan/icd.d/*.json /usr/share/vulkan/icd.d/*.json \
+        >/dev/null 2>&1; then
+        source .ci/scripts/setup-vulkan-linux-deps.sh "real-gpu"
+    else
+        source .ci/scripts/setup-vulkan-linux-deps.sh "swiftshader"
+    fi
 
     EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
 fi
diff --git a/.ci/scripts/wheel/pre_build_script.sh b/.ci/scripts/wheel/pre_build_script.sh
index 365398d27a4..431b5c3fa90 100755
--- a/.ci/scripts/wheel/pre_build_script.sh
+++ b/.ci/scripts/wheel/pre_build_script.sh
@@ -69,3 +69,37 @@ if [[ "$(uname -s)" == "Linux" && "$(uname -m)" == "x86_64" ]]; then
   echo "QNN_SDK_ROOT=${QNN_SDK_ROOT}" >> "${GITHUB_ENV}"
   echo "QNN SDK downloaded to ${QNN_SDK_ROOT}"
 fi
+
+# Provision the Vulkan SDK (glslc) and submodules ONLY when explicitly requested
+# via EXECUTORCH_BUILD_VULKAN. The default wheel build leaves this unset, so it
+# does no extra work (no submodule fetch, no SDK download) and is unaffected.
+if [[ "${EXECUTORCH_BUILD_VULKAN:-0}" != "0" \
+      && "${EXECUTORCH_BUILD_VULKAN:-OFF}" != "OFF" ]]; then
+  echo "Initializing Vulkan backend third-party submodules..."
+  VULKAN_SUBMODULES=(
+    backends/vulkan/third-party/Vulkan-Headers
+    backends/vulkan/third-party/volk
+    backends/vulkan/third-party/VulkanMemoryAllocator
+  )
+  if [[ $UNAME_S == *"MINGW"* || $UNAME_S == *"MSYS"* ]]; then
+    git -c http.sslBackend=openssl submodule update --init "${VULKAN_SUBMODULES[@]}"
+    echo "Installing Vulkan SDK for Windows wheel build..."
+    powershell -ExecutionPolicy Bypass -File .ci/scripts/setup-vulkan-windows-deps.ps1
+  else
+    git submodule update --init "${VULKAN_SUBMODULES[@]}"
+    echo "Installing Vulkan SDK for Linux wheel build..."
+    VULKAN_SDK_VERSION="1.4.341.1"
+    _vulkan_sdk_url="https://sdk.lunarg.com/sdk/download/${VULKAN_SDK_VERSION}/linux/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.xz"
+    _vulkan_sdk_dir="${HOME}/.vulkan-sdk/${VULKAN_SDK_VERSION}"
+    mkdir -p "${_vulkan_sdk_dir}"
+    curl --silent --show-error --location --fail --retry 3 --retry-all-errors \
+      --output /tmp/vulkansdk.tar.xz "${_vulkan_sdk_url}"
+    tar -C "${_vulkan_sdk_dir}" -xJf /tmp/vulkansdk.tar.xz
+    VULKAN_SDK="${_vulkan_sdk_dir}/${VULKAN_SDK_VERSION}/x86_64"
+    export VULKAN_SDK
+    export PATH="${VULKAN_SDK}/bin:${PATH}"
+    echo "VULKAN_SDK=${VULKAN_SDK}" >> "${GITHUB_ENV}"
+    echo "${VULKAN_SDK}/bin" >> "${GITHUB_PATH}"
+    echo "Vulkan SDK installed to ${VULKAN_SDK}"
+  fi
+fi
diff --git a/.ci/scripts/wheel/test_linux.py b/.ci/scripts/wheel/test_linux.py
index c441bcec91f..7545b4c6f20 100644
--- a/.ci/scripts/wheel/test_linux.py
+++ b/.ci/scripts/wheel/test_linux.py
@@ -31,6 +31,13 @@
         ), f"OpenvinoBackend not found in registered backends: {registered}"
         print("✓ OpenvinoBackend is registered")
 
+        # Vulkan backend is optional: only present when the wheel was built with
+        # EXECUTORCH_BUILD_VULKAN=1 and the Vulkan SDK (glslc) was available.
+        if "VulkanBackend" in registered:
+            print("✓ VulkanBackend is registered")
+        else:
+            print("⚠ VulkanBackend not registered (expected for the default wheel)")
+
     test_base.run_tests(
         model_tests=[
             test_base.ModelTest(
diff --git a/.ci/scripts/wheel/test_windows.py b/.ci/scripts/wheel/test_windows.py
index d2d8b29a534..ba141d4498c 100644
--- a/.ci/scripts/wheel/test_windows.py
+++ b/.ci/scripts/wheel/test_windows.py
@@ -5,6 +5,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import platform
 from typing import List
 
 import torch
@@ -15,6 +16,7 @@
 from executorch.examples.xnnpack.quantization.utils import quantize as quantize_xnn
 from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
 from executorch.extension.pybindings.portable_lib import (
+    _get_registered_backend_names,
     _load_for_executorch_from_buffer,
 )
 from test_base import ModelTest
@@ -63,6 +65,15 @@ def run_tests(model_tests: List[ModelTest]) -> None:
 
 
 if __name__ == "__main__":
+    if platform.system() == "Windows":
+        registered = _get_registered_backend_names()
+        # Vulkan backend is optional: only present when the wheel was built with
+        # EXECUTORCH_BUILD_VULKAN=1 and the Vulkan SDK (glslc) was available.
+        if "VulkanBackend" in registered:
+            print("✓ VulkanBackend is registered")
+        else:
+            print("⚠ VulkanBackend not registered (expected for the default wheel)")
+
     run_tests(
         model_tests=[
             ModelTest(
diff --git a/.github/workflows/test-backend-vulkan.yml b/.github/workflows/test-backend-vulkan.yml
index 0461527b073..d8300b9c72a 100644
--- a/.github/workflows/test-backend-vulkan.yml
+++ b/.github/workflows/test-backend-vulkan.yml
@@ -17,6 +17,8 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  # Default coverage: builds + runs on SwiftShader (software Vulkan) on CPU
+  # runners. Runs on every PR and nightly.
   test-vulkan:
     uses: ./.github/workflows/_test_backend.yml
     with:
@@ -28,3 +30,47 @@ jobs:
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 120
       run-linux: true
+
+  # Real-GPU coverage on an NVIDIA runner (exercises the fp16/int16/dot-product
+  # shader variants that SwiftShader cannot). Standalone job (does not go through
+  # _test_backend.yml) so the GPU is attached via gpu-arch-type, mirroring
+  # cuda.yml. Not run on pull_request to avoid GPU cost and to keep it from
+  # blocking PR merges; runs on nightly/push/dispatch.
+  test-vulkan-real-gpu:
+    if: ${{ github.event_name != 'pull_request' }}
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      timeout: 90
+      runner: linux.g5.4xlarge.nvidia.gpu
+      gpu-arch-type: cuda
+      gpu-arch-version: "12.6"
+      use-custom-docker-registry: false
+      submodules: recursive
+      ref: ${{ github.sha }}
+      script: |
+        set -eux
+
+        # Install the Vulkan SDK (glslc) and select a real system ICD. The NVIDIA
+        # driver on this runner provides the ICD; install the loader as well.
+        # NOTE: first-run check - inspect the vulkaninfo output below to confirm a
+        # real NVIDIA device is selected (not llvmpipe/SwiftShader). If no system
+        # ICD is present, setup-vulkan-linux-deps.sh falls back to SwiftShader.
+        sudo apt-get update && sudo apt-get install -y libvulkan1 vulkan-tools || true
+        source .ci/scripts/setup-vulkan-linux-deps.sh real-gpu
+        vulkaninfo --summary || true
+
+        PYTHON_EXECUTABLE=python ./install_executorch.sh
+
+        cmake -DCMAKE_BUILD_TYPE=Release \
+            -DEXECUTORCH_BUILD_VULKAN=ON \
+            -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+            -DPYTHON_EXECUTABLE=python \
+            -Bcmake-out .
+        cmake --build cmake-out -j4 --target executor_runner
+
+        # Export a model to the Vulkan backend and run it on the GPU.
+        python -m examples.vulkan.export -m mv2 -o .
+        ./cmake-out/executor_runner --model_path mv2.pte
diff --git a/.github/workflows/vulkan-windows.yml b/.github/workflows/vulkan-windows.yml
new file mode 100644
index 00000000000..2555e4289a8
--- /dev/null
+++ b/.github/workflows/vulkan-windows.yml
@@ -0,0 +1,48 @@
+name: Test Vulkan Backend Windows Build
+
+# Build-validation for the Vulkan backend under MSVC on Windows. This is a
+# bring-up job (no GPU): it confirms the backend configures and compiles with
+# MSVC. Real-GPU Windows E2E is a follow-up once a Windows Vulkan GPU runner is
+# available. Path-filtered and not part of the required PR checks so it can be
+# iterated on without blocking unrelated work.
+
+on:
+  push:
+    branches:
+      - main
+      - release/*
+    tags:
+      - ciflow/nightly/*
+  pull_request:
+    paths:
+      - backends/vulkan/**
+      - .ci/scripts/setup-vulkan-windows-deps.ps1
+      - .ci/scripts/setup-windows-msvc-vulkan.ps1
+      - .github/workflows/vulkan-windows.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  build-vulkan-windows-msvc:
+    name: build-vulkan-windows-msvc
+    uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
+    with:
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        git config --global http.sslBackend openssl
+        git submodule update --init backends/vulkan/third-party/Vulkan-Headers backends/vulkan/third-party/volk backends/vulkan/third-party/VulkanMemoryAllocator
+        git submodule update --init
+        conda init powershell
+        powershell -Command "& {
+          Set-PSDebug -Trace 1
+          \$ErrorActionPreference = 'Stop'
+          \$PSNativeCommandUseErrorActionPreference = \$true
+          .ci/scripts/setup-windows-msvc-vulkan.ps1
+        }"
diff --git a/backends/vulkan/CMakeLists.txt b/backends/vulkan/CMakeLists.txt
index d9acde79ecf..6945d67779c 100644
--- a/backends/vulkan/CMakeLists.txt
+++ b/backends/vulkan/CMakeLists.txt
@@ -41,6 +41,24 @@ set(VULKAN_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/Vulkan-Headers)
 set(VOLK_PATH ${VULKAN_THIRD_PARTY_PATH}/volk)
 set(VMA_PATH ${VULKAN_THIRD_PARTY_PATH}/VulkanMemoryAllocator)
 
+# These third-party dependencies are git submodules. They are not part of the
+# default submodule set checked out by install_executorch.py, so fail early with
+# an actionable message rather than a confusing missing-header error.
+if(NOT EXISTS "${VOLK_PATH}/volk.c"
+   OR NOT EXISTS "${VULKAN_HEADERS_PATH}/include/vulkan/vulkan.h"
+   OR NOT EXISTS "${VMA_PATH}/include/vk_mem_alloc.h"
+)
+  message(
+    FATAL_ERROR
+      "The Vulkan backend third-party submodules are missing. "
+      "Run the following from the repository root:\n"
+      "  git submodule update --init "
+      "backends/vulkan/third-party/Vulkan-Headers "
+      "backends/vulkan/third-party/volk "
+      "backends/vulkan/third-party/VulkanMemoryAllocator"
+  )
+endif()
+
 set(COMMON_INCLUDES
     $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
     $<BUILD_INTERFACE:${VULKAN_HEADERS_PATH}/include>
@@ -49,7 +67,11 @@ set(COMMON_INCLUDES
 
 # Compile settings
 
-set(VULKAN_CXX_FLAGS "-fexceptions")
+# Exceptions are required: the vk_api layer throws on Vulkan errors (see
+# vk_api/Exception.h). MSVC does not understand -fexceptions and enables C++
+# exceptions via /EHsc instead, so select the flag per compiler.
+set(VULKAN_CXX_FLAGS "$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-fexceptions>")
+list(APPEND VULKAN_CXX_FLAGS "$<$<CXX_COMPILER_ID:MSVC>:/EHsc>")
 list(APPEND VULKAN_CXX_FLAGS "-DUSE_VULKAN_WRAPPER")
 list(APPEND VULKAN_CXX_FLAGS "-DUSE_VULKAN_VOLK")
 
diff --git a/backends/vulkan/cmake/ShaderLibrary.cmake b/backends/vulkan/cmake/ShaderLibrary.cmake
index e2045cbf7da..0fb99757b0c 100644
--- a/backends/vulkan/cmake/ShaderLibrary.cmake
+++ b/backends/vulkan/cmake/ShaderLibrary.cmake
@@ -24,17 +24,33 @@ if(NOT EXECUTORCH_ROOT)
   message("WARNING: EXECUTORCH_ROOT is not set! A failure is likely imminent.")
 endif()
 
-find_program(GLSLC_PATH glslc PATHS $ENV{PATH})
+# find_program already searches the PATH environment variable and appends the
+# platform executable suffix (.exe on Windows). Add the Vulkan SDK bin dir as a
+# hint so glslc is found on Windows even when only VULKAN_SDK is set.
+find_program(GLSLC_PATH glslc HINTS $ENV{VULKAN_SDK}/bin $ENV{VULKAN_SDK}/Bin)
 
 if(NOT GLSLC_PATH AND EXECUTORCH_BUILD_VULKAN)
-  message(
-    FATAL_ERROR
-      "glslc from the Vulkan SDK must be installed to build the Vulkan backend. "
-      "Please install the Vulkan SDK 1.4.341.1 or newer from "
-      "https://vulkan.lunarg.com/sdk/home and ensure that the glslc binary is in your PATH. "
-      "Note that the glslc distributed with the Android NDK is not compatible since it "
-      "does not support the GL_EXT_integer_dot_product extension. "
-  )
+  if(EXECUTORCH_BUILD_WHEEL_DO_NOT_USE)
+    # In a wheel/pybind build, degrade gracefully so the wheel can still be
+    # produced without the Vulkan backend rather than failing the whole build.
+    message(
+      STATUS
+        "glslc not found; the Vulkan backend will not be included in the wheel."
+    )
+    set(EXECUTORCH_BUILD_VULKAN
+        OFF
+        CACHE BOOL "" FORCE
+    )
+  else()
+    message(
+      FATAL_ERROR
+        "glslc from the Vulkan SDK must be installed to build the Vulkan backend. "
+        "Please install the Vulkan SDK 1.4.341.1 or newer from "
+        "https://vulkan.lunarg.com/sdk/home and ensure that the glslc binary is in your PATH. "
+        "Note that the glslc distributed with the Android NDK is not compatible since it "
+        "does not support the GL_EXT_integer_dot_product extension. "
+    )
+  endif()
 endif()
 
 # Required to enable linking with --whole-archive
diff --git a/backends/vulkan/partitioner/vulkan_partitioner.py b/backends/vulkan/partitioner/vulkan_partitioner.py
index 60b4c3346f3..fb51a0edfad 100644
--- a/backends/vulkan/partitioner/vulkan_partitioner.py
+++ b/backends/vulkan/partitioner/vulkan_partitioner.py
@@ -378,9 +378,15 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
             exported_program.graph_module
         )
 
-        texture_limits: utils.ImageExtents = self.options.get(
-            "texture_limits", utils.DEFAULT_TEXTURE_LIMITS
-        )
+        # small_texture_limits opts into the conservative 3D texture limit that is
+        # compatible with most desktop/laptop GPUs (the Vulkan spec only guarantees
+        # 2048). An explicit texture_limits always takes precedence.
+        if "texture_limits" in self.options:
+            texture_limits: utils.ImageExtents = self.options["texture_limits"]
+        elif self.options.get("small_texture_limits", False):
+            texture_limits = utils.SMALL_TEXTURE_LIMITS
+        else:
+            texture_limits = utils.DEFAULT_TEXTURE_LIMITS
         buffer_limit: int = self.options.get("buffer_limit", utils.DEFAULT_BUFFER_LIMIT)
         capability_partitioner = CapabilityBasedPartitioner(
             exported_program.graph_module,
diff --git a/backends/vulkan/runtime/api/Context.cpp b/backends/vulkan/runtime/api/Context.cpp
index d090a62f370..a2c69c45cd9 100644
--- a/backends/vulkan/runtime/api/Context.cpp
+++ b/backends/vulkan/runtime/api/Context.cpp
@@ -212,6 +212,14 @@ void Context::register_blit(
     vkapi::PipelineBarrier& pipeline_barrier,
     vkapi::VulkanImage& src,
     vkapi::VulkanImage& dst) {
+  // vkCmdBlitImage requires a queue with graphics or transfer capability. The
+  // queue is selected by compute capability only, so on desktop GPUs that
+  // expose compute-only queue families this could otherwise be invalid usage.
+  // On mobile the single universal queue always has these bits set.
+  VK_CHECK_COND(
+      queue_.capabilities & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT),
+      "The Vulkan queue selected for compute does not support blit operations "
+      "(neither VK_QUEUE_GRAPHICS_BIT nor VK_QUEUE_TRANSFER_BIT is set).");
   cmd_.insert_barrier(pipeline_barrier);
   cmd_.blit(src, dst);
 }
diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp
index 47cefa1031a..cba78403358 100644
--- a/backends/vulkan/runtime/api/containers/Tensor.cpp
+++ b/backends/vulkan/runtime/api/containers/Tensor.cpp
@@ -680,7 +680,11 @@ vkapi::VulkanBuffer allocate_buffer(
       return vkapi::VulkanBuffer();
   }
 
-  VK_CHECK_COND(numel <= context_ptr->adapter_ptr()->max_buffer_numel());
+  // max_buffer_numel() returns maxStorageBufferRange, which is a size in bytes,
+  // so compare it against the buffer size in bytes (not the element count).
+  VK_CHECK_COND(
+      element_size(dtype) * numel <=
+      context_ptr->adapter_ptr()->max_buffer_numel());
 
   return adapter_ptr->vma().create_storage_buffer(
       element_size(dtype) * numel, allocate_memory);
diff --git a/backends/vulkan/runtime/gen_vulkan_spv.py b/backends/vulkan/runtime/gen_vulkan_spv.py
index 93d6f9e41aa..69c87563bbd 100644
--- a/backends/vulkan/runtime/gen_vulkan_spv.py
+++ b/backends/vulkan/runtime/gen_vulkan_spv.py
@@ -1123,6 +1123,7 @@ def compile_spirv(shader_paths_pair) -> Tuple[str, str]:
             # Construct name of SPIR-V file to be compiled
             spv_out_path = os.path.join(output_dir, f"{src_file_name}.spv")
 
+            cached_spv_out_path = None
             if cache_dir is not None:
                 # Construct the file names of cached SPIR-V file to check if they exist
                 # in the cache.
@@ -1160,7 +1161,9 @@ def compile_spirv(shader_paths_pair) -> Tuple[str, str]:
                             subprocess.run(cmd_no_opt, check=True, capture_output=True)
                         except subprocess.CalledProcessError as e_no_opt:
                             # Delete any existing cached SPIR-V file if it exists
-                            if os.path.exists(cached_spv_out_path):
+                            if cached_spv_out_path is not None and os.path.exists(
+                                cached_spv_out_path
+                            ):
                                 os.remove(cached_spv_out_path)
 
                             raise RuntimeError(
@@ -1169,7 +1172,9 @@ def compile_spirv(shader_paths_pair) -> Tuple[str, str]:
 
                     else:
                         # Delete any existing cached SPIR-V file if it exists
-                        if os.path.exists(cached_spv_out_path):
+                        if cached_spv_out_path is not None and os.path.exists(
+                            cached_spv_out_path
+                        ):
                             os.remove(cached_spv_out_path)
 
                         raise RuntimeError(f"{err_msg_base} {e.stderr}") from e
diff --git a/backends/vulkan/runtime/graph/ops/glsl/coopmat_mm.yaml b/backends/vulkan/runtime/graph/ops/glsl/coopmat_mm.yaml
index bd5c2377cf6..05b26adfb24 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/coopmat_mm.yaml
+++ b/backends/vulkan/runtime/graph/ops/glsl/coopmat_mm.yaml
@@ -12,6 +12,10 @@
 
 coopmat_mm:
   parameter_names_with_default_values:
+    # GL_KHR_cooperative_matrix requires SPIR-V 1.6, so target Vulkan 1.3 when
+    # compiling this shader (the default target-env of 1.1 is too low). Other
+    # shaders are unaffected and keep the default.
+    VK_VERSION: '1.3'
     DTYPE: float
     PRECISION: highp
     WEIGHT_LAYOUT: row_major
diff --git a/backends/vulkan/runtime/vk_api/Adapter.cpp b/backends/vulkan/runtime/vk_api/Adapter.cpp
index b762c95205b..b28c7601687 100644
--- a/backends/vulkan/runtime/vk_api/Adapter.cpp
+++ b/backends/vulkan/runtime/vk_api/Adapter.cpp
@@ -140,6 +140,20 @@ VkDevice create_logical_device(
       enabled_device_extensions,
       requested_device_extensions);
 
+  // Enable the base device features that ExecuTorch shaders rely on, but only
+  // those that the physical device reports as supported. With pEnabledFeatures
+  // left null, all base features are disabled; using a shader that performs
+  // e.g. int16 arithmetic without enabling shaderInt16 is invalid usage and
+  // crashes on drivers that enforce it. Unsupported features stay VK_FALSE, so
+  // this is a no-op on devices that lack them.
+  VkPhysicalDeviceFeatures enabled_features{};
+  enabled_features.shaderInt16 =
+      physical_device.supports_int16_shader_types ? VK_TRUE : VK_FALSE;
+  enabled_features.shaderInt64 =
+      physical_device.supports_int64_shader_types ? VK_TRUE : VK_FALSE;
+  enabled_features.shaderFloat64 =
+      physical_device.supports_float64_shader_types ? VK_TRUE : VK_FALSE;
+
   VkDeviceCreateInfo device_create_info{
       VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // sType
       nullptr, // pNext
@@ -151,7 +165,7 @@ VkDevice create_logical_device(
       static_cast<uint32_t>(
           enabled_device_extensions.size()), // enabledExtensionCount
       enabled_device_extensions.data(), // ppEnabledExtensionNames
-      nullptr, // pEnabledFeatures
+      &enabled_features, // pEnabledFeatures
   };
 
   void* extension_list_top = nullptr;
diff --git a/backends/vulkan/runtime/vk_api/Runtime.cpp b/backends/vulkan/runtime/vk_api/Runtime.cpp
index 3d3a146d80d..86cf182ece0 100644
--- a/backends/vulkan/runtime/vk_api/Runtime.cpp
+++ b/backends/vulkan/runtime/vk_api/Runtime.cpp
@@ -10,6 +10,7 @@
 
 #include <executorch/backends/vulkan/runtime/vk_api/Adapter.h>
 
+#include <cstdlib>
 #include <cstring>
 #include <iostream>
 #include <sstream>
@@ -239,19 +240,62 @@ VkDebugReportCallbackEXT create_debug_report_callback(
 // Adapter selection methods
 //
 
-uint32_t select_first(const std::vector<Runtime::DeviceMapping>& devices) {
+// Ranks compute-capable devices so that a real GPU is preferred over a software
+// rasterizer (e.g. SwiftShader/lavapipe, which report as CPU). On a single-GPU
+// system (e.g. mobile) there is only one candidate, so the choice is unchanged.
+int compute_device_priority(const PhysicalDevice& device) {
+  if (device.num_compute_queues == 0) {
+    return -1; // not compute-capable, never select
+  }
+  switch (device.properties.deviceType) {
+    case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
+      return 5;
+    case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
+      return 4;
+    case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
+      return 3;
+    case VK_PHYSICAL_DEVICE_TYPE_CPU:
+      return 1;
+    default:
+      return 2;
+  }
+}
+
+uint32_t select_compute_device(
+    const std::vector<Runtime::DeviceMapping>& devices) {
+  const uint32_t invalid =
+      devices.size() + 1; // out of range signals invalidity
   if (devices.empty()) {
-    return devices.size() + 1; // return out of range to signal invalidity
+    return invalid;
+  }
+
+  // Allow overriding device selection via the ETVK_DEVICE_INDEX environment
+  // variable, which is useful on multi-GPU desktop systems. Invalid values fall
+  // through to automatic selection below.
+  const char* device_index_env = std::getenv("ETVK_DEVICE_INDEX");
+  if (device_index_env != nullptr) {
+    char* end = nullptr;
+    const long idx = std::strtol(device_index_env, &end, 10);
+    if (end != device_index_env && *end == '\0' && idx >= 0 &&
+        static_cast<size_t>(idx) < devices.size() &&
+        devices[static_cast<size_t>(idx)].first.num_compute_queues > 0) {
+      return static_cast<uint32_t>(idx);
+    }
   }
 
-  // Select the first adapter that has compute capability
+  // Otherwise pick the highest-priority compute-capable device, preferring the
+  // first one on ties (preserving the previous first-match behavior).
+  uint32_t best_i = invalid;
+  int best_priority = -1;
   for (size_t i = 0; i < devices.size(); ++i) {
-    if (devices[i].first.num_compute_queues > 0) {
-      return i;
+    const int priority = compute_device_priority(devices[i].first);
+    if (priority > best_priority) {
+      best_priority = priority;
+      best_i = static_cast<uint32_t>(i);
     }
   }
 
-  return devices.size() + 1;
+  return best_i;
 }
 
 //
@@ -312,7 +356,7 @@ Runtime::Runtime(const RuntimeConfig config)
     try {
       switch (config.default_selector) {
         case AdapterSelector::First:
-          default_adapter_i_ = create_adapter(select_first);
+          default_adapter_i_ = create_adapter(select_compute_device);
       }
     } catch (...) {
     }
diff --git a/backends/vulkan/runtime/vk_api/memory/vma_api.h b/backends/vulkan/runtime/vk_api/memory/vma_api.h
index dc7abbf8b1e..cf267a27d11 100644
--- a/backends/vulkan/runtime/vk_api/memory/vma_api.h
+++ b/backends/vulkan/runtime/vk_api/memory/vma_api.h
@@ -25,17 +25,28 @@
 #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
 #define VMA_VULKAN_VERSION 1002000
 
-#ifdef __clang__
+#if defined(__clang__)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wnullability-completeness"
 #pragma clang diagnostic ignored "-Wunused-variable"
-#endif /* __clang__ */
+#elif defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#elif defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4100 4101 4189)
+#endif
 
 #include <vk_mem_alloc.h>
 
-#ifdef __clang__
+#if defined(__clang__)
 #pragma clang diagnostic pop
-#endif /* __clang__ */
+#elif defined(__GNUC__)
+#pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
+#pragma warning(pop)
+#endif
 
 #else // !ETVK_USE_META_VMA
 
@@ -71,16 +82,27 @@
 */
 #endif /* VULKAN_DEBUG */
 
-#ifdef __clang__
+#if defined(__clang__)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wnullability-completeness"
 #pragma clang diagnostic ignored "-Wunused-variable"
-#endif /* __clang__ */
+#elif defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#elif defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4100 4101 4189)
+#endif
 
 #include <include/vk_mem_alloc.h>
 
-#ifdef __clang__
+#if defined(__clang__)
 #pragma clang diagnostic pop
-#endif /* __clang__ */
+#elif defined(__GNUC__)
+#pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
+#pragma warning(pop)
+#endif
 
 #endif // ETVK_USE_META_VMA
diff --git a/backends/vulkan/test/test_vulkan_compile_options.py b/backends/vulkan/test/test_vulkan_compile_options.py
new file mode 100644
index 00000000000..f44850d2915
--- /dev/null
+++ b/backends/vulkan/test/test_vulkan_compile_options.py
@@ -0,0 +1,48 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from typing import Any, Dict
+
+from executorch.backends.vulkan.partitioner.vulkan_partitioner import (
+    parse_compile_options,
+)
+from executorch.backends.vulkan.vulkan_preprocess import parse_compile_spec
+
+
+class TestVulkanCompileOptions(unittest.TestCase):
+    """Verify that compile options survive the partitioner -> backend round trip.
+
+    The partitioner serializes the user-provided options into CompileSpecs
+    (parse_compile_options) and the backend deserializes them at preprocess time
+    (parse_compile_spec). Boolean options that are serialized but not handled on
+    the deserialization side are silently dropped, which is a class of bug that
+    previously hid the small_texture_limits desktop-compatibility option.
+    """
+
+    def _round_trip(self, options: Dict[str, Any]) -> Dict[str, Any]:
+        return parse_compile_spec(parse_compile_options(options))
+
+    def test_small_texture_limits_round_trips(self) -> None:
+        round_tripped = self._round_trip({"small_texture_limits": True})
+        self.assertTrue(round_tripped.get("small_texture_limits"))
+
+    def test_skip_memory_planning_round_trips(self) -> None:
+        round_tripped = self._round_trip({"skip_memory_planning": True})
+        self.assertTrue(round_tripped.get("skip_memory_planning"))
+
+    def test_force_fp16_round_trips(self) -> None:
+        round_tripped = self._round_trip({"force_fp16": True})
+        self.assertTrue(round_tripped.get("force_fp16"))
+
+    def test_unset_options_are_absent(self) -> None:
+        round_tripped = self._round_trip({})
+        self.assertNotIn("small_texture_limits", round_tripped)
+        self.assertNotIn("skip_memory_planning", round_tripped)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/backends/vulkan/utils.py b/backends/vulkan/utils.py
index 7febff260c6..b349fb51001 100644
--- a/backends/vulkan/utils.py
+++ b/backends/vulkan/utils.py
@@ -588,6 +588,10 @@ def node_has_target(node: Any, target: str):
 ImageExtents = Tuple[int, int, int]
 
 DEFAULT_TEXTURE_LIMITS = (16384, 16384, 2048)
+# Conservative 3D texture limit compatible with most desktop/laptop GPUs. The
+# Vulkan spec only guarantees maxImageDimension3D >= 2048, whereas mobile GPUs
+# commonly support 16384. Used when the small_texture_limits option is set.
+SMALL_TEXTURE_LIMITS = (2048, 2048, 2048)
 DEFAULT_BUFFER_LIMIT = 128 * (1024 * 1024)
 
 all_storage_types: Set[VkStorageType] = {
diff --git a/backends/vulkan/vulkan_preprocess.py b/backends/vulkan/vulkan_preprocess.py
index e9d5613668a..53a81d1772e 100644
--- a/backends/vulkan/vulkan_preprocess.py
+++ b/backends/vulkan/vulkan_preprocess.py
@@ -6,7 +6,6 @@
 
 # pyre-strict
 
-import copy
 from functools import partial
 from typing import Any, Callable, Dict, final, List
 
@@ -114,6 +113,12 @@ def parse_compile_spec(compile_specs: List[CompileSpec]) -> Dict[str, Any]:
         if spec.key == "force_fp16":
             options[spec.key] = bool.from_bytes(spec.value, byteorder="little")
 
+        if spec.key == "small_texture_limits":
+            options[spec.key] = bool.from_bytes(spec.value, byteorder="little")
+
+        if spec.key == "skip_memory_planning":
+            options[spec.key] = bool.from_bytes(spec.value, byteorder="little")
+
         # Unhandled options are ignored
 
     return options
@@ -130,16 +135,15 @@ def preprocess(  # noqa: C901
     ) -> PreprocessResult:
         compile_options = parse_compile_spec(module_compile_spec)
 
-        default_texture_limits = copy.deepcopy(utils.DEFAULT_TEXTURE_LIMITS)
         # 2048 is the typical limit value for 3D textures, but mobile GPUs often support
         # 16384. Since the Vulkan delegate primarily targets mobile GPUs at the moment,
-        # 16394 is the default texture limit used. This option is provided as a
-        # convenient way to switch to using a limit of 2048 for image textures which
-        # will be compatible with most GPUs.
+        # 16384 is the default texture limit used. The small_texture_limits option is
+        # provided as a convenient way to switch to a limit of 2048 for image textures,
+        # which will be compatible with most desktop/laptop GPUs.
         if compile_options.get("small_texture_limits", False):
-            default_texture_limits[0] = 2048
-            default_texture_limits[1] = 2048
-            default_texture_limits[2] = 2048
+            default_texture_limits = utils.SMALL_TEXTURE_LIMITS
+        else:
+            default_texture_limits = utils.DEFAULT_TEXTURE_LIMITS
 
         limits_x = compile_options.get("texture_limits_x", default_texture_limits[0])
         limits_y = compile_options.get("texture_limits_y", default_texture_limits[1])
diff --git a/setup.py b/setup.py
index 85228bd37ae..cd2bb5332cb 100644
--- a/setup.py
+++ b/setup.py
@@ -134,6 +134,7 @@ def _minimal_cmake_flags() -> List[str]:
         "-DEXECUTORCH_BUILD_PYBIND=OFF",
         "-DEXECUTORCH_BUILD_QNN=OFF",
         "-DEXECUTORCH_BUILD_TESTS=OFF",
+        "-DEXECUTORCH_BUILD_VULKAN=OFF",
         "-DEXECUTORCH_BUILD_XNNPACK=OFF",
     ]
 
@@ -962,6 +963,9 @@ def run(self):  # noqa C901
             if cmake_cache.is_enabled("EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER"):
                 cmake_build_args += ["--target", "_llm_runner"]
 
+            if cmake_cache.is_enabled("EXECUTORCH_BUILD_VULKAN"):
+                cmake_build_args += ["--target", "vulkan_backend"]
+
             if cmake_cache.is_enabled("EXECUTORCH_BUILD_CUDA"):
                 cmake_build_args += ["--target", "aoti_cuda_backend"]
                 cmake_build_args += ["--target", "aoti_common_shims_slim"]
diff --git a/tools/cmake/preset/pybind.cmake b/tools/cmake/preset/pybind.cmake
index ecce850ab3c..9a17f561785 100644
--- a/tools/cmake/preset/pybind.cmake
+++ b/tools/cmake/preset/pybind.cmake
@@ -97,3 +97,31 @@ else()
     FATAL_ERROR "Unsupported CMAKE_SYSTEM_NAME for pybind: ${CMAKE_SYSTEM_NAME}"
   )
 endif()
+
+# Opt-in Vulkan backend for Linux/Windows wheels. Enabled ONLY when the build
+# requests it via the EXECUTORCH_BUILD_VULKAN env var AND glslc (Vulkan SDK) is
+# available to compile the shaders. This keeps the default wheel (and
+# macOS/Android) byte-for-byte unchanged: GPU backends are opt-in rather than
+# bundled into the universal wheel.
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux"
+   OR CMAKE_SYSTEM_NAME STREQUAL "Windows"
+   OR CMAKE_SYSTEM_NAME STREQUAL "WIN32"
+)
+  if(DEFINED ENV{EXECUTORCH_BUILD_VULKAN}
+     AND NOT "$ENV{EXECUTORCH_BUILD_VULKAN}" STREQUAL "0"
+     AND NOT "$ENV{EXECUTORCH_BUILD_VULKAN}" STREQUAL "OFF"
+  )
+    find_program(
+      GLSLC_PATH glslc HINTS $ENV{VULKAN_SDK}/bin $ENV{VULKAN_SDK}/Bin
+    )
+    if(GLSLC_PATH)
+      set_overridable_option(EXECUTORCH_BUILD_VULKAN ON)
+      message(STATUS "Enabling Vulkan backend for wheel; glslc: ${GLSLC_PATH}")
+    else()
+      message(
+        STATUS "EXECUTORCH_BUILD_VULKAN requested but glslc was not found; "
+               "the Vulkan backend will not be included."
+      )
+    endif()
+  endif()
+endif()