From a80d3d31a34dc34b8fddbf585786cd51edafa7ad Mon Sep 17 00:00:00 2001
From: Simao Gomes Viana <simao.gomes@toowoxx.de>
Date: Wed, 25 Mar 2026 13:57:23 +0100
Subject: [PATCH 1/3] ggml-vulkan: probe vkGetBufferDeviceAddress before
 enabling BDA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some Vulkan drivers (observed on Adreno, Qualcomm build 923a446bf8,
driver date 09/05/24) report bufferDeviceAddress support in
VkPhysicalDeviceVulkan12Features but crash with SIGSEGV when
vkGetBufferDeviceAddress is actually called:

    Fatal signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x0
    #01 ggml_vk_create_buffer+4084
    #02 ggml_vk_create_buffer_device+148
    #03 ggml_backend_vk_buffer_type_alloc_buffer+240
    #07 whisper_model_load+5996

The crash occurs inside ggml_vk_create_buffer when
device->device.getBufferAddress() is called — the driver-internal
function pointer dereferences null.

After creating the logical device, verify that the function pointer
resolves via vkGetDeviceProcAddr and that a test call returns a
non-zero address. If either check fails, disable
buffer_device_address so all guarded code paths skip BDA.
---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 57 ++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 7092361d2ea..50efff144ca 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -5379,6 +5379,63 @@ static vk_device ggml_vk_get_device(size_t idx) {
         // Queues
         ggml_vk_create_queue(device, device->compute_queue, compute_queue_family_index, 0, { vk::PipelineStageFlagBits::eComputeShader | vk::PipelineStageFlagBits::eTransfer }, false);
 
+        // Verify vkGetBufferDeviceAddress actually works — some drivers
+        // (e.g. certain Adreno builds) report bufferDeviceAddress support
+        // in the feature bits but crash (SIGSEGV) when the function is
+        // actually called. Probe it here and disable if broken.
+        if (device->buffer_device_address) {
+            PFN_vkGetBufferDeviceAddress pfn = (PFN_vkGetBufferDeviceAddress)
+                vkGetDeviceProcAddr(device->device, "vkGetBufferDeviceAddress");
+            if (pfn == nullptr) {
+                GGML_LOG_WARN("ggml_vulkan: vkGetBufferDeviceAddress proc addr is null, disabling BDA\n");
+                device->buffer_device_address = false;
+            } else {
+                // Create a tiny test buffer and verify the call returns non-zero
+                VkBufferCreateInfo test_bci = {};
+                test_bci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+                test_bci.size = 256;
+                test_bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
+                VkBuffer test_buf = VK_NULL_HANDLE;
+                if (vkCreateBuffer(device->device, &test_bci, nullptr, &test_buf) == VK_SUCCESS) {
+                    VkMemoryRequirements test_req;
+                    vkGetBufferMemoryRequirements(device->device, test_buf, &test_req);
+                    VkPhysicalDeviceMemoryProperties test_mem_props;
+                    vkGetPhysicalDeviceMemoryProperties(device->physical_device, &test_mem_props);
+                    uint32_t test_mtype = UINT32_MAX;
+                    for (uint32_t j = 0; j < test_mem_props.memoryTypeCount; j++) {
+                        if (test_req.memoryTypeBits & (1u << j)) { test_mtype = j; break; }
+                    }
+                    if (test_mtype != UINT32_MAX) {
+                        VkMemoryAllocateFlagsInfo test_flags = {};
+                        test_flags.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
+                        test_flags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
+                        VkMemoryAllocateInfo test_alloc = {};
+                        test_alloc.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+                        test_alloc.pNext = &test_flags;
+                        test_alloc.allocationSize = test_req.size;
+                        test_alloc.memoryTypeIndex = test_mtype;
+                        VkDeviceMemory test_mem = VK_NULL_HANDLE;
+                        if (vkAllocateMemory(device->device, &test_alloc, nullptr, &test_mem) == VK_SUCCESS) {
+                            vkBindBufferMemory(device->device, test_buf, test_mem, 0);
+                            VkBufferDeviceAddressInfo test_addr = {};
+                            test_addr.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO;
+                            test_addr.buffer = test_buf;
+                            VkDeviceAddress addr = pfn(device->device, &test_addr);
+                            if (addr == 0) {
+                                GGML_LOG_WARN("ggml_vulkan: vkGetBufferDeviceAddress returned 0, disabling BDA\n");
+                                device->buffer_device_address = false;
+                            }
+                            vkFreeMemory(device->device, test_mem, nullptr);
+                        } else {
+                            GGML_LOG_WARN("ggml_vulkan: BDA test alloc failed, disabling BDA\n");
+                            device->buffer_device_address = false;
+                        }
+                    }
+                    vkDestroyBuffer(device->device, test_buf, nullptr);
+                }
+            }
+        }
+
         // Shaders
         // Disable matmul tile sizes early if performance low or not supported
         for (uint32_t i = 0; i < GGML_TYPE_COUNT; ++i) {

From d28a49b8371a9669d1bf21eff8d10461e7958a7a Mon Sep 17 00:00:00 2001
From: Simao Gomes Viana <simao.gomes@toowoxx.de>
Date: Wed, 25 Mar 2026 14:01:00 +0100
Subject: [PATCH 2/3] ggml-vulkan: graceful fallback on pipeline compilation
 failure

Some Vulkan drivers (observed on Adreno, Qualcomm build 923a446bf8)
fail to compile compute shaders at runtime, reporting
"Failed to link shaders" and returning ErrorUnknown from
createComputePipeline. Previously this threw a C++ exception that
propagated as an uncaught abort, or the resulting null pipeline was
dispatched causing SIGSEGV:

    AdrenoVK-0: Failed to link shaders.
    Fatal signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0xe8
    #01 ggml_vk_dispatch_pipeline<vk_mat_mat_push_constants>+360
    #02 ggml_vk_mul_mat_q_f16+6616
    #03 ggml_backend_vk_graph_compute+41780

Three changes:

1. ggml_vk_create_pipeline_func: catch the exception, increment
   device->pipeline_failures, clean up the shader module, and return
   instead of rethrowing. Also handle null pipeline after creation.

2. ggml_vk_dispatch_pipeline: early-return if the pipeline is null or
   not compiled (safety net against dispatching broken pipelines).

3. ggml_backend_vk_device_supports_op: return false for all ops when
   pipeline_failures > 0, causing the backend scheduler to route
   everything to the CPU backend. The GPU is still used for buffer
   allocation but all compute runs on CPU.
---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 32 +++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 50efff144ca..8fa9d1da943 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -625,6 +625,11 @@ struct vk_device_struct {
 
     bool shader_64b_indexing;
 
+    // Number of compute pipelines that failed to compile.
+    // When > 0, supports_op returns false for all ops so the backend
+    // scheduler routes everything to the CPU backend.
+    std::atomic<uint32_t> pipeline_failures {};
+
     bool integer_dot_product;
     // 0: default, 1: force mmvq, -1: disable mmvq
     int32_t mmvq_mode;
@@ -2192,9 +2197,19 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
     try {
         pipeline->pipeline = device->device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value;
     } catch (const vk::SystemError& e) {
-        std::cerr << "ggml_vulkan: Compute pipeline creation failed for " << pipeline->name << std::endl;
-        std::cerr << "ggml_vulkan: " << e.what() << std::endl;
-        throw e;
+        GGML_LOG_WARN("ggml_vulkan: compute pipeline creation failed for %s: %s\n",
+                      pipeline->name.c_str(), e.what());
+        device->pipeline_failures.fetch_add(1, std::memory_order_relaxed);
+        device->device.destroyShaderModule(pipeline->shader_module);
+        pipeline->shader_module = VK_NULL_HANDLE;
+        return;
+    }
+    if (!pipeline->pipeline) {
+        GGML_LOG_WARN("ggml_vulkan: compute pipeline is null for %s\n", pipeline->name.c_str());
+        device->pipeline_failures.fetch_add(1, std::memory_order_relaxed);
+        device->device.destroyShaderModule(pipeline->shader_module);
+        pipeline->shader_module = VK_NULL_HANDLE;
+        return;
     }
     pipeline->compiled = true;
 
@@ -6495,6 +6510,9 @@ template <typename T, uint32_t N> const T *push_constant_data(const std::array<T
 
 template <typename T>
 static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context& subctx, vk_pipeline& pipeline, std::initializer_list<vk::DescriptorBufferInfo> const& descriptor_buffer_infos, const T &push_constants, std::array<uint32_t, 3> elements) {
+    if (!pipeline || !pipeline->compiled) {
+        return;
+    }
     const uint32_t wg0 = CEIL_DIV(elements[0], pipeline->wg_denoms[0]);
     const uint32_t wg1 = CEIL_DIV(elements[1], pipeline->wg_denoms[1]);
     const uint32_t wg2 = CEIL_DIV(elements[2], pipeline->wg_denoms[2]);
@@ -15149,6 +15167,14 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
     ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
     const vk_device& device = ggml_vk_get_device(ctx->device);
 
+    // If any compute pipelines failed to compile, the GPU driver is broken
+    // for these shaders. Return false for all ops so the backend scheduler
+    // routes everything to the CPU backend instead of dispatching to
+    // null pipelines.
+    if (device->pipeline_failures.load(std::memory_order_relaxed) > 0) {
+        return false;
+    }
+
     const bool uses_bda = (op->op == GGML_OP_IM2COL || op->op == GGML_OP_IM2COL_3D) &&
                           device->shader_int64 && device->buffer_device_address;
 

From fb934bf4a086970a0f6c0dc5533d7439bd65b36c Mon Sep 17 00:00:00 2001
From: Simao Gomes Viana <simao.gomes@toowoxx.de>
Date: Wed, 25 Mar 2026 14:10:02 +0100
Subject: [PATCH 3/3] ggml-vulkan: fall back to CPU when 16-bit storage is
 unavailable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, missing storageBuffer16BitAccess threw
std::runtime_error("Unsupported device") which crashed the process on
platforms where C++ exceptions propagate as uncaught aborts (Android).

Some drivers also report the feature bit but don't enumerate
VK_KHR_16bit_storage as a device extension — pushing it into
device_extensions then causes vkCreateDevice to fail with
ErrorExtensionNotPresent (another fatal abort on Android).

Instead of throwing, set pipeline_failures so supports_op returns
false for all ops and the backend scheduler routes everything to CPU.
Only push VK_KHR_16bit_storage when the extension is actually
enumerated.
---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 8fa9d1da943..5689e73553f 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -5253,12 +5253,18 @@ static vk_device ggml_vk_get_device(size_t idx) {
 #endif
         }
 
-        if (!vk11_features.storageBuffer16BitAccess) {
-            std::cerr << "ggml_vulkan: device " << GGML_VK_NAME << idx << " does not support 16-bit storage." << std::endl;
-            throw std::runtime_error("Unsupported device");
+        if (!vk11_features.storageBuffer16BitAccess || !fp16_storage) {
+            GGML_LOG_WARN("ggml_vulkan: device %s%zu does not support 16-bit storage "
+                          "(feature=%d, extension=%d), falling back to CPU\n",
+                          GGML_VK_NAME, idx,
+                          (int)vk11_features.storageBuffer16BitAccess,
+                          (int)fp16_storage);
+            device->pipeline_failures.store(1, std::memory_order_relaxed);
         }
 
-        device_extensions.push_back("VK_KHR_16bit_storage");
+        if (fp16_storage) {
+            device_extensions.push_back("VK_KHR_16bit_storage");
+        }
 
 #ifdef GGML_VULKAN_VALIDATE
         device_extensions.push_back("VK_KHR_shader_non_semantic_info");