From a80d3d31a34dc34b8fddbf585786cd51edafa7ad Mon Sep 17 00:00:00 2001 From: Simao Gomes Viana Date: Wed, 25 Mar 2026 13:57:23 +0100 Subject: [PATCH 1/3] ggml-vulkan: probe vkGetBufferDeviceAddress before enabling BDA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Vulkan drivers (observed on Adreno, Qualcomm build 923a446bf8, driver date 09/05/24) report bufferDeviceAddress support in VkPhysicalDeviceVulkan12Features but crash with SIGSEGV when vkGetBufferDeviceAddress is actually called: Fatal signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x0 #01 ggml_vk_create_buffer+4084 #02 ggml_vk_create_buffer_device+148 #03 ggml_backend_vk_buffer_type_alloc_buffer+240 #07 whisper_model_load+5996 The crash occurs inside ggml_vk_create_buffer when device->device.getBufferAddress() is called — the driver-internal function pointer dereferences null. After creating the logical device, verify that the function pointer resolves via vkGetDeviceProcAddr and that a test call returns a non-zero address. If either check fails, disable buffer_device_address so all guarded code paths skip BDA. --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 7092361d2ea..50efff144ca 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -5379,6 +5379,63 @@ static vk_device ggml_vk_get_device(size_t idx) { // Queues ggml_vk_create_queue(device, device->compute_queue, compute_queue_family_index, 0, { vk::PipelineStageFlagBits::eComputeShader | vk::PipelineStageFlagBits::eTransfer }, false); + // Verify vkGetBufferDeviceAddress actually works — some drivers + // (e.g. certain Adreno builds) report bufferDeviceAddress support + // in the feature bits but crash (SIGSEGV) when the function is + // actually called. Probe it here and disable if broken. + if (device->buffer_device_address) { + PFN_vkGetBufferDeviceAddress pfn = (PFN_vkGetBufferDeviceAddress) + vkGetDeviceProcAddr(device->device, "vkGetBufferDeviceAddress"); + if (pfn == nullptr) { + GGML_LOG_WARN("ggml_vulkan: vkGetBufferDeviceAddress proc addr is null, disabling BDA\n"); + device->buffer_device_address = false; + } else { + // Create a tiny test buffer and verify the call returns non-zero + VkBufferCreateInfo test_bci = {}; + test_bci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + test_bci.size = 256; + test_bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + VkBuffer test_buf = VK_NULL_HANDLE; + if (vkCreateBuffer(device->device, &test_bci, nullptr, &test_buf) == VK_SUCCESS) { + VkMemoryRequirements test_req; + vkGetBufferMemoryRequirements(device->device, test_buf, &test_req); + VkPhysicalDeviceMemoryProperties test_mem_props; + vkGetPhysicalDeviceMemoryProperties(device->physical_device, &test_mem_props); + uint32_t test_mtype = UINT32_MAX; + for (uint32_t j = 0; j < test_mem_props.memoryTypeCount; j++) { + if (test_req.memoryTypeBits & (1u << j)) { test_mtype = j; break; } + } + if (test_mtype != UINT32_MAX) { + VkMemoryAllocateFlagsInfo test_flags = {}; + test_flags.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO; + test_flags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; + VkMemoryAllocateInfo test_alloc = {}; + test_alloc.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + test_alloc.pNext = &test_flags; + test_alloc.allocationSize = test_req.size; + test_alloc.memoryTypeIndex = test_mtype; + VkDeviceMemory test_mem = VK_NULL_HANDLE; + if (vkAllocateMemory(device->device, &test_alloc, nullptr, &test_mem) == VK_SUCCESS) { + vkBindBufferMemory(device->device, test_buf, test_mem, 0); + VkBufferDeviceAddressInfo test_addr = {}; + test_addr.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO; + test_addr.buffer = test_buf; + VkDeviceAddress addr = pfn(device->device, &test_addr); + if (addr == 0) { + GGML_LOG_WARN("ggml_vulkan: vkGetBufferDeviceAddress returned 0, disabling BDA\n"); + device->buffer_device_address = false; + } + vkFreeMemory(device->device, test_mem, nullptr); + } else { + GGML_LOG_WARN("ggml_vulkan: BDA test alloc failed, disabling BDA\n"); + device->buffer_device_address = false; + } + } + vkDestroyBuffer(device->device, test_buf, nullptr); + } + } + } + // Shaders // Disable matmul tile sizes early if performance low or not supported for (uint32_t i = 0; i < GGML_TYPE_COUNT; ++i) { From d28a49b8371a9669d1bf21eff8d10461e7958a7a Mon Sep 17 00:00:00 2001 From: Simao Gomes Viana Date: Wed, 25 Mar 2026 14:01:00 +0100 Subject: [PATCH 2/3] ggml-vulkan: graceful fallback on pipeline compilation failure Some Vulkan drivers (observed on Adreno, Qualcomm build 923a446bf8) fail to compile compute shaders at runtime, reporting "Failed to link shaders" and returning ErrorUnknown from createComputePipeline. Previously this threw a C++ exception that propagated as an uncaught abort, or the resulting null pipeline was dispatched causing SIGSEGV: AdrenoVK-0: Failed to link shaders. Fatal signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0xe8 #01 ggml_vk_dispatch_pipeline+360 #02 ggml_vk_mul_mat_q_f16+6616 #03 ggml_backend_vk_graph_compute+41780 Three changes: 1. ggml_vk_create_pipeline_func: catch the exception, increment device->pipeline_failures, clean up the shader module, and return instead of rethrowing. Also handle null pipeline after creation. 2. ggml_vk_dispatch_pipeline: early-return if the pipeline is null or not compiled (safety net against dispatching broken pipelines). 3. ggml_backend_vk_device_supports_op: return false for all ops when pipeline_failures > 0, causing the backend scheduler to route everything to the CPU backend. The GPU is still used for buffer allocation but all compute runs on CPU. --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 32 +++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 50efff144ca..8fa9d1da943 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -625,6 +625,11 @@ struct vk_device_struct { bool shader_64b_indexing; + // Number of compute pipelines that failed to compile. + // When > 0, supports_op returns false for all ops so the backend + // scheduler routes everything to the CPU backend. + std::atomic pipeline_failures {}; + bool integer_dot_product; // 0: default, 1: force mmvq, -1: disable mmvq int32_t mmvq_mode; @@ -2192,9 +2197,19 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin try { pipeline->pipeline = device->device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value; } catch (const vk::SystemError& e) { - std::cerr << "ggml_vulkan: Compute pipeline creation failed for " << pipeline->name << std::endl; - std::cerr << "ggml_vulkan: " << e.what() << std::endl; - throw e; + GGML_LOG_WARN("ggml_vulkan: compute pipeline creation failed for %s: %s\n", + pipeline->name.c_str(), e.what()); + device->pipeline_failures.fetch_add(1, std::memory_order_relaxed); + device->device.destroyShaderModule(pipeline->shader_module); + pipeline->shader_module = VK_NULL_HANDLE; + return; + } + if (!pipeline->pipeline) { + GGML_LOG_WARN("ggml_vulkan: compute pipeline is null for %s\n", pipeline->name.c_str()); + device->pipeline_failures.fetch_add(1, std::memory_order_relaxed); + device->device.destroyShaderModule(pipeline->shader_module); + pipeline->shader_module = VK_NULL_HANDLE; + return; } pipeline->compiled = true; @@ -6495,6 +6510,9 @@ template const T *push_constant_data(const std::array static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context& subctx, vk_pipeline& pipeline, std::initializer_list const& descriptor_buffer_infos, const T &push_constants, std::array elements) { + if (!pipeline || !pipeline->compiled) { + return; + } const uint32_t wg0 = CEIL_DIV(elements[0], pipeline->wg_denoms[0]); const uint32_t wg1 = CEIL_DIV(elements[1], pipeline->wg_denoms[1]); const uint32_t wg2 = CEIL_DIV(elements[2], pipeline->wg_denoms[2]); @@ -15149,6 +15167,14 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; const vk_device& device = ggml_vk_get_device(ctx->device); + // If any compute pipelines failed to compile, the GPU driver is broken + // for these shaders. Return false for all ops so the backend scheduler + // routes everything to the CPU backend instead of dispatching to + // null pipelines. + if (device->pipeline_failures.load(std::memory_order_relaxed) > 0) { + return false; + } + const bool uses_bda = (op->op == GGML_OP_IM2COL || op->op == GGML_OP_IM2COL_3D) && device->shader_int64 && device->buffer_device_address; From fb934bf4a086970a0f6c0dc5533d7439bd65b36c Mon Sep 17 00:00:00 2001 From: Simao Gomes Viana Date: Wed, 25 Mar 2026 14:10:02 +0100 Subject: [PATCH 3/3] ggml-vulkan: fall back to CPU when 16-bit storage is unavailable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, missing storageBuffer16BitAccess threw std::runtime_error("Unsupported device") which crashed the process on platforms where C++ exceptions propagate as uncaught aborts (Android). Some drivers also report the feature bit but don't enumerate VK_KHR_16bit_storage as a device extension — pushing it into device_extensions then causes vkCreateDevice to fail with ErrorExtensionNotPresent (another fatal abort on Android). Instead of throwing, set pipeline_failures so supports_op returns false for all ops and the backend scheduler routes everything to CPU. Only push VK_KHR_16bit_storage when the extension is actually enumerated. --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 8fa9d1da943..5689e73553f 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -5253,12 +5253,18 @@ static vk_device ggml_vk_get_device(size_t idx) { #endif } - if (!vk11_features.storageBuffer16BitAccess) { - std::cerr << "ggml_vulkan: device " << GGML_VK_NAME << idx << " does not support 16-bit storage." << std::endl; - throw std::runtime_error("Unsupported device"); + if (!vk11_features.storageBuffer16BitAccess || !fp16_storage) { + GGML_LOG_WARN("ggml_vulkan: device %s%zu does not support 16-bit storage " + "(feature=%d, extension=%d), falling back to CPU\n", + GGML_VK_NAME, idx, + (int)vk11_features.storageBuffer16BitAccess, + (int)fp16_storage); + device->pipeline_failures.store(1, std::memory_order_relaxed); } - device_extensions.push_back("VK_KHR_16bit_storage"); + if (fp16_storage) { + device_extensions.push_back("VK_KHR_16bit_storage"); + } #ifdef GGML_VULKAN_VALIDATE device_extensions.push_back("VK_KHR_shader_non_semantic_info");