rasbid · rasbid · Oct 13, 2025
diff --git a/docs/build.md b/docs/build.md
@@ -413,6 +413,12 @@ Finally, after finishing your build, you should be able to do something like thi
 # ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32
 ```
 
+> [!NOTE]
+> On AMD GCN GPUs (for example, the RX 580) the Vulkan backend now prefers allocating buffers from pure device-local VRAM.
+> When this exhausts VRAM you will see a log indicating that allocation has fallen back to host-visible memory. If you need the
+> previous behaviour where host-visible memory is allowed as a fallback by default, set `GGML_VK_ALLOW_SYSMEM_FALLBACK=1` before
+> launching the application.
+
 ## CANN
 This provides NPU acceleration using the AI cores of your Ascend NPU. And [CANN](https://www.hiascend.com/en/software/cann) is a hierarchical APIs to help you to quickly build AI applications and service based on Ascend NPU.
 

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -2025,20 +2025,33 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std
 
     vk::PhysicalDeviceMemoryProperties mem_props = device->physical_device.getMemoryProperties();
 
+    bool attempted_non_host_visible = false;
+    bool fallback_to_host_visible = false;
+
     for (auto it = req_flags_list.begin(); it != req_flags_list.end(); it++) {
         const auto & req_flags = *it;
+        const bool current_host_visible = static_cast<bool>(req_flags & vk::MemoryPropertyFlagBits::eHostVisible);
 
         uint32_t memory_type_index = find_properties(&mem_props, &mem_req, req_flags);
 
         if (memory_type_index == UINT32_MAX) {
+            if (!current_host_visible) {
+                attempted_non_host_visible = true;
+            }
             continue;
         }
         buf->memory_property_flags = req_flags;
 
         try {
             buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
+            if (current_host_visible && attempted_non_host_visible) {
+                fallback_to_host_visible = true;
+            }
             break;
         } catch (const vk::SystemError& e) {
+            if (!current_host_visible) {
+                attempted_non_host_visible = true;
+            }
             // loop and retry
             // during last attempt throw the exception
             if (it + 1 == req_flags_list.end()) {
@@ -2053,6 +2066,15 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std
         throw vk::OutOfDeviceMemoryError("No suitable memory type found");
     }
 
+    if (fallback_to_host_visible) {
+        std::cerr << "ggml_vulkan: Falling back to host-visible memory for allocation of size "
+                  << size << " on " << device->name;
+        if (device->properties.deviceName[0] != '\0') {
+            std::cerr << " (" << device->properties.deviceName << ")";
+        }
+        std::cerr << "." << std::endl;
+    }
+
     buf->ptr = nullptr;
 
     if (buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
@@ -2098,6 +2120,14 @@ static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
             } else {
                 buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal});
             }
+        } else if (device->architecture == vk_device_architecture::AMD_GCN && !device->uma) {
+            if (device->allow_sysmem_fallback) {
+                buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal,
+                                                           vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
+                                                           vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
+            } else {
+                buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal});
+            }
         } else {
             // use rebar if available, otherwise fallback to device only visible memory
             if (device->allow_sysmem_fallback) {