rasbid · rasbid · Oct 13, 2025
diff --git a/docs/build.md b/docs/build.md
@@ -413,6 +413,10 @@ Finally, after finishing your build, you should be able to do something like thi
 # ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32
 ```
 
+#### AMD GCN discrete GPUs
+
+On older AMD GCN cards such as the RX 580, the Vulkan backend now prefers allocations from pure device-local memory by default to avoid host-visible BAR usage that can exhaust limited VRAM. If you want to restore the previous behavior (for example to trade performance for a larger working set), set the environment variable `GGML_VK_GCN_HOST_VISIBLE=1` before launching the executable. When this override is not set and the allocator has to fall back to host-visible memory, a warning will be printed to help with troubleshooting.
+
 ## CANN
 This provides NPU acceleration using the AI cores of your Ascend NPU. And [CANN](https://www.hiascend.com/en/software/cann) is a hierarchical APIs to help you to quickly build AI applications and service based on Ascend NPU.
 

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -597,6 +597,7 @@ struct vk_device_struct {
     bool disable_host_visible_vidmem;
     bool allow_sysmem_fallback;
     bool disable_graph_optimize;
+    bool gcn_host_visible_override;
 
 #ifdef GGML_VULKAN_MEMORY_DEBUG
     std::unique_ptr<vk_memory_logger> memory_logger;
@@ -2025,6 +2026,8 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std
 
     vk::PhysicalDeviceMemoryProperties mem_props = device->physical_device.getMemoryProperties();
 
+    const bool initial_request_host_visible = (*req_flags_list.begin() & vk::MemoryPropertyFlagBits::eHostVisible) == vk::MemoryPropertyFlagBits::eHostVisible;
+
     for (auto it = req_flags_list.begin(); it != req_flags_list.end(); it++) {
         const auto & req_flags = *it;
 
@@ -2037,6 +2040,12 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std
 
         try {
             buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
+            if (!initial_request_host_visible &&
+                (req_flags & vk::MemoryPropertyFlagBits::eHostVisible) == vk::MemoryPropertyFlagBits::eHostVisible &&
+                it != req_flags_list.begin()) {
+                std::cerr << "ggml_vulkan: Falling back to host-visible memory for allocation of size "
+                          << mem_req.size << " on " << device->name << std::endl;
+            }
             break;
         } catch (const vk::SystemError& e) {
             // loop and retry
@@ -2083,6 +2092,8 @@ static vk_buffer ggml_vk_create_buffer_check(vk_device& device, size_t size, vk:
 
 static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
     vk_buffer buf;
+    const bool prefer_device_local_only =
+        device->architecture == vk_device_architecture::AMD_GCN && !device->uma && !device->gcn_host_visible_override;
     try {
         if (device->prefer_host_memory) {
             buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
@@ -2099,14 +2110,23 @@ static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
                 buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal});
             }
         } else {
-            // use rebar if available, otherwise fallback to device only visible memory
-            if (device->allow_sysmem_fallback) {
-                buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
-                                                           vk::MemoryPropertyFlagBits::eDeviceLocal,
-                                                           vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
+            if (prefer_device_local_only) {
+                if (device->allow_sysmem_fallback) {
+                    buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal,
+                                                               vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
+                } else {
+                    buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal});
+                }
             } else {
-                buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
-                                                           vk::MemoryPropertyFlagBits::eDeviceLocal});
+                // use rebar if available, otherwise fallback to device only visible memory
+                if (device->allow_sysmem_fallback) {
+                    buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
+                                                               vk::MemoryPropertyFlagBits::eDeviceLocal,
+                                                               vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
+                } else {
+                    buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
+                                                               vk::MemoryPropertyFlagBits::eDeviceLocal});
+                }
             }
         }
     } catch (const vk::SystemError& e) {
@@ -3693,6 +3713,9 @@ static vk_device ggml_vk_get_device(size_t idx) {
         const char* GGML_VK_DISABLE_HOST_VISIBLE_VIDMEM = getenv("GGML_VK_DISABLE_HOST_VISIBLE_VIDMEM");
         device->disable_host_visible_vidmem = GGML_VK_DISABLE_HOST_VISIBLE_VIDMEM != nullptr;
 
+        const char* GGML_VK_GCN_HOST_VISIBLE = getenv("GGML_VK_GCN_HOST_VISIBLE");
+        device->gcn_host_visible_override = GGML_VK_GCN_HOST_VISIBLE != nullptr;
+
         const char* GGML_VK_ALLOW_SYSMEM_FALLBACK = getenv("GGML_VK_ALLOW_SYSMEM_FALLBACK");
         device->allow_sysmem_fallback = GGML_VK_ALLOW_SYSMEM_FALLBACK != nullptr;