diff --git a/docs/build.md b/docs/build.md index dcbcce7549a..fab47792029 100644 --- a/docs/build.md +++ b/docs/build.md @@ -413,6 +413,12 @@ Finally, after finishing your build, you should be able to do something like thi # ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32 ``` +> [!NOTE] +> On AMD GCN GPUs (for example, the RX 580) the Vulkan backend now prefers allocating buffers from pure device-local VRAM. +> When this exhausts VRAM you will see a log indicating that allocation has fallen back to host-visible memory. If you need the +> previous behaviour where host-visible memory is allowed as a fallback by default, set `GGML_VK_ALLOW_SYSMEM_FALLBACK=1` before +> launching the application. + ## CANN This provides NPU acceleration using the AI cores of your Ascend NPU. And [CANN](https://www.hiascend.com/en/software/cann) is a hierarchical APIs to help you to quickly build AI applications and service based on Ascend NPU. diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index ebbb412e55f..70728dd252f 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2025,20 +2025,33 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std vk::PhysicalDeviceMemoryProperties mem_props = device->physical_device.getMemoryProperties(); + bool attempted_non_host_visible = false; + bool fallback_to_host_visible = false; + for (auto it = req_flags_list.begin(); it != req_flags_list.end(); it++) { const auto & req_flags = *it; + const bool current_host_visible = static_cast(req_flags & vk::MemoryPropertyFlagBits::eHostVisible); uint32_t memory_type_index = find_properties(&mem_props, &mem_req, req_flags); if (memory_type_index == UINT32_MAX) { + if (!current_host_visible) { + attempted_non_host_visible = true; + } continue; } buf->memory_property_flags = req_flags; try { buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index }); + if (current_host_visible && attempted_non_host_visible) { + fallback_to_host_visible = true; + } break; } catch (const vk::SystemError& e) { + if (!current_host_visible) { + attempted_non_host_visible = true; + } // loop and retry // during last attempt throw the exception if (it + 1 == req_flags_list.end()) { @@ -2053,6 +2066,15 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std throw vk::OutOfDeviceMemoryError("No suitable memory type found"); } + if (fallback_to_host_visible) { + std::cerr << "ggml_vulkan: Falling back to host-visible memory for allocation of size " + << size << " on " << device->name; + if (device->properties.deviceName[0] != '\0') { + std::cerr << " (" << device->properties.deviceName << ")"; + } + std::cerr << "." << std::endl; + } + buf->ptr = nullptr; if (buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) { @@ -2098,6 +2120,14 @@ static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) { } else { buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal}); } + } else if (device->architecture == vk_device_architecture::AMD_GCN && !device->uma) { + if (device->allow_sysmem_fallback) { + buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal, + vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent}); + } else { + buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal}); + } } else { // use rebar if available, otherwise fallback to device only visible memory if (device->allow_sysmem_fallback) {