Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,12 @@ Finally, after finishing your build, you should be able to do something like thi
# ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32
```

> [!NOTE]
> On AMD GCN GPUs (for example, the RX 580) the Vulkan backend now prefers allocating buffers from pure device-local VRAM.
> When this exhausts VRAM you will see a log indicating that allocation has fallen back to host-visible memory. If you need the
> previous behaviour where host-visible memory is allowed as a fallback by default, set `GGML_VK_ALLOW_SYSMEM_FALLBACK=1` before
> launching the application.

## CANN
This provides NPU acceleration using the AI cores of your Ascend NPU. And [CANN](https://www.hiascend.com/en/software/cann) is a hierarchical APIs to help you to quickly build AI applications and service based on Ascend NPU.

Expand Down
30 changes: 30 additions & 0 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2025,20 +2025,33 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std

vk::PhysicalDeviceMemoryProperties mem_props = device->physical_device.getMemoryProperties();

bool attempted_non_host_visible = false;
bool fallback_to_host_visible = false;

for (auto it = req_flags_list.begin(); it != req_flags_list.end(); it++) {
const auto & req_flags = *it;
const bool current_host_visible = static_cast<bool>(req_flags & vk::MemoryPropertyFlagBits::eHostVisible);

uint32_t memory_type_index = find_properties(&mem_props, &mem_req, req_flags);

if (memory_type_index == UINT32_MAX) {
if (!current_host_visible) {
attempted_non_host_visible = true;
}
continue;
}
buf->memory_property_flags = req_flags;

try {
buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
if (current_host_visible && attempted_non_host_visible) {
fallback_to_host_visible = true;
}
break;
} catch (const vk::SystemError& e) {
if (!current_host_visible) {
attempted_non_host_visible = true;
}
// loop and retry
// during last attempt throw the exception
if (it + 1 == req_flags_list.end()) {
Expand All @@ -2053,6 +2066,15 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std
throw vk::OutOfDeviceMemoryError("No suitable memory type found");
}

if (fallback_to_host_visible) {
std::cerr << "ggml_vulkan: Falling back to host-visible memory for allocation of size "
<< size << " on " << device->name;
if (device->properties.deviceName[0] != '\0') {
std::cerr << " (" << device->properties.deviceName << ")";
}
std::cerr << "." << std::endl;
}

buf->ptr = nullptr;

if (buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
Expand Down Expand Up @@ -2098,6 +2120,14 @@ static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
} else {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal});
}
} else if (device->architecture == vk_device_architecture::AMD_GCN && !device->uma) {
if (device->allow_sysmem_fallback) {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal,
vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
} else {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal});
}
} else {
// use rebar if available, otherwise fallback to device only visible memory
if (device->allow_sysmem_fallback) {
Expand Down