Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,10 @@ Finally, after finishing your build, you should be able to do something like thi
# ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32
```

#### AMD GCN discrete GPUs

On older AMD GCN cards such as the RX 580, the Vulkan backend now prefers allocations from pure device-local memory by default to avoid host-visible BAR usage that can exhaust limited VRAM. If you want to restore the previous behavior (for example to trade performance for a larger working set), set the environment variable `GGML_VK_GCN_HOST_VISIBLE=1` before launching the executable. When this override is not set and the allocator has to fall back to host-visible memory, a warning will be printed to help with troubleshooting.

## CANN
This provides NPU acceleration using the AI cores of your Ascend NPU. And [CANN](https://www.hiascend.com/en/software/cann) is a hierarchical APIs to help you to quickly build AI applications and service based on Ascend NPU.

Expand Down
37 changes: 30 additions & 7 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,7 @@ struct vk_device_struct {
bool disable_host_visible_vidmem;
bool allow_sysmem_fallback;
bool disable_graph_optimize;
bool gcn_host_visible_override;

#ifdef GGML_VULKAN_MEMORY_DEBUG
std::unique_ptr<vk_memory_logger> memory_logger;
Expand Down Expand Up @@ -2025,6 +2026,8 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std

vk::PhysicalDeviceMemoryProperties mem_props = device->physical_device.getMemoryProperties();

const bool initial_request_host_visible = (*req_flags_list.begin() & vk::MemoryPropertyFlagBits::eHostVisible) == vk::MemoryPropertyFlagBits::eHostVisible;

for (auto it = req_flags_list.begin(); it != req_flags_list.end(); it++) {
const auto & req_flags = *it;

Expand All @@ -2037,6 +2040,12 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std

try {
buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
if (!initial_request_host_visible &&
(req_flags & vk::MemoryPropertyFlagBits::eHostVisible) == vk::MemoryPropertyFlagBits::eHostVisible &&
it != req_flags_list.begin()) {
std::cerr << "ggml_vulkan: Falling back to host-visible memory for allocation of size "
<< mem_req.size << " on " << device->name << std::endl;
}
break;
} catch (const vk::SystemError& e) {
// loop and retry
Expand Down Expand Up @@ -2083,6 +2092,8 @@ static vk_buffer ggml_vk_create_buffer_check(vk_device& device, size_t size, vk:

static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
vk_buffer buf;
const bool prefer_device_local_only =
device->architecture == vk_device_architecture::AMD_GCN && !device->uma && !device->gcn_host_visible_override;
try {
if (device->prefer_host_memory) {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
Expand All @@ -2099,14 +2110,23 @@ static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal});
}
} else {
// use rebar if available, otherwise fallback to device only visible memory
if (device->allow_sysmem_fallback) {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
vk::MemoryPropertyFlagBits::eDeviceLocal,
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
if (prefer_device_local_only) {
if (device->allow_sysmem_fallback) {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal,
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
} else {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal});
}
} else {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
vk::MemoryPropertyFlagBits::eDeviceLocal});
// use rebar if available, otherwise fallback to device only visible memory
if (device->allow_sysmem_fallback) {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
vk::MemoryPropertyFlagBits::eDeviceLocal,
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
} else {
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
vk::MemoryPropertyFlagBits::eDeviceLocal});
}
}
}
} catch (const vk::SystemError& e) {
Expand Down Expand Up @@ -3693,6 +3713,9 @@ static vk_device ggml_vk_get_device(size_t idx) {
const char* GGML_VK_DISABLE_HOST_VISIBLE_VIDMEM = getenv("GGML_VK_DISABLE_HOST_VISIBLE_VIDMEM");
device->disable_host_visible_vidmem = GGML_VK_DISABLE_HOST_VISIBLE_VIDMEM != nullptr;

const char* GGML_VK_GCN_HOST_VISIBLE = getenv("GGML_VK_GCN_HOST_VISIBLE");
device->gcn_host_visible_override = GGML_VK_GCN_HOST_VISIBLE != nullptr;

const char* GGML_VK_ALLOW_SYSMEM_FALLBACK = getenv("GGML_VK_ALLOW_SYSMEM_FALLBACK");
device->allow_sysmem_fallback = GGML_VK_ALLOW_SYSMEM_FALLBACK != nullptr;

Expand Down