Skip to content

Commit 740a447

Browse files
authored
vulkan: allow graphics queue only through env var (ggml-org#20599)
* vulkan: avoid graphics queue on non-RADV AMD drivers * avoid graphics queues on small GPUs * change to only use graphics queue if overridden with env var GGML_VK_ALLOW_GRAPHICS_QUEUE * reenable transfer queue if graphics queue is not used
1 parent b6c83aa commit 740a447

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4981,8 +4981,9 @@ static vk_device ggml_vk_get_device(size_t idx) {
49814981
std::vector<vk::QueueFamilyProperties> queue_family_props = device->physical_device.getQueueFamilyProperties();
49824982

49834983
// Try to find a non-graphics compute queue and transfer-focused queues
4984-
// On AMD, the graphics queue seems to be faster, so don't avoid it
4985-
const vk::QueueFlagBits graphics_flag = device->vendor_id == VK_VENDOR_ID_AMD ? (vk::QueueFlagBits)0 : vk::QueueFlagBits::eGraphics;
4984+
// Allow overriding avoiding the graphics queue because it can increase performance on RADV
4985+
const bool allow_graphics_queue = (getenv("GGML_VK_ALLOW_GRAPHICS_QUEUE") != nullptr);
4986+
const vk::QueueFlagBits graphics_flag = allow_graphics_queue ? (vk::QueueFlagBits)0 : vk::QueueFlagBits::eGraphics;
49864987
const uint32_t compute_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eCompute, graphics_flag, -1, 1);
49874988
const uint32_t transfer_queue_family_index = ggml_vk_find_queue_family_index(queue_family_props, vk::QueueFlagBits::eTransfer, vk::QueueFlagBits::eCompute | graphics_flag, compute_queue_family_index, 1);
49884989

@@ -5443,11 +5444,14 @@ static vk_device ggml_vk_get_device(size_t idx) {
54435444

54445445
ggml_vk_load_shaders(device);
54455446

5447+
// Only use transfer queue on AMD non-GCN, when the graphics queue is not enabled
5448+
const bool prefers_transfer_queue = device->vendor_id == VK_VENDOR_ID_AMD && device->architecture != AMD_GCN && !allow_graphics_queue;
5449+
54465450
if (!device->single_queue) {
54475451
const uint32_t transfer_queue_index = compute_queue_family_index == transfer_queue_family_index ? 1 : 0;
54485452
ggml_vk_create_queue(device, device->transfer_queue, transfer_queue_family_index, transfer_queue_index, { vk::PipelineStageFlagBits::eTransfer }, true);
54495453

5450-
device->async_use_transfer_queue = (getenv("GGML_VK_ASYNC_USE_TRANSFER_QUEUE") != nullptr);
5454+
device->async_use_transfer_queue = prefers_transfer_queue || (getenv("GGML_VK_ASYNC_USE_TRANSFER_QUEUE") != nullptr);
54515455
} else {
54525456
// TODO: Use pointer or reference to avoid copy
54535457
device->transfer_queue.copyFrom(device->compute_queue);

0 commit comments

Comments
 (0)