From 6295c529479c1cd80e4551d7244bbdad4a27316e Mon Sep 17 00:00:00 2001
From: rasbid <104773487+rasbid@users.noreply.github.com>
Date: Mon, 13 Oct 2025 20:01:12 +0300
Subject: [PATCH] Tighten Vulkan submit heuristics for AMD GCN

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index ebbb412e55f..b4200995677 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -12019,11 +12019,25 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
     // Estimate the amount of matmul work by looking at the weight matrix size, and submit every 100MB
     // (and scaled down based on model size, so smaller models submit earlier).
     // Also submit at least every 100 nodes, in case there are workloads without as much matmul.
-    int nodes_per_submit = 100;
+    const bool is_amd_gcn = ctx->device->architecture == vk_device_architecture::AMD_GCN;
+    int nodes_per_submit = is_amd_gcn ? 40 : 100;
     int submitted_nodes = 0;
     int submit_count = 0;
     uint64_t mul_mat_bytes = 0;
-    uint64_t mul_mat_bytes_per_submit = std::min(uint64_t(100*1000*1000), total_mat_mul_bytes / 40u);
+    uint64_t mul_mat_bytes_per_submit = 0;
+    const uint64_t mul_mat_bytes_cap = is_amd_gcn ? uint64_t(48) * 1000 * 1000 : uint64_t(100) * 1000 * 1000;
+    if (total_mat_mul_bytes == 0) {
+        mul_mat_bytes_per_submit = 0;
+    } else if (is_amd_gcn) {
+        const uint64_t scaled_bytes = total_mat_mul_bytes / 64u;
+        const uint64_t minimum_bytes = uint64_t(12) * 1000 * 1000;
+        mul_mat_bytes_per_submit = std::min(mul_mat_bytes_cap, std::max(minimum_bytes, scaled_bytes));
+    } else {
+        mul_mat_bytes_per_submit = std::min(mul_mat_bytes_cap, total_mat_mul_bytes / 40u);
+    }
+    const int submit_growth_limit = is_amd_gcn ? 5 : 3;
+    const uint32_t submit_growth_num = is_amd_gcn ? 3 : 2;
+    const uint32_t submit_growth_den = is_amd_gcn ? 2 : 1;
     for (int i = 0; i < cgraph->n_nodes; i++) {
         if (first_node_in_batch) {
             submit_node_idx = i;
@@ -12079,8 +12093,8 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
             first_node_in_batch = true;
             submitted_nodes = 0;
             mul_mat_bytes = 0;
-            if (submit_count < 3) {
-                mul_mat_bytes_per_submit *= 2;
+            if (submit_count < submit_growth_limit) {
+                mul_mat_bytes_per_submit = mul_mat_bytes_per_submit * submit_growth_num / submit_growth_den;
             }
             submit_count++;
         }