From 285644355b1e4207a2ce0c13153ed604d8d603f8 Mon Sep 17 00:00:00 2001
From: Wagner Bruna <wbruna@yahoo.com>
Date: Wed, 17 Jun 2026 20:03:46 -0300
Subject: [PATCH] fix: workaround for Anima with Vulkan and Flash Attention

---
 src/model/diffusion/anima.hpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/model/diffusion/anima.hpp b/src/model/diffusion/anima.hpp
index 6042516a9..504904d41 100644
--- a/src/model/diffusion/anima.hpp
+++ b/src/model/diffusion/anima.hpp
@@ -227,6 +227,7 @@ namespace Anima {
             k4 = k_norm->forward(ctx, k4);
 
             ggml_tensor* attn_out = nullptr;
+            float scale           = (sd_backend_is(ctx->backend, "Vulkan") && ctx->flash_attn_enabled) ? 1.0f / 32.0f : 1.0f;
             if (pe_q != nullptr || pe_k != nullptr) {
                 if (pe_q == nullptr) {
                     pe_q = pe_k;
@@ -244,7 +245,8 @@ namespace Anima {
                                                      num_heads,
                                                      nullptr,
                                                      true,
-                                                     ctx->flash_attn_enabled);
+                                                     ctx->flash_attn_enabled,
+                                                     scale);
             } else {
                 auto q_flat = ggml_reshape_3d(ctx->ggml_ctx, q4, head_dim * num_heads, L_q, N);
                 auto k_flat = ggml_reshape_3d(ctx->ggml_ctx, k4, head_dim * num_heads, L_k, N);
@@ -256,7 +258,8 @@ namespace Anima {
                                                      num_heads,
                                                      nullptr,
                                                      false,
-                                                     ctx->flash_attn_enabled);
+                                                     ctx->flash_attn_enabled,
+                                                     scale);
             }
 
             return out_proj->forward(ctx, attn_out);