Skip to content

Commit 2446419

Browse files
authored
models : fix graph splits (ggml-org#19866)
1 parent 47eb12b commit 2446419

4 files changed

Lines changed: 8 additions & 3 deletions

File tree

src/models/kimi-linear.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ llm_build_kimi_linear::llm_build_kimi_linear(const llama_model & model, const ll
116116
cur = build_norm(inpL, layer.attn_norm, NULL, LLM_NORM_RMS, il);
117117
cb(cur, "attn_norm", il);
118118

119+
ggml_build_forward_expand(gf, cur);
120+
119121
// Check layer type by checking which tensors exist
120122
// KDA layers have ssm_a_log tensor, MLA layers have wkv_a_mqa tensor
121123
bool is_kda = (layer.ssm_a != nullptr);

src/models/qwen35.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ llm_build_qwen35::llm_build_qwen35(const llama_model & model, const llm_graph_pa
2929
cur = build_norm(inpL, model.layers[il].attn_norm, nullptr, LLM_NORM_RMS, il);
3030
cb(cur, "attn_norm", il);
3131

32+
ggml_build_forward_expand(gf, cur);
33+
3234
// Determine layer type and build appropriate attention mechanism
3335
if (hparams.is_recurrent(il)) {
3436
// Linear attention layer (gated delta net)
@@ -269,7 +271,6 @@ ggml_tensor * llm_build_qwen35::build_layer_attn_linear(
269271
cb(state_update_target, "state_update_target", il);
270272

271273
ggml_build_forward_expand(gf, ggml_cpy(ctx0, last_conv_states, state_update_target));
272-
cb(conv_states_all, "conv_states_updated", il);
273274

274275
ggml_tensor * state = build_rs(inp, ssm_states_all, hparams.n_embd_s(), n_seqs);
275276
state = ggml_reshape_4d(ctx0, state, head_v_dim, head_v_dim, num_v_heads, n_seqs);

src/models/qwen35moe.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ llm_build_qwen35moe::llm_build_qwen35moe(const llama_model & model, const llm_gr
2929
cur = build_norm(inpL, model.layers[il].attn_norm, nullptr, LLM_NORM_RMS, il);
3030
cb(cur, "attn_norm", il);
3131

32+
ggml_build_forward_expand(gf, cur);
33+
3234
// Determine layer type and build appropriate attention mechanism
3335
if (hparams.is_recurrent(il)) {
3436
// Linear attention layer (gated delta net)
@@ -269,7 +271,6 @@ ggml_tensor * llm_build_qwen35moe ::build_layer_attn_linear(
269271
cb(state_update_target, "state_update_target", il);
270272

271273
ggml_build_forward_expand(gf, ggml_cpy(ctx0, last_conv_states, state_update_target));
272-
cb(conv_states_all, "conv_states_updated", il);
273274

274275
ggml_tensor * state = build_rs(inp, ssm_states_all, hparams.n_embd_s(), n_seqs);
275276
state = ggml_reshape_4d(ctx0, state, head_v_dim, head_v_dim, num_v_heads, n_seqs);

src/models/qwen3next.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ llm_build_qwen3next::llm_build_qwen3next(const llama_model & model, const llm_gr
2121
cur = build_norm(inpL, model.layers[il].attn_norm, nullptr, LLM_NORM_RMS, il);
2222
cb(cur, "attn_norm", il);
2323

24+
ggml_build_forward_expand(gf, cur);
25+
2426
// Determine layer type and build appropriate attention mechanism
2527
if (hparams.is_recurrent(il)) {
2628
// Linear attention layer (gated delta net)
@@ -354,7 +356,6 @@ ggml_tensor * llm_build_qwen3next::build_layer_attn_linear(
354356
cb(state_update_target, "state_update_target", il);
355357

356358
ggml_build_forward_expand(gf, ggml_cpy(ctx0, last_conv_states, state_update_target));
357-
cb(conv_states_all, "conv_states_updated", il);
358359

359360
ggml_tensor * state = build_rs(inp, ssm_states_all, hparams.n_embd_s(), n_seqs);
360361
state = ggml_reshape_4d(ctx0, state, head_v_dim, head_v_dim, num_v_heads, n_seqs);

0 commit comments

Comments
 (0)