Skip to content

Commit 8ba4d39

Browse files
committed
fix: Increase max nodes for models known to use mamba2
Branch: Mamba2SSD Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
1 parent eb4e399 commit 8ba4d39

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

src/llama-context.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,6 +1389,13 @@ void llama_context::output_reorder() {
13891389
uint32_t llama_context::graph_max_nodes() const {
13901390
if (model.arch == LLM_ARCH_QWEN3NEXT) {
13911391
return std::max<uint32_t>(8192u, 32u*model.n_tensors());
1392+
} else if (
1393+
model.arch == LLM_ARCH_GRANITE_HYBRID ||
1394+
model.arch == LLM_ARCH_MAMBA2 ||
1395+
model.arch == LLM_ARCH_FALCON_H1 ||
1396+
model.arch == LLM_ARCH_NEMOTRON_H
1397+
) {
1398+
return std::max<uint32_t>(1024u, 16u*model.n_tensors());
13921399
}
13931400
return std::max<uint32_t>(1024u, 8u*model.n_tensors());
13941401
}

0 commit comments

Comments
 (0)