fix: Increase max nodes for models known to use mamba2

gabe-l-hart · gabe-l-hart · commit 8ba4d399deb2 · 2025-12-05T15:22:59.000-07:00
Branch: Mamba2SSD

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -1389,6 +1389,13 @@ void llama_context::output_reorder() {
 uint32_t llama_context::graph_max_nodes() const {
     if (model.arch == LLM_ARCH_QWEN3NEXT) {
         return std::max<uint32_t>(8192u, 32u*model.n_tensors());
+    } else if (
+        model.arch == LLM_ARCH_GRANITE_HYBRID ||
+        model.arch == LLM_ARCH_MAMBA2 ||
+        model.arch == LLM_ARCH_FALCON_H1 ||
+        model.arch == LLM_ARCH_NEMOTRON_H
+    ) {
+        return std::max<uint32_t>(1024u, 16u*model.n_tensors());
     }
     return std::max<uint32_t>(1024u, 8u*model.n_tensors());
 }

Original file line number	Diff line number	Diff line change
`@@ -1389,6 +1389,13 @@ void llama_context::output_reorder() {`
`1389`	`1389`	`uint32_t llama_context::graph_max_nodes() const {`
`1390`	`1390`	`if (model.arch == LLM_ARCH_QWEN3NEXT) {`
`1391`	`1391`	`return std::max<uint32_t>(8192u, 32u*model.n_tensors());`
	`1392`	`+ } else if (`
	`1393`	`+ model.arch == LLM_ARCH_GRANITE_HYBRID \|\|`
	`1394`	`+ model.arch == LLM_ARCH_MAMBA2 \|\|`
	`1395`	`+ model.arch == LLM_ARCH_FALCON_H1 \|\|`
	`1396`	`+ model.arch == LLM_ARCH_NEMOTRON_H`
	`1397`	`+ ) {`
	`1398`	`+ return std::max<uint32_t>(1024u, 16u*model.n_tensors());`
`1392`	`1399`	`}`
`1393`	`1400`	`return std::max<uint32_t>(1024u, 8u*model.n_tensors());`
`1394`	`1401`	`}`