anomalyco · feanor5555 · May 16, 2026
diff --git a/packages/core/src/models.ts b/packages/core/src/models.ts
@@ -57,6 +57,25 @@ export const Model = Schema.Struct({
       }),
     ]),
   ),
+  // Whether the model's chat template accepts an assistant turn as the LAST
+  // message (a.k.a. "prefill" / "response continuation").
+  //
+  // Default (undefined) is treated as `true` for backwards compatibility.
+  //
+  // Set to `false` for thinking-on-by-default models whose chat template
+  // rejects trailing-assistant when thinking is enabled. Affected families
+  // (non-exhaustive, 2025-2026):
+  //   - Qwen3 hybrid (all sizes), Qwen3-Thinking-2507, Qwen3-VL,
+  //     Qwen3.5, Qwen3.6, QwQ-32B  ->  llama.cpp "Assistant response prefill
+  //     is incompatible with enable_thinking" (ggml-org/llama.cpp#20861,
+  //     #21889; mastra-ai/mastra#15234)
+  //   - DeepSeek-R1 / R1-0528 / V4  (vllm-project/vllm#12999)
+  //   - GLM-4.6 / 4.7 thinking      (ggml-org/llama.cpp#15401)
+  //   - Kimi-K2-Thinking, MiniMax-M2
+  //
+  // Qwen3-Coder, Qwen3-Instruct-2507, Qwen2.5 keep `true` — their templates
+  // do not branch on `enable_thinking`, so prefill is safe.
+  prefill: Schema.optional(Schema.Boolean),
   cost: Schema.optional(Cost),
   limit: Schema.Struct({
     context: Schema.Finite,

diff --git a/packages/opencode/src/config/provider.ts b/packages/opencode/src/config/provider.ts
@@ -19,6 +19,15 @@ export const Model = Schema.Struct({
       }),
     ]),
   ),
+  prefill: Schema.optional(Schema.Boolean).annotate({
+    description:
+      "Whether the model accepts an assistant turn as the last message. " +
+      "Set false for thinking-on-by-default templates whose chat template " +
+      "rejects trailing-assistant (Qwen3 hybrid/3.5/3.6, QwQ, DeepSeek-R1, " +
+      "GLM-4.6/4.7 thinking, Kimi-K2-Thinking, MiniMax-M2). Defaults to " +
+      "true for non-openai-compatible providers, false for openai-compatible " +
+      "with reasoning enabled.",
+  }),
   cost: Schema.optional(
     Schema.Struct({
       input: Schema.Finite,