diff --git a/packages/core/src/models.ts b/packages/core/src/models.ts index 4ee17b8e25eb..0c12de4d1af0 100644 --- a/packages/core/src/models.ts +++ b/packages/core/src/models.ts @@ -57,6 +57,25 @@ export const Model = Schema.Struct({ }), ]), ), + // Whether the model's chat template accepts an assistant turn as the LAST + // message (a.k.a. "prefill" / "response continuation"). + // + // Default (undefined) is treated as `true` for backwards compatibility. + // + // Set to `false` for thinking-on-by-default models whose chat template + // rejects trailing-assistant when thinking is enabled. Affected families + // (non-exhaustive, 2025-2026): + // - Qwen3 hybrid (all sizes), Qwen3-Thinking-2507, Qwen3-VL, + // Qwen3.5, Qwen3.6, QwQ-32B -> llama.cpp "Assistant response prefill + // is incompatible with enable_thinking" (ggml-org/llama.cpp#20861, + // #21889; mastra-ai/mastra#15234) + // - DeepSeek-R1 / R1-0528 / V4 (vllm-project/vllm#12999) + // - GLM-4.6 / 4.7 thinking (ggml-org/llama.cpp#15401) + // - Kimi-K2-Thinking, MiniMax-M2 + // + // Qwen3-Coder, Qwen3-Instruct-2507, Qwen2.5 keep `true` — their templates + // do not branch on `enable_thinking`, so prefill is safe. + prefill: Schema.optional(Schema.Boolean), cost: Schema.optional(Cost), limit: Schema.Struct({ context: Schema.Finite, diff --git a/packages/opencode/src/config/provider.ts b/packages/opencode/src/config/provider.ts index 5635512cedf9..7f742b3b5609 100644 --- a/packages/opencode/src/config/provider.ts +++ b/packages/opencode/src/config/provider.ts @@ -19,6 +19,15 @@ export const Model = Schema.Struct({ }), ]), ), + prefill: Schema.optional(Schema.Boolean).annotate({ + description: + "Whether the model accepts an assistant turn as the last message. " + + "Set false for thinking-on-by-default templates whose chat template " + + "rejects trailing-assistant (Qwen3 hybrid/3.5/3.6, QwQ, DeepSeek-R1, " + + "GLM-4.6/4.7 thinking, Kimi-K2-Thinking, MiniMax-M2). Defaults to " + + "true for non-openai-compatible providers, false for openai-compatible " + + "with reasoning enabled.", + }), cost: Schema.optional( Schema.Struct({ input: Schema.Finite,