From 30fc379d71c49b9bf6307934c2c5c206da407e83 Mon Sep 17 00:00:00 2001 From: Tai An Date: Wed, 1 Jul 2026 00:08:10 -0700 Subject: [PATCH] fix(reasoning): don't persist request-scoped reasoning_effort into model config When a model sets `reasoning_effort: none` (or any default) in its YAML without an explicit `reasoning.disable`, ApplyReasoningEffort resolves that default at request time and sets ReasoningConfig.DisableReasoning on the request-scoped config copy. The post-load thinking/marker probe then wrote that request-scoped value back into the loader's persistent config via UpdateModelConfig, making it look as though the operator had explicitly set reasoning.disable=true. From then on, per-request `reasoning_effort` overrides were silently ignored (an explicit operator disable wins over a request asking to think). DetectThinkingSupportFromBackend only fills reasoning slots that are still nil, so a slot already set here came from ApplyReasoningEffort, not the probe. Snapshot which slots were nil before the probe and only persist those, so the probe's genuine backend detection is still saved while request-time reasoning effort never leaks into the persistent config. Fixes #10622 Signed-off-by: Tai An --- core/backend/llm.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/core/backend/llm.go b/core/backend/llm.go index 053e984e8a77..0d46c3889cdc 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -110,11 +110,25 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima needsMarkerProbe := c.MediaMarker == "" if shouldProbeThinking || needsMarkerProbe { modelOpts := grpcModelOpts(*c, o.SystemState.Model.ModelsPath) + // DetectThinkingSupportFromBackend only fills reasoning slots that are + // still nil, so a slot that already carries a value here was populated by + // request-time ApplyReasoningEffort (e.g. a `reasoning_effort: none` + // default), not by backend detection. Persisting such a request-scoped + // value would masquerade as an operator's explicit reasoning.disable and + // permanently defeat future per-request reasoning_effort overrides + // (see #10622). Only persist the slots the probe is actually allowed to + // fill. + persistDisableReasoning := c.ReasoningConfig.DisableReasoning == nil + persistDisableTagPrefill := c.ReasoningConfig.DisableReasoningTagPrefill == nil config.DetectThinkingSupportFromBackend(ctx, c, inferenceModel, modelOpts) // Update the config in the loader so it persists for future requests cl.UpdateModelConfig(c.Name, func(cfg *config.ModelConfig) { - cfg.ReasoningConfig.DisableReasoning = c.ReasoningConfig.DisableReasoning - cfg.ReasoningConfig.DisableReasoningTagPrefill = c.ReasoningConfig.DisableReasoningTagPrefill + if persistDisableReasoning { + cfg.ReasoningConfig.DisableReasoning = c.ReasoningConfig.DisableReasoning + } + if persistDisableTagPrefill { + cfg.ReasoningConfig.DisableReasoningTagPrefill = c.ReasoningConfig.DisableReasoningTagPrefill + } if c.MediaMarker != "" { cfg.MediaMarker = c.MediaMarker }