From 30fc379d71c49b9bf6307934c2c5c206da407e83 Mon Sep 17 00:00:00 2001
From: Tai An <antai12232931@outlook.com>
Date: Wed, 1 Jul 2026 00:08:10 -0700
Subject: [PATCH] fix(reasoning): don't persist request-scoped reasoning_effort
 into model config

When a model sets `reasoning_effort: none` (or any default) in its YAML
without an explicit `reasoning.disable`, ApplyReasoningEffort resolves that
default at request time and sets ReasoningConfig.DisableReasoning on the
request-scoped config copy. The post-load thinking/marker probe then wrote
that request-scoped value back into the loader's persistent config via
UpdateModelConfig, making it look as though the operator had explicitly set
reasoning.disable=true. From then on, per-request `reasoning_effort` overrides
were silently ignored (an explicit operator disable wins over a request
asking to think).

DetectThinkingSupportFromBackend only fills reasoning slots that are still
nil, so a slot already set here came from ApplyReasoningEffort, not the probe.
Snapshot which slots were nil before the probe and only persist those, so the
probe's genuine backend detection is still saved while request-time reasoning
effort never leaks into the persistent config.

Fixes #10622

Signed-off-by: Tai An <antai12232931@outlook.com>
---
 core/backend/llm.go | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/core/backend/llm.go b/core/backend/llm.go
index 053e984e8a77..0d46c3889cdc 100644
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -110,11 +110,25 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
 	needsMarkerProbe := c.MediaMarker == ""
 	if shouldProbeThinking || needsMarkerProbe {
 		modelOpts := grpcModelOpts(*c, o.SystemState.Model.ModelsPath)
+		// DetectThinkingSupportFromBackend only fills reasoning slots that are
+		// still nil, so a slot that already carries a value here was populated by
+		// request-time ApplyReasoningEffort (e.g. a `reasoning_effort: none`
+		// default), not by backend detection. Persisting such a request-scoped
+		// value would masquerade as an operator's explicit reasoning.disable and
+		// permanently defeat future per-request reasoning_effort overrides
+		// (see #10622). Only persist the slots the probe is actually allowed to
+		// fill.
+		persistDisableReasoning := c.ReasoningConfig.DisableReasoning == nil
+		persistDisableTagPrefill := c.ReasoningConfig.DisableReasoningTagPrefill == nil
 		config.DetectThinkingSupportFromBackend(ctx, c, inferenceModel, modelOpts)
 		// Update the config in the loader so it persists for future requests
 		cl.UpdateModelConfig(c.Name, func(cfg *config.ModelConfig) {
-			cfg.ReasoningConfig.DisableReasoning = c.ReasoningConfig.DisableReasoning
-			cfg.ReasoningConfig.DisableReasoningTagPrefill = c.ReasoningConfig.DisableReasoningTagPrefill
+			if persistDisableReasoning {
+				cfg.ReasoningConfig.DisableReasoning = c.ReasoningConfig.DisableReasoning
+			}
+			if persistDisableTagPrefill {
+				cfg.ReasoningConfig.DisableReasoningTagPrefill = c.ReasoningConfig.DisableReasoningTagPrefill
+			}
 			if c.MediaMarker != "" {
 				cfg.MediaMarker = c.MediaMarker
 			}