From 98423ef8e8a4212967486782f167201c739eafc0 Mon Sep 17 00:00:00 2001
From: MagnusBuehler <111045718+PufferFishCode@users.noreply.github.com>
Date: Mon, 16 Feb 2026 10:23:17 +0100
Subject: [PATCH 1/2] Fixed Thinking tokens in kv-cache get added only during
 fit

---
 src/tabpfn/architectures/base/transformer.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/tabpfn/architectures/base/transformer.py b/src/tabpfn/architectures/base/transformer.py
index 632aba37a..0b825595b 100644
--- a/src/tabpfn/architectures/base/transformer.py
+++ b/src/tabpfn/architectures/base/transformer.py
@@ -523,7 +523,11 @@ def forward(  # noqa: PLR0912, C901
             )
         del embedded_y, embedded_x
 
-        if self.add_thinking_tokens is not None:
+        is_kv_cache_prediction = (
+            self.cache_trainset_representation and single_eval_pos == 0
+        )
+
+        if self.add_thinking_tokens is not None and not is_kv_cache_prediction:
             embedded_input, single_eval_pos = self.add_thinking_tokens(
                 embedded_input,
                 single_eval_pos,

From 8d155439dce582b74ec671516a33d25ca1da6a83 Mon Sep 17 00:00:00 2001
From: MagnusBuehler <111045718+PufferFishCode@users.noreply.github.com>
Date: Mon, 16 Feb 2026 10:49:13 +0100
Subject: [PATCH 2/2] added changelog file

---
 changelog/784.fixed.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog/784.fixed.md

diff --git a/changelog/784.fixed.md b/changelog/784.fixed.md
new file mode 100644
index 000000000..ad2e5a0be
--- /dev/null
+++ b/changelog/784.fixed.md
@@ -0,0 +1 @@
+Fixed the bug, that with fit_mode="fit_with_cache" (= kv-cache) the thinking tokens were added twice during the fit and predict call.
\ No newline at end of file