diff --git a/changelog/784.fixed.md b/changelog/784.fixed.md
new file mode 100644
index 000000000..ad2e5a0be
--- /dev/null
+++ b/changelog/784.fixed.md
@@ -0,0 +1 @@
+Fixed the bug, that with fit_mode="fit_with_cache" (= kv-cache) the thinking tokens were added twice during the fit and predict call.
\ No newline at end of file
diff --git a/src/tabpfn/architectures/base/transformer.py b/src/tabpfn/architectures/base/transformer.py
index 632aba37a..0b825595b 100644
--- a/src/tabpfn/architectures/base/transformer.py
+++ b/src/tabpfn/architectures/base/transformer.py
@@ -523,7 +523,11 @@ def forward(  # noqa: PLR0912, C901
             )
         del embedded_y, embedded_x
 
-        if self.add_thinking_tokens is not None:
+        is_kv_cache_prediction = (
+            self.cache_trainset_representation and single_eval_pos == 0
+        )
+
+        if self.add_thinking_tokens is not None and not is_kv_cache_prediction:
             embedded_input, single_eval_pos = self.add_thinking_tokens(
                 embedded_input,
                 single_eval_pos,