diff --git a/changelog/784.fixed.md b/changelog/784.fixed.md new file mode 100644 index 000000000..ad2e5a0be --- /dev/null +++ b/changelog/784.fixed.md @@ -0,0 +1 @@ +Fixed the bug, that with fit_mode="fit_with_cache" (= kv-cache) the thinking tokens were added twice during the fit and predict call. \ No newline at end of file diff --git a/src/tabpfn/architectures/base/transformer.py b/src/tabpfn/architectures/base/transformer.py index 632aba37a..0b825595b 100644 --- a/src/tabpfn/architectures/base/transformer.py +++ b/src/tabpfn/architectures/base/transformer.py @@ -523,7 +523,11 @@ def forward( # noqa: PLR0912, C901 ) del embedded_y, embedded_x - if self.add_thinking_tokens is not None: + is_kv_cache_prediction = ( + self.cache_trainset_representation and single_eval_pos == 0 + ) + + if self.add_thinking_tokens is not None and not is_kv_cache_prediction: embedded_input, single_eval_pos = self.add_thinking_tokens( embedded_input, single_eval_pos,