Skip to content

Commit 493ce2e

Browse files
author
1bcMax
committed
fix: strip Kimi thinking tokens from model responses
Fixes #6 - Raw session tool outputs with Kimi thinking tokens were visible on Telegram/TUI despite verboseDefault being off. - Add KIMI_BLOCK_RE to strip full thinking blocks - Add KIMI_TOKEN_RE to strip standalone tokens like <|end▁of▁thinking|> - Add THINKING_BLOCK_RE and THINKING_TAG_RE for standard <think> tags - Apply stripThinkingTokens() to response content in SSE conversion
1 parent 12b649f commit 493ce2e

1 file changed

Lines changed: 36 additions & 1 deletion

File tree

src/proxy.ts

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,39 @@ const HEARTBEAT_INTERVAL_MS = 2_000;
4747
const DEFAULT_REQUEST_TIMEOUT_MS = 180_000; // 3 minutes (allows for on-chain tx + LLM response)
4848
const DEFAULT_PORT = 8402;
4949

50+
// Kimi/Moonshot models use special Unicode tokens for thinking boundaries.
51+
// Pattern: <|begin▁of▁thinking|>content<|end▁of▁thinking|>
52+
// The | is fullwidth vertical bar (U+FF5C), ▁ is lower one-eighth block (U+2581).
53+
54+
// Match full Kimi thinking blocks: <|begin...|>content<|end...|>
55+
const KIMI_BLOCK_RE = /<[|][^<>]*begin[^<>]*[|]>[\s\S]*?<[|][^<>]*end[^<>]*[|]>/gi;
56+
57+
// Match standalone Kimi tokens like <|end▁of▁thinking|>
58+
const KIMI_TOKEN_RE = /<[|][^<>]*[|]>/g;
59+
60+
// Standard thinking tags that may leak through from various models
61+
const THINKING_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi;
62+
63+
// Full thinking blocks: <think>content</think>
64+
const THINKING_BLOCK_RE = /<\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>[\s\S]*?<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
65+
66+
/**
67+
* Strip thinking tokens and blocks from model response content.
68+
* Handles both Kimi-style Unicode tokens and standard XML-style tags.
69+
*/
70+
function stripThinkingTokens(content: string): string {
71+
if (!content) return content;
72+
// Strip full Kimi thinking blocks first (begin...end with content)
73+
let cleaned = content.replace(KIMI_BLOCK_RE, "");
74+
// Strip remaining standalone Kimi tokens
75+
cleaned = cleaned.replace(KIMI_TOKEN_RE, "");
76+
// Strip full thinking blocks (<think>...</think>)
77+
cleaned = cleaned.replace(THINKING_BLOCK_RE, "");
78+
// Strip remaining standalone thinking tags
79+
cleaned = cleaned.replace(THINKING_TAG_RE, "");
80+
return cleaned;
81+
}
82+
5083
/** Callback info for low balance warning */
5184
export type LowBalanceInfo = {
5285
balanceUSD: string;
@@ -597,7 +630,9 @@ async function proxyRequest(
597630
// Process each choice (usually just one)
598631
if (rsp.choices && Array.isArray(rsp.choices)) {
599632
for (const choice of rsp.choices) {
600-
const content = choice.message?.content ?? choice.delta?.content ?? "";
633+
// Strip thinking tokens (Kimi <|...|> and standard <think> tags)
634+
const rawContent = choice.message?.content ?? choice.delta?.content ?? "";
635+
const content = stripThinkingTokens(rawContent);
601636
const role = choice.message?.role ?? choice.delta?.role ?? "assistant";
602637
const index = choice.index ?? 0;
603638

0 commit comments

Comments
 (0)