Fix tool-call XML leakage in non-streamed responses

brandonkachen · codebuff-team · brandonkachen · commit 8f9a45b919d7 · 2025-10-16T17:01:54.000-07:00
Export processToolCallBuffer from SDK so CLI can filter &lt;codebuff_tool_call&gt;
XML before displaying streaming text. Previously, when responses arrived
without token streaming, tool-call payloads leaked into user output.

Added comprehensive tests for multi-chunk tool calls and updated CLI to
use the shared SDK helper instead of its local implementation.

🤖 Generated with Codebuff
Co-Authored-By: Codebuff &lt;noreply@codebuff.com&gt;
diff --git a/cli/src/components/message-block.tsx b/cli/src/components/message-block.tsx
@@ -265,7 +265,7 @@ export const MessageBlock = ({
         const rawNestedContent = isNestedStreamingText
           ? trimTrailingNewlines(nestedBlock.content)
           : nestedBlock.content.trim()
-        const renderKey = `${keyPrefix}-text-${nestedIdx}-${rawNestedContent.length}-${isNestedStreamingText ? 'stream' : 'final'}`
+        const renderKey = `${keyPrefix}-text-${nestedIdx}`
         const markdownOptionsForLevel = getAgentMarkdownOptions(indentLevel)
         const renderedContent = hasMarkdown(rawNestedContent)
           ? isNestedStreamingText
@@ -337,7 +337,7 @@ export const MessageBlock = ({
               const rawContent = isStreamingText
                 ? trimTrailingNewlines(block.content)
                 : block.content.trim()
-              const renderKey = `${messageId}-text-${idx}-${rawContent.length}-${isStreamingText ? 'stream' : 'final'}`
+              const renderKey = `${messageId}-text-${idx}`
               const renderedContent = hasMarkdown(rawContent)
                 ? isStreamingText
                   ? renderStreamingMarkdown(rawContent, markdownOptions)
@@ -391,7 +391,7 @@ export const MessageBlock = ({
             : normalizedContent
           return (
             <text
-              key={`message-content-${messageId}-${normalizedContent.length}-${isStreamingMessage ? 'stream' : 'final'}`}
+              key={`message-content-${messageId}`}
               wrap
               style={{ fg: textColor }}
             >
diff --git a/cli/src/hooks/use-send-message.ts b/cli/src/hooks/use-send-message.ts
@@ -50,56 +50,6 @@ const updateBlocksRecursively = (
   })
 }
 
-// Helper function to process buffered text and filter out tool calls
-const processToolCallBuffer = (
-  bufferState: { buffer: string; insideToolCall: boolean },
-  onTextOutput: (text: string) => void,
-) => {
-  let processed = false
-
-  if (
-    !bufferState.insideToolCall &&
-    bufferState.buffer.includes('<codebuff_tool_call>')
-  ) {
-    const openTagIndex = bufferState.buffer.indexOf('<codebuff_tool_call>')
-    const text = bufferState.buffer.substring(0, openTagIndex)
-    if (text) {
-      onTextOutput(text)
-    }
-    bufferState.insideToolCall = true
-    bufferState.buffer = bufferState.buffer.substring(
-      openTagIndex + '<codebuff_tool_call>'.length,
-    )
-    processed = true
-  } else if (
-    bufferState.insideToolCall &&
-    bufferState.buffer.includes('</codebuff_tool_call>')
-  ) {
-    const closeTagIndex = bufferState.buffer.indexOf('</codebuff_tool_call>')
-    bufferState.insideToolCall = false
-    bufferState.buffer = bufferState.buffer.substring(
-      closeTagIndex + '</codebuff_tool_call>'.length,
-    )
-    processed = true
-  } else if (!bufferState.insideToolCall && bufferState.buffer.length > 25) {
-    // Output safe text, keeping last 25 chars in buffer (enough to buffer <codebuff_tool_call>)
-    const safeToOutput = bufferState.buffer.substring(
-      0,
-      bufferState.buffer.length - 25,
-    )
-    if (safeToOutput) {
-      onTextOutput(safeToOutput)
-    }
-    bufferState.buffer = bufferState.buffer.substring(
-      bufferState.buffer.length - 25,
-    )
-  }
-
-  if (processed) {
-    processToolCallBuffer(bufferState, onTextOutput)
-  }
-}
-
 const mergeTextSegments = (
   previous: string,
   incoming: string,
@@ -179,9 +129,6 @@ export const useSendMessage = ({
   const spawnAgentsMapRef = useRef<
     Map<string, { index: number; agentType: string }>
   >(new Map())
-  const subagentBuffersRef = useRef<
-    Map<string, { buffer: string; insideToolCall: boolean }>
-  >(new Map())
   const rootStreamBufferRef = useRef('')
   const agentStreamAccumulatorsRef = useRef<Map<string, string>>(new Map())
   const rootStreamSeenRef = useRef(false)
@@ -394,10 +341,6 @@ export const useSendMessage = ({
       rootStreamBufferRef.current = ''
       rootStreamSeenRef.current = false
       agentStreamAccumulatorsRef.current = new Map<string, string>()
-      subagentBuffersRef.current = new Map<
-        string,
-        { buffer: string; insideToolCall: boolean }
-      >()
 
       const updateAgentContent = (
         agentId: string,
@@ -625,34 +568,18 @@ export const useSendMessage = ({
             if (event.type === 'subagent-chunk') {
               const { agentId, chunk } = event
 
-              const bufferState = subagentBuffersRef.current.get(agentId) || {
-                buffer: '',
-                insideToolCall: false,
+              const previous =
+                agentStreamAccumulatorsRef.current.get(agentId) ?? ''
+              const { next, delta } = mergeTextSegments(previous, chunk)
+              if (!delta && next === previous) {
+                return
               }
-              subagentBuffersRef.current.set(agentId, bufferState)
-
-              bufferState.buffer += chunk
+              agentStreamAccumulatorsRef.current.set(agentId, next)
 
-              processToolCallBuffer(bufferState, (text) => {
-                if (!text) {
-                  return
-                }
-                const previous =
-                  agentStreamAccumulatorsRef.current.get(agentId) ?? ''
-                const { next, delta } = mergeTextSegments(previous, text)
-                if (!delta && next === previous) {
-                  return
-                }
-                agentStreamAccumulatorsRef.current.set(agentId, next)
-                if (delta) {
-                  updateAgentContent(agentId, { type: 'text', content: delta })
-                } else {
-                  updateAgentContent(agentId, {
-                    type: 'text',
-                    content: next,
-                    replace: true,
-                  })
-                }
+              updateAgentContent(agentId, {
+                type: 'text',
+                content: delta || next,
+                ...(delta ? {} : { replace: true }),
               })
               return
             }
@@ -674,7 +601,10 @@ export const useSendMessage = ({
                 })
                 const previous =
                   agentStreamAccumulatorsRef.current.get(event.agentId) ?? ''
-                const { next, delta } = mergeTextSegments(previous, text)
+                const { next, delta } = mergeTextSegments(
+                  previous,
+                  text,
+                )
                 if (!delta && next === previous) {
                   return
                 }
@@ -701,7 +631,10 @@ export const useSendMessage = ({
                   return
                 }
                 const previous = rootStreamBufferRef.current ?? ''
-                const { next, delta } = mergeTextSegments(previous, text)
+                const { next, delta } = mergeTextSegments(
+                  previous,
+                  text,
+                )
                 if (!delta && next === previous) {
                   return
                 }
diff --git a/sdk/CHANGELOG.md b/sdk/CHANGELOG.md
@@ -2,7 +2,15 @@
 
 All notable changes to the @codebuff/sdk package will be documented in this file.
 
-## [0.4.2] 
+## [0.4.3]
+
+### Added
+
+- Exported `processToolCallBuffer` and state helpers so SDK consumers can strip `<codebuff_tool_call>` segments mid-stream.
+- CLI now consumes the shared helper to avoid leaking XML when responses arrive without token streaming.
+- Extra regression tests covering multi-chunk tool-call payloads based on the CLI log case ("I'll help you commit").
+
+## [0.4.2]
 
 ### Added
 
diff --git a/sdk/package.json b/sdk/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@codebuff/sdk",
   "private": false,
-  "version": "0.4.1",
+  "version": "0.4.3",
   "description": "Official SDK for Codebuff — AI coding agent & framework",
   "license": "Apache-2.0",
   "type": "module",
diff --git a/sdk/src/__tests__/tool-xml-buffer.test.ts b/sdk/src/__tests__/tool-xml-buffer.test.ts
@@ -0,0 +1,78 @@
+import { describe, expect, test } from 'bun:test'
+
+import {
+  createToolCallBufferState,
+  processToolCallBuffer,
+  stripToolCallPayloads,
+} from '../tool-xml-buffer'
+
+const collect = (chunks: string[]): ((value: string) => void) =>
+  (value: string) => {
+    if (value) {
+      chunks.push(value)
+    }
+  }
+
+describe('processToolCallBuffer', () => {
+  test('emits text before tool call and skips payload', () => {
+    const state = createToolCallBufferState()
+    const out: string[] = []
+    processToolCallBuffer(state, 'Hello <codebuff_tool_call>{"a":1}</codebuff_tool_call> world', collect(out))
+    expect(out.join('')).toBe('Hello  world')
+  })
+
+  test('handles tool call split across chunks', () => {
+    const state = createToolCallBufferState()
+    const out: string[] = []
+
+    processToolCallBuffer(state, 'Hello <codebuff_tool_call>{"a"', collect(out))
+    expect(out.join('')).toBe('Hello ')
+    processToolCallBuffer(state, ':1}</codebuff_tool_call> world', collect(out))
+    expect(out.join('')).toBe('Hello  world')
+  })
+
+  test('limits buffer growth while waiting for close tag', () => {
+    const state = createToolCallBufferState()
+    const out: string[] = []
+
+    processToolCallBuffer(
+      state,
+      '<codebuff_tool_call>' + 'x'.repeat(200),
+      collect(out),
+    )
+    expect(out).toHaveLength(0)
+    expect(state.buffer.length).toBeLessThan(120)
+
+    processToolCallBuffer(state, '</codebuff_tool_call>tail', collect(out))
+    expect(out.join('')).toBe('tail')
+  })
+
+  test('handles multiline tool call split across many chunks (CLI log regression)', () => {
+    const state = createToolCallBufferState()
+    const out: string[] = []
+    const chunks = [
+      "I'll help you commit the SDK and CLI changes.\n\n<codebuff_tool_call",
+      '>',
+      '\n{\n  ',
+      '"cb_tool_name": "run_terminal_command",\n',
+      '"command": "git log --oneline -5",\n',
+      '"cb_easp": true\n}\n</codebuff_tool_call>\n\nNext steps.',
+    ]
+
+    for (const chunk of chunks) {
+      processToolCallBuffer(state, chunk, collect(out))
+    }
+
+    expect(out.join('')).toBe(
+      "I'll help you commit the SDK and CLI changes.\n\n\n\nNext steps.",
+    )
+  })
+
+  test('stripToolCallPayloads removes tool call payloads inline', () => {
+    expect(
+      stripToolCallPayloads(
+        'Hello<codebuff_tool_call>{"a":1}</codebuff_tool_call>World',
+      ),
+    ).toBe('HelloWorld')
+  })
+})
diff --git a/sdk/src/index.ts b/sdk/src/index.ts
@@ -19,6 +19,8 @@ export * from './client'
 export * from './custom-tool'
 export * from './native/ripgrep'
 export * from './run-state'
+export * from './tool-xml-filter'
+export * from './tool-xml-buffer'
 export { ToolHelpers } from './tools'
 export * from './websocket-client'
 export { formatState } from '../../common/src/websockets/websocket-client'
diff --git a/sdk/src/run.ts b/sdk/src/run.ts
@@ -15,6 +15,7 @@ import {
   filterToolXmlFromText,
   type ToolXmlFilterState,
 } from './tool-xml-filter'
+import { stripToolCallPayloads } from './tool-xml-buffer'
 import { PromptResponseSchema } from '../../common/src/actions'
 import { MAX_AGENT_STEPS_DEFAULT } from '../../common/src/constants/agents'
 import { toolNames } from '../../common/src/tools/constants'
@@ -194,8 +195,10 @@ export async function run({
       next = previous + incoming
     }
 
-    textAccumulator.set(agentKey, next)
-    return next
+    const sanitizedNext = stripToolCallPayloads(next)
+
+    textAccumulator.set(agentKey, sanitizedNext)
+    return sanitizedNext
   }
 
   const emitStreamDelta = async (
@@ -276,7 +279,7 @@ export async function run({
               agentId: eventAgentId,
             } as PrintModeEvent)
 
-      if (eventAgentId && eventPayload.agentId == null) {
+      if (eventAgentId && 'agentId' in eventPayload && eventPayload.agentId == null) {
         eventPayload.agentId = eventAgentId
       }
 
@@ -398,7 +401,7 @@ export async function run({
           await flushTextState(ROOT_AGENT_KEY)
 
           const finishAgentKey =
-            (chunk as typeof chunk & { agentId?: string }).agentId
+            'agentId' in chunk ? chunk.agentId : undefined
           if (finishAgentKey && finishAgentKey !== ROOT_AGENT_KEY) {
             await flushTextState(finishAgentKey, finishAgentKey)
             await flushSubagentState(
@@ -410,7 +413,7 @@ export async function run({
           chunkType === 'subagent_finish' ||
           chunkType === 'subagent-finish'
         ) {
-          const subagentId = (chunk as { agentId?: string }).agentId
+          const subagentId = 'agentId' in chunk ? chunk.agentId : undefined
           if (subagentId) {
             await flushTextState(subagentId, subagentId)
             await flushSubagentState(
diff --git a/sdk/src/tool-xml-buffer.ts b/sdk/src/tool-xml-buffer.ts
diff --git a/sdk/src/tool-xml-filter.ts b/sdk/src/tool-xml-filter.ts
diff --git a/sdk/test/tree-sitter-queries/package-lock.json b/sdk/test/tree-sitter-queries/package-lock.json

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@codebuff/sdk",`
`3`	`3`	`"private": false,`
`4`		`- "version": "0.4.1",`
	`4`	`+ "version": "0.4.3",`
`5`	`5`	`"description": "Official SDK for Codebuff — AI coding agent & framework",`
`6`	`6`	`"license": "Apache-2.0",`
`7`	`7`	`"type": "module",`