Skip to content

Commit 8f9a45b

Browse files
Fix tool-call XML leakage in non-streamed responses
Export processToolCallBuffer from SDK so CLI can filter <codebuff_tool_call> XML before displaying streaming text. Previously, when responses arrived without token streaming, tool-call payloads leaked into user output. Added comprehensive tests for multi-chunk tool calls and updated CLI to use the shared SDK helper instead of its local implementation. 🤖 Generated with Codebuff Co-Authored-By: Codebuff <noreply@codebuff.com>
1 parent a512ec8 commit 8f9a45b

File tree

10 files changed

+248
-99
lines changed

10 files changed

+248
-99
lines changed

cli/src/components/message-block.tsx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ export const MessageBlock = ({
265265
const rawNestedContent = isNestedStreamingText
266266
? trimTrailingNewlines(nestedBlock.content)
267267
: nestedBlock.content.trim()
268-
const renderKey = `${keyPrefix}-text-${nestedIdx}-${rawNestedContent.length}-${isNestedStreamingText ? 'stream' : 'final'}`
268+
const renderKey = `${keyPrefix}-text-${nestedIdx}`
269269
const markdownOptionsForLevel = getAgentMarkdownOptions(indentLevel)
270270
const renderedContent = hasMarkdown(rawNestedContent)
271271
? isNestedStreamingText
@@ -337,7 +337,7 @@ export const MessageBlock = ({
337337
const rawContent = isStreamingText
338338
? trimTrailingNewlines(block.content)
339339
: block.content.trim()
340-
const renderKey = `${messageId}-text-${idx}-${rawContent.length}-${isStreamingText ? 'stream' : 'final'}`
340+
const renderKey = `${messageId}-text-${idx}`
341341
const renderedContent = hasMarkdown(rawContent)
342342
? isStreamingText
343343
? renderStreamingMarkdown(rawContent, markdownOptions)
@@ -391,7 +391,7 @@ export const MessageBlock = ({
391391
: normalizedContent
392392
return (
393393
<text
394-
key={`message-content-${messageId}-${normalizedContent.length}-${isStreamingMessage ? 'stream' : 'final'}`}
394+
key={`message-content-${messageId}`}
395395
wrap
396396
style={{ fg: textColor }}
397397
>

cli/src/hooks/use-send-message.ts

Lines changed: 18 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -50,56 +50,6 @@ const updateBlocksRecursively = (
5050
})
5151
}
5252

53-
// Helper function to process buffered text and filter out tool calls
54-
const processToolCallBuffer = (
55-
bufferState: { buffer: string; insideToolCall: boolean },
56-
onTextOutput: (text: string) => void,
57-
) => {
58-
let processed = false
59-
60-
if (
61-
!bufferState.insideToolCall &&
62-
bufferState.buffer.includes('<codebuff_tool_call>')
63-
) {
64-
const openTagIndex = bufferState.buffer.indexOf('<codebuff_tool_call>')
65-
const text = bufferState.buffer.substring(0, openTagIndex)
66-
if (text) {
67-
onTextOutput(text)
68-
}
69-
bufferState.insideToolCall = true
70-
bufferState.buffer = bufferState.buffer.substring(
71-
openTagIndex + '<codebuff_tool_call>'.length,
72-
)
73-
processed = true
74-
} else if (
75-
bufferState.insideToolCall &&
76-
bufferState.buffer.includes('</codebuff_tool_call>')
77-
) {
78-
const closeTagIndex = bufferState.buffer.indexOf('</codebuff_tool_call>')
79-
bufferState.insideToolCall = false
80-
bufferState.buffer = bufferState.buffer.substring(
81-
closeTagIndex + '</codebuff_tool_call>'.length,
82-
)
83-
processed = true
84-
} else if (!bufferState.insideToolCall && bufferState.buffer.length > 25) {
85-
// Output safe text, keeping last 25 chars in buffer (enough to buffer <codebuff_tool_call>)
86-
const safeToOutput = bufferState.buffer.substring(
87-
0,
88-
bufferState.buffer.length - 25,
89-
)
90-
if (safeToOutput) {
91-
onTextOutput(safeToOutput)
92-
}
93-
bufferState.buffer = bufferState.buffer.substring(
94-
bufferState.buffer.length - 25,
95-
)
96-
}
97-
98-
if (processed) {
99-
processToolCallBuffer(bufferState, onTextOutput)
100-
}
101-
}
102-
10353
const mergeTextSegments = (
10454
previous: string,
10555
incoming: string,
@@ -179,9 +129,6 @@ export const useSendMessage = ({
179129
const spawnAgentsMapRef = useRef<
180130
Map<string, { index: number; agentType: string }>
181131
>(new Map())
182-
const subagentBuffersRef = useRef<
183-
Map<string, { buffer: string; insideToolCall: boolean }>
184-
>(new Map())
185132
const rootStreamBufferRef = useRef('')
186133
const agentStreamAccumulatorsRef = useRef<Map<string, string>>(new Map())
187134
const rootStreamSeenRef = useRef(false)
@@ -394,10 +341,6 @@ export const useSendMessage = ({
394341
rootStreamBufferRef.current = ''
395342
rootStreamSeenRef.current = false
396343
agentStreamAccumulatorsRef.current = new Map<string, string>()
397-
subagentBuffersRef.current = new Map<
398-
string,
399-
{ buffer: string; insideToolCall: boolean }
400-
>()
401344

402345
const updateAgentContent = (
403346
agentId: string,
@@ -625,34 +568,18 @@ export const useSendMessage = ({
625568
if (event.type === 'subagent-chunk') {
626569
const { agentId, chunk } = event
627570

628-
const bufferState = subagentBuffersRef.current.get(agentId) || {
629-
buffer: '',
630-
insideToolCall: false,
571+
const previous =
572+
agentStreamAccumulatorsRef.current.get(agentId) ?? ''
573+
const { next, delta } = mergeTextSegments(previous, chunk)
574+
if (!delta && next === previous) {
575+
return
631576
}
632-
subagentBuffersRef.current.set(agentId, bufferState)
633-
634-
bufferState.buffer += chunk
577+
agentStreamAccumulatorsRef.current.set(agentId, next)
635578

636-
processToolCallBuffer(bufferState, (text) => {
637-
if (!text) {
638-
return
639-
}
640-
const previous =
641-
agentStreamAccumulatorsRef.current.get(agentId) ?? ''
642-
const { next, delta } = mergeTextSegments(previous, text)
643-
if (!delta && next === previous) {
644-
return
645-
}
646-
agentStreamAccumulatorsRef.current.set(agentId, next)
647-
if (delta) {
648-
updateAgentContent(agentId, { type: 'text', content: delta })
649-
} else {
650-
updateAgentContent(agentId, {
651-
type: 'text',
652-
content: next,
653-
replace: true,
654-
})
655-
}
579+
updateAgentContent(agentId, {
580+
type: 'text',
581+
content: delta || next,
582+
...(delta ? {} : { replace: true }),
656583
})
657584
return
658585
}
@@ -674,7 +601,10 @@ export const useSendMessage = ({
674601
})
675602
const previous =
676603
agentStreamAccumulatorsRef.current.get(event.agentId) ?? ''
677-
const { next, delta } = mergeTextSegments(previous, text)
604+
const { next, delta } = mergeTextSegments(
605+
previous,
606+
text,
607+
)
678608
if (!delta && next === previous) {
679609
return
680610
}
@@ -701,7 +631,10 @@ export const useSendMessage = ({
701631
return
702632
}
703633
const previous = rootStreamBufferRef.current ?? ''
704-
const { next, delta } = mergeTextSegments(previous, text)
634+
const { next, delta } = mergeTextSegments(
635+
previous,
636+
text,
637+
)
705638
if (!delta && next === previous) {
706639
return
707640
}

sdk/CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,15 @@
22

33
All notable changes to the @codebuff/sdk package will be documented in this file.
44

5-
## [0.4.2]
5+
## [0.4.3]
6+
7+
### Added
8+
9+
- Exported `processToolCallBuffer` and state helpers so SDK consumers can strip `<codebuff_tool_call>` segments mid-stream.
10+
- CLI now consumes the shared helper to avoid leaking XML when responses arrive without token streaming.
11+
- Extra regression tests covering multi-chunk tool-call payloads based on the CLI log case ("I'll help you commit").
12+
13+
## [0.4.2]
614

715
### Added
816

sdk/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@codebuff/sdk",
33
"private": false,
4-
"version": "0.4.1",
4+
"version": "0.4.3",
55
"description": "Official SDK for Codebuff — AI coding agent & framework",
66
"license": "Apache-2.0",
77
"type": "module",
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import { describe, expect, test } from 'bun:test'
2+
3+
import {
4+
createToolCallBufferState,
5+
processToolCallBuffer,
6+
stripToolCallPayloads,
7+
} from '../tool-xml-buffer'
8+
9+
const collect = (chunks: string[]): ((value: string) => void) =>
10+
(value: string) => {
11+
if (value) {
12+
chunks.push(value)
13+
}
14+
}
15+
16+
describe('processToolCallBuffer', () => {
17+
test('emits text before tool call and skips payload', () => {
18+
const state = createToolCallBufferState()
19+
const out: string[] = []
20+
processToolCallBuffer(state, 'Hello <codebuff_tool_call>{"a":1}</codebuff_tool_call> world', collect(out))
21+
expect(out.join('')).toBe('Hello world')
22+
})
23+
24+
test('handles tool call split across chunks', () => {
25+
const state = createToolCallBufferState()
26+
const out: string[] = []
27+
28+
processToolCallBuffer(state, 'Hello <codebuff_tool_call>{"a"', collect(out))
29+
expect(out.join('')).toBe('Hello ')
30+
processToolCallBuffer(state, ':1}</codebuff_tool_call> world', collect(out))
31+
expect(out.join('')).toBe('Hello world')
32+
})
33+
34+
test('limits buffer growth while waiting for close tag', () => {
35+
const state = createToolCallBufferState()
36+
const out: string[] = []
37+
38+
processToolCallBuffer(
39+
state,
40+
'<codebuff_tool_call>' + 'x'.repeat(200),
41+
collect(out),
42+
)
43+
expect(out).toHaveLength(0)
44+
expect(state.buffer.length).toBeLessThan(120)
45+
46+
processToolCallBuffer(state, '</codebuff_tool_call>tail', collect(out))
47+
expect(out.join('')).toBe('tail')
48+
})
49+
50+
test('handles multiline tool call split across many chunks (CLI log regression)', () => {
51+
const state = createToolCallBufferState()
52+
const out: string[] = []
53+
const chunks = [
54+
"I'll help you commit the SDK and CLI changes.\n\n<codebuff_tool_call",
55+
'>',
56+
'\n{\n ',
57+
'"cb_tool_name": "run_terminal_command",\n',
58+
'"command": "git log --oneline -5",\n',
59+
'"cb_easp": true\n}\n</codebuff_tool_call>\n\nNext steps.',
60+
]
61+
62+
for (const chunk of chunks) {
63+
processToolCallBuffer(state, chunk, collect(out))
64+
}
65+
66+
expect(out.join('')).toBe(
67+
"I'll help you commit the SDK and CLI changes.\n\n\n\nNext steps.",
68+
)
69+
})
70+
71+
test('stripToolCallPayloads removes tool call payloads inline', () => {
72+
expect(
73+
stripToolCallPayloads(
74+
'Hello<codebuff_tool_call>{"a":1}</codebuff_tool_call>World',
75+
),
76+
).toBe('HelloWorld')
77+
})
78+
})

sdk/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ export * from './client'
1919
export * from './custom-tool'
2020
export * from './native/ripgrep'
2121
export * from './run-state'
22+
export * from './tool-xml-filter'
23+
export * from './tool-xml-buffer'
2224
export { ToolHelpers } from './tools'
2325
export * from './websocket-client'
2426
export { formatState } from '../../common/src/websockets/websocket-client'

sdk/src/run.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import {
1515
filterToolXmlFromText,
1616
type ToolXmlFilterState,
1717
} from './tool-xml-filter'
18+
import { stripToolCallPayloads } from './tool-xml-buffer'
1819
import { PromptResponseSchema } from '../../common/src/actions'
1920
import { MAX_AGENT_STEPS_DEFAULT } from '../../common/src/constants/agents'
2021
import { toolNames } from '../../common/src/tools/constants'
@@ -194,8 +195,10 @@ export async function run({
194195
next = previous + incoming
195196
}
196197

197-
textAccumulator.set(agentKey, next)
198-
return next
198+
const sanitizedNext = stripToolCallPayloads(next)
199+
200+
textAccumulator.set(agentKey, sanitizedNext)
201+
return sanitizedNext
199202
}
200203

201204
const emitStreamDelta = async (
@@ -276,7 +279,7 @@ export async function run({
276279
agentId: eventAgentId,
277280
} as PrintModeEvent)
278281

279-
if (eventAgentId && eventPayload.agentId == null) {
282+
if (eventAgentId && 'agentId' in eventPayload && eventPayload.agentId == null) {
280283
eventPayload.agentId = eventAgentId
281284
}
282285

@@ -398,7 +401,7 @@ export async function run({
398401
await flushTextState(ROOT_AGENT_KEY)
399402

400403
const finishAgentKey =
401-
(chunk as typeof chunk & { agentId?: string }).agentId
404+
'agentId' in chunk ? chunk.agentId : undefined
402405
if (finishAgentKey && finishAgentKey !== ROOT_AGENT_KEY) {
403406
await flushTextState(finishAgentKey, finishAgentKey)
404407
await flushSubagentState(
@@ -410,7 +413,7 @@ export async function run({
410413
chunkType === 'subagent_finish' ||
411414
chunkType === 'subagent-finish'
412415
) {
413-
const subagentId = (chunk as { agentId?: string }).agentId
416+
const subagentId = 'agentId' in chunk ? chunk.agentId : undefined
414417
if (subagentId) {
415418
await flushTextState(subagentId, subagentId)
416419
await flushSubagentState(

0 commit comments

Comments
 (0)