From 5c01ba61d9afc71272774072eda1851b827eae5e Mon Sep 17 00:00:00 2001 From: Yuliia Kovalova Date: Mon, 8 Jun 2026 16:59:43 +0200 Subject: [PATCH] Chat: finalize gracefully at tool-call cap + stop redundant loops (v0.10.25) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The @binlog chat participant hit a hardcoded round cap (depth > 10) and dead-ended with "Too many tool calls — stopping here." producing no answer, even after dozens of successful tool calls. The model also looped, re-issuing near-identical perf-tool calls (expensive_targets/tasks/projects with only cosmetic arg changes) until it exhausted the budget. - Graceful finalization: at the cap, make one final tool-free request so the model synthesizes a response from the data already gathered (reuses the existing no-tools fallback pattern). - Repeat-call short-circuit: byte-identical tool calls (name + args) reuse the earlier result instead of re-querying the MCP server, so commands like /summary converge within budget. Errored calls are not memoized, so transient failures remain retryable. - System-prompt tool-use efficiency rule: instruct the model to call each tool at most once per distinct args and stop once it can answer. Bumps version to 0.10.25 and updates CHANGELOG. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 9 +++++++ package-lock.json | 4 +-- package.json | 2 +- src/chatParticipant.ts | 58 +++++++++++++++++++++++++++++++++++------- 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ffb3f0..7ad77eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## 0.10.25 (Preview) + +### Fixed +- **Chat agent no longer dead-ends on long investigations** — when the `@binlog` chat participant reached its tool-call round limit it previously stopped with `⚠️ Too many tool calls — stopping here.` and produced no answer. It now makes a final tool-free request so the model synthesizes a response from the data already gathered. +- **Stopped redundant tool-call loops** — byte-identical tool calls (same tool + same arguments) are now short-circuited and reuse the earlier result instead of re-querying the MCP server, so commands such as `/summary` converge well within the round budget. Failed calls are still retryable. + +### Changed +- **Added a tool-use efficiency instruction** to the chat system prompt so the model avoids re-running the same analysis with only cosmetic argument changes and stops once it has enough data to answer. + ## 0.10.24 (Preview) ### Fixed diff --git a/package-lock.json b/package-lock.json index 9fae88b..8a82c6b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "binlog-analyzer", - "version": "0.10.23", + "version": "0.10.25", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "binlog-analyzer", - "version": "0.10.23", + "version": "0.10.25", "license": "MIT", "dependencies": { "@vscode/extension-telemetry": "^0.9.0" diff --git a/package.json b/package.json index 1b8b2ff..be36829 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "binlog-analyzer", "displayName": "MSBuild Binlog Analyzer", "description": "Analyze MSBuild binary logs with Copilot Chat and MCP tools", - "version": "0.10.24", + "version": "0.10.25", "preview": true, "publisher": "dotutils", "license": "MIT", diff --git a/src/chatParticipant.ts b/src/chatParticipant.ts index 4f5f9e3..cd3996d 100644 --- a/src/chatParticipant.ts +++ b/src/chatParticipant.ts @@ -158,7 +158,7 @@ export class BinlogChatParticipant { vscode.LanguageModelChatMessage.User(userMessage), ]; - const state = { hadOutput: false, toolCallCount: 0 }; + const state = { hadOutput: false, toolCallCount: 0, seen: new Set() }; try { const chatRequest = await model.sendRequest( messages, @@ -235,6 +235,15 @@ export class BinlogChatParticipant { parts.push(`# Playbook: ${playbookKey}\n${this.playbooks.get(playbookKey)}`); } + parts.push( + '# Tool-use efficiency\n' + + 'Call each tool at most once per distinct set of arguments. Never re-issue an ' + + 'identical tool call, and do not re-run the same analysis with only cosmetic ' + + 'argument changes (e.g. a different top/limit) unless the earlier result was ' + + 'explicitly truncated and you genuinely need more rows. As soon as you have the ' + + 'data needed to answer, stop calling tools and write the response.', + ); + const body = parts.filter(Boolean).join('\n\n'); return [ 'The following block contains your operating instructions. ' + @@ -333,11 +342,31 @@ export class BinlogChatParticipant { stream: vscode.ChatResponseStream, token: vscode.CancellationToken, depth: number = 0, - state: { hadOutput: boolean; toolCallCount: number } = { hadOutput: false, toolCallCount: 0 }, - ): Promise<{ hadOutput: boolean; toolCallCount: number }> { + state: { hadOutput: boolean; toolCallCount: number; seen: Set } = + { hadOutput: false, toolCallCount: 0, seen: new Set() }, + ): Promise<{ hadOutput: boolean; toolCallCount: number; seen: Set }> { if (depth > 10) { - stream.markdown('\n\n⚠️ Too many tool calls — stopping here.\n'); - state.hadOutput = true; + // Round budget exhausted. Rather than dead-ending with no answer, + // make one final request with NO tools so the model is forced to + // synthesize a response from everything gathered so far (same + // no-tools pattern used by the 400-error fallback below). + stream.progress('Reached the tool-call limit — summarizing findings so far…'); + try { + const finalRequest = await model.sendRequest(messages, {}, token); + for await (const part of finalRequest.stream) { + if (part instanceof vscode.LanguageModelTextPart) { + stream.markdown(part.value); + if (part.value.trim()) state.hadOutput = true; + } + } + } catch (err) { + telemetry.trackError('processResponseFinalize', err); + stream.markdown( + '\n\n⚠️ Reached the tool-call limit before reaching a complete answer. ' + + 'Try narrowing the question.\n', + ); + state.hadOutput = true; + } return state; } @@ -354,13 +383,23 @@ export class BinlogChatParticipant { if (toolCalls.length === 0) return state; + const toolResultTexts: string[] = []; for (const call of toolCalls) { + // Collapse byte-identical repeat calls: on a static binlog the same + // tool + input always returns the same data, so re-invoking only + // burns the round budget and pushes the conversation toward the cap. + // Errored calls are intentionally NOT remembered, so a transient + // failure can still be retried. + const key = `${call.name}:${JSON.stringify(call.input)}`; + if (state.seen.has(key)) { + toolResultTexts.push( + `(already returned above — reuse the earlier result)`, + ); + continue; + } + stream.progress(`Calling ${call.name}…`); state.toolCallCount++; - } - - const toolResultTexts: string[] = []; - for (const call of toolCalls) { try { const result = await vscode.lm.invokeTool( call.name, @@ -371,6 +410,7 @@ export class BinlogChatParticipant { .filter((p): p is vscode.LanguageModelTextPart => p instanceof vscode.LanguageModelTextPart) .map(p => p.value) .join('\n'); + state.seen.add(key); toolResultTexts.push(`\n${text || '(empty)'}\n`); } catch (err) { const m = err instanceof Error ? err.message : String(err);