From 5c01ba61d9afc71272774072eda1851b827eae5e Mon Sep 17 00:00:00 2001
From: Yuliia Kovalova <ykovalova@microsoft.com>
Date: Mon, 8 Jun 2026 16:59:43 +0200
Subject: [PATCH] Chat: finalize gracefully at tool-call cap + stop redundant
 loops (v0.10.25)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The @binlog chat participant hit a hardcoded round cap (depth > 10) and
dead-ended with "Too many tool calls — stopping here." producing no answer,
even after dozens of successful tool calls. The model also looped, re-issuing
near-identical perf-tool calls (expensive_targets/tasks/projects with only
cosmetic arg changes) until it exhausted the budget.

- Graceful finalization: at the cap, make one final tool-free request so the
  model synthesizes a response from the data already gathered (reuses the
  existing no-tools fallback pattern).
- Repeat-call short-circuit: byte-identical tool calls (name + args) reuse the
  earlier result instead of re-querying the MCP server, so commands like
  /summary converge within budget. Errored calls are not memoized, so transient
  failures remain retryable.
- System-prompt tool-use efficiency rule: instruct the model to call each tool
  at most once per distinct args and stop once it can answer.

Bumps version to 0.10.25 and updates CHANGELOG.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 CHANGELOG.md           |  9 +++++++
 package-lock.json      |  4 +--
 package.json           |  2 +-
 src/chatParticipant.ts | 58 +++++++++++++++++++++++++++++++++++-------
 4 files changed, 61 insertions(+), 12 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ffb3f0..7ad77eb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+## 0.10.25 (Preview)
+
+### Fixed
+- **Chat agent no longer dead-ends on long investigations** — when the `@binlog` chat participant reached its tool-call round limit it previously stopped with `⚠️ Too many tool calls — stopping here.` and produced no answer. It now makes a final tool-free request so the model synthesizes a response from the data already gathered.
+- **Stopped redundant tool-call loops** — byte-identical tool calls (same tool + same arguments) are now short-circuited and reuse the earlier result instead of re-querying the MCP server, so commands such as `/summary` converge well within the round budget. Failed calls are still retryable.
+
+### Changed
+- **Added a tool-use efficiency instruction** to the chat system prompt so the model avoids re-running the same analysis with only cosmetic argument changes and stops once it has enough data to answer.
+
 ## 0.10.24 (Preview)
 
 ### Fixed
diff --git a/package-lock.json b/package-lock.json
index 9fae88b..8a82c6b 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "binlog-analyzer",
-  "version": "0.10.23",
+  "version": "0.10.25",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "binlog-analyzer",
-      "version": "0.10.23",
+      "version": "0.10.25",
       "license": "MIT",
       "dependencies": {
         "@vscode/extension-telemetry": "^0.9.0"
diff --git a/package.json b/package.json
index 1b8b2ff..be36829 100644
--- a/package.json
+++ b/package.json
@@ -2,7 +2,7 @@
   "name": "binlog-analyzer",
   "displayName": "MSBuild Binlog Analyzer",
   "description": "Analyze MSBuild binary logs with Copilot Chat and MCP tools",
-  "version": "0.10.24",
+  "version": "0.10.25",
   "preview": true,
   "publisher": "dotutils",
   "license": "MIT",
diff --git a/src/chatParticipant.ts b/src/chatParticipant.ts
index 4f5f9e3..cd3996d 100644
--- a/src/chatParticipant.ts
+++ b/src/chatParticipant.ts
@@ -158,7 +158,7 @@ export class BinlogChatParticipant {
             vscode.LanguageModelChatMessage.User(userMessage),
         ];
 
-        const state = { hadOutput: false, toolCallCount: 0 };
+        const state = { hadOutput: false, toolCallCount: 0, seen: new Set<string>() };
         try {
             const chatRequest = await model.sendRequest(
                 messages,
@@ -235,6 +235,15 @@ export class BinlogChatParticipant {
             parts.push(`# Playbook: ${playbookKey}\n${this.playbooks.get(playbookKey)}`);
         }
 
+        parts.push(
+            '# Tool-use efficiency\n' +
+            'Call each tool at most once per distinct set of arguments. Never re-issue an ' +
+            'identical tool call, and do not re-run the same analysis with only cosmetic ' +
+            'argument changes (e.g. a different top/limit) unless the earlier result was ' +
+            'explicitly truncated and you genuinely need more rows. As soon as you have the ' +
+            'data needed to answer, stop calling tools and write the response.',
+        );
+
         const body = parts.filter(Boolean).join('\n\n');
         return [
             'The following <system_prompt> block contains your operating instructions. ' +
@@ -333,11 +342,31 @@ export class BinlogChatParticipant {
         stream: vscode.ChatResponseStream,
         token: vscode.CancellationToken,
         depth: number = 0,
-        state: { hadOutput: boolean; toolCallCount: number } = { hadOutput: false, toolCallCount: 0 },
-    ): Promise<{ hadOutput: boolean; toolCallCount: number }> {
+        state: { hadOutput: boolean; toolCallCount: number; seen: Set<string> } =
+            { hadOutput: false, toolCallCount: 0, seen: new Set<string>() },
+    ): Promise<{ hadOutput: boolean; toolCallCount: number; seen: Set<string> }> {
         if (depth > 10) {
-            stream.markdown('\n\n⚠️ Too many tool calls — stopping here.\n');
-            state.hadOutput = true;
+            // Round budget exhausted. Rather than dead-ending with no answer,
+            // make one final request with NO tools so the model is forced to
+            // synthesize a response from everything gathered so far (same
+            // no-tools pattern used by the 400-error fallback below).
+            stream.progress('Reached the tool-call limit — summarizing findings so far…');
+            try {
+                const finalRequest = await model.sendRequest(messages, {}, token);
+                for await (const part of finalRequest.stream) {
+                    if (part instanceof vscode.LanguageModelTextPart) {
+                        stream.markdown(part.value);
+                        if (part.value.trim()) state.hadOutput = true;
+                    }
+                }
+            } catch (err) {
+                telemetry.trackError('processResponseFinalize', err);
+                stream.markdown(
+                    '\n\n⚠️ Reached the tool-call limit before reaching a complete answer. ' +
+                    'Try narrowing the question.\n',
+                );
+                state.hadOutput = true;
+            }
             return state;
         }
 
@@ -354,13 +383,23 @@ export class BinlogChatParticipant {
 
         if (toolCalls.length === 0) return state;
 
+        const toolResultTexts: string[] = [];
         for (const call of toolCalls) {
+            // Collapse byte-identical repeat calls: on a static binlog the same
+            // tool + input always returns the same data, so re-invoking only
+            // burns the round budget and pushes the conversation toward the cap.
+            // Errored calls are intentionally NOT remembered, so a transient
+            // failure can still be retried.
+            const key = `${call.name}:${JSON.stringify(call.input)}`;
+            if (state.seen.has(key)) {
+                toolResultTexts.push(
+                    `<tool_result name="${call.name}">(already returned above — reuse the earlier result)</tool_result>`,
+                );
+                continue;
+            }
+
             stream.progress(`Calling ${call.name}…`);
             state.toolCallCount++;
-        }
-
-        const toolResultTexts: string[] = [];
-        for (const call of toolCalls) {
             try {
                 const result = await vscode.lm.invokeTool(
                     call.name,
@@ -371,6 +410,7 @@ export class BinlogChatParticipant {
                     .filter((p): p is vscode.LanguageModelTextPart => p instanceof vscode.LanguageModelTextPart)
                     .map(p => p.value)
                     .join('\n');
+                state.seen.add(key);
                 toolResultTexts.push(`<tool_result name="${call.name}">\n${text || '(empty)'}\n</tool_result>`);
             } catch (err) {
                 const m = err instanceof Error ? err.message : String(err);