dotutils · YuliiaKovalova · Jun 8, 2026 · Jun 8, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+## 0.10.25 (Preview)
+
+### Fixed
+- **Chat agent no longer dead-ends on long investigations** — when the `@binlog` chat participant reached its tool-call round limit it previously stopped with `⚠️ Too many tool calls — stopping here.` and produced no answer. It now makes a final tool-free request so the model synthesizes a response from the data already gathered.
+- **Stopped redundant tool-call loops** — byte-identical tool calls (same tool + same arguments) are now short-circuited and reuse the earlier result instead of re-querying the MCP server, so commands such as `/summary` converge well within the round budget. Failed calls are still retryable.
+
+### Changed
+- **Added a tool-use efficiency instruction** to the chat system prompt so the model avoids re-running the same analysis with only cosmetic argument changes and stops once it has enough data to answer.
+
 ## 0.10.24 (Preview)
 
 ### Fixed

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -2,7 +2,7 @@
   "name": "binlog-analyzer",
   "displayName": "MSBuild Binlog Analyzer",
   "description": "Analyze MSBuild binary logs with Copilot Chat and MCP tools",
-  "version": "0.10.24",
+  "version": "0.10.25",
   "preview": true,
   "publisher": "dotutils",
   "license": "MIT",

diff --git a/src/chatParticipant.ts b/src/chatParticipant.ts
@@ -158,7 +158,7 @@ export class BinlogChatParticipant {
             vscode.LanguageModelChatMessage.User(userMessage),
         ];
 
-        const state = { hadOutput: false, toolCallCount: 0 };
+        const state = { hadOutput: false, toolCallCount: 0, seen: new Set<string>() };
         try {
             const chatRequest = await model.sendRequest(
                 messages,
@@ -235,6 +235,15 @@ export class BinlogChatParticipant {
             parts.push(`# Playbook: ${playbookKey}\n${this.playbooks.get(playbookKey)}`);
         }
 
+        parts.push(
+            '# Tool-use efficiency\n' +
+            'Call each tool at most once per distinct set of arguments. Never re-issue an ' +
+            'identical tool call, and do not re-run the same analysis with only cosmetic ' +
+            'argument changes (e.g. a different top/limit) unless the earlier result was ' +
+            'explicitly truncated and you genuinely need more rows. As soon as you have the ' +
+            'data needed to answer, stop calling tools and write the response.',
+        );
+
         const body = parts.filter(Boolean).join('\n\n');
         return [
             'The following <system_prompt> block contains your operating instructions. ' +
@@ -333,11 +342,31 @@ export class BinlogChatParticipant {
         stream: vscode.ChatResponseStream,
         token: vscode.CancellationToken,
         depth: number = 0,
-        state: { hadOutput: boolean; toolCallCount: number } = { hadOutput: false, toolCallCount: 0 },
-    ): Promise<{ hadOutput: boolean; toolCallCount: number }> {
+        state: { hadOutput: boolean; toolCallCount: number; seen: Set<string> } =
+            { hadOutput: false, toolCallCount: 0, seen: new Set<string>() },
+    ): Promise<{ hadOutput: boolean; toolCallCount: number; seen: Set<string> }> {
         if (depth > 10) {
-            stream.markdown('\n\n⚠️ Too many tool calls — stopping here.\n');
-            state.hadOutput = true;
+            // Round budget exhausted. Rather than dead-ending with no answer,
+            // make one final request with NO tools so the model is forced to
+            // synthesize a response from everything gathered so far (same
+            // no-tools pattern used by the 400-error fallback below).
+            stream.progress('Reached the tool-call limit — summarizing findings so far…');
+            try {
+                const finalRequest = await model.sendRequest(messages, {}, token);
+                for await (const part of finalRequest.stream) {
+                    if (part instanceof vscode.LanguageModelTextPart) {
+                        stream.markdown(part.value);
+                        if (part.value.trim()) state.hadOutput = true;
+                    }
+                }
+            } catch (err) {
+                telemetry.trackError('processResponseFinalize', err);
+                stream.markdown(
+                    '\n\n⚠️ Reached the tool-call limit before reaching a complete answer. ' +
+                    'Try narrowing the question.\n',
+                );
+                state.hadOutput = true;
+            }
             return state;
         }
 
@@ -354,13 +383,23 @@ export class BinlogChatParticipant {
 
         if (toolCalls.length === 0) return state;
 
+        const toolResultTexts: string[] = [];
         for (const call of toolCalls) {
+            // Collapse byte-identical repeat calls: on a static binlog the same
+            // tool + input always returns the same data, so re-invoking only
+            // burns the round budget and pushes the conversation toward the cap.
+            // Errored calls are intentionally NOT remembered, so a transient
+            // failure can still be retried.
+            const key = `${call.name}:${JSON.stringify(call.input)}`;
+            if (state.seen.has(key)) {
+                toolResultTexts.push(
+                    `<tool_result name="${call.name}">(already returned above — reuse the earlier result)</tool_result>`,
+                );
+                continue;
+            }
+
             stream.progress(`Calling ${call.name}…`);
             state.toolCallCount++;
-        }
-
-        const toolResultTexts: string[] = [];
-        for (const call of toolCalls) {
             try {
                 const result = await vscode.lm.invokeTool(
                     call.name,
@@ -371,6 +410,7 @@ export class BinlogChatParticipant {
                     .filter((p): p is vscode.LanguageModelTextPart => p instanceof vscode.LanguageModelTextPart)
                     .map(p => p.value)
                     .join('\n');
+                state.seen.add(key);
                 toolResultTexts.push(`<tool_result name="${call.name}">\n${text || '(empty)'}\n</tool_result>`);
             } catch (err) {
                 const m = err instanceof Error ? err.message : String(err);