From 8e0b4bcf35a01e0b9a55ce0f30db51a957bf916b Mon Sep 17 00:00:00 2001
From: yusufaytas <yusufaytas@gmail.com>
Date: Sun, 15 Mar 2026 22:28:24 +0000
Subject: [PATCH 1/2] Tightened enginee follow-up, scope inference, and
 reference/cache handling

Enabled first-iteration follow-ups to run cleanly and persist their entities, pass resolved questions plus
conversation history into synthesis, tighten active-status detection, avoid incorrect log references and scope
narrowing, and dedupe fuzzy cache entries. Includes regression tests for follow-ups, long-history synthesis, scope
inference, cache behavior, and reference handling.
---
 src/engine/answerGenerator.ts         |  11 +-
 src/engine/contextManager.ts          |  27 ++-
 src/engine/copilotEngine.ts           |  59 ++++++-
 src/engine/followUpEngine.ts          |  23 +--
 src/engine/referenceBuilder.ts        |  94 +++++------
 src/engine/referenceResolver.ts       |   6 +-
 src/engine/resultCache.ts             |  13 +-
 src/engine/scopeInferer.ts            |  76 +++------
 tests/contextManager.test.ts          |  47 ++++++
 tests/copilotEngine.followups.test.ts | 228 ++++++++++++++++++++++++++
 tests/engine/answerGenerator.test.ts  |  63 ++++++-
 tests/engine/referenceBuilder.test.ts |  34 ++++
 tests/engine/scopeInferer.test.ts     |  99 +++++++++++
 tests/followUpEngine.test.ts          |  56 +++++++
 tests/referenceResolver.test.ts       |  27 +++
 tests/resultCache.test.ts             |  40 +++++
 16 files changed, 751 insertions(+), 152 deletions(-)
diff --git a/src/engine/answerGenerator.ts b/src/engine/answerGenerator.ts
index 705edeb..6177fae 100644
--- a/src/engine/answerGenerator.ts
+++ b/src/engine/answerGenerator.ts
@@ -1,4 +1,4 @@
-import { CopilotAnswer, CopilotAction, CopilotReferences, ToolResult, LlmClient, Correlation, Anomaly, MetricReference, LogReference, MetricExpression, LogExpression, QueryScope } from "../types.js";
+import { CopilotAnswer, CopilotAction, CopilotReferences, ToolResult, LlmClient, LlmMessage, Correlation, Anomaly, MetricReference, LogReference, MetricExpression, LogExpression, QueryScope } from "../types.js";
 import { buildFinalAnswerPrompt } from "../prompts.js";
 import { HandlerUtils } from "./handlers/utils.js";
 import { CorrelationDetector } from "./correlationDetector.js";
@@ -17,6 +17,7 @@ export async function synthesizeCopilotAnswer(
   results: ToolResult[],
   chatId: string,
   llm: LlmClient,
+  conversationHistory: LlmMessage[] = [],
 ): Promise<CopilotAnswer> {
   const fallback = createFallbackAnswer(question, results, chatId);
   if (!results.length) return fallback;
@@ -32,8 +33,12 @@ export async function synthesizeCopilotAnswer(
     // Create a comprehensive prompt for the LLM, including insights
     const prompt = createSynthesisPrompt(question, results, correlations, anomalies);
 
-    // Get LLM analysis
-    const response = await llm.chat([{ role: "user", content: prompt }], []);
+    // Get LLM analysis (include conversation history for multi-turn context)
+    const messages: LlmMessage[] = [
+      ...conversationHistory,
+      { role: "user", content: prompt },
+    ];
+    const response = await llm.chat(messages, []);
 
     if (!response || !response.content) {
       console.warn(`[Copilot][${chatId}] LLM synthesis failed, using fallback`);
diff --git a/src/engine/contextManager.ts b/src/engine/contextManager.ts
index d983076..97d9f14 100644
--- a/src/engine/contextManager.ts
+++ b/src/engine/contextManager.ts
@@ -90,40 +90,35 @@ export function fitMessagesInContext(
     return messages; // Fits, no truncation needed
   }
 
-  // Separate messages by type
+  // Separate system messages (always kept, highest priority)
   const systemMessages = messages.filter((m) => m.role === "system");
-  const userMessages = messages.filter((m) => m.role === "user");
+  const nonSystemMessages = messages.filter((m) => m.role !== "system");
 
-  // Always keep system messages (highest priority)
+  // Always keep system messages
   const result: LlmMessage[] = [...systemMessages];
   let budget =
     config.maxContextTokens -
     systemMessages.reduce((sum, msg) => sum + estimateTokens(msg.content), 0);
 
-  // Keep most recent user message (critical for context)
-  const lastUserMessage = userMessages[userMessages.length - 1];
-  if (lastUserMessage) {
-    result.push(lastUserMessage);
-    budget -= estimateTokens(lastUserMessage.content);
-  }
-
-  // Add recent messages with remaining budget
-  const recentMessages = messages
-    .slice(-3)
-    .filter((m) => m.role !== "system" && m !== lastUserMessage);
+  // Keep most recent messages first (regardless of role: user, assistant, tool)
+  // Work backwards from most recent to fill budget
+  const reversedNonSystem = [...nonSystemMessages].reverse();
 
-  for (const msg of recentMessages) {
+  for (const msg of reversedNonSystem) {
     const tokens = estimateTokens(msg.content);
     if (tokens <= budget) {
       result.push(msg);
       budget -= tokens;
     } else if (budget > 100) {
-      // Truncate this message to fit
+      // Truncate this message to fit remaining budget
       const truncated = {
         ...msg,
         content: truncateToTokens(msg.content, budget),
       };
       result.push(truncated);
+      budget = 0;
+      break;
+    } else {
       break;
     }
   }
diff --git a/src/engine/copilotEngine.ts b/src/engine/copilotEngine.ts
index 02ac108..ba828a8 100644
--- a/src/engine/copilotEngine.ts
+++ b/src/engine/copilotEngine.ts
@@ -417,12 +417,16 @@ export class CopilotEngine {
       }
 
       // Validate and fix calls (ensures required args like start/end/step are present for metrics)
-      plannedCalls = await this.planRefiner.refineCalls(
-        plannedCalls,
-        this.mcp.getTools(),
-        conversationTurns,
-        allResults,
-      );
+      // Note: On first iteration, refineCalls is already called inside applyHeuristics,
+      // so we only need to call it explicitly on follow-up iterations.
+      if (!isFirstIteration) {
+        plannedCalls = await this.planRefiner.refineCalls(
+          plannedCalls,
+          this.mcp.getTools(),
+          conversationTurns,
+          allResults,
+        );
+      }
 
       // Limit calls
       plannedCalls = this.limitToolCalls(plannedCalls);
@@ -461,11 +465,47 @@ export class CopilotEngine {
 
       // D. Accumulate
       allResults.push(...results);
+
+      // D2. Run follow-up heuristics after first iteration results too
+      // (Previously follow-ups only ran on iteration ≥2, missing orchestration plan lookups
+      // when problems were detected on the first tool execution)
+      let iterationResultsForEntityExtraction = results;
+
+      if (isFirstIteration && results.length > 0) {
+        const formattedForFollowUp = this.formatResultsForPlanning(allResults);
+        const availableToolNames = new Set(this.mcp.getTools().map((t) => t.name));
+        const firstIterationFollowUps = (await this.followUpEngine.applyFollowUps(
+          formattedForFollowUp,
+          chatId,
+          conversationTurns,
+          questionForPlanning,
+          plannedCalls,
+        )).filter((c) => availableToolNames.has(c.name));
+
+        if (firstIterationFollowUps.length > 0) {
+          console.log(
+            `[Copilot][${chatId}] First-iteration follow-ups: ${firstIterationFollowUps.length} call(s): ${firstIterationFollowUps.map((c) => c.name).join(", ")}`,
+          );
+
+          const followUpResults = await this.runToolCallsWithCache(
+            this.limitToolCalls(firstIterationFollowUps),
+            chatId,
+            this.mcp.getTools(),
+            trace,
+          );
+          allResults.push(...followUpResults);
+          iterationResultsForEntityExtraction = [
+            ...results,
+            ...followUpResults,
+          ];
+        }
+      }
+
       isFirstIteration = false;
 
       // E. Extract entities from results
       const extractedEntities = await this.entityExtractor.extractFromResults(
-        results,
+        iterationResultsForEntityExtraction,
         chatId,
         conversationTurns,
       );
@@ -537,12 +577,13 @@ export class CopilotEngine {
       );
     }
 
-    // Step 5: Synthesize final answer
+    // Step 5: Synthesize final answer (use resolved question and conversation history for context)
     const answer = await synthesizeCopilotAnswer(
-      question,
+      questionForPlanning,
       allResults,
       chatId,
       this.config.llm,
+      conversationHistory,
     );
 
     // If calls were planned but no results were collected, tool calls were likely
diff --git a/src/engine/followUpEngine.ts b/src/engine/followUpEngine.ts
index ad72a7c..b821675 100644
--- a/src/engine/followUpEngine.ts
+++ b/src/engine/followUpEngine.ts
@@ -221,24 +221,15 @@ export class FollowUpEngine {
       "active",
       "investigating",
     ]);
-    const resolvedStatuses = new Set([
-      "resolved",
-      "closed",
-      "mitigated",
-      "remediated",
-    ]);
 
     for (const record of this.collectRecords(result)) {
       const status = record.status;
       if (typeof status === "string") {
         const normalized = status.toLowerCase();
         if (activeStatuses.has(normalized)) return true;
-        if (resolvedStatuses.has(normalized)) continue;
-        return true;
-      }
-      if (record.id) {
-        return true;
+        // Unknown or resolved statuses are not considered active
       }
+      // Records without a status are not considered active
     }
 
     return false;
@@ -246,7 +237,6 @@ export class FollowUpEngine {
 
   private hasActiveAlert(result: JsonValue): boolean {
     const activeStatuses = new Set(["firing", "acknowledged", "triggered", "open"]);
-    const resolvedStatuses = new Set(["resolved", "closed", "cleared"]);
     for (const record of this.collectRecords(result)) {
       const status = record.status;
       if (typeof status === "string") {
@@ -254,14 +244,9 @@ export class FollowUpEngine {
         if (activeStatuses.has(normalized)) {
           return true;
         }
-        if (resolvedStatuses.has(normalized)) {
-          continue;
-        }
-        return true;
-      }
-      if (record.id) {
-        return true;
+        // Unknown or resolved statuses are not considered active
       }
+      // Records without a status are not considered active
     }
     return false;
   }
diff --git a/src/engine/referenceBuilder.ts b/src/engine/referenceBuilder.ts
index eb269bd..4518e26 100644
--- a/src/engine/referenceBuilder.ts
+++ b/src/engine/referenceBuilder.ts
@@ -393,61 +393,53 @@ export function buildReferences(
       }
     }
 
-    let query: string | undefined;
-
-    // MCP logQuerySchema: expression is object with search field
-    if (args.expression && typeof args.expression === "object" && !Array.isArray(args.expression)) {
-      const expr = args.expression as JsonObject;
-      // MCP logExpressionSchema: search is z.string().optional()
-      if (typeof expr.search === "string") query = expr.search;
-
-      // If we have filters but no search string, use "Filtered Logs" or similar placeholder
-      // or just allow empty search. The Reference structure allows undefined search if we change types, 
-      // but let's check strict types. LogReference.expression.search is optional in types.ts?
-      // Let's check types again. 
-      // types.ts: export interface LogExpression { search?: string; ... }
-      // So we can create a LogReference without search string if filters exist.
-    }
-
-    // Check if we have enough to make a reference (search OR filters)
-    const hasSearch = typeof query === "string";
-    const exprObj = (args.expression as JsonObject) || {};
-    const hasFilters = Array.isArray(exprObj.filters) && exprObj.filters.length > 0;
-
-    if (hasSearch || hasFilters) {
-      // Construct a simple LogReference
-      const log: LogReference = {
-        expression: {},
-      };
-      if (hasSearch) log.expression.search = query;
-
-      // Copy filters if present
-      if (hasFilters) {
-        // We can blindly copy filters if structure matches, or leave emptiness.
-        // Ideally we copy them to be accurate.
-        // defined in types as: filters?: { field: string; operator: string; value: string }[];
-        // Let's copy them correctly.
-        log.expression.filters = exprObj.filters as { field: string; operator: string; value: string }[];
+    // Only extract log references from log tools
+    if (capabilityType === "log") {
+      let query: string | undefined;
+
+      // MCP logQuerySchema: expression is object with search field
+      if (args.expression && typeof args.expression === "object" && !Array.isArray(args.expression)) {
+        const expr = args.expression as JsonObject;
+        // MCP logExpressionSchema: search is z.string().optional()
+        if (typeof expr.search === "string") query = expr.search;
       }
 
-      if (typeof args.start === "string" && args.start.trim())
-        log.start = args.start.trim();
-      if (typeof args.end === "string" && args.end.trim())
-        log.end = args.end.trim();
-      if (typeof args.service === "string" && args.service.trim())
-        log.scope = { service: args.service.trim() };
+      // Check if we have enough to make a reference (search OR filters)
+      const hasSearch = typeof query === "string";
+      const exprObj = (args.expression as JsonObject) || {};
+      const hasFilters = Array.isArray(exprObj.filters) && exprObj.filters.length > 0;
 
-      // Handle object scope (MCP uses scope object)
-      if (
-        args.scope &&
-        typeof args.scope === "object" &&
-        !Array.isArray(args.scope) &&
-        (args.scope as JsonObject).service
-      ) {
-        log.scope = log.scope ?? { service: (args.scope as JsonObject).service as string };
-      }
+      if (hasSearch || hasFilters) {
+        // Construct a simple LogReference
+        const log: LogReference = {
+          expression: {},
+        };
+        if (hasSearch) log.expression.search = query;
+
+        // Copy filters if present
+        if (hasFilters) {
+          log.expression.filters = exprObj.filters as { field: string; operator: string; value: string }[];
+        }
+
+        if (typeof args.start === "string" && args.start.trim())
+          log.start = args.start.trim();
+        if (typeof args.end === "string" && args.end.trim())
+          log.end = args.end.trim();
+        if (typeof args.service === "string" && args.service.trim())
+          log.scope = { service: args.service.trim() };
+
+        // Handle object scope (MCP uses scope object)
+        if (
+          args.scope &&
+          typeof args.scope === "object" &&
+          !Array.isArray(args.scope) &&
+          (args.scope as JsonObject).service
+        ) {
+          log.scope = log.scope ?? { service: (args.scope as JsonObject).service as string };
+        }
 
-      logs.push(log);
+        logs.push(log);
+      }
     }
   }
 
diff --git a/src/engine/referenceResolver.ts b/src/engine/referenceResolver.ts
index 1747529..0403af7 100644
--- a/src/engine/referenceResolver.ts
+++ b/src/engine/referenceResolver.ts
@@ -24,10 +24,10 @@ export class ReferenceResolver {
     conversationHistory?: ConversationTurn[],
   ): Promise<Map<string, string>> {
     const resolutions = new Map<string, string>();
-    const normalized = question.toLowerCase();
 
-    // Extract potential reference patterns from the question
-    const referencePatterns = this.extractReferencePatterns(normalized);
+    // Extract potential reference patterns from the original question
+    // (handlers receive original casing; replacement uses case-insensitive regex)
+    const referencePatterns = this.extractReferencePatterns(question);
 
     for (const { text, entityType } of referencePatterns) {
       if (this.referenceRegistry.hasHandlers(entityType)) {
diff --git a/src/engine/resultCache.ts b/src/engine/resultCache.ts
index a8d950e..9d2da36 100644
--- a/src/engine/resultCache.ts
+++ b/src/engine/resultCache.ts
@@ -166,9 +166,20 @@ export class ResultCache {
   set(call: ToolCall, result: ToolResult, scopeKey: string = "global"): void {
     const key = createScopedCacheKey(scopeKey, call);
 
-    // Remove existing entry if present
+    // Remove existing exact-key entry if present
     if (this.cache.has(key)) {
       this.accessOrder = this.accessOrder.filter((k) => k !== key);
+    } else {
+      // Also remove any fuzzy-matched entry under a different key to prevent duplicates
+      for (const [existingKey, candidate] of this.cache.entries()) {
+        if (candidate.scopeKey !== scopeKey) continue;
+        if (candidate.call.name !== call.name) continue;
+        if (isFuzzyEqual(call.arguments || {}, candidate.call.arguments || {})) {
+          this.cache.delete(existingKey);
+          this.accessOrder = this.accessOrder.filter((k) => k !== existingKey);
+          break;
+        }
+      }
     }
 
     // Evict oldest entry if cache is full
diff --git a/src/engine/scopeInferer.ts b/src/engine/scopeInferer.ts
index 3d805f8..8b17826 100644
--- a/src/engine/scopeInferer.ts
+++ b/src/engine/scopeInferer.ts
@@ -178,56 +178,34 @@ export class ScopeInferer {
         return scope.service;
       }
 
-      // Service from first item in arrays using MCP schema fields
+      // Service from items in arrays using MCP schema fields
+      // Only infer if ALL items share the same service (avoids narrowing multi-service results)
       // incidents, alerts, logs use: service field
       // services use: name field
-      if (Array.isArray(obj.incidents) && obj.incidents.length > 0) {
-        const first = obj.incidents[0];
-        if (first && typeof first === "object" && !Array.isArray(first)) {
-          const firstObj = first as JsonObject;
-          if (firstObj.service && typeof firstObj.service === "string") {
-            return firstObj.service;
-          }
-        }
-      }
-
-      if (Array.isArray(obj.alerts) && obj.alerts.length > 0) {
-        const first = obj.alerts[0];
-        if (first && typeof first === "object" && !Array.isArray(first)) {
-          const firstObj = first as JsonObject;
-          if (firstObj.service && typeof firstObj.service === "string") {
-            return firstObj.service;
-          }
-        }
-      }
-
-      if (Array.isArray(obj.logs) && obj.logs.length > 0) {
-        const first = obj.logs[0];
-        if (first && typeof first === "object" && !Array.isArray(first)) {
-          const firstObj = first as JsonObject;
-          if (firstObj.service && typeof firstObj.service === "string") {
-            return firstObj.service;
-          }
-        }
-      }
+      const arrayFields: Array<{ key: string; serviceField: string }> = [
+        { key: "incidents", serviceField: "service" },
+        { key: "alerts", serviceField: "service" },
+        { key: "logs", serviceField: "service" },
+        { key: "services", serviceField: "name" },
+        { key: "metrics", serviceField: "service" },
+      ];
 
-      // Services are extracted by name field (MCP serviceSchema)
-      if (Array.isArray(obj.services) && obj.services.length > 0) {
-        const first = obj.services[0];
-        if (first && typeof first === "object" && !Array.isArray(first)) {
-          const firstObj = first as JsonObject;
-          if (firstObj.name && typeof firstObj.name === "string") {
-            return firstObj.name;
+      for (const { key, serviceField } of arrayFields) {
+        const arr = obj[key];
+        if (Array.isArray(arr) && arr.length > 0) {
+          const services = new Set<string>();
+          for (const item of arr) {
+            if (item && typeof item === "object" && !Array.isArray(item)) {
+              const itemObj = item as JsonObject;
+              const svc = itemObj[serviceField];
+              if (typeof svc === "string") {
+                services.add(svc);
+              }
+            }
           }
-        }
-      }
-
-      if (Array.isArray(obj.metrics) && obj.metrics.length > 0) {
-        const first = obj.metrics[0];
-        if (first && typeof first === "object" && !Array.isArray(first)) {
-          const firstObj = first as JsonObject;
-          if (firstObj.service && typeof firstObj.service === "string") {
-            return firstObj.service;
+          // Only infer scope if exactly one service is present across all items
+          if (services.size === 1) {
+            return [...services][0];
           }
         }
       }
@@ -326,13 +304,13 @@ export class ScopeInferer {
   applyScope(calls: ToolCall[], inference: ScopeInference): ToolCall[] {
     return calls.map((call) => {
       // Only apply scope to tools that commonly use scope
-      const scopeTools = [
+      const scopeTools = new Set([
         "query-logs",
         "query-metrics",
         "query-incidents",
         "query-alerts",
-      ];
-      if (!scopeTools.some((tool) => call.name.includes(tool.split("-")[1]))) {
+      ]);
+      if (!scopeTools.has(call.name)) {
         return call;
       }
 
diff --git a/tests/contextManager.test.ts b/tests/contextManager.test.ts
index 98d48fc..afcdeb9 100644
--- a/tests/contextManager.test.ts
+++ b/tests/contextManager.test.ts
@@ -119,3 +119,50 @@ test('summarizes conversation for logging', () => {
     assert.ok(summary.includes('2 user messages'));
     assert.ok(summary.includes('1 assistant'));
 });
+
+test('fitMessagesInContext retains tool messages when within budget', () => {
+    const messages: LlmMessage[] = [
+        { role: 'system', content: 'You are a helper.' },
+        { role: 'user', content: 'question' },
+        { role: 'tool', content: 'tool result data' },
+        { role: 'assistant', content: 'here is my analysis' },
+        { role: 'user', content: 'follow up question' },
+    ];
+
+    const fitted = fitMessagesInContext(messages, {
+        maxContextTokens: 10000,
+        systemPriority: 1.0,
+        recentPriority: 0.8,
+        olderPriority: 0.3,
+    });
+
+    assert.equal(fitted.length, 5, 'All messages should fit');
+    assert.ok(fitted.some(m => m.role === 'tool'), 'Tool messages should be retained');
+    assert.ok(fitted.some(m => m.role === 'assistant'), 'Assistant messages should be retained');
+});
+
+test('fitMessagesInContext keeps recent tool/assistant messages under tight budget', () => {
+    const messages: LlmMessage[] = [
+        { role: 'system', content: 'sys' },
+        { role: 'user', content: 'old question ' + 'x'.repeat(500) },
+        { role: 'tool', content: 'old tool result ' + 'y'.repeat(500) },
+        { role: 'assistant', content: 'old answer ' + 'z'.repeat(500) },
+        { role: 'user', content: 'recent question' },
+        { role: 'tool', content: 'recent tool result' },
+        { role: 'assistant', content: 'recent answer' },
+    ];
+
+    const fitted = fitMessagesInContext(messages, {
+        maxContextTokens: 60,
+        systemPriority: 1.0,
+        recentPriority: 0.8,
+        olderPriority: 0.3,
+    });
+
+    assert.ok(fitted.some(m => m.role === 'system'));
+    const hasRecentTool = fitted.some(m => m.content.includes('recent tool'));
+    const hasRecentAnswer = fitted.some(m => m.content.includes('recent answer'));
+    const hasRecentQuestion = fitted.some(m => m.content.includes('recent question'));
+    assert.ok(hasRecentQuestion || hasRecentTool || hasRecentAnswer,
+        'At least some recent messages should be kept');
+});
diff --git a/tests/copilotEngine.followups.test.ts b/tests/copilotEngine.followups.test.ts
index 331e2ef..815382a 100644
--- a/tests/copilotEngine.followups.test.ts
+++ b/tests/copilotEngine.followups.test.ts
@@ -334,3 +334,231 @@ test('caches dependency-resolved detail calls by executed arguments, not unresol
     'Timeline calls should use each resolved incident id instead of reusing a stale no-id cache entry',
   );
 });
+
+test('first iteration follow-ups suggest orchestration plans for active incidents', async () => {
+  let plannerCalls = 0;
+  const llm: LlmClient = {
+    async chat(_messages: LlmMessage[], tools: Tool[]) {
+      if (tools.length) {
+        plannerCalls++;
+        if (plannerCalls === 1) {
+          return {
+            content: 'plan',
+            toolCalls: [{ name: 'query-incidents', arguments: {} }],
+          };
+        }
+        return { content: 'stop', toolCalls: [] };
+      }
+      return {
+        content: JSON.stringify({ conclusion: 'done', evidence: [] }),
+        toolCalls: [],
+      };
+    },
+  };
+
+  const calls: ToolCall[] = [];
+  const mcp: StubMcp = {
+    async listTools() {
+      return [
+        { name: 'query-incidents' } as Tool,
+        { name: 'query-orchestration-plans' } as Tool,
+      ];
+    },
+    async callTool(call): Promise<ToolResult> {
+      calls.push(call);
+      if (call.name === 'query-incidents') {
+        return {
+          name: call.name,
+          result: [
+            { id: 'INC-001', service: 'payments', status: 'active' },
+          ],
+        };
+      }
+      if (call.name === 'query-orchestration-plans') {
+        return { name: call.name, result: [] };
+      }
+      return { name: call.name, result: {} };
+    },
+  };
+
+  const engine = makeEngine(llm, mcp);
+  await engine.answer('investigate the payments incident');
+
+  const callNames = calls.map(c => c.name);
+  assert.ok(
+    callNames.includes('query-orchestration-plans'),
+    'First iteration should trigger orchestration plan follow-up for active incidents. Got: ' + callNames.join(', '),
+  );
+});
+
+test('first iteration follow-ups do not add unavailable tools', async () => {
+  const llm: LlmClient = {
+    async chat(_messages: LlmMessage[], tools: Tool[]) {
+      if (tools.length) {
+        return {
+          content: 'plan',
+          toolCalls: [{ name: 'query-incidents', arguments: {} }],
+        };
+      }
+      return {
+        content: JSON.stringify({ conclusion: 'done', evidence: [] }),
+        toolCalls: [],
+      };
+    },
+  };
+
+  const calls: ToolCall[] = [];
+  const mcp: StubMcp = {
+    async listTools() {
+      // Crucially: query-orchestration-plans is NOT in the tool list
+      return [{ name: 'query-incidents' } as Tool];
+    },
+    async callTool(call): Promise<ToolResult> {
+      calls.push(call);
+      return {
+        name: call.name,
+        result: [{ id: 'INC-001', service: 'payments', status: 'active' }],
+      };
+    },
+  };
+
+  const engine = makeEngine(llm, mcp);
+  await engine.answer('investigate payments incident');
+
+  const callNames = calls.map(c => c.name);
+  assert.ok(
+    !callNames.includes('query-orchestration-plans'),
+    'Should not attempt unavailable tools. Got: ' + callNames.join(', '),
+  );
+});
+
+test('first iteration follow-up results contribute entities to conversation state', async () => {
+  const llm: LlmClient = {
+    async chat(_messages: LlmMessage[], tools: Tool[]) {
+      if (tools.length) {
+        return {
+          content: 'plan',
+          toolCalls: [{ name: 'query-incidents', arguments: {} }],
+        };
+      }
+      return {
+        content: JSON.stringify({ conclusion: 'done', evidence: [] }),
+        toolCalls: [],
+      };
+    },
+  };
+
+  const mcp: StubMcp = {
+    async listTools() {
+      return [
+        { name: 'query-incidents' } as Tool,
+        { name: 'query-orchestration-plans' } as Tool,
+      ];
+    },
+    async callTool(call): Promise<ToolResult> {
+      if (call.name === 'query-incidents') {
+        return {
+          name: call.name,
+          result: [{ id: 'INC-001', service: 'payments', status: 'active' }],
+        };
+      }
+      if (call.name === 'query-orchestration-plans') {
+        return {
+          name: call.name,
+          result: [{ id: 'plan-42', name: 'payments recovery' }],
+        };
+      }
+      return { name: call.name, result: {} };
+    },
+  };
+
+  const engine = makeEngine(llm, mcp);
+  const answer = await engine.answer('investigate the payments incident');
+
+  const conversation = await engine.getConversationManager().peekConversation(answer.chatId);
+  const turnEntities = conversation?.turns[0]?.entities ?? [];
+  const planEntity = turnEntities.find((entity) => entity.type === 'orchestration_plan');
+
+  assert.ok(planEntity, 'Expected orchestration plan entity from first-iteration follow-up results');
+  assert.equal(planEntity?.value, 'plan-42');
+});
+
+test('answer synthesizer receives resolved question with entity references', async () => {
+  let synthesisPrompt = '';
+  const llm: LlmClient = {
+    async chat(messages: LlmMessage[], tools: Tool[]) {
+      if (tools.length) {
+        return {
+          content: 'plan',
+          toolCalls: [{ name: 'query-incidents', arguments: {} }],
+        };
+      }
+      // Capture the synthesis prompt
+      const lastMsg = messages[messages.length - 1];
+      synthesisPrompt = lastMsg?.content ?? '';
+      return {
+        content: JSON.stringify({ conclusion: 'done', evidence: [] }),
+        toolCalls: [],
+      };
+    },
+  };
+
+  const mcp: StubMcp = {
+    async listTools() {
+      return [{ name: 'query-incidents' } as Tool];
+    },
+    async callTool(call): Promise<ToolResult> {
+      return {
+        name: call.name,
+        result: [{ id: 'INC-001', service: 'payments', status: 'active' }],
+      };
+    },
+  };
+
+  const engine = makeEngine(llm, mcp);
+  // Ask using a plain question (no references to resolve)
+  await engine.answer('Show me active incidents');
+
+  // The synthesis prompt should contain the question text
+  assert.ok(
+    synthesisPrompt.includes('active incidents'),
+    'Synthesis prompt should include the question text',
+  );
+});
+
+test('first iteration does not double-validate through refineCalls', async () => {
+  // This is a structural test: we verify the engine doesn't crash and the
+  // plan goes through validation only once by checking the tool calls executed.
+  const llm: LlmClient = {
+    async chat(_messages: LlmMessage[], tools: Tool[]) {
+      if (tools.length) {
+        return {
+          content: 'plan',
+          toolCalls: [{ name: 'query-services', arguments: {} }],
+        };
+      }
+      return {
+        content: JSON.stringify({ conclusion: 'services ok', evidence: [] }),
+        toolCalls: [],
+      };
+    },
+  };
+
+  const calls: ToolCall[] = [];
+  const mcp: StubMcp = {
+    async listTools() {
+      return [{ name: 'query-services' } as Tool];
+    },
+    async callTool(call): Promise<ToolResult> {
+      calls.push(call);
+      return { name: call.name, result: { services: ['svc-a'] } };
+    },
+  };
+
+  const engine = makeEngine(llm, mcp);
+  await engine.answer('list services');
+
+  // The tool should be called exactly once (no double execution from double validation)
+  assert.equal(calls.length, 1, 'Tool should execute exactly once');
+  assert.equal(calls[0].name, 'query-services');
+});
diff --git a/tests/engine/answerGenerator.test.ts b/tests/engine/answerGenerator.test.ts
index 6c8ea0d..df0a5e3 100644
--- a/tests/engine/answerGenerator.test.ts
+++ b/tests/engine/answerGenerator.test.ts
@@ -155,6 +155,68 @@ describe('synthesizeCopilotAnswer', () => {
         assert.strictEqual(answer.chatId, 'chat-error');
     });
 
+    test('passes very long multi-turn conversation history alongside the final synthesis prompt', async () => {
+        let capturedMessages: LlmMessage[] = [];
+
+        const llm: LlmClient = {
+            async chat(messages: LlmMessage[], _tools: Tool[]) {
+                capturedMessages = messages;
+                return {
+                    content: JSON.stringify({
+                        conclusion: 'Long-context synthesis completed.',
+                        confidence: 0.82,
+                    }),
+                    toolCalls: [],
+                };
+            }
+        };
+
+        const conversationHistory: LlmMessage[] = [
+            { role: 'user', content: 'Initial incident report ' + 'a'.repeat(4000) },
+            { role: 'tool', content: 'query-incidents: ' + 'b'.repeat(3500) },
+            { role: 'assistant', content: 'Preliminary analysis ' + 'c'.repeat(4200) },
+            { role: 'user', content: 'Can you correlate this with logs and metrics? ' + 'd'.repeat(3800) },
+            { role: 'tool', content: 'query-logs: ' + 'e'.repeat(3600) },
+            { role: 'assistant', content: 'I found several suspicious signals ' + 'f'.repeat(4100) },
+        ];
+
+        const results: ToolResult[] = [
+            {
+                name: 'query-incidents',
+                arguments: {},
+                result: {
+                    incidents: [{ id: 'inc-900', title: 'Major outage', service: 'payments' }]
+                }
+            }
+        ];
+
+        const answer = await synthesizeCopilotAnswer(
+            'Summarize the current status',
+            results,
+            'chat-long-history',
+            llm,
+            conversationHistory,
+        );
+
+        assert.strictEqual(answer.chatId, 'chat-long-history');
+        assert.strictEqual(capturedMessages.length, conversationHistory.length + 1,
+            'Synthesis should append exactly one final prompt after the supplied history');
+        assert.deepStrictEqual(
+            capturedMessages.slice(0, conversationHistory.length),
+            conversationHistory,
+            'Conversation history should be forwarded unchanged and in order',
+        );
+        assert.strictEqual(
+            capturedMessages[capturedMessages.length - 1]?.role,
+            'user',
+            'Final synthesis prompt should be appended as the last user message',
+        );
+        assert.ok(
+            capturedMessages[capturedMessages.length - 1]?.content.includes('Summarize the current status'),
+            'Final synthesis prompt should still include the current question',
+        );
+    });
+
     test('includes correlations and anomalies in fallback on LLM failure', async () => {
         const llm: LlmClient = {
             async chat(_messages: LlmMessage[], _tools: Tool[]) {
@@ -522,4 +584,3 @@ describe('synthesizeCopilotAnswer', () => {
     });
 
 });
-
diff --git a/tests/engine/referenceBuilder.test.ts b/tests/engine/referenceBuilder.test.ts
index 5e7c7d0..bd285b1 100644
--- a/tests/engine/referenceBuilder.test.ts
+++ b/tests/engine/referenceBuilder.test.ts
@@ -412,6 +412,40 @@ test('referenceBuilder', async (t) => {
         assert.ok(refs.teams.includes('Velocity Team'), 'Should include team name');
     });
 
+    await t.test('does not create log references for metric tools with expression.search', () => {
+        const results: ToolResult[] = [
+            {
+                name: 'query-metrics',
+                arguments: {
+                    expression: { metricName: 'cpu_usage', search: 'something' },
+                    start: '2024-01-01T00:00:00Z',
+                    end: '2024-01-01T01:00:00Z',
+                    step: 60,
+                },
+                result: [],
+            },
+        ];
+
+        const refs = buildReferences(results);
+        assert.ok(!refs?.logs?.length, 'Metric tools should not produce log references');
+    });
 
+    await t.test('creates log references for actual log tools', () => {
+        const results: ToolResult[] = [
+            {
+                name: 'query-logs',
+                arguments: {
+                    expression: { search: 'error 504' },
+                    start: '2024-01-01T00:00:00Z',
+                    end: '2024-01-01T01:00:00Z',
+                },
+                result: { logs: [] },
+            },
+        ];
+
+        const refs = buildReferences(results);
+        assert.ok(refs?.logs?.length, 'Log tools should produce log references');
+        assert.strictEqual(refs!.logs![0].expression.search, 'error 504');
+    });
 
 });
diff --git a/tests/engine/scopeInferer.test.ts b/tests/engine/scopeInferer.test.ts
index 99f9392..0bd8caf 100644
--- a/tests/engine/scopeInferer.test.ts
+++ b/tests/engine/scopeInferer.test.ts
@@ -326,3 +326,102 @@ test('ScopeInferer: detects multiple team references', async () => {
   }
 });
 
+test('ScopeInferer: applyScope only matches exact query tool names', () => {
+  const inferer = new ScopeInferer();
+  const inference = {
+    scope: { service: 'payment-api' },
+    confidence: 0.8,
+    source: 'incident' as const,
+    reason: 'test',
+  };
+
+  const calls = [
+    { name: 'query-logs', arguments: {} },
+    { name: 'query-metrics', arguments: {} },
+    { name: 'query-incidents', arguments: {} },
+    { name: 'query-alerts', arguments: {} },
+    { name: 'get-incident', arguments: {} },
+    { name: 'get-incident-timeline', arguments: {} },
+    { name: 'query-tickets', arguments: {} },
+    { name: 'get-service', arguments: {} },
+  ];
+
+  const updated = inferer.applyScope(calls, inference);
+
+  // First 4 should have scope injected
+  for (const call of updated.slice(0, 4)) {
+    const scope = (call.arguments as JsonObject)?.scope as JsonObject | undefined;
+    assert.equal(scope?.service, 'payment-api', `${call.name} should get scope`);
+  }
+
+  // Last 4 should NOT have scope injected
+  for (const call of updated.slice(4)) {
+    const scope = (call.arguments as JsonObject)?.scope as JsonObject | undefined;
+    assert.equal(scope, undefined, `${call.name} should NOT get scope`);
+  }
+});
+
+test('ScopeInferer: does not narrow scope when result items have different services', () => {
+  const inferer = new ScopeInferer();
+
+  const calls = [{ name: 'query-logs', arguments: {} }];
+
+  // When inferFromResults returns no service (multi-service), applyScope should not inject one
+  const noServiceInference = {
+    scope: {} as { service?: string },
+    confidence: 0.75,
+    source: 'previous_query' as const,
+    reason: 'test',
+  };
+
+  const updated = inferer.applyScope(calls, noServiceInference);
+  const scope = (updated[0].arguments as JsonObject)?.scope as JsonObject | undefined;
+  assert.equal(scope, undefined, 'No scope should be applied when no service was inferred');
+});
+
+test('ScopeInferer: infers scope when all log items share same service', async () => {
+  const inferer = new ScopeInferer();
+
+  const results: ToolResult[] = [
+    {
+      name: 'query-logs',
+      result: {
+        logs: [
+          { id: 'log-1', service: 'payment-api' },
+          { id: 'log-2', service: 'payment-api' },
+        ],
+      },
+    },
+  ];
+
+  const inference = await inferer.inferScope('what happened?', results);
+
+  assert.ok(inference, 'Should infer scope when all items share the same service');
+  assert.equal(inference.scope.service, 'payment-api');
+});
+
+test('ScopeInferer: does not narrow multi-service log results', async () => {
+  const inferer = new ScopeInferer();
+
+  const results: ToolResult[] = [
+    {
+      name: 'query-logs',
+      result: {
+        logs: [
+          { id: 'log-1', service: 'api-gateway' },
+          { id: 'log-2', service: 'auth-service' },
+        ],
+      },
+    },
+  ];
+
+  const inference = await inferer.inferScope('show me logs', results);
+
+  if (inference?.scope.service) {
+    assert.ok(
+      inference.scope.service !== 'api-gateway' && inference.scope.service !== 'auth-service',
+      `Should not pick one service from multi-service results, got: ${inference.scope.service}`,
+    );
+  }
+});
+
diff --git a/tests/followUpEngine.test.ts b/tests/followUpEngine.test.ts
index e5aea2d..606d1d4 100644
--- a/tests/followUpEngine.test.ts
+++ b/tests/followUpEngine.test.ts
@@ -159,4 +159,60 @@ test('FollowUpEngine', async (t) => {
 
         assert.strictEqual(toolServiceKeys.length, uniqueKeys.size, 'Should have no duplicate tool+service combinations');
     });
+
+    await t.test('resolved incidents do not trigger orchestration plan suggestions', async () => {
+        const results: ToolResult[] = [
+            {
+                name: 'query-incidents',
+                result: [{ id: 'INC-001', service: 'payments', status: 'resolved' }],
+                arguments: {},
+            },
+        ];
+
+        const suggestions = await engine.applyFollowUps(results, 'test', [], 'show me services');
+        const orchestrationCalls = suggestions.filter(c => c.name === 'query-orchestration-plans');
+        assert.strictEqual(orchestrationCalls.length, 0, 'Resolved incidents should not trigger orchestration plans');
+    });
+
+    await t.test('incidents with unknown status do not trigger problem detection', async () => {
+        const results: ToolResult[] = [
+            {
+                name: 'query-incidents',
+                result: [{ id: 'INC-002', service: 'payments', status: 'postmortem' }],
+                arguments: {},
+            },
+        ];
+
+        const suggestions = await engine.applyFollowUps(results, 'test', [], 'list recent entries');
+        const orchestrationCalls = suggestions.filter(c => c.name === 'query-orchestration-plans');
+        assert.strictEqual(orchestrationCalls.length, 0, 'Unknown statuses should not trigger orchestration lookups');
+    });
+
+    await t.test('explicitly active incidents still trigger problem detection', async () => {
+        const results: ToolResult[] = [
+            {
+                name: 'query-incidents',
+                result: [{ id: 'INC-003', service: 'payments', status: 'triggered' }],
+                arguments: {},
+            },
+        ];
+
+        const suggestions = await engine.applyFollowUps(results, 'test', [], 'what is happening?');
+        const orchestrationCalls = suggestions.filter(c => c.name === 'query-orchestration-plans');
+        assert.ok(orchestrationCalls.length > 0, 'Explicitly active incidents should still trigger orchestration plans');
+    });
+
+    await t.test('cleared alerts do not trigger problem detection', async () => {
+        const results: ToolResult[] = [
+            {
+                name: 'query-alerts',
+                result: [{ id: 'ALT-001', service: 'api', status: 'cleared' }],
+                arguments: {},
+            },
+        ];
+
+        const suggestions = await engine.applyFollowUps(results, 'test', [], 'show me the dashboard');
+        const orchestrationCalls = suggestions.filter(c => c.name === 'query-orchestration-plans');
+        assert.strictEqual(orchestrationCalls.length, 0, 'Cleared alerts should not trigger orchestration plans');
+    });
 });
diff --git a/tests/referenceResolver.test.ts b/tests/referenceResolver.test.ts
index 1213d93..f793279 100644
--- a/tests/referenceResolver.test.ts
+++ b/tests/referenceResolver.test.ts
@@ -310,3 +310,30 @@ test('ReferenceResolver: uses prominence as tiebreaker when timestamps are equal
   const resolved = resolutions.get('that incident');
   assert.equal(resolved, 'inc-005');
 });
+
+test('ReferenceResolver: extracts patterns from original-case question', async () => {
+  const resolver = new ReferenceResolver(referenceRegistry);
+  const context: ConversationContext = {
+    chatId: 'test-chat',
+    entities: new Map(),
+  };
+
+  // The question has mixed casing — extraction should work without crashing
+  const resolutions = await resolver.resolveReferences(
+    'What about That Incident?',
+    context,
+    [],
+  );
+
+  assert.ok(resolutions instanceof Map);
+});
+
+test('ReferenceResolver: applyResolutions works case-insensitively on original text', () => {
+  const resolver = new ReferenceResolver(referenceRegistry);
+  const resolutions = new Map<string, string>();
+  resolutions.set('That Incident', 'INC-1234');
+
+  const result = resolver.applyResolutions('Tell me about That Incident please', resolutions);
+  assert.ok(result.includes('INC-1234'));
+  assert.ok(!result.includes('That Incident'));
+});
diff --git a/tests/resultCache.test.ts b/tests/resultCache.test.ts
index cb92e18..2948b60 100644
--- a/tests/resultCache.test.ts
+++ b/tests/resultCache.test.ts
@@ -305,3 +305,43 @@ test('expired entries count as misses', async () => {
     assert.equal(stats.hits, 1);
     assert.equal(stats.misses, 1);
 });
+
+test('cache.set removes existing fuzzy-matched entry to prevent duplicates', () => {
+    const cache = new ResultCache({ maxSize: 10, ttlMs: 60000 });
+
+    const call1: ToolCall = {
+        name: 'query-logs',
+        arguments: {
+            start: '2025-01-01T10:00:15.000Z',
+            end: '2025-01-01T10:30:00.000Z',
+        },
+    };
+    cache.set(call1, { name: 'query-logs', result: 'first' });
+
+    const call2: ToolCall = {
+        name: 'query-logs',
+        arguments: {
+            start: '2025-01-01T10:00:45.000Z', // +30s
+            end: '2025-01-01T10:30:00.000Z',
+        },
+    };
+    cache.set(call2, { name: 'query-logs', result: 'second' });
+
+    assert.equal(cache.stats().size, 1, 'Fuzzy-matched entries should replace, not accumulate');
+    assert.deepEqual(cache.get(call2), { name: 'query-logs', result: 'second' });
+});
+
+test('cache.set does not remove entries with different tool names', () => {
+    const cache = new ResultCache({ maxSize: 10, ttlMs: 60000 });
+
+    cache.set(
+        { name: 'query-logs', arguments: { start: '2025-01-01T10:00:00Z' } },
+        { name: 'query-logs', result: 'logs' },
+    );
+    cache.set(
+        { name: 'query-metrics', arguments: { start: '2025-01-01T10:00:00Z' } },
+        { name: 'query-metrics', result: 'metrics' },
+    );
+
+    assert.equal(cache.stats().size, 2, 'Different tool names should not be deduped');
+});

From 03202785f10ea34b7ce4933f4c12ba881d5ea575 Mon Sep 17 00:00:00 2001
From: yusufaytas <yusufaytas@gmail.com>
Date: Sun, 15 Mar 2026 22:45:30 +0000
Subject: [PATCH 2/2] Applied per iteration safeguard to follow ups and
 refiniing

---
 src/engine/copilotEngine.ts | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/engine/copilotEngine.ts b/src/engine/copilotEngine.ts
index ba828a8..2a8aa4b 100644
--- a/src/engine/copilotEngine.ts
+++ b/src/engine/copilotEngine.ts
@@ -482,13 +482,25 @@ export class CopilotEngine {
           plannedCalls,
         )).filter((c) => availableToolNames.has(c.name));
 
-        if (firstIterationFollowUps.length > 0) {
+        // 1. Refine the follow-ups to fill missing/default arguments (like start/end times)
+        const refinedFollowUps = await this.planRefiner.refineCalls(
+          firstIterationFollowUps,
+          this.mcp.getTools(),
+          conversationTurns,
+          allResults,
+        );
+
+        // 2. Ensure total executed calls do not exceed the per-iteration limit
+        const remainingCapacity = Math.max(0, MAX_TOOL_CALLS_PER_ITERATION - plannedCalls.length);
+        const limitedFollowUps = refinedFollowUps.slice(0, remainingCapacity);
+
+        if (limitedFollowUps.length > 0) {
           console.log(
-            `[Copilot][${chatId}] First-iteration follow-ups: ${firstIterationFollowUps.length} call(s): ${firstIterationFollowUps.map((c) => c.name).join(", ")}`,
+            `[Copilot][${chatId}] First-iteration follow-ups (refined/limited): ${limitedFollowUps.length} call(s): ${limitedFollowUps.map((c) => c.name).join(", ")}`,
           );
 
           const followUpResults = await this.runToolCallsWithCache(
-            this.limitToolCalls(firstIterationFollowUps),
+            limitedFollowUps,
             chatId,
             this.mcp.getTools(),
             trace,