From e91972dcdc9ed6300773f2489fe0eb8370eac063 Mon Sep 17 00:00:00 2001
From: yusufaytas <yusufaytas@gmail.com>
Date: Sun, 15 Mar 2026 17:31:46 +0000
Subject: [PATCH 1/2] Fixed the runtime inconsistencies in the copilot loop
 with tool result passing

  The main changes are in copilotEngine.ts, conversationManager.ts, planner.ts, and parallelToolRunner.ts. Cross-turn
  reference resolution now receives real conversation history, prior tool outputs are stored with each turn and replayed
  as tool messages for follow-up planning, JSON fallback planning keeps the same history/context as the primary planner,
  and dependent detail calls like get-incident-timeline can inherit IDs from earlier query results before execution.

  I also fixed service extraction around wrapped query-services payloads in service/entityHandler.ts and made
  planRefiner.ts stop suggesting replacement tools that are not actually available. The tests in
  conversationHistory.test.ts, conversationManager.test.ts, and parallelToolRunner.test.ts now assert real state instead
  of relying on swallowed mock assertions.
---
 package.json                                  |   2 +-
 src/engine/chatNamer.ts                       |  31 +++++-
 src/engine/conversationManager.ts             |  33 +++++-
 src/engine/copilotEngine.ts                   |   7 +-
 src/engine/executionTracer.ts                 |  10 ++
 src/engine/handlers/service/entityHandler.ts  |   9 ++
 .../handlers/service/referenceHandler.ts      |   9 ++
 src/engine/parallelToolRunner.ts              |  88 ++++++++++++++-
 src/engine/planRefiner.ts                     |  11 +-
 src/engine/planner.ts                         |  22 +++-
 src/engine/resultCache.ts                     |  30 ++++--
 src/engine/retryStrategy.ts                   |   7 +-
 src/engine/scopeInferer.ts                    |   8 ++
 src/engine/toolRunner.ts                      |   9 +-
 src/llmFactory.ts                             |  21 ++--
 src/llms/anthropic.ts                         |  12 ++-
 src/llms/gemini.ts                            |  18 ++--
 src/llms/openai.ts                            |  35 ++----
 src/server.ts                                 |   6 +-
 src/types.ts                                  |   1 +
 tests/chatNamer.test.ts                       |  12 +++
 tests/conversationHistory.test.ts             | 101 +++++++-----------
 tests/conversationManager.test.ts             |  17 ++-
 tests/executionTracer.test.ts                 |  16 +++
 tests/gemini.test.ts                          |  49 ++++-----
 tests/llmFactory.test.ts                      |  41 ++++---
 tests/parallelToolRunner.test.ts              |   8 ++
 tests/resultCache.test.ts                     |  13 ++-
 28 files changed, 426 insertions(+), 200 deletions(-)

diff --git a/package.json b/package.json
index fbe9f1a..89ac562 100644
--- a/package.json
+++ b/package.json
@@ -30,7 +30,7 @@
     "lint": "eslint .",
     "lint:fix": "eslint . --fix",
     "type-check": "tsc --noEmit",
-    "test": "NODE_ENV=test node --test --loader ts-node/esm",
+    "test": "NODE_ENV=test node --test --loader ts-node/esm tests/*.test.ts tests/**/*.test.ts",
     "start": "node --loader ts-node/esm src/server.ts",
     "seed": "node --loader ts-node/esm scripts/seedDatabase.ts"
   },
diff --git a/src/engine/chatNamer.ts b/src/engine/chatNamer.ts
index 1df884b..aec857a 100644
--- a/src/engine/chatNamer.ts
+++ b/src/engine/chatNamer.ts
@@ -73,6 +73,9 @@ export class ChatNamer {
     // Determine intent from user message
     const intent = this.determineIntent(userMessage);
 
+    // Avoid redundant topic/service naming like "Payment Payment Issues"
+    this.deduplicateSemanticOverlap(mergedEntities);
+
     // Try to synthesize a name
     let name = this.synthesizeName(mergedEntities, intent, userMessage);
 
@@ -96,6 +99,30 @@ export class ChatNamer {
     return sanitizedName;
   }
 
+  private deduplicateSemanticOverlap(entities: ExtractedEntities): void {
+    if (entities.services.length === 0 || entities.topics.length === 0) {
+      return;
+    }
+
+    const topicSet = new Set(entities.topics.map((topic) => topic.toLowerCase()));
+    entities.services = entities.services.filter((service) => {
+      const normalizedService = service.toLowerCase();
+      for (const topic of topicSet) {
+        if (
+          normalizedService === topic ||
+          normalizedService.startsWith(`${topic}-`) ||
+          normalizedService.endsWith(`-${topic}`) ||
+          normalizedService.includes(`${topic}-service`) ||
+          normalizedService.includes(`${topic}-svc`) ||
+          normalizedService.includes(`${topic}-api`)
+        ) {
+          return false;
+        }
+      }
+      return true;
+    });
+  }
+
   /**
    * Extract key entities from the LLM response (incidents, services, metrics).
    */
@@ -359,10 +386,10 @@ export class ChatNamer {
       return `${topic} ${metric} Issues`;
     }
 
-    // Priority 6: Topic + Problems/Issues (e.g., "Payment Service Issues")
+    // Priority 6: Topic + Problems/Issues (e.g., "Payment Issues")
     if (topics.length > 0 && /problem|issue|error|fail/i.test(userMessage)) {
       const topic = this.formatTopicName(topics[0]);
-      return `${topic} Service Issues`;
+      return `${topic} Issues`;
     }
 
     // Priority 7: Topic + Intent (e.g., "Payment Investigation")
diff --git a/src/engine/conversationManager.ts b/src/engine/conversationManager.ts
index 51525e6..7c553df 100644
--- a/src/engine/conversationManager.ts
+++ b/src/engine/conversationManager.ts
@@ -11,6 +11,7 @@ import {
   Entity,
   ConversationContext,
   TurnExecutionTrace,
+  ToolResult,
 } from "../types.js";
 import { ConversationStore } from "../conversationStore.js";
 import { createConversationStore } from "../storeFactory.js";
@@ -99,6 +100,7 @@ export class ConversationManager {
     assistantResponse?: string,
     entities?: Entity[],
     executionTrace?: TurnExecutionTrace,
+    toolResults?: ToolResult[],
   ): Promise<void> {
     let conversation = await this.store.get(chatId);
 
@@ -119,6 +121,7 @@ export class ConversationManager {
       assistantResponse,
       timestamp: Date.now(),
       entities,
+      toolResults,
       executionTrace,
     });
 
@@ -135,8 +138,8 @@ export class ConversationManager {
 
   /**
    * Build LLM message history from conversation turns.
-   * Note: Tool results are no longer stored in turns (replaced by executionTrace).
-   * The message history now only includes user messages and assistant responses.
+   * Tool results are stored alongside turns so follow-up planning can reuse
+   * concrete outputs instead of relying only on assistant summaries.
    */
   async buildMessageHistory(chatId: string): Promise<LlmMessage[]> {
     const conversation = await this.getConversation(chatId);
@@ -153,6 +156,14 @@ export class ConversationManager {
         content: turn.userMessage,
       });
 
+      for (const toolResult of turn.toolResults ?? []) {
+        messages.push({
+          role: "tool",
+          toolName: toolResult.name,
+          content: summarizeToolResult(toolResult),
+        });
+      }
+
       // Add assistant response if any
       if (turn.assistantResponse) {
         messages.push({
@@ -294,3 +305,21 @@ export class ConversationManager {
     return this.store;
   }
 }
+
+function summarizeToolResult(toolResult: ToolResult): string {
+  const content = safeStringify(toolResult.result);
+  const suffix = content.length > 1000 ? `${content.slice(0, 1000)}...` : content;
+  return `${toolResult.name}: ${suffix}`;
+}
+
+function safeStringify(value: unknown): string {
+  if (typeof value === "string") {
+    return value;
+  }
+
+  try {
+    return JSON.stringify(value);
+  } catch {
+    return String(value);
+  }
+}
diff --git a/src/engine/copilotEngine.ts b/src/engine/copilotEngine.ts
index 53cdb0e..48fd6a9 100644
--- a/src/engine/copilotEngine.ts
+++ b/src/engine/copilotEngine.ts
@@ -155,7 +155,7 @@ export class CopilotEngine {
 
     // Step 1: Check cache for each call
     for (const call of calls) {
-      const cached = this.resultCache.get(call);
+      const cached = this.resultCache.get(call, chatId);
       if (cached) {
         console.log(
           `[Copilot][${chatId}] Using cached result for ${call.name}`,
@@ -233,7 +233,7 @@ export class CopilotEngine {
 
         // Only cache if not an error
         if (!isError) {
-          this.resultCache.set(callsToExecute[i], result);
+          this.resultCache.set(callsToExecute[i], result, chatId);
         }
 
         // Record tool execution in trace
@@ -304,6 +304,7 @@ export class CopilotEngine {
     const resolutions = await this.referenceResolver.resolveReferences(
       question,
       entityContext,
+      conversationTurns,
     );
     const resolvedQuestion = this.referenceResolver.applyResolutions(
       question,
@@ -421,6 +422,7 @@ export class CopilotEngine {
 
       // Limit calls
       plannedCalls = this.limitToolCalls(plannedCalls);
+      this.executionTracer.updateIterationPlan(trace, plannedCalls);
 
       // Track if any calls were ever planned (before toolRunner filtering)
       if (plannedCalls.length > 0) {
@@ -563,6 +565,7 @@ export class CopilotEngine {
       answer.conclusion,
       allExtractedEntities,
       turnTrace,
+      allResults,
     );
 
     // Step 8: Generate conversation name for new conversations only
diff --git a/src/engine/executionTracer.ts b/src/engine/executionTracer.ts
index 01640b7..d1393e0 100644
--- a/src/engine/executionTracer.ts
+++ b/src/engine/executionTracer.ts
@@ -39,6 +39,16 @@ export class ExecutionTracer {
     trace.iterations.push(iteration);
   }
 
+  /**
+   * Update the current iteration plan after heuristics/refinement.
+   */
+  updateIterationPlan(trace: ExecutionTrace, plannedTools: ToolCall[]): void {
+    const currentIteration = this.getCurrentIteration(trace);
+    if (currentIteration) {
+      currentIteration.plannedTools = [...plannedTools];
+    }
+  }
+
   /**
    * Record a heuristic modification
    */
diff --git a/src/engine/handlers/service/entityHandler.ts b/src/engine/handlers/service/entityHandler.ts
index 1075dab..aeaa9d1 100644
--- a/src/engine/handlers/service/entityHandler.ts
+++ b/src/engine/handlers/service/entityHandler.ts
@@ -26,6 +26,15 @@ export const serviceEntityHandler: EntityHandler = async (
   // Handle array of services (query-services returns z.array(serviceSchema))
   if (Array.isArray(toolResult.result)) {
     services = toolResult.result as JsonObject[];
+  } else if (
+    typeof toolResult.result === "object" &&
+    toolResult.result !== null &&
+    Array.isArray((toolResult.result as JsonObject).services)
+  ) {
+    services = ((toolResult.result as JsonObject).services as Array<JsonObject | string>).map(
+      (service) =>
+        typeof service === "string" ? ({ name: service } as JsonObject) : service,
+    );
   } else {
     // Single service
     services = [toolResult.result as JsonObject];
diff --git a/src/engine/handlers/service/referenceHandler.ts b/src/engine/handlers/service/referenceHandler.ts
index 284ed03..fd58976 100644
--- a/src/engine/handlers/service/referenceHandler.ts
+++ b/src/engine/handlers/service/referenceHandler.ts
@@ -42,6 +42,15 @@ export const serviceReferenceHandler: ReferenceHandler = async (
         let servicesArray: Array<Record<string, unknown>> = [];
         if (Array.isArray(content)) {
           servicesArray = content as Array<Record<string, unknown>>;
+        } else if (
+          typeof content === "object" &&
+          content !== null &&
+          Array.isArray((content as Record<string, unknown>).services)
+        ) {
+          servicesArray = (content as { services: Array<Record<string, unknown> | string> }).services
+            .map((service) =>
+              typeof service === "string" ? { name: service } : service,
+            );
         } else if (typeof content === "object" && content !== null) {
           servicesArray = [content as Record<string, unknown>];
         }
diff --git a/src/engine/parallelToolRunner.ts b/src/engine/parallelToolRunner.ts
index c830e58..7065067 100644
--- a/src/engine/parallelToolRunner.ts
+++ b/src/engine/parallelToolRunner.ts
@@ -74,7 +74,7 @@ export class ParallelToolRunner {
     );
 
     for (let i = 0; i < batches.length; i++) {
-      const batch = batches[i];
+      const batch = this.resolveBatchDependencies(batches[i], results);
       console.log(
         `[ParallelToolRunner][${logId}] Batch ${i + 1}/${batches.length}: ${batch.map((c) => c.name).join(", ")}`,
       );
@@ -87,6 +87,92 @@ export class ParallelToolRunner {
     return results;
   }
 
+  private resolveBatchDependencies(
+    batch: ToolCall[],
+    previousResults: ToolResult[],
+  ): ToolCall[] {
+    return batch.map((call) => {
+      if (call.name === "get-incident-timeline" && !call.arguments?.id) {
+        const incidentId = this.findFirstId(previousResults, "query-incidents");
+        if (incidentId) {
+          return {
+            ...call,
+            arguments: {
+              ...call.arguments,
+              id: incidentId,
+            },
+          };
+        }
+      }
+
+      if (call.name === "get-ticket" && !call.arguments?.id) {
+        const ticketId = this.findFirstId(previousResults, "query-tickets");
+        if (ticketId) {
+          return {
+            ...call,
+            arguments: {
+              ...call.arguments,
+              id: ticketId,
+            },
+          };
+        }
+      }
+
+      return call;
+    });
+  }
+
+  private findFirstId(
+    results: ToolResult[],
+    sourceToolName: string,
+  ): string | undefined {
+    for (const result of results) {
+      if (result.name !== sourceToolName) continue;
+
+      const extracted = this.extractIdFromResult(result.result);
+      if (extracted) {
+        return extracted;
+      }
+    }
+
+    return undefined;
+  }
+
+  private extractIdFromResult(result: ToolResult["result"]): string | undefined {
+    if (Array.isArray(result)) {
+      for (const item of result) {
+        if (item && typeof item === "object" && !Array.isArray(item)) {
+          const maybeId = (item as Record<string, unknown>).id;
+          if (typeof maybeId === "string" && maybeId.trim()) {
+            return maybeId;
+          }
+        }
+      }
+      return undefined;
+    }
+
+    if (result && typeof result === "object") {
+      const record = result as Record<string, unknown>;
+      if (typeof record.id === "string" && record.id.trim()) {
+        return record.id;
+      }
+
+      const nestedArrays = Object.values(record).filter(Array.isArray) as unknown[][];
+      for (const arrayValue of nestedArrays) {
+        for (const item of arrayValue) {
+          if (item && typeof item === "object" && !Array.isArray(item)) {
+            const maybeId = (item as Record<string, unknown>).id;
+            if (typeof maybeId === "string" && maybeId.trim()) {
+              return maybeId;
+            }
+          }
+        }
+      }
+    }
+
+    return undefined;
+  }
+
   /**
    * Check if tools can be executed in parallel (no dependencies)
    */
diff --git a/src/engine/planRefiner.ts b/src/engine/planRefiner.ts
index cf3955f..29204f7 100644
--- a/src/engine/planRefiner.ts
+++ b/src/engine/planRefiner.ts
@@ -87,8 +87,14 @@ export class PlanRefiner {
         }
       } else if (validation.replacementCall) {
         // Validation failed but a replacement was suggested
-        console.log(`[PlanRefiner] Replacing ${call.name} with ${validation.replacementCall.name}`);
-        replacementCalls.push(validation.replacementCall);
+        if (tools.some((candidate) => candidate.name === validation.replacementCall?.name)) {
+          console.log(`[PlanRefiner] Replacing ${call.name} with ${validation.replacementCall.name}`);
+          replacementCalls.push(validation.replacementCall);
+        } else {
+          console.log(
+            `[PlanRefiner] Skipping replacement ${validation.replacementCall.name} for ${call.name} because the tool is unavailable`,
+          );
+        }
         validatedCalls.push({ call, valid: false });
       } else {
         // Validation failed with no replacement
@@ -183,4 +189,3 @@ export class PlanRefiner {
     return augmented;
   }
 }
-
diff --git a/src/engine/planner.ts b/src/engine/planner.ts
index 2012d54..84689fe 100644
--- a/src/engine/planner.ts
+++ b/src/engine/planner.ts
@@ -11,6 +11,10 @@ import { PlannerResponse } from "../types.js";
 
 const MAX_PLANNER_CALLS = 3;
 
+function formatErrorMessage(error: unknown): string {
+  return error instanceof Error ? error.message : String(error);
+}
+
 export async function requestInitialPlan(
   question: string,
   llm: LlmClient,
@@ -48,9 +52,11 @@ export async function requestInitialPlan(
       `[Planner] Initial plan: No tool calls from LLM, falling back to JSON plan`,
     );
     // If the model didn't use tools directly, fall back to JSON-planned tool calls
-    return await requestJsonPlan(question, llm, tools);
+    return await requestJsonPlan(question, llm, tools, history, anchorTime);
   } catch (err) {
-    console.warn("LLM planning failed, falling back to heuristics:", err);
+    console.warn(
+      `LLM planning failed, falling back to heuristics: ${formatErrorMessage(err)}`,
+    );
     return {
       toolCalls: inferPlan(question),
     };
@@ -113,7 +119,9 @@ export async function requestFollowUpPlan(
       toolCalls: limitCalls(reply.toolCalls),
     };
   } catch (err) {
-    console.error("LLM refinement failed; skipping follow-up plan:", err);
+    console.error(
+      `LLM refinement failed; skipping follow-up plan: ${formatErrorMessage(err)}`,
+    );
     return { toolCalls: [] };
   }
 }
@@ -122,10 +130,16 @@ async function requestJsonPlan(
   question: string,
   llm: LlmClient,
   tools: Tool[],
+  history: LlmMessage[] = [],
+  anchorTime?: string,
 ): Promise<PlannerResponse> {
   const toolList = tools.map((t) => `- ${t.name}`).join("\n") || "No tools.";
   const messages: LlmMessage[] = [
     { role: "system", content: buildJsonPlannerPrompt(toolList) },
+    ...(anchorTime
+      ? [{ role: "system", content: `Current Time: ${anchorTime}` } as LlmMessage]
+      : []),
+    ...history,
     { role: "user", content: `User request: ${question}\nReturn only JSON.` },
   ];
 
@@ -139,7 +153,7 @@ async function requestJsonPlan(
     }
     return { toolCalls: [] };
   } catch (err) {
-    console.warn("LLM JSON planner failed:", err);
+    console.warn(`LLM JSON planner failed: ${formatErrorMessage(err)}`);
   }
 
   return {
diff --git a/src/engine/resultCache.ts b/src/engine/resultCache.ts
index 4caca32..a8d950e 100644
--- a/src/engine/resultCache.ts
+++ b/src/engine/resultCache.ts
@@ -10,6 +10,7 @@ export const DEFAULT_CACHE_CONFIG: CacheConfig = {
 };
 
 type CacheEntry = {
+  scopeKey: string;
   call: ToolCall;
   result: ToolResult;
   timestamp: number;
@@ -96,6 +97,10 @@ function createExactCacheKey(call: ToolCall): string {
   return `${call.name}:${normalizeForHash(call.arguments || {})}`;
 }
 
+function createScopedCacheKey(scopeKey: string, call: ToolCall): string {
+  return `${scopeKey}::${createExactCacheKey(call)}`;
+}
+
 /**
  * LRU cache implementation for tool results.
  */
@@ -111,9 +116,9 @@ export class ResultCache {
    * Get a cached result if available and not expired.
    * Performs fuzzy matching on timestamps (window: 60s).
    */
-  get(call: ToolCall): ToolResult | null {
+  get(call: ToolCall, scopeKey: string = "global"): ToolResult | null {
     // 1. Try exact exact match first (fastest)
-    const exactKey = createExactCacheKey(call);
+    const exactKey = createScopedCacheKey(scopeKey, call);
     let entry = this.cache.get(exactKey);
     let matchedKey = exactKey;
 
@@ -121,6 +126,7 @@ export class ResultCache {
     if (!entry) {
       // This linear scan is acceptable because maxSize is small (default 100)
       for (const [key, candidate] of this.cache.entries()) {
+        if (candidate.scopeKey !== scopeKey) continue;
         if (candidate.call.name !== call.name) continue;
 
         // Deep comparison with fuzzy timestamps
@@ -157,8 +163,8 @@ export class ResultCache {
   /**
    * Store a result in the cache.
    */
-  set(call: ToolCall, result: ToolResult): void {
-    const key = createExactCacheKey(call);
+  set(call: ToolCall, result: ToolResult, scopeKey: string = "global"): void {
+    const key = createScopedCacheKey(scopeKey, call);
 
     // Remove existing entry if present
     if (this.cache.has(key)) {
@@ -174,6 +180,7 @@ export class ResultCache {
     }
 
     this.cache.set(key, {
+      scopeKey,
       call,
       result,
       timestamp: Date.now(),
@@ -184,8 +191,8 @@ export class ResultCache {
   /**
    * Check if a call result is cached and valid.
    */
-  has(call: ToolCall): boolean {
-    return this.get(call) !== null;
+  has(call: ToolCall, scopeKey: string = "global"): boolean {
+    return this.get(call, scopeKey) !== null;
   }
 
   /**
@@ -233,10 +240,13 @@ export class ResultCache {
   /**
    * Invalidate cache entries matching a tool name.
    */
-  invalidateByToolName(toolName: string): void {
-    const keysToRemove = Array.from(this.cache.keys()).filter((key) =>
-      key.startsWith(`${toolName}:`),
-    );
+  invalidateByToolName(toolName: string, scopeKey?: string): void {
+    const keysToRemove = Array.from(this.cache.entries())
+      .filter(([, entry]) =>
+        entry.call.name === toolName &&
+        (scopeKey === undefined || entry.scopeKey === scopeKey),
+      )
+      .map(([key]) => key);
 
     for (const key of keysToRemove) {
       this.cache.delete(key);
diff --git a/src/engine/retryStrategy.ts b/src/engine/retryStrategy.ts
index 5d840bb..a0c99ec 100644
--- a/src/engine/retryStrategy.ts
+++ b/src/engine/retryStrategy.ts
@@ -28,6 +28,10 @@ export class CircuitBreakerError extends Error {
   }
 }
 
+function formatErrorMessage(error: unknown): string {
+  return error instanceof Error ? error.message : String(error);
+}
+
 /**
  * Determines if an error is transient and should be retried.
  */
@@ -182,8 +186,7 @@ export async function withRetry<T>(
 
       if (!isTransientError(error)) {
         console.warn(
-          `[Retry]${context ? ` [${context}]` : ""} Non-transient error, not retrying:`,
-          error,
+          `[Retry]${context ? ` [${context}]` : ""} Non-transient error, not retrying: ${formatErrorMessage(error)}`,
         );
         throw error;
       }
diff --git a/src/engine/scopeInferer.ts b/src/engine/scopeInferer.ts
index 8c8f665..3d805f8 100644
--- a/src/engine/scopeInferer.ts
+++ b/src/engine/scopeInferer.ts
@@ -300,6 +300,14 @@ export class ScopeInferer {
       hasScope = true;
     }
 
+    const serviceMatch = question.match(
+      /\b((?:svc-[a-z0-9-]+)|(?:[a-z0-9][a-z0-9-]+-(?:service|svc|api)))\b/i,
+    );
+    if (serviceMatch) {
+      scope.service = serviceMatch[1].toLowerCase();
+      hasScope = true;
+    }
+
     if (hasScope) {
       return {
         scope,
diff --git a/src/engine/toolRunner.ts b/src/engine/toolRunner.ts
index 24ae25e..7085ad0 100644
--- a/src/engine/toolRunner.ts
+++ b/src/engine/toolRunner.ts
@@ -5,6 +5,10 @@ import { withRetry } from "./retryStrategy.js";
 import { validateToolCall } from "./toolsSchema.js";
 import { TimeWindowExpander } from "./timeWindowExpander.js";
 
+function formatErrorMessage(error: unknown): string {
+  return error instanceof Error ? error.message : String(error);
+}
+
 export async function runToolCalls(
   calls: ToolCall[],
   mcp: McpClient,
@@ -116,13 +120,12 @@ export async function runToolCalls(
       return toolResult;
     } catch (err) {
       console.error(
-        `[Copilot][${logId}] Tool ${call.name} failed with error:`,
-        err,
+        `[Copilot][${logId}] Tool ${call.name} failed with error: ${formatErrorMessage(err)}`,
       );
       // Return error as a result instead of throwing, for partial success handling
       return {
         name: call.name,
-        result: { error: err instanceof Error ? err.message : String(err) },
+        result: { error: formatErrorMessage(err) },
         arguments: sanitizedArgs,
         callId: call.callId,
       } satisfies ToolResult;
diff --git a/src/llmFactory.ts b/src/llmFactory.ts
index fd12158..60879ce 100644
--- a/src/llmFactory.ts
+++ b/src/llmFactory.ts
@@ -3,19 +3,15 @@ import { MockLlm } from "./llms/mock.js";
 import { OpenAiLlm } from "./llms/openai.js";
 import { AnthropicLlm } from "./llms/anthropic.js";
 import { GeminiLlm } from "./llms/gemini.js";
-import { NullLlm } from "./llms/null.js";
 
 export function createLlmFromEnv(): LlmClient {
-  const provider = (process.env.LLM_PROVIDER || "mock").toLowerCase();
+  const provider = (process.env.LLM_PROVIDER || "openai").toLowerCase();
   console.log(`[LlmFactory] Initializing LLM provider: ${provider}`);
 
   if (provider === "openai") {
     const key = process.env.OPENAI_API_KEY || "";
     if (!key) {
-      console.warn(
-        "OPENAI_API_KEY missing; using mock LLM. Set LLM_PROVIDER=openai and OPENAI_API_KEY to enable OpenAI.",
-      );
-      return new MockLlm();
+      throw new Error("OPENAI_API_KEY is required when LLM_PROVIDER=openai");
     }
     console.log(`[LlmFactory] Using OpenAI LLM`);
     return new OpenAiLlm(key);
@@ -23,10 +19,9 @@ export function createLlmFromEnv(): LlmClient {
   if (provider === "anthropic") {
     const key = process.env.ANTHROPIC_API_KEY;
     if (!key) {
-      console.warn(
-        "ANTHROPIC_API_KEY missing; using mock LLM. Set ANTHROPIC_API_KEY to enable Anthropic.",
+      throw new Error(
+        "ANTHROPIC_API_KEY is required when LLM_PROVIDER=anthropic",
       );
-      return new MockLlm();
     }
     console.log(`[LlmFactory] Using Anthropic LLM`);
     return new AnthropicLlm(key);
@@ -34,10 +29,7 @@ export function createLlmFromEnv(): LlmClient {
   if (provider === "gemini") {
     const key = process.env.GEMINI_API_KEY;
     if (!key) {
-      console.warn(
-        "GEMINI_API_KEY missing; using mock LLM. Set GEMINI_API_KEY to enable Gemini.",
-      );
-      return new MockLlm();
+      throw new Error("GEMINI_API_KEY is required when LLM_PROVIDER=gemini");
     }
     console.log(`[LlmFactory] Using Gemini LLM`);
     return new GeminiLlm(key);
@@ -46,6 +38,5 @@ export function createLlmFromEnv(): LlmClient {
     console.log(`[LlmFactory] Using Mock LLM`);
     return new MockLlm();
   }
-  console.log(`[LlmFactory] Unknown provider "${provider}", using Null LLM`);
-  return new NullLlm();
+  throw new Error(`Unsupported LLM_PROVIDER "${provider}"`);
 }
diff --git a/src/llms/anthropic.ts b/src/llms/anthropic.ts
index 259a605..6098d58 100644
--- a/src/llms/anthropic.ts
+++ b/src/llms/anthropic.ts
@@ -47,20 +47,23 @@ function mapMessagesForAnthropic(messages: LlmMessage[]): {
   messages: Array<{ role: string; content: string }>;
 } {
   const systemMessages = messages.filter((m) => m.role === "system");
-  const nonSystemMessages = messages.filter(
-    (m) => m.role !== "system" && m.role !== "tool",
-  );
+  const nonSystemMessages = messages.filter((m) => m.role !== "system");
 
   const system = systemMessages.map((m) => m.content).join("\n\n") || undefined;
 
   const anthropicMessages = nonSystemMessages.map((m) => ({
     role: m.role === "assistant" ? "assistant" : "user",
-    content: m.content,
+    content: m.role === "tool" ? formatToolMessage(m) : m.content,
   }));
 
   return { system, messages: anthropicMessages };
 }
 
+function formatToolMessage(message: LlmMessage): string {
+  const toolName = message.toolName || "unknown-tool";
+  return `Tool result from ${toolName}:\n${message.content}`;
+}
+
 /**
  * Anthropic LLM client implementation.
  */
@@ -128,4 +131,3 @@ export class AnthropicLlm implements LlmClient {
     };
   }
 }
-
diff --git a/src/llms/gemini.ts b/src/llms/gemini.ts
index 1a77279..528df78 100644
--- a/src/llms/gemini.ts
+++ b/src/llms/gemini.ts
@@ -95,21 +95,21 @@ function mapMessagesForGemini(messages: LlmMessage[]): Array<{ role: string; par
             continue;
         }
 
-        if (msg.role === "tool") {
-            // Skip tool messages - Gemini handles these differently
-            continue;
-        }
-
         const role = msg.role === "assistant" ? "model" : "user";
         geminiMessages.push({
             role,
-            parts: [{ text: msg.content }],
+            parts: [{ text: msg.role === "tool" ? formatToolMessage(msg) : msg.content }],
         });
     }
 
     return geminiMessages;
 }
 
+function formatToolMessage(message: LlmMessage): string {
+    const toolName = message.toolName || "unknown-tool";
+    return `Tool result from ${toolName}:\n${message.content}`;
+}
+
 /**
  * Extract system message from messages array.
  */
@@ -227,11 +227,7 @@ export class GeminiLlm implements LlmClient {
             };
         } catch (error) {
             console.error("[Gemini] Request failed:", error);
-            console.warn("[Gemini] Returning empty response due to error");
-            return {
-                content: "",
-                toolCalls: [],
-            };
+            throw error;
         }
     }
 }
diff --git a/src/llms/openai.ts b/src/llms/openai.ts
index 7261362..88b3001 100644
--- a/src/llms/openai.ts
+++ b/src/llms/openai.ts
@@ -24,12 +24,10 @@ const ANY_JSON_SCHEMA: JsonObject = {
 function mapMessages(
   messages: LlmMessage[],
 ): Array<{ role: string; content: JsonValue }> {
-  return messages
-    .filter((m) => m.role !== "tool") // tool replies are not carried across
-    .map((m) => ({
-      role: m.role,
-      content: m.content,
-    }));
+  return messages.map((m) => ({
+    role: m.role === "tool" ? "user" : m.role,
+    content: m.role === "tool" ? formatToolMessage(m) : m.content,
+  }));
 }
 
 /**
@@ -64,6 +62,11 @@ function buildInputForResponses(
   return [...systemLike, lastTurn];
 }
 
+function formatToolMessage(message: LlmMessage): string {
+  const toolName = message.toolName || "unknown-tool";
+  return `Tool result from ${toolName}:\n${message.content}`;
+}
+
 /**
  * Map internal Tool definitions to Responses API tools format.
  */
@@ -326,10 +329,7 @@ export class OpenAiLlm implements LlmClient {
             }
 
             // Don't retry for 4xx errors (except 429)
-            console.warn(
-              "[OpenAI] Non-retryable error, returning empty response",
-            );
-            return null; // Signal non-retryable error
+            throw new Error(`OpenAI API error ${response.status}: ${text}`);
           }
 
           return response;
@@ -338,14 +338,6 @@ export class OpenAiLlm implements LlmClient {
         "llm-openai",
       );
 
-      // Handle non-retryable error
-      if (res === null) {
-        return {
-          content: "",
-          toolCalls: [],
-        };
-      }
-
       const data = (await res.json()) as JsonObject;
       console.log("[OpenAI] raw response:", JSON.stringify(data));
 
@@ -487,13 +479,8 @@ export class OpenAiLlm implements LlmClient {
         toolCalls,
       };
     } catch (error) {
-      // Catch ANY error (network, parsing, etc.) and return empty response
       console.error("[OpenAI] Request failed:", error);
-      console.warn("[OpenAI] Returning empty response due to error");
-      return {
-        content: "",
-        toolCalls: [],
-      };
+      throw error;
     }
   }
 }
diff --git a/src/server.ts b/src/server.ts
index f7d9fab..d9e1a13 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -3,6 +3,10 @@ import { CopilotEngine } from "./engine/copilotEngine.js";
 import { createLlmFromEnv } from "./llmFactory.js";
 import { Conversation } from "./types.js";
 
+function formatErrorMessage(error: unknown): string {
+  return error instanceof Error ? error.message : String(error);
+}
+
 /**
  * Build a preview string for a conversation.
  * Prioritizes the most recent assistant response over user messages.
@@ -39,7 +43,7 @@ export function buildPreview(conversation: Conversation): string {
     console.error(
       "Error building preview for conversation",
       conversation.chatId,
-      error,
+      formatErrorMessage(error),
     );
     return "Preview unavailable";
   }
diff --git a/src/types.ts b/src/types.ts
index 4731014..42c28cb 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -121,6 +121,7 @@ export type ConversationTurn = {
   assistantResponse?: string;
   timestamp: number;
   entities?: Entity[];
+  toolResults?: ToolResult[];
   executionTrace?: TurnExecutionTrace; // Full execution trace for auditability
 };
 
diff --git a/tests/chatNamer.test.ts b/tests/chatNamer.test.ts
index d6a2ed9..11f1d9f 100644
--- a/tests/chatNamer.test.ts
+++ b/tests/chatNamer.test.ts
@@ -239,6 +239,18 @@ describe('ChatNamer', () => {
       assert.ok(name.toLowerCase().includes('latency'));
     });
 
+    it('should avoid duplicate topic and service naming', () => {
+      const namer = new ChatNamer();
+      const name = namer.generateName(
+        'Show me payment-service issues',
+        'Payment-service is degraded',
+        Date.now(),
+        [{ type: 'service', value: 'payment-service', extractedAt: Date.now(), source: 'query-services' }]
+      );
+
+      assert.equal(name, 'Payment Issues');
+    });
+
     it('should synthesize metric correlation name', () => {
       const namer = new ChatNamer();
       const name = namer.generateName(
diff --git a/tests/conversationHistory.test.ts b/tests/conversationHistory.test.ts
index 96e6f4c..0696499 100644
--- a/tests/conversationHistory.test.ts
+++ b/tests/conversationHistory.test.ts
@@ -5,34 +5,21 @@ import { makeEngine, StubMcp } from './helpers/copilotTestUtils.js';
 
 test('passes conversation history to LLM for contextual follow-up questions', async () => {
     const receivedMessages: LlmMessage[][] = [];
+    const toolCalls: Array<{ name: string; arguments?: JsonObject }> = [];
 
     const llm: LlmClient = {
         async chat(messages: LlmMessage[], tools: Tool[]) {
             receivedMessages.push([...messages]);
+            const latestUserMessage = [...messages].reverse().find((message) => message.role === 'user')?.content ?? '';
 
-            // First turn: "what services are running?"
-            if (receivedMessages.length === 1 && tools.length > 0) {
+            if (tools.length > 0 && latestUserMessage === 'what services are running?') {
                 return {
                     content: 'plan',
                     toolCalls: [{ name: 'query-services', arguments: {} }],
                 };
             }
 
-            // First turn synthesis
-            if (receivedMessages.length === 2 && tools.length === 0) {
-                return {
-                    content: JSON.stringify({
-                        conclusion: 'The payment-service is running.',
-                        evidence: ['Found payment-service in service list'],
-                        confidence: 0.95
-                    }),
-                };
-            }
-
-            // Second turn: "are there any incidents related to this service"
-            // Should have history of previous conversation
-            if (receivedMessages.length === 3 && tools.length > 0) {
-                // Verify history is present
+            if (tools.length > 0 && latestUserMessage === 'are there any incidents related to this service') {
                 const hasHistory = messages.some(m =>
                     m.role === 'user' && m.content.includes('what services are running')
                 );
@@ -49,12 +36,13 @@ test('passes conversation history to LLM for contextual follow-up questions', as
                 };
             }
 
-            // Second turn synthesis
             return {
                 content: JSON.stringify({
-                    conclusion: 'No incidents for payment-service',
+                    conclusion: latestUserMessage.includes('what services are running')
+                        ? 'The payment-service is running.'
+                        : 'No incidents for payment-service',
                     evidence: [],
-                    confidence: 0.9
+                    confidence: 0.9,
                 }),
             };
         },
@@ -68,16 +56,11 @@ test('passes conversation history to LLM for contextual follow-up questions', as
             ];
         },
         async callTool(call) {
+            toolCalls.push({ name: call.name, arguments: call.arguments as JsonObject });
             if (call.name === 'query-services') {
                 return { name: 'query-services', result: { services: ['payment-service'] } };
             }
             if (call.name === 'query-incidents') {
-                // Verify the LLM resolved "this service" correctly
-                assert.deepEqual(
-                    (call.arguments as JsonObject)?.scope,
-                    { service: 'payment-service' },
-                    'LLM should resolve "this service" to "payment-service" using history'
-                );
                 return { name: 'query-incidents', result: [] };
             }
             return { name: call.name, result: null };
@@ -96,6 +79,13 @@ test('passes conversation history to LLM for contextual follow-up questions', as
     // Verify we got both answers
     assert.ok(answer1.conclusion.includes('payment-service'));
     assert.ok(answer2.conclusion.length > 0);
+    const incidentCall = toolCalls.find((call) => call.name === 'query-incidents');
+    assert.ok(incidentCall, 'Expected query-incidents to run on follow-up');
+    assert.deepEqual(
+        incidentCall?.arguments?.scope,
+        { service: 'payment-service' },
+        'Follow-up incident query should inherit the referenced service'
+    );
 
     assert.ok(receivedMessages.length >= 3, 'Should have made multiple LLM calls with history');
 });
@@ -106,50 +96,24 @@ test('passes tool results from previous turns to LLM', async () => {
     const llm: LlmClient = {
         async chat(messages: LlmMessage[], _tools: Tool[]) {
             receivedMessages.push([...messages]);
-            const callIndex = receivedMessages.length;
+            const latestUserMessage = [...messages].reverse().find((message) => message.role === 'user')?.content ?? '';
 
-            // First turn: "check metrics"
-            if (callIndex === 1) {
+            if (latestUserMessage === 'check services') {
                 return {
-                    content: 'Checking metrics...',
-                    toolCalls: [{ name: 'query-metrics', arguments: { metric: 'cpu' } }],
+                    content: 'Checking services...',
+                    toolCalls: [{ name: 'query-services', arguments: {} }],
                 };
             }
 
-            // First turn synthesis
-            if (callIndex === 2) {
+            if (latestUserMessage === 'what did it return?') {
                 return {
-                    content: JSON.stringify({
-                        conclusion: 'CPU is high (95%).',
-                        evidence: ['CPU metrics show 95% utilization.'],
-                        confidence: 1.0
-                    }),
+                    content: 'No additional tool needed.',
                 };
             }
 
-            // Second turn: "why is it high?"
-            if (receivedMessages.length === 3) { // Note: In our mock counting, this might be index 3 or 4 depending on test run,
-                // but let's stick to the logic that worked or simplify.
-                // Actually, checking content is safer.
-
-                const toolMessage = messages.find(m => m.role === 'tool');
-
-                // Only assert if we are in the Planning call (which has tool messages)
-                // The Planning call has History. The Analysis call has "You are OpsOrch..."
-                if (messages[0]?.role === 'system') { // Heuristic to identify Plan call
-                    assert.ok(toolMessage, 'History should contain tool messages');
-                    assert.ok(toolMessage.content.includes('95'), 'History should contain tool result content');
-
-                    return {
-                        content: 'Analyzing logs...',
-                        toolCalls: [{ name: 'query-logs', arguments: {} }]
-                    };
-                }
-            }
-
             return {
                 content: JSON.stringify({
-                    conclusion: 'Logs show infinite loop.',
+                    conclusion: 'The payment-service is active.',
                     evidence: [],
                     confidence: 0.9
                 }),
@@ -158,10 +122,10 @@ test('passes tool results from previous turns to LLM', async () => {
     };
 
     const mcp: StubMcp = {
-        async listTools() { return [{ name: 'query-metrics' }, { name: 'query-logs' }] as Tool[]; },
+        async listTools() { return [{ name: 'query-services' }] as Tool[]; },
         async callTool(call) {
-            if (call.name === 'query-metrics') {
-                return { name: 'query-metrics', result: { value: 95, unit: '%' } };
+            if (call.name === 'query-services') {
+                return { name: 'query-services', result: { services: ['payment-service'] } };
             }
             return { name: call.name, result: null };
         }
@@ -170,10 +134,17 @@ test('passes tool results from previous turns to LLM', async () => {
     const engine = makeEngine(llm, mcp);
 
     // Turn 1
-    const ans1 = await engine.answer('check metrics');
+    const ans1 = await engine.answer('check services');
 
     // Turn 2
-    await engine.answer('why is it high?', { chatId: ans1.chatId });
-
+    await engine.answer('what did it return?', { chatId: ans1.chatId });
+
+    const followUpPlanningMessages = receivedMessages.find((messages) =>
+        messages.some((message) => message.role === 'user' && message.content === 'what did it return?')
+    );
+    assert.ok(followUpPlanningMessages, 'Expected a follow-up planning call');
+    const toolMessage = followUpPlanningMessages.find(m => m.role === 'tool');
+    assert.ok(toolMessage, 'History should contain tool messages');
+    assert.ok(toolMessage.content.includes('payment-service'), 'History should contain tool result content');
     assert.ok(receivedMessages.length >= 3);
 });
diff --git a/tests/conversationManager.test.ts b/tests/conversationManager.test.ts
index 4b2b3c8..74c0596 100644
--- a/tests/conversationManager.test.ts
+++ b/tests/conversationManager.test.ts
@@ -2,7 +2,7 @@ import assert from 'node:assert/strict';
 import { test, beforeEach } from 'node:test';
 import { ConversationManager, DEFAULT_CONVERSATION_CONFIG } from '../src/engine/conversationManager.js';
 import { InMemoryConversationStore } from '../src/stores/inMemoryConversationStore.js';
-import { Entity } from '../src/types.js';
+import { Entity, ToolResult } from '../src/types.js';
 
 test('ConversationManager', async (t) => {
     let manager: ConversationManager;
@@ -109,6 +109,21 @@ test('ConversationManager', async (t) => {
         assert.strictEqual(messages[1].role, 'assistant');
     });
 
+    await t.test('buildMessageHistory includes tool messages from prior turns', async () => {
+        const toolResults: ToolResult[] = [
+            { name: 'query-metrics', result: { value: 95, unit: '%' } },
+        ];
+
+        await manager.addTurn('chat-1', 'Check CPU', 'CPU is high', undefined, undefined, toolResults);
+
+        const messages = await manager.buildMessageHistory('chat-1');
+        assert.strictEqual(messages.length, 3);
+        assert.strictEqual(messages[1].role, 'tool');
+        assert.strictEqual(messages[1].toolName, 'query-metrics');
+        assert.ok(messages[1].content.includes('95'));
+        assert.strictEqual(messages[2].role, 'assistant');
+    });
+
     await t.test('deleteConversation removes conversation', async () => {
         await manager.addTurn('chat-1', 'Hello');
         await manager.deleteConversation('chat-1');
diff --git a/tests/executionTracer.test.ts b/tests/executionTracer.test.ts
index 81f8aa5..87dfaa0 100644
--- a/tests/executionTracer.test.ts
+++ b/tests/executionTracer.test.ts
@@ -68,6 +68,22 @@ test('ExecutionTracer: records heuristic modifications', () => {
   assert.deepEqual(trace.iterations[0].heuristicModifications[0], modification);
 });
 
+test('ExecutionTracer: updates current iteration plan', () => {
+  const tracer = new ExecutionTracer();
+  const trace = tracer.startTrace('chat-1');
+
+  tracer.startIteration(trace, [{ name: 'query-incidents', arguments: {} }]);
+  tracer.updateIterationPlan(trace, [
+    { name: 'query-incidents', arguments: { limit: 10 } },
+    { name: 'query-logs', arguments: { scope: { service: 'payments' } } },
+  ]);
+
+  assert.deepEqual(trace.iterations[0].plannedTools, [
+    { name: 'query-incidents', arguments: { limit: 10 } },
+    { name: 'query-logs', arguments: { scope: { service: 'payments' } } },
+  ]);
+});
+
 test('ExecutionTracer: records tool executions', () => {
   const tracer = new ExecutionTracer();
   const trace = tracer.startTrace('chat-1');
diff --git a/tests/gemini.test.ts b/tests/gemini.test.ts
index 4348fa2..3c4cfdb 100644
--- a/tests/gemini.test.ts
+++ b/tests/gemini.test.ts
@@ -27,7 +27,7 @@ test("gemini", async (t) => {
         assert.ok(llm, "Should create GeminiLlm instance");
     });
 
-    await t.test("should handle errors gracefully and return empty response", async () => {
+    await t.test("should surface provider errors", async () => {
         const llm = new GeminiLlm("invalid-key-that-will-fail");
 
         const logs: string[] = [];
@@ -46,21 +46,11 @@ test("gemini", async (t) => {
         };
 
         try {
-            const response = await llm.chat(
-                [{ role: "user" as const, content: "Test message" }],
-                [],
-            );
-
-            // Should return empty response on error
-            assert.strictEqual(
-                response.content,
-                "",
-                "Should return empty content on error",
-            );
-            assert.strictEqual(
-                response.toolCalls?.length || 0,
-                0,
-                "Should have no tool calls on error",
+            await assert.rejects(
+                llm.chat(
+                    [{ role: "user" as const, content: "Test message" }],
+                    [],
+                ),
             );
 
             // Should log error details
@@ -68,11 +58,6 @@ test("gemini", async (t) => {
                 log.includes("[Gemini] Request failed"),
             );
             assert.ok(hasErrorLog, "Should log request failure");
-
-            const hasWarningLog = logs.some((log) =>
-                log.includes("[Gemini] Returning empty response due to error"),
-            );
-            assert.ok(hasWarningLog, "Should log empty response warning");
         } finally {
             console.log = originalLog;
             console.warn = originalWarn;
@@ -91,16 +76,18 @@ test("gemini", async (t) => {
         };
 
         try {
-            await llm.chat(
-                [
-                    { role: "user" as const, content: "First" },
-                    { role: "assistant" as const, content: "Response" },
-                    { role: "user" as const, content: "Second" },
-                ],
-                [
-                    { name: "tool1", description: "Test tool 1" },
-                    { name: "tool2", description: "Test tool 2" },
-                ],
+            await assert.rejects(
+                llm.chat(
+                    [
+                        { role: "user" as const, content: "First" },
+                        { role: "assistant" as const, content: "Response" },
+                        { role: "user" as const, content: "Second" },
+                    ],
+                    [
+                        { name: "tool1", description: "Test tool 1" },
+                        { name: "tool2", description: "Test tool 2" },
+                    ],
+                ),
             );
 
             // Should log request metadata
diff --git a/tests/llmFactory.test.ts b/tests/llmFactory.test.ts
index fd909f9..238161c 100644
--- a/tests/llmFactory.test.ts
+++ b/tests/llmFactory.test.ts
@@ -5,7 +5,6 @@ import { OpenAiLlm } from '../src/llms/openai.js';
 import { AnthropicLlm } from '../src/llms/anthropic.js';
 import { GeminiLlm } from '../src/llms/gemini.js';
 import { MockLlm } from '../src/llms/mock.js';
-import { NullLlm } from '../src/llms/null.js';
 
 test('llmFactory', async (t) => {
     const originalEnv = { ...process.env };
@@ -23,9 +22,11 @@ test('llmFactory', async (t) => {
         process.env = originalEnv;
     });
 
-    await t.test('defaults to openai (mock) if no provider set and no key', () => {
-        const llm = createLlmFromEnv();
-        assert.ok(llm instanceof MockLlm, 'Should fallback to MockLlm when key is missing for default provider');
+    await t.test('defaults to openai and throws if key is missing', () => {
+        assert.throws(
+            () => createLlmFromEnv(),
+            /OPENAI_API_KEY is required when LLM_PROVIDER=openai/,
+        );
     });
 
     await t.test('creates OpenAiLlm when provider is openai and key is present', () => {
@@ -35,11 +36,13 @@ test('llmFactory', async (t) => {
         assert.ok(llm instanceof OpenAiLlm);
     });
 
-    await t.test('creates MockLlm when provider is openai but key is missing', () => {
+    await t.test('throws when provider is openai but key is missing', () => {
         process.env.LLM_PROVIDER = 'openai';
         delete process.env.OPENAI_API_KEY;
-        const llm = createLlmFromEnv();
-        assert.ok(llm instanceof MockLlm);
+        assert.throws(
+            () => createLlmFromEnv(),
+            /OPENAI_API_KEY is required when LLM_PROVIDER=openai/,
+        );
     });
 
     await t.test('creates AnthropicLlm when provider is anthropic and key is present', () => {
@@ -49,11 +52,13 @@ test('llmFactory', async (t) => {
         assert.ok(llm instanceof AnthropicLlm);
     });
 
-    await t.test('creates MockLlm when provider is anthropic but key is missing', () => {
+    await t.test('throws when provider is anthropic but key is missing', () => {
         process.env.LLM_PROVIDER = 'anthropic';
         delete process.env.ANTHROPIC_API_KEY;
-        const llm = createLlmFromEnv();
-        assert.ok(llm instanceof MockLlm);
+        assert.throws(
+            () => createLlmFromEnv(),
+            /ANTHROPIC_API_KEY is required when LLM_PROVIDER=anthropic/,
+        );
     });
 
     await t.test('creates MockLlm when provider is mock', () => {
@@ -62,10 +67,12 @@ test('llmFactory', async (t) => {
         assert.ok(llm instanceof MockLlm);
     });
 
-    await t.test('creates NullLlm for unknown provider', () => {
+    await t.test('throws for unknown provider', () => {
         process.env.LLM_PROVIDER = 'unknown-provider';
-        const llm = createLlmFromEnv();
-        assert.ok(llm instanceof NullLlm);
+        assert.throws(
+            () => createLlmFromEnv(),
+            /Unsupported LLM_PROVIDER "unknown-provider"/,
+        );
     });
 
     await t.test('is case insensitive for provider name', () => {
@@ -81,10 +88,12 @@ test('llmFactory', async (t) => {
         assert.ok(llm instanceof GeminiLlm);
     });
 
-    await t.test('creates MockLlm when provider is gemini but key is missing', () => {
+    await t.test('throws when provider is gemini but key is missing', () => {
         process.env.LLM_PROVIDER = 'gemini';
         delete process.env.GEMINI_API_KEY;
-        const llm = createLlmFromEnv();
-        assert.ok(llm instanceof MockLlm);
+        assert.throws(
+            () => createLlmFromEnv(),
+            /GEMINI_API_KEY is required when LLM_PROVIDER=gemini/,
+        );
     });
 });
diff --git a/tests/parallelToolRunner.test.ts b/tests/parallelToolRunner.test.ts
index e7f0f2d..a77ed88 100644
--- a/tests/parallelToolRunner.test.ts
+++ b/tests/parallelToolRunner.test.ts
@@ -156,6 +156,7 @@ test('ParallelToolRunner: executes independent tools in parallel', async () => {
 test('ParallelToolRunner: executes dependent tools in correct order', async () => {
   const runner = new ParallelToolRunner();
   const executionOrder: string[] = [];
+  let timelineArgs: ToolCall['arguments'] | undefined;
 
   const mcp = new MockMcp(
     async () => [
@@ -164,6 +165,12 @@ test('ParallelToolRunner: executes dependent tools in correct order', async () =
     ],
     async (call) => {
       executionOrder.push(call.name);
+      if (call.name === 'query-incidents') {
+        return { name: call.name, result: [{ id: 'INC-123' }] };
+      }
+      if (call.name === 'get-incident-timeline') {
+        timelineArgs = call.arguments;
+      }
       return { name: call.name, result: { ok: true } };
     }
   );
@@ -187,6 +194,7 @@ test('ParallelToolRunner: executes dependent tools in correct order', async () =
   // query-incidents must execute before get-incident-timeline
   assert.equal(executionOrder[0], 'query-incidents');
   assert.equal(executionOrder[1], 'get-incident-timeline');
+  assert.deepEqual(timelineArgs, { id: 'INC-123' });
 });
 
 test('ParallelToolRunner: handles mixed parallel and sequential execution', async () => {
diff --git a/tests/resultCache.test.ts b/tests/resultCache.test.ts
index aa806b0..cb92e18 100644
--- a/tests/resultCache.test.ts
+++ b/tests/resultCache.test.ts
@@ -110,6 +110,18 @@ test('invalidates by tool name', () => {
     assert.deepEqual(cache.get({ name: 'query-logs', arguments: { query: 'error' } }), { name: 'query-logs', result: 'data2' });
 });
 
+test('cache entries are scoped by namespace', () => {
+    const cache = new ResultCache({ maxSize: 10, ttlMs: 60000 });
+    const call: ToolCall = { name: 'query-incidents', arguments: { limit: 5 } };
+
+    cache.set(call, { name: 'query-incidents', result: 'chat-a' }, 'chat-a');
+    cache.set(call, { name: 'query-incidents', result: 'chat-b' }, 'chat-b');
+
+    assert.deepEqual(cache.get(call, 'chat-a'), { name: 'query-incidents', result: 'chat-a' });
+    assert.deepEqual(cache.get(call, 'chat-b'), { name: 'query-incidents', result: 'chat-b' });
+    assert.equal(cache.get(call, 'chat-c'), null);
+});
+
 test('LRU: recently accessed items are not evicted', () => {
     const cache = new ResultCache({ maxSize: 3, ttlMs: 60000 });
 
@@ -293,4 +305,3 @@ test('expired entries count as misses', async () => {
     assert.equal(stats.hits, 1);
     assert.equal(stats.misses, 1);
 });
-

From 62f4ec3702ef144083a113af0a035e73c26a67ae Mon Sep 17 00:00:00 2001
From: yusufaytas <yusufaytas@gmail.com>
Date: Sun, 15 Mar 2026 18:01:05 +0000
Subject: [PATCH 2/2] The cache write path now builds the cache key from the
 actual executed call shape

---
 src/engine/copilotEngine.ts           | 10 +++-
 tests/copilotEngine.followups.test.ts | 83 +++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/src/engine/copilotEngine.ts b/src/engine/copilotEngine.ts
index 48fd6a9..02ac108 100644
--- a/src/engine/copilotEngine.ts
+++ b/src/engine/copilotEngine.ts
@@ -225,6 +225,10 @@ export class CopilotEngine {
       for (let i = 0; i < callsToExecute.length; i++) {
         const result = freshResults[i];
         if (!result) continue; // Skip if somehow undefined
+        const executedCall: ToolCall = {
+          ...callsToExecute[i],
+          arguments: result.arguments ?? callsToExecute[i].arguments,
+        };
 
         const isError =
           typeof result.result === "object" &&
@@ -233,14 +237,14 @@ export class CopilotEngine {
 
         // Only cache if not an error
         if (!isError) {
-          this.resultCache.set(callsToExecute[i], result, chatId);
+          this.resultCache.set(executedCall, result, chatId);
         }
 
         // Record tool execution in trace
         if (trace) {
           this.executionTracer.recordToolExecution(trace, {
-            toolName: callsToExecute[i].name,
-            arguments: callsToExecute[i].arguments,
+            toolName: executedCall.name,
+            arguments: executedCall.arguments,
             cacheHit: false,
             executionTimeMs: Math.round(executionTime / callsToExecute.length), // Approximate per-tool time
             success: !isError,
diff --git a/tests/copilotEngine.followups.test.ts b/tests/copilotEngine.followups.test.ts
index f88e2a9..331e2ef 100644
--- a/tests/copilotEngine.followups.test.ts
+++ b/tests/copilotEngine.followups.test.ts
@@ -251,3 +251,86 @@ test('drills into incident timelines/logs/metrics when user asks for root cause'
   assert.ok(callNames.includes('query-metrics'), 'Should query metrics');
   assert.ok(callNames.includes('query-logs'), 'Should query logs');
 });
+
+test('caches dependency-resolved detail calls by executed arguments, not unresolved plan args', async () => {
+  let latestIncidentId = 'INC-100';
+  const timelineCalls: string[] = [];
+
+  const llm: LlmClient = {
+    async chat(messages: LlmMessage[], tools: Tool[]) {
+      if (tools.length) {
+        const latestUserMessage =
+          [...messages].reverse().find((message) => message.role === 'user')?.content ?? '';
+
+        if (latestUserMessage === 'summarize first incident') {
+          return {
+            content: 'plan-first',
+            toolCalls: [
+              { name: 'query-incidents', arguments: { query: 'first' } as JsonObject },
+              { name: 'get-incident-timeline', arguments: {} as JsonObject },
+            ],
+          };
+        }
+
+        if (latestUserMessage === 'summarize second incident') {
+          return {
+            content: 'plan-second',
+            toolCalls: [
+              { name: 'query-incidents', arguments: { query: 'second' } as JsonObject },
+              { name: 'get-incident-timeline', arguments: {} as JsonObject },
+            ],
+          };
+        }
+
+        return { content: 'stop', toolCalls: [] };
+      }
+
+      return {
+        content: JSON.stringify({ conclusion: `timeline for ${latestIncidentId}`, evidence: [] }),
+        toolCalls: [],
+      };
+    },
+  };
+
+  const mcp: StubMcp = {
+    async listTools() {
+      return [
+        { name: 'query-incidents' } as Tool,
+        { name: 'get-incident-timeline' } as Tool,
+      ];
+    },
+    async callTool(call): Promise<ToolResult> {
+      if (call.name === 'query-incidents') {
+        latestIncidentId = (call.arguments.query === 'first') ? 'INC-100' : 'INC-200';
+        return {
+          name: call.name,
+          arguments: call.arguments,
+          result: [{ id: latestIncidentId }],
+        };
+      }
+
+      if (call.name === 'get-incident-timeline') {
+        const incidentId = String(call.arguments.id);
+        timelineCalls.push(incidentId);
+        return {
+          name: call.name,
+          arguments: call.arguments,
+          result: [{ incidentId, at: '2024-01-01T00:00:00Z', kind: 'event' }],
+        };
+      }
+
+      throw new Error(`unexpected call ${call.name}`);
+    },
+  };
+
+  const engine = makeEngine(llm, mcp);
+
+  const first = await engine.answer('summarize first incident');
+  await engine.answer('summarize second incident', { chatId: first.chatId });
+
+  assert.deepEqual(
+    timelineCalls,
+    ['INC-100', 'INC-200'],
+    'Timeline calls should use each resolved incident id instead of reusing a stale no-id cache entry',
+  );
+});