From e91972dcdc9ed6300773f2489fe0eb8370eac063 Mon Sep 17 00:00:00 2001 From: yusufaytas Date: Sun, 15 Mar 2026 17:31:46 +0000 Subject: [PATCH 1/2] Fixed the runtime inconsistencies in the copilot loop with tool result passing The main changes are in copilotEngine.ts, conversationManager.ts, planner.ts, and parallelToolRunner.ts. Cross-turn reference resolution now receives real conversation history, prior tool outputs are stored with each turn and replayed as tool messages for follow-up planning, JSON fallback planning keeps the same history/context as the primary planner, and dependent detail calls like get-incident-timeline can inherit IDs from earlier query results before execution. I also fixed service extraction around wrapped query-services payloads in service/entityHandler.ts and made planRefiner.ts stop suggesting replacement tools that are not actually available. The tests in conversationHistory.test.ts, conversationManager.test.ts, and parallelToolRunner.test.ts now assert real state instead of relying on swallowed mock assertions. --- package.json | 2 +- src/engine/chatNamer.ts | 31 +++++- src/engine/conversationManager.ts | 33 +++++- src/engine/copilotEngine.ts | 7 +- src/engine/executionTracer.ts | 10 ++ src/engine/handlers/service/entityHandler.ts | 9 ++ .../handlers/service/referenceHandler.ts | 9 ++ src/engine/parallelToolRunner.ts | 88 ++++++++++++++- src/engine/planRefiner.ts | 11 +- src/engine/planner.ts | 22 +++- src/engine/resultCache.ts | 30 ++++-- src/engine/retryStrategy.ts | 7 +- src/engine/scopeInferer.ts | 8 ++ src/engine/toolRunner.ts | 9 +- src/llmFactory.ts | 21 ++-- src/llms/anthropic.ts | 12 ++- src/llms/gemini.ts | 18 ++-- src/llms/openai.ts | 35 ++---- src/server.ts | 6 +- src/types.ts | 1 + tests/chatNamer.test.ts | 12 +++ tests/conversationHistory.test.ts | 101 +++++++----------- tests/conversationManager.test.ts | 17 ++- tests/executionTracer.test.ts | 16 +++ tests/gemini.test.ts | 49 ++++----- tests/llmFactory.test.ts | 41 ++++--- tests/parallelToolRunner.test.ts | 8 ++ tests/resultCache.test.ts | 13 ++- 28 files changed, 426 insertions(+), 200 deletions(-) diff --git a/package.json b/package.json index fbe9f1a..89ac562 100644 --- a/package.json +++ b/package.json @@ -30,7 +30,7 @@ "lint": "eslint .", "lint:fix": "eslint . --fix", "type-check": "tsc --noEmit", - "test": "NODE_ENV=test node --test --loader ts-node/esm", + "test": "NODE_ENV=test node --test --loader ts-node/esm tests/*.test.ts tests/**/*.test.ts", "start": "node --loader ts-node/esm src/server.ts", "seed": "node --loader ts-node/esm scripts/seedDatabase.ts" }, diff --git a/src/engine/chatNamer.ts b/src/engine/chatNamer.ts index 1df884b..aec857a 100644 --- a/src/engine/chatNamer.ts +++ b/src/engine/chatNamer.ts @@ -73,6 +73,9 @@ export class ChatNamer { // Determine intent from user message const intent = this.determineIntent(userMessage); + // Avoid redundant topic/service naming like "Payment Payment Issues" + this.deduplicateSemanticOverlap(mergedEntities); + // Try to synthesize a name let name = this.synthesizeName(mergedEntities, intent, userMessage); @@ -96,6 +99,30 @@ export class ChatNamer { return sanitizedName; } + private deduplicateSemanticOverlap(entities: ExtractedEntities): void { + if (entities.services.length === 0 || entities.topics.length === 0) { + return; + } + + const topicSet = new Set(entities.topics.map((topic) => topic.toLowerCase())); + entities.services = entities.services.filter((service) => { + const normalizedService = service.toLowerCase(); + for (const topic of topicSet) { + if ( + normalizedService === topic || + normalizedService.startsWith(`${topic}-`) || + normalizedService.endsWith(`-${topic}`) || + normalizedService.includes(`${topic}-service`) || + normalizedService.includes(`${topic}-svc`) || + normalizedService.includes(`${topic}-api`) + ) { + return false; + } + } + return true; + }); + } + /** * Extract key entities from the LLM response (incidents, services, metrics). */ @@ -359,10 +386,10 @@ export class ChatNamer { return `${topic} ${metric} Issues`; } - // Priority 6: Topic + Problems/Issues (e.g., "Payment Service Issues") + // Priority 6: Topic + Problems/Issues (e.g., "Payment Issues") if (topics.length > 0 && /problem|issue|error|fail/i.test(userMessage)) { const topic = this.formatTopicName(topics[0]); - return `${topic} Service Issues`; + return `${topic} Issues`; } // Priority 7: Topic + Intent (e.g., "Payment Investigation") diff --git a/src/engine/conversationManager.ts b/src/engine/conversationManager.ts index 51525e6..7c553df 100644 --- a/src/engine/conversationManager.ts +++ b/src/engine/conversationManager.ts @@ -11,6 +11,7 @@ import { Entity, ConversationContext, TurnExecutionTrace, + ToolResult, } from "../types.js"; import { ConversationStore } from "../conversationStore.js"; import { createConversationStore } from "../storeFactory.js"; @@ -99,6 +100,7 @@ export class ConversationManager { assistantResponse?: string, entities?: Entity[], executionTrace?: TurnExecutionTrace, + toolResults?: ToolResult[], ): Promise { let conversation = await this.store.get(chatId); @@ -119,6 +121,7 @@ export class ConversationManager { assistantResponse, timestamp: Date.now(), entities, + toolResults, executionTrace, }); @@ -135,8 +138,8 @@ export class ConversationManager { /** * Build LLM message history from conversation turns. - * Note: Tool results are no longer stored in turns (replaced by executionTrace). - * The message history now only includes user messages and assistant responses. + * Tool results are stored alongside turns so follow-up planning can reuse + * concrete outputs instead of relying only on assistant summaries. */ async buildMessageHistory(chatId: string): Promise { const conversation = await this.getConversation(chatId); @@ -153,6 +156,14 @@ export class ConversationManager { content: turn.userMessage, }); + for (const toolResult of turn.toolResults ?? []) { + messages.push({ + role: "tool", + toolName: toolResult.name, + content: summarizeToolResult(toolResult), + }); + } + // Add assistant response if any if (turn.assistantResponse) { messages.push({ @@ -294,3 +305,21 @@ export class ConversationManager { return this.store; } } + +function summarizeToolResult(toolResult: ToolResult): string { + const content = safeStringify(toolResult.result); + const suffix = content.length > 1000 ? `${content.slice(0, 1000)}...` : content; + return `${toolResult.name}: ${suffix}`; +} + +function safeStringify(value: unknown): string { + if (typeof value === "string") { + return value; + } + + try { + return JSON.stringify(value); + } catch { + return String(value); + } +} diff --git a/src/engine/copilotEngine.ts b/src/engine/copilotEngine.ts index 53cdb0e..48fd6a9 100644 --- a/src/engine/copilotEngine.ts +++ b/src/engine/copilotEngine.ts @@ -155,7 +155,7 @@ export class CopilotEngine { // Step 1: Check cache for each call for (const call of calls) { - const cached = this.resultCache.get(call); + const cached = this.resultCache.get(call, chatId); if (cached) { console.log( `[Copilot][${chatId}] Using cached result for ${call.name}`, @@ -233,7 +233,7 @@ export class CopilotEngine { // Only cache if not an error if (!isError) { - this.resultCache.set(callsToExecute[i], result); + this.resultCache.set(callsToExecute[i], result, chatId); } // Record tool execution in trace @@ -304,6 +304,7 @@ export class CopilotEngine { const resolutions = await this.referenceResolver.resolveReferences( question, entityContext, + conversationTurns, ); const resolvedQuestion = this.referenceResolver.applyResolutions( question, @@ -421,6 +422,7 @@ export class CopilotEngine { // Limit calls plannedCalls = this.limitToolCalls(plannedCalls); + this.executionTracer.updateIterationPlan(trace, plannedCalls); // Track if any calls were ever planned (before toolRunner filtering) if (plannedCalls.length > 0) { @@ -563,6 +565,7 @@ export class CopilotEngine { answer.conclusion, allExtractedEntities, turnTrace, + allResults, ); // Step 8: Generate conversation name for new conversations only diff --git a/src/engine/executionTracer.ts b/src/engine/executionTracer.ts index 01640b7..d1393e0 100644 --- a/src/engine/executionTracer.ts +++ b/src/engine/executionTracer.ts @@ -39,6 +39,16 @@ export class ExecutionTracer { trace.iterations.push(iteration); } + /** + * Update the current iteration plan after heuristics/refinement. + */ + updateIterationPlan(trace: ExecutionTrace, plannedTools: ToolCall[]): void { + const currentIteration = this.getCurrentIteration(trace); + if (currentIteration) { + currentIteration.plannedTools = [...plannedTools]; + } + } + /** * Record a heuristic modification */ diff --git a/src/engine/handlers/service/entityHandler.ts b/src/engine/handlers/service/entityHandler.ts index 1075dab..aeaa9d1 100644 --- a/src/engine/handlers/service/entityHandler.ts +++ b/src/engine/handlers/service/entityHandler.ts @@ -26,6 +26,15 @@ export const serviceEntityHandler: EntityHandler = async ( // Handle array of services (query-services returns z.array(serviceSchema)) if (Array.isArray(toolResult.result)) { services = toolResult.result as JsonObject[]; + } else if ( + typeof toolResult.result === "object" && + toolResult.result !== null && + Array.isArray((toolResult.result as JsonObject).services) + ) { + services = ((toolResult.result as JsonObject).services as Array).map( + (service) => + typeof service === "string" ? ({ name: service } as JsonObject) : service, + ); } else { // Single service services = [toolResult.result as JsonObject]; diff --git a/src/engine/handlers/service/referenceHandler.ts b/src/engine/handlers/service/referenceHandler.ts index 284ed03..fd58976 100644 --- a/src/engine/handlers/service/referenceHandler.ts +++ b/src/engine/handlers/service/referenceHandler.ts @@ -42,6 +42,15 @@ export const serviceReferenceHandler: ReferenceHandler = async ( let servicesArray: Array> = []; if (Array.isArray(content)) { servicesArray = content as Array>; + } else if ( + typeof content === "object" && + content !== null && + Array.isArray((content as Record).services) + ) { + servicesArray = (content as { services: Array | string> }).services + .map((service) => + typeof service === "string" ? { name: service } : service, + ); } else if (typeof content === "object" && content !== null) { servicesArray = [content as Record]; } diff --git a/src/engine/parallelToolRunner.ts b/src/engine/parallelToolRunner.ts index c830e58..7065067 100644 --- a/src/engine/parallelToolRunner.ts +++ b/src/engine/parallelToolRunner.ts @@ -74,7 +74,7 @@ export class ParallelToolRunner { ); for (let i = 0; i < batches.length; i++) { - const batch = batches[i]; + const batch = this.resolveBatchDependencies(batches[i], results); console.log( `[ParallelToolRunner][${logId}] Batch ${i + 1}/${batches.length}: ${batch.map((c) => c.name).join(", ")}`, ); @@ -87,6 +87,92 @@ export class ParallelToolRunner { return results; } + private resolveBatchDependencies( + batch: ToolCall[], + previousResults: ToolResult[], + ): ToolCall[] { + return batch.map((call) => { + if (call.name === "get-incident-timeline" && !call.arguments?.id) { + const incidentId = this.findFirstId(previousResults, "query-incidents"); + if (incidentId) { + return { + ...call, + arguments: { + ...call.arguments, + id: incidentId, + }, + }; + } + } + + if (call.name === "get-ticket" && !call.arguments?.id) { + const ticketId = this.findFirstId(previousResults, "query-tickets"); + if (ticketId) { + return { + ...call, + arguments: { + ...call.arguments, + id: ticketId, + }, + }; + } + } + + return call; + }); + } + + private findFirstId( + results: ToolResult[], + sourceToolName: string, + ): string | undefined { + for (const result of results) { + if (result.name !== sourceToolName) continue; + + const extracted = this.extractIdFromResult(result.result); + if (extracted) { + return extracted; + } + } + + return undefined; + } + + private extractIdFromResult(result: ToolResult["result"]): string | undefined { + if (Array.isArray(result)) { + for (const item of result) { + if (item && typeof item === "object" && !Array.isArray(item)) { + const maybeId = (item as Record).id; + if (typeof maybeId === "string" && maybeId.trim()) { + return maybeId; + } + } + } + return undefined; + } + + if (result && typeof result === "object") { + const record = result as Record; + if (typeof record.id === "string" && record.id.trim()) { + return record.id; + } + + const nestedArrays = Object.values(record).filter(Array.isArray) as unknown[][]; + for (const arrayValue of nestedArrays) { + for (const item of arrayValue) { + if (item && typeof item === "object" && !Array.isArray(item)) { + const maybeId = (item as Record).id; + if (typeof maybeId === "string" && maybeId.trim()) { + return maybeId; + } + } + } + } + } + + return undefined; + } + /** * Check if tools can be executed in parallel (no dependencies) */ diff --git a/src/engine/planRefiner.ts b/src/engine/planRefiner.ts index cf3955f..29204f7 100644 --- a/src/engine/planRefiner.ts +++ b/src/engine/planRefiner.ts @@ -87,8 +87,14 @@ export class PlanRefiner { } } else if (validation.replacementCall) { // Validation failed but a replacement was suggested - console.log(`[PlanRefiner] Replacing ${call.name} with ${validation.replacementCall.name}`); - replacementCalls.push(validation.replacementCall); + if (tools.some((candidate) => candidate.name === validation.replacementCall?.name)) { + console.log(`[PlanRefiner] Replacing ${call.name} with ${validation.replacementCall.name}`); + replacementCalls.push(validation.replacementCall); + } else { + console.log( + `[PlanRefiner] Skipping replacement ${validation.replacementCall.name} for ${call.name} because the tool is unavailable`, + ); + } validatedCalls.push({ call, valid: false }); } else { // Validation failed with no replacement @@ -183,4 +189,3 @@ export class PlanRefiner { return augmented; } } - diff --git a/src/engine/planner.ts b/src/engine/planner.ts index 2012d54..84689fe 100644 --- a/src/engine/planner.ts +++ b/src/engine/planner.ts @@ -11,6 +11,10 @@ import { PlannerResponse } from "../types.js"; const MAX_PLANNER_CALLS = 3; +function formatErrorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + export async function requestInitialPlan( question: string, llm: LlmClient, @@ -48,9 +52,11 @@ export async function requestInitialPlan( `[Planner] Initial plan: No tool calls from LLM, falling back to JSON plan`, ); // If the model didn't use tools directly, fall back to JSON-planned tool calls - return await requestJsonPlan(question, llm, tools); + return await requestJsonPlan(question, llm, tools, history, anchorTime); } catch (err) { - console.warn("LLM planning failed, falling back to heuristics:", err); + console.warn( + `LLM planning failed, falling back to heuristics: ${formatErrorMessage(err)}`, + ); return { toolCalls: inferPlan(question), }; @@ -113,7 +119,9 @@ export async function requestFollowUpPlan( toolCalls: limitCalls(reply.toolCalls), }; } catch (err) { - console.error("LLM refinement failed; skipping follow-up plan:", err); + console.error( + `LLM refinement failed; skipping follow-up plan: ${formatErrorMessage(err)}`, + ); return { toolCalls: [] }; } } @@ -122,10 +130,16 @@ async function requestJsonPlan( question: string, llm: LlmClient, tools: Tool[], + history: LlmMessage[] = [], + anchorTime?: string, ): Promise { const toolList = tools.map((t) => `- ${t.name}`).join("\n") || "No tools."; const messages: LlmMessage[] = [ { role: "system", content: buildJsonPlannerPrompt(toolList) }, + ...(anchorTime + ? [{ role: "system", content: `Current Time: ${anchorTime}` } as LlmMessage] + : []), + ...history, { role: "user", content: `User request: ${question}\nReturn only JSON.` }, ]; @@ -139,7 +153,7 @@ async function requestJsonPlan( } return { toolCalls: [] }; } catch (err) { - console.warn("LLM JSON planner failed:", err); + console.warn(`LLM JSON planner failed: ${formatErrorMessage(err)}`); } return { diff --git a/src/engine/resultCache.ts b/src/engine/resultCache.ts index 4caca32..a8d950e 100644 --- a/src/engine/resultCache.ts +++ b/src/engine/resultCache.ts @@ -10,6 +10,7 @@ export const DEFAULT_CACHE_CONFIG: CacheConfig = { }; type CacheEntry = { + scopeKey: string; call: ToolCall; result: ToolResult; timestamp: number; @@ -96,6 +97,10 @@ function createExactCacheKey(call: ToolCall): string { return `${call.name}:${normalizeForHash(call.arguments || {})}`; } +function createScopedCacheKey(scopeKey: string, call: ToolCall): string { + return `${scopeKey}::${createExactCacheKey(call)}`; +} + /** * LRU cache implementation for tool results. */ @@ -111,9 +116,9 @@ export class ResultCache { * Get a cached result if available and not expired. * Performs fuzzy matching on timestamps (window: 60s). */ - get(call: ToolCall): ToolResult | null { + get(call: ToolCall, scopeKey: string = "global"): ToolResult | null { // 1. Try exact exact match first (fastest) - const exactKey = createExactCacheKey(call); + const exactKey = createScopedCacheKey(scopeKey, call); let entry = this.cache.get(exactKey); let matchedKey = exactKey; @@ -121,6 +126,7 @@ export class ResultCache { if (!entry) { // This linear scan is acceptable because maxSize is small (default 100) for (const [key, candidate] of this.cache.entries()) { + if (candidate.scopeKey !== scopeKey) continue; if (candidate.call.name !== call.name) continue; // Deep comparison with fuzzy timestamps @@ -157,8 +163,8 @@ export class ResultCache { /** * Store a result in the cache. */ - set(call: ToolCall, result: ToolResult): void { - const key = createExactCacheKey(call); + set(call: ToolCall, result: ToolResult, scopeKey: string = "global"): void { + const key = createScopedCacheKey(scopeKey, call); // Remove existing entry if present if (this.cache.has(key)) { @@ -174,6 +180,7 @@ export class ResultCache { } this.cache.set(key, { + scopeKey, call, result, timestamp: Date.now(), @@ -184,8 +191,8 @@ export class ResultCache { /** * Check if a call result is cached and valid. */ - has(call: ToolCall): boolean { - return this.get(call) !== null; + has(call: ToolCall, scopeKey: string = "global"): boolean { + return this.get(call, scopeKey) !== null; } /** @@ -233,10 +240,13 @@ export class ResultCache { /** * Invalidate cache entries matching a tool name. */ - invalidateByToolName(toolName: string): void { - const keysToRemove = Array.from(this.cache.keys()).filter((key) => - key.startsWith(`${toolName}:`), - ); + invalidateByToolName(toolName: string, scopeKey?: string): void { + const keysToRemove = Array.from(this.cache.entries()) + .filter(([, entry]) => + entry.call.name === toolName && + (scopeKey === undefined || entry.scopeKey === scopeKey), + ) + .map(([key]) => key); for (const key of keysToRemove) { this.cache.delete(key); diff --git a/src/engine/retryStrategy.ts b/src/engine/retryStrategy.ts index 5d840bb..a0c99ec 100644 --- a/src/engine/retryStrategy.ts +++ b/src/engine/retryStrategy.ts @@ -28,6 +28,10 @@ export class CircuitBreakerError extends Error { } } +function formatErrorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + /** * Determines if an error is transient and should be retried. */ @@ -182,8 +186,7 @@ export async function withRetry( if (!isTransientError(error)) { console.warn( - `[Retry]${context ? ` [${context}]` : ""} Non-transient error, not retrying:`, - error, + `[Retry]${context ? ` [${context}]` : ""} Non-transient error, not retrying: ${formatErrorMessage(error)}`, ); throw error; } diff --git a/src/engine/scopeInferer.ts b/src/engine/scopeInferer.ts index 8c8f665..3d805f8 100644 --- a/src/engine/scopeInferer.ts +++ b/src/engine/scopeInferer.ts @@ -300,6 +300,14 @@ export class ScopeInferer { hasScope = true; } + const serviceMatch = question.match( + /\b((?:svc-[a-z0-9-]+)|(?:[a-z0-9][a-z0-9-]+-(?:service|svc|api)))\b/i, + ); + if (serviceMatch) { + scope.service = serviceMatch[1].toLowerCase(); + hasScope = true; + } + if (hasScope) { return { scope, diff --git a/src/engine/toolRunner.ts b/src/engine/toolRunner.ts index 24ae25e..7085ad0 100644 --- a/src/engine/toolRunner.ts +++ b/src/engine/toolRunner.ts @@ -5,6 +5,10 @@ import { withRetry } from "./retryStrategy.js"; import { validateToolCall } from "./toolsSchema.js"; import { TimeWindowExpander } from "./timeWindowExpander.js"; +function formatErrorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + export async function runToolCalls( calls: ToolCall[], mcp: McpClient, @@ -116,13 +120,12 @@ export async function runToolCalls( return toolResult; } catch (err) { console.error( - `[Copilot][${logId}] Tool ${call.name} failed with error:`, - err, + `[Copilot][${logId}] Tool ${call.name} failed with error: ${formatErrorMessage(err)}`, ); // Return error as a result instead of throwing, for partial success handling return { name: call.name, - result: { error: err instanceof Error ? err.message : String(err) }, + result: { error: formatErrorMessage(err) }, arguments: sanitizedArgs, callId: call.callId, } satisfies ToolResult; diff --git a/src/llmFactory.ts b/src/llmFactory.ts index fd12158..60879ce 100644 --- a/src/llmFactory.ts +++ b/src/llmFactory.ts @@ -3,19 +3,15 @@ import { MockLlm } from "./llms/mock.js"; import { OpenAiLlm } from "./llms/openai.js"; import { AnthropicLlm } from "./llms/anthropic.js"; import { GeminiLlm } from "./llms/gemini.js"; -import { NullLlm } from "./llms/null.js"; export function createLlmFromEnv(): LlmClient { - const provider = (process.env.LLM_PROVIDER || "mock").toLowerCase(); + const provider = (process.env.LLM_PROVIDER || "openai").toLowerCase(); console.log(`[LlmFactory] Initializing LLM provider: ${provider}`); if (provider === "openai") { const key = process.env.OPENAI_API_KEY || ""; if (!key) { - console.warn( - "OPENAI_API_KEY missing; using mock LLM. Set LLM_PROVIDER=openai and OPENAI_API_KEY to enable OpenAI.", - ); - return new MockLlm(); + throw new Error("OPENAI_API_KEY is required when LLM_PROVIDER=openai"); } console.log(`[LlmFactory] Using OpenAI LLM`); return new OpenAiLlm(key); @@ -23,10 +19,9 @@ export function createLlmFromEnv(): LlmClient { if (provider === "anthropic") { const key = process.env.ANTHROPIC_API_KEY; if (!key) { - console.warn( - "ANTHROPIC_API_KEY missing; using mock LLM. Set ANTHROPIC_API_KEY to enable Anthropic.", + throw new Error( + "ANTHROPIC_API_KEY is required when LLM_PROVIDER=anthropic", ); - return new MockLlm(); } console.log(`[LlmFactory] Using Anthropic LLM`); return new AnthropicLlm(key); @@ -34,10 +29,7 @@ export function createLlmFromEnv(): LlmClient { if (provider === "gemini") { const key = process.env.GEMINI_API_KEY; if (!key) { - console.warn( - "GEMINI_API_KEY missing; using mock LLM. Set GEMINI_API_KEY to enable Gemini.", - ); - return new MockLlm(); + throw new Error("GEMINI_API_KEY is required when LLM_PROVIDER=gemini"); } console.log(`[LlmFactory] Using Gemini LLM`); return new GeminiLlm(key); @@ -46,6 +38,5 @@ export function createLlmFromEnv(): LlmClient { console.log(`[LlmFactory] Using Mock LLM`); return new MockLlm(); } - console.log(`[LlmFactory] Unknown provider "${provider}", using Null LLM`); - return new NullLlm(); + throw new Error(`Unsupported LLM_PROVIDER "${provider}"`); } diff --git a/src/llms/anthropic.ts b/src/llms/anthropic.ts index 259a605..6098d58 100644 --- a/src/llms/anthropic.ts +++ b/src/llms/anthropic.ts @@ -47,20 +47,23 @@ function mapMessagesForAnthropic(messages: LlmMessage[]): { messages: Array<{ role: string; content: string }>; } { const systemMessages = messages.filter((m) => m.role === "system"); - const nonSystemMessages = messages.filter( - (m) => m.role !== "system" && m.role !== "tool", - ); + const nonSystemMessages = messages.filter((m) => m.role !== "system"); const system = systemMessages.map((m) => m.content).join("\n\n") || undefined; const anthropicMessages = nonSystemMessages.map((m) => ({ role: m.role === "assistant" ? "assistant" : "user", - content: m.content, + content: m.role === "tool" ? formatToolMessage(m) : m.content, })); return { system, messages: anthropicMessages }; } +function formatToolMessage(message: LlmMessage): string { + const toolName = message.toolName || "unknown-tool"; + return `Tool result from ${toolName}:\n${message.content}`; +} + /** * Anthropic LLM client implementation. */ @@ -128,4 +131,3 @@ export class AnthropicLlm implements LlmClient { }; } } - diff --git a/src/llms/gemini.ts b/src/llms/gemini.ts index 1a77279..528df78 100644 --- a/src/llms/gemini.ts +++ b/src/llms/gemini.ts @@ -95,21 +95,21 @@ function mapMessagesForGemini(messages: LlmMessage[]): Array<{ role: string; par continue; } - if (msg.role === "tool") { - // Skip tool messages - Gemini handles these differently - continue; - } - const role = msg.role === "assistant" ? "model" : "user"; geminiMessages.push({ role, - parts: [{ text: msg.content }], + parts: [{ text: msg.role === "tool" ? formatToolMessage(msg) : msg.content }], }); } return geminiMessages; } +function formatToolMessage(message: LlmMessage): string { + const toolName = message.toolName || "unknown-tool"; + return `Tool result from ${toolName}:\n${message.content}`; +} + /** * Extract system message from messages array. */ @@ -227,11 +227,7 @@ export class GeminiLlm implements LlmClient { }; } catch (error) { console.error("[Gemini] Request failed:", error); - console.warn("[Gemini] Returning empty response due to error"); - return { - content: "", - toolCalls: [], - }; + throw error; } } } diff --git a/src/llms/openai.ts b/src/llms/openai.ts index 7261362..88b3001 100644 --- a/src/llms/openai.ts +++ b/src/llms/openai.ts @@ -24,12 +24,10 @@ const ANY_JSON_SCHEMA: JsonObject = { function mapMessages( messages: LlmMessage[], ): Array<{ role: string; content: JsonValue }> { - return messages - .filter((m) => m.role !== "tool") // tool replies are not carried across - .map((m) => ({ - role: m.role, - content: m.content, - })); + return messages.map((m) => ({ + role: m.role === "tool" ? "user" : m.role, + content: m.role === "tool" ? formatToolMessage(m) : m.content, + })); } /** @@ -64,6 +62,11 @@ function buildInputForResponses( return [...systemLike, lastTurn]; } +function formatToolMessage(message: LlmMessage): string { + const toolName = message.toolName || "unknown-tool"; + return `Tool result from ${toolName}:\n${message.content}`; +} + /** * Map internal Tool definitions to Responses API tools format. */ @@ -326,10 +329,7 @@ export class OpenAiLlm implements LlmClient { } // Don't retry for 4xx errors (except 429) - console.warn( - "[OpenAI] Non-retryable error, returning empty response", - ); - return null; // Signal non-retryable error + throw new Error(`OpenAI API error ${response.status}: ${text}`); } return response; @@ -338,14 +338,6 @@ export class OpenAiLlm implements LlmClient { "llm-openai", ); - // Handle non-retryable error - if (res === null) { - return { - content: "", - toolCalls: [], - }; - } - const data = (await res.json()) as JsonObject; console.log("[OpenAI] raw response:", JSON.stringify(data)); @@ -487,13 +479,8 @@ export class OpenAiLlm implements LlmClient { toolCalls, }; } catch (error) { - // Catch ANY error (network, parsing, etc.) and return empty response console.error("[OpenAI] Request failed:", error); - console.warn("[OpenAI] Returning empty response due to error"); - return { - content: "", - toolCalls: [], - }; + throw error; } } } diff --git a/src/server.ts b/src/server.ts index f7d9fab..d9e1a13 100644 --- a/src/server.ts +++ b/src/server.ts @@ -3,6 +3,10 @@ import { CopilotEngine } from "./engine/copilotEngine.js"; import { createLlmFromEnv } from "./llmFactory.js"; import { Conversation } from "./types.js"; +function formatErrorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + /** * Build a preview string for a conversation. * Prioritizes the most recent assistant response over user messages. @@ -39,7 +43,7 @@ export function buildPreview(conversation: Conversation): string { console.error( "Error building preview for conversation", conversation.chatId, - error, + formatErrorMessage(error), ); return "Preview unavailable"; } diff --git a/src/types.ts b/src/types.ts index 4731014..42c28cb 100644 --- a/src/types.ts +++ b/src/types.ts @@ -121,6 +121,7 @@ export type ConversationTurn = { assistantResponse?: string; timestamp: number; entities?: Entity[]; + toolResults?: ToolResult[]; executionTrace?: TurnExecutionTrace; // Full execution trace for auditability }; diff --git a/tests/chatNamer.test.ts b/tests/chatNamer.test.ts index d6a2ed9..11f1d9f 100644 --- a/tests/chatNamer.test.ts +++ b/tests/chatNamer.test.ts @@ -239,6 +239,18 @@ describe('ChatNamer', () => { assert.ok(name.toLowerCase().includes('latency')); }); + it('should avoid duplicate topic and service naming', () => { + const namer = new ChatNamer(); + const name = namer.generateName( + 'Show me payment-service issues', + 'Payment-service is degraded', + Date.now(), + [{ type: 'service', value: 'payment-service', extractedAt: Date.now(), source: 'query-services' }] + ); + + assert.equal(name, 'Payment Issues'); + }); + it('should synthesize metric correlation name', () => { const namer = new ChatNamer(); const name = namer.generateName( diff --git a/tests/conversationHistory.test.ts b/tests/conversationHistory.test.ts index 96e6f4c..0696499 100644 --- a/tests/conversationHistory.test.ts +++ b/tests/conversationHistory.test.ts @@ -5,34 +5,21 @@ import { makeEngine, StubMcp } from './helpers/copilotTestUtils.js'; test('passes conversation history to LLM for contextual follow-up questions', async () => { const receivedMessages: LlmMessage[][] = []; + const toolCalls: Array<{ name: string; arguments?: JsonObject }> = []; const llm: LlmClient = { async chat(messages: LlmMessage[], tools: Tool[]) { receivedMessages.push([...messages]); + const latestUserMessage = [...messages].reverse().find((message) => message.role === 'user')?.content ?? ''; - // First turn: "what services are running?" - if (receivedMessages.length === 1 && tools.length > 0) { + if (tools.length > 0 && latestUserMessage === 'what services are running?') { return { content: 'plan', toolCalls: [{ name: 'query-services', arguments: {} }], }; } - // First turn synthesis - if (receivedMessages.length === 2 && tools.length === 0) { - return { - content: JSON.stringify({ - conclusion: 'The payment-service is running.', - evidence: ['Found payment-service in service list'], - confidence: 0.95 - }), - }; - } - - // Second turn: "are there any incidents related to this service" - // Should have history of previous conversation - if (receivedMessages.length === 3 && tools.length > 0) { - // Verify history is present + if (tools.length > 0 && latestUserMessage === 'are there any incidents related to this service') { const hasHistory = messages.some(m => m.role === 'user' && m.content.includes('what services are running') ); @@ -49,12 +36,13 @@ test('passes conversation history to LLM for contextual follow-up questions', as }; } - // Second turn synthesis return { content: JSON.stringify({ - conclusion: 'No incidents for payment-service', + conclusion: latestUserMessage.includes('what services are running') + ? 'The payment-service is running.' + : 'No incidents for payment-service', evidence: [], - confidence: 0.9 + confidence: 0.9, }), }; }, @@ -68,16 +56,11 @@ test('passes conversation history to LLM for contextual follow-up questions', as ]; }, async callTool(call) { + toolCalls.push({ name: call.name, arguments: call.arguments as JsonObject }); if (call.name === 'query-services') { return { name: 'query-services', result: { services: ['payment-service'] } }; } if (call.name === 'query-incidents') { - // Verify the LLM resolved "this service" correctly - assert.deepEqual( - (call.arguments as JsonObject)?.scope, - { service: 'payment-service' }, - 'LLM should resolve "this service" to "payment-service" using history' - ); return { name: 'query-incidents', result: [] }; } return { name: call.name, result: null }; @@ -96,6 +79,13 @@ test('passes conversation history to LLM for contextual follow-up questions', as // Verify we got both answers assert.ok(answer1.conclusion.includes('payment-service')); assert.ok(answer2.conclusion.length > 0); + const incidentCall = toolCalls.find((call) => call.name === 'query-incidents'); + assert.ok(incidentCall, 'Expected query-incidents to run on follow-up'); + assert.deepEqual( + incidentCall?.arguments?.scope, + { service: 'payment-service' }, + 'Follow-up incident query should inherit the referenced service' + ); assert.ok(receivedMessages.length >= 3, 'Should have made multiple LLM calls with history'); }); @@ -106,50 +96,24 @@ test('passes tool results from previous turns to LLM', async () => { const llm: LlmClient = { async chat(messages: LlmMessage[], _tools: Tool[]) { receivedMessages.push([...messages]); - const callIndex = receivedMessages.length; + const latestUserMessage = [...messages].reverse().find((message) => message.role === 'user')?.content ?? ''; - // First turn: "check metrics" - if (callIndex === 1) { + if (latestUserMessage === 'check services') { return { - content: 'Checking metrics...', - toolCalls: [{ name: 'query-metrics', arguments: { metric: 'cpu' } }], + content: 'Checking services...', + toolCalls: [{ name: 'query-services', arguments: {} }], }; } - // First turn synthesis - if (callIndex === 2) { + if (latestUserMessage === 'what did it return?') { return { - content: JSON.stringify({ - conclusion: 'CPU is high (95%).', - evidence: ['CPU metrics show 95% utilization.'], - confidence: 1.0 - }), + content: 'No additional tool needed.', }; } - // Second turn: "why is it high?" - if (receivedMessages.length === 3) { // Note: In our mock counting, this might be index 3 or 4 depending on test run, - // but let's stick to the logic that worked or simplify. - // Actually, checking content is safer. - - const toolMessage = messages.find(m => m.role === 'tool'); - - // Only assert if we are in the Planning call (which has tool messages) - // The Planning call has History. The Analysis call has "You are OpsOrch..." - if (messages[0]?.role === 'system') { // Heuristic to identify Plan call - assert.ok(toolMessage, 'History should contain tool messages'); - assert.ok(toolMessage.content.includes('95'), 'History should contain tool result content'); - - return { - content: 'Analyzing logs...', - toolCalls: [{ name: 'query-logs', arguments: {} }] - }; - } - } - return { content: JSON.stringify({ - conclusion: 'Logs show infinite loop.', + conclusion: 'The payment-service is active.', evidence: [], confidence: 0.9 }), @@ -158,10 +122,10 @@ test('passes tool results from previous turns to LLM', async () => { }; const mcp: StubMcp = { - async listTools() { return [{ name: 'query-metrics' }, { name: 'query-logs' }] as Tool[]; }, + async listTools() { return [{ name: 'query-services' }] as Tool[]; }, async callTool(call) { - if (call.name === 'query-metrics') { - return { name: 'query-metrics', result: { value: 95, unit: '%' } }; + if (call.name === 'query-services') { + return { name: 'query-services', result: { services: ['payment-service'] } }; } return { name: call.name, result: null }; } @@ -170,10 +134,17 @@ test('passes tool results from previous turns to LLM', async () => { const engine = makeEngine(llm, mcp); // Turn 1 - const ans1 = await engine.answer('check metrics'); + const ans1 = await engine.answer('check services'); // Turn 2 - await engine.answer('why is it high?', { chatId: ans1.chatId }); - + await engine.answer('what did it return?', { chatId: ans1.chatId }); + + const followUpPlanningMessages = receivedMessages.find((messages) => + messages.some((message) => message.role === 'user' && message.content === 'what did it return?') + ); + assert.ok(followUpPlanningMessages, 'Expected a follow-up planning call'); + const toolMessage = followUpPlanningMessages.find(m => m.role === 'tool'); + assert.ok(toolMessage, 'History should contain tool messages'); + assert.ok(toolMessage.content.includes('payment-service'), 'History should contain tool result content'); assert.ok(receivedMessages.length >= 3); }); diff --git a/tests/conversationManager.test.ts b/tests/conversationManager.test.ts index 4b2b3c8..74c0596 100644 --- a/tests/conversationManager.test.ts +++ b/tests/conversationManager.test.ts @@ -2,7 +2,7 @@ import assert from 'node:assert/strict'; import { test, beforeEach } from 'node:test'; import { ConversationManager, DEFAULT_CONVERSATION_CONFIG } from '../src/engine/conversationManager.js'; import { InMemoryConversationStore } from '../src/stores/inMemoryConversationStore.js'; -import { Entity } from '../src/types.js'; +import { Entity, ToolResult } from '../src/types.js'; test('ConversationManager', async (t) => { let manager: ConversationManager; @@ -109,6 +109,21 @@ test('ConversationManager', async (t) => { assert.strictEqual(messages[1].role, 'assistant'); }); + await t.test('buildMessageHistory includes tool messages from prior turns', async () => { + const toolResults: ToolResult[] = [ + { name: 'query-metrics', result: { value: 95, unit: '%' } }, + ]; + + await manager.addTurn('chat-1', 'Check CPU', 'CPU is high', undefined, undefined, toolResults); + + const messages = await manager.buildMessageHistory('chat-1'); + assert.strictEqual(messages.length, 3); + assert.strictEqual(messages[1].role, 'tool'); + assert.strictEqual(messages[1].toolName, 'query-metrics'); + assert.ok(messages[1].content.includes('95')); + assert.strictEqual(messages[2].role, 'assistant'); + }); + await t.test('deleteConversation removes conversation', async () => { await manager.addTurn('chat-1', 'Hello'); await manager.deleteConversation('chat-1'); diff --git a/tests/executionTracer.test.ts b/tests/executionTracer.test.ts index 81f8aa5..87dfaa0 100644 --- a/tests/executionTracer.test.ts +++ b/tests/executionTracer.test.ts @@ -68,6 +68,22 @@ test('ExecutionTracer: records heuristic modifications', () => { assert.deepEqual(trace.iterations[0].heuristicModifications[0], modification); }); +test('ExecutionTracer: updates current iteration plan', () => { + const tracer = new ExecutionTracer(); + const trace = tracer.startTrace('chat-1'); + + tracer.startIteration(trace, [{ name: 'query-incidents', arguments: {} }]); + tracer.updateIterationPlan(trace, [ + { name: 'query-incidents', arguments: { limit: 10 } }, + { name: 'query-logs', arguments: { scope: { service: 'payments' } } }, + ]); + + assert.deepEqual(trace.iterations[0].plannedTools, [ + { name: 'query-incidents', arguments: { limit: 10 } }, + { name: 'query-logs', arguments: { scope: { service: 'payments' } } }, + ]); +}); + test('ExecutionTracer: records tool executions', () => { const tracer = new ExecutionTracer(); const trace = tracer.startTrace('chat-1'); diff --git a/tests/gemini.test.ts b/tests/gemini.test.ts index 4348fa2..3c4cfdb 100644 --- a/tests/gemini.test.ts +++ b/tests/gemini.test.ts @@ -27,7 +27,7 @@ test("gemini", async (t) => { assert.ok(llm, "Should create GeminiLlm instance"); }); - await t.test("should handle errors gracefully and return empty response", async () => { + await t.test("should surface provider errors", async () => { const llm = new GeminiLlm("invalid-key-that-will-fail"); const logs: string[] = []; @@ -46,21 +46,11 @@ test("gemini", async (t) => { }; try { - const response = await llm.chat( - [{ role: "user" as const, content: "Test message" }], - [], - ); - - // Should return empty response on error - assert.strictEqual( - response.content, - "", - "Should return empty content on error", - ); - assert.strictEqual( - response.toolCalls?.length || 0, - 0, - "Should have no tool calls on error", + await assert.rejects( + llm.chat( + [{ role: "user" as const, content: "Test message" }], + [], + ), ); // Should log error details @@ -68,11 +58,6 @@ test("gemini", async (t) => { log.includes("[Gemini] Request failed"), ); assert.ok(hasErrorLog, "Should log request failure"); - - const hasWarningLog = logs.some((log) => - log.includes("[Gemini] Returning empty response due to error"), - ); - assert.ok(hasWarningLog, "Should log empty response warning"); } finally { console.log = originalLog; console.warn = originalWarn; @@ -91,16 +76,18 @@ test("gemini", async (t) => { }; try { - await llm.chat( - [ - { role: "user" as const, content: "First" }, - { role: "assistant" as const, content: "Response" }, - { role: "user" as const, content: "Second" }, - ], - [ - { name: "tool1", description: "Test tool 1" }, - { name: "tool2", description: "Test tool 2" }, - ], + await assert.rejects( + llm.chat( + [ + { role: "user" as const, content: "First" }, + { role: "assistant" as const, content: "Response" }, + { role: "user" as const, content: "Second" }, + ], + [ + { name: "tool1", description: "Test tool 1" }, + { name: "tool2", description: "Test tool 2" }, + ], + ), ); // Should log request metadata diff --git a/tests/llmFactory.test.ts b/tests/llmFactory.test.ts index fd909f9..238161c 100644 --- a/tests/llmFactory.test.ts +++ b/tests/llmFactory.test.ts @@ -5,7 +5,6 @@ import { OpenAiLlm } from '../src/llms/openai.js'; import { AnthropicLlm } from '../src/llms/anthropic.js'; import { GeminiLlm } from '../src/llms/gemini.js'; import { MockLlm } from '../src/llms/mock.js'; -import { NullLlm } from '../src/llms/null.js'; test('llmFactory', async (t) => { const originalEnv = { ...process.env }; @@ -23,9 +22,11 @@ test('llmFactory', async (t) => { process.env = originalEnv; }); - await t.test('defaults to openai (mock) if no provider set and no key', () => { - const llm = createLlmFromEnv(); - assert.ok(llm instanceof MockLlm, 'Should fallback to MockLlm when key is missing for default provider'); + await t.test('defaults to openai and throws if key is missing', () => { + assert.throws( + () => createLlmFromEnv(), + /OPENAI_API_KEY is required when LLM_PROVIDER=openai/, + ); }); await t.test('creates OpenAiLlm when provider is openai and key is present', () => { @@ -35,11 +36,13 @@ test('llmFactory', async (t) => { assert.ok(llm instanceof OpenAiLlm); }); - await t.test('creates MockLlm when provider is openai but key is missing', () => { + await t.test('throws when provider is openai but key is missing', () => { process.env.LLM_PROVIDER = 'openai'; delete process.env.OPENAI_API_KEY; - const llm = createLlmFromEnv(); - assert.ok(llm instanceof MockLlm); + assert.throws( + () => createLlmFromEnv(), + /OPENAI_API_KEY is required when LLM_PROVIDER=openai/, + ); }); await t.test('creates AnthropicLlm when provider is anthropic and key is present', () => { @@ -49,11 +52,13 @@ test('llmFactory', async (t) => { assert.ok(llm instanceof AnthropicLlm); }); - await t.test('creates MockLlm when provider is anthropic but key is missing', () => { + await t.test('throws when provider is anthropic but key is missing', () => { process.env.LLM_PROVIDER = 'anthropic'; delete process.env.ANTHROPIC_API_KEY; - const llm = createLlmFromEnv(); - assert.ok(llm instanceof MockLlm); + assert.throws( + () => createLlmFromEnv(), + /ANTHROPIC_API_KEY is required when LLM_PROVIDER=anthropic/, + ); }); await t.test('creates MockLlm when provider is mock', () => { @@ -62,10 +67,12 @@ test('llmFactory', async (t) => { assert.ok(llm instanceof MockLlm); }); - await t.test('creates NullLlm for unknown provider', () => { + await t.test('throws for unknown provider', () => { process.env.LLM_PROVIDER = 'unknown-provider'; - const llm = createLlmFromEnv(); - assert.ok(llm instanceof NullLlm); + assert.throws( + () => createLlmFromEnv(), + /Unsupported LLM_PROVIDER "unknown-provider"/, + ); }); await t.test('is case insensitive for provider name', () => { @@ -81,10 +88,12 @@ test('llmFactory', async (t) => { assert.ok(llm instanceof GeminiLlm); }); - await t.test('creates MockLlm when provider is gemini but key is missing', () => { + await t.test('throws when provider is gemini but key is missing', () => { process.env.LLM_PROVIDER = 'gemini'; delete process.env.GEMINI_API_KEY; - const llm = createLlmFromEnv(); - assert.ok(llm instanceof MockLlm); + assert.throws( + () => createLlmFromEnv(), + /GEMINI_API_KEY is required when LLM_PROVIDER=gemini/, + ); }); }); diff --git a/tests/parallelToolRunner.test.ts b/tests/parallelToolRunner.test.ts index e7f0f2d..a77ed88 100644 --- a/tests/parallelToolRunner.test.ts +++ b/tests/parallelToolRunner.test.ts @@ -156,6 +156,7 @@ test('ParallelToolRunner: executes independent tools in parallel', async () => { test('ParallelToolRunner: executes dependent tools in correct order', async () => { const runner = new ParallelToolRunner(); const executionOrder: string[] = []; + let timelineArgs: ToolCall['arguments'] | undefined; const mcp = new MockMcp( async () => [ @@ -164,6 +165,12 @@ test('ParallelToolRunner: executes dependent tools in correct order', async () = ], async (call) => { executionOrder.push(call.name); + if (call.name === 'query-incidents') { + return { name: call.name, result: [{ id: 'INC-123' }] }; + } + if (call.name === 'get-incident-timeline') { + timelineArgs = call.arguments; + } return { name: call.name, result: { ok: true } }; } ); @@ -187,6 +194,7 @@ test('ParallelToolRunner: executes dependent tools in correct order', async () = // query-incidents must execute before get-incident-timeline assert.equal(executionOrder[0], 'query-incidents'); assert.equal(executionOrder[1], 'get-incident-timeline'); + assert.deepEqual(timelineArgs, { id: 'INC-123' }); }); test('ParallelToolRunner: handles mixed parallel and sequential execution', async () => { diff --git a/tests/resultCache.test.ts b/tests/resultCache.test.ts index aa806b0..cb92e18 100644 --- a/tests/resultCache.test.ts +++ b/tests/resultCache.test.ts @@ -110,6 +110,18 @@ test('invalidates by tool name', () => { assert.deepEqual(cache.get({ name: 'query-logs', arguments: { query: 'error' } }), { name: 'query-logs', result: 'data2' }); }); +test('cache entries are scoped by namespace', () => { + const cache = new ResultCache({ maxSize: 10, ttlMs: 60000 }); + const call: ToolCall = { name: 'query-incidents', arguments: { limit: 5 } }; + + cache.set(call, { name: 'query-incidents', result: 'chat-a' }, 'chat-a'); + cache.set(call, { name: 'query-incidents', result: 'chat-b' }, 'chat-b'); + + assert.deepEqual(cache.get(call, 'chat-a'), { name: 'query-incidents', result: 'chat-a' }); + assert.deepEqual(cache.get(call, 'chat-b'), { name: 'query-incidents', result: 'chat-b' }); + assert.equal(cache.get(call, 'chat-c'), null); +}); + test('LRU: recently accessed items are not evicted', () => { const cache = new ResultCache({ maxSize: 3, ttlMs: 60000 }); @@ -293,4 +305,3 @@ test('expired entries count as misses', async () => { assert.equal(stats.hits, 1); assert.equal(stats.misses, 1); }); - From 62f4ec3702ef144083a113af0a035e73c26a67ae Mon Sep 17 00:00:00 2001 From: yusufaytas Date: Sun, 15 Mar 2026 18:01:05 +0000 Subject: [PATCH 2/2] The cache write path now builds the cache key from the actual executed call shape --- src/engine/copilotEngine.ts | 10 +++- tests/copilotEngine.followups.test.ts | 83 +++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 3 deletions(-) diff --git a/src/engine/copilotEngine.ts b/src/engine/copilotEngine.ts index 48fd6a9..02ac108 100644 --- a/src/engine/copilotEngine.ts +++ b/src/engine/copilotEngine.ts @@ -225,6 +225,10 @@ export class CopilotEngine { for (let i = 0; i < callsToExecute.length; i++) { const result = freshResults[i]; if (!result) continue; // Skip if somehow undefined + const executedCall: ToolCall = { + ...callsToExecute[i], + arguments: result.arguments ?? callsToExecute[i].arguments, + }; const isError = typeof result.result === "object" && @@ -233,14 +237,14 @@ export class CopilotEngine { // Only cache if not an error if (!isError) { - this.resultCache.set(callsToExecute[i], result, chatId); + this.resultCache.set(executedCall, result, chatId); } // Record tool execution in trace if (trace) { this.executionTracer.recordToolExecution(trace, { - toolName: callsToExecute[i].name, - arguments: callsToExecute[i].arguments, + toolName: executedCall.name, + arguments: executedCall.arguments, cacheHit: false, executionTimeMs: Math.round(executionTime / callsToExecute.length), // Approximate per-tool time success: !isError, diff --git a/tests/copilotEngine.followups.test.ts b/tests/copilotEngine.followups.test.ts index f88e2a9..331e2ef 100644 --- a/tests/copilotEngine.followups.test.ts +++ b/tests/copilotEngine.followups.test.ts @@ -251,3 +251,86 @@ test('drills into incident timelines/logs/metrics when user asks for root cause' assert.ok(callNames.includes('query-metrics'), 'Should query metrics'); assert.ok(callNames.includes('query-logs'), 'Should query logs'); }); + +test('caches dependency-resolved detail calls by executed arguments, not unresolved plan args', async () => { + let latestIncidentId = 'INC-100'; + const timelineCalls: string[] = []; + + const llm: LlmClient = { + async chat(messages: LlmMessage[], tools: Tool[]) { + if (tools.length) { + const latestUserMessage = + [...messages].reverse().find((message) => message.role === 'user')?.content ?? ''; + + if (latestUserMessage === 'summarize first incident') { + return { + content: 'plan-first', + toolCalls: [ + { name: 'query-incidents', arguments: { query: 'first' } as JsonObject }, + { name: 'get-incident-timeline', arguments: {} as JsonObject }, + ], + }; + } + + if (latestUserMessage === 'summarize second incident') { + return { + content: 'plan-second', + toolCalls: [ + { name: 'query-incidents', arguments: { query: 'second' } as JsonObject }, + { name: 'get-incident-timeline', arguments: {} as JsonObject }, + ], + }; + } + + return { content: 'stop', toolCalls: [] }; + } + + return { + content: JSON.stringify({ conclusion: `timeline for ${latestIncidentId}`, evidence: [] }), + toolCalls: [], + }; + }, + }; + + const mcp: StubMcp = { + async listTools() { + return [ + { name: 'query-incidents' } as Tool, + { name: 'get-incident-timeline' } as Tool, + ]; + }, + async callTool(call): Promise { + if (call.name === 'query-incidents') { + latestIncidentId = (call.arguments.query === 'first') ? 'INC-100' : 'INC-200'; + return { + name: call.name, + arguments: call.arguments, + result: [{ id: latestIncidentId }], + }; + } + + if (call.name === 'get-incident-timeline') { + const incidentId = String(call.arguments.id); + timelineCalls.push(incidentId); + return { + name: call.name, + arguments: call.arguments, + result: [{ incidentId, at: '2024-01-01T00:00:00Z', kind: 'event' }], + }; + } + + throw new Error(`unexpected call ${call.name}`); + }, + }; + + const engine = makeEngine(llm, mcp); + + const first = await engine.answer('summarize first incident'); + await engine.answer('summarize second incident', { chatId: first.chatId }); + + assert.deepEqual( + timelineCalls, + ['INC-100', 'INC-200'], + 'Timeline calls should use each resolved incident id instead of reusing a stale no-id cache entry', + ); +});