From 8e0b4bcf35a01e0b9a55ce0f30db51a957bf916b Mon Sep 17 00:00:00 2001 From: yusufaytas Date: Sun, 15 Mar 2026 22:28:24 +0000 Subject: [PATCH 1/2] Tightened enginee follow-up, scope inference, and reference/cache handling Enabled first-iteration follow-ups to run cleanly and persist their entities, pass resolved questions plus conversation history into synthesis, tighten active-status detection, avoid incorrect log references and scope narrowing, and dedupe fuzzy cache entries. Includes regression tests for follow-ups, long-history synthesis, scope inference, cache behavior, and reference handling. --- src/engine/answerGenerator.ts | 11 +- src/engine/contextManager.ts | 27 ++- src/engine/copilotEngine.ts | 59 ++++++- src/engine/followUpEngine.ts | 23 +-- src/engine/referenceBuilder.ts | 94 +++++------ src/engine/referenceResolver.ts | 6 +- src/engine/resultCache.ts | 13 +- src/engine/scopeInferer.ts | 76 +++------ tests/contextManager.test.ts | 47 ++++++ tests/copilotEngine.followups.test.ts | 228 ++++++++++++++++++++++++++ tests/engine/answerGenerator.test.ts | 63 ++++++- tests/engine/referenceBuilder.test.ts | 34 ++++ tests/engine/scopeInferer.test.ts | 99 +++++++++++ tests/followUpEngine.test.ts | 56 +++++++ tests/referenceResolver.test.ts | 27 +++ tests/resultCache.test.ts | 40 +++++ 16 files changed, 751 insertions(+), 152 deletions(-) diff --git a/src/engine/answerGenerator.ts b/src/engine/answerGenerator.ts index 705edeb..6177fae 100644 --- a/src/engine/answerGenerator.ts +++ b/src/engine/answerGenerator.ts @@ -1,4 +1,4 @@ -import { CopilotAnswer, CopilotAction, CopilotReferences, ToolResult, LlmClient, Correlation, Anomaly, MetricReference, LogReference, MetricExpression, LogExpression, QueryScope } from "../types.js"; +import { CopilotAnswer, CopilotAction, CopilotReferences, ToolResult, LlmClient, LlmMessage, Correlation, Anomaly, MetricReference, LogReference, MetricExpression, LogExpression, QueryScope } from "../types.js"; import { buildFinalAnswerPrompt } from "../prompts.js"; import { HandlerUtils } from "./handlers/utils.js"; import { CorrelationDetector } from "./correlationDetector.js"; @@ -17,6 +17,7 @@ export async function synthesizeCopilotAnswer( results: ToolResult[], chatId: string, llm: LlmClient, + conversationHistory: LlmMessage[] = [], ): Promise { const fallback = createFallbackAnswer(question, results, chatId); if (!results.length) return fallback; @@ -32,8 +33,12 @@ export async function synthesizeCopilotAnswer( // Create a comprehensive prompt for the LLM, including insights const prompt = createSynthesisPrompt(question, results, correlations, anomalies); - // Get LLM analysis - const response = await llm.chat([{ role: "user", content: prompt }], []); + // Get LLM analysis (include conversation history for multi-turn context) + const messages: LlmMessage[] = [ + ...conversationHistory, + { role: "user", content: prompt }, + ]; + const response = await llm.chat(messages, []); if (!response || !response.content) { console.warn(`[Copilot][${chatId}] LLM synthesis failed, using fallback`); diff --git a/src/engine/contextManager.ts b/src/engine/contextManager.ts index d983076..97d9f14 100644 --- a/src/engine/contextManager.ts +++ b/src/engine/contextManager.ts @@ -90,40 +90,35 @@ export function fitMessagesInContext( return messages; // Fits, no truncation needed } - // Separate messages by type + // Separate system messages (always kept, highest priority) const systemMessages = messages.filter((m) => m.role === "system"); - const userMessages = messages.filter((m) => m.role === "user"); + const nonSystemMessages = messages.filter((m) => m.role !== "system"); - // Always keep system messages (highest priority) + // Always keep system messages const result: LlmMessage[] = [...systemMessages]; let budget = config.maxContextTokens - systemMessages.reduce((sum, msg) => sum + estimateTokens(msg.content), 0); - // Keep most recent user message (critical for context) - const lastUserMessage = userMessages[userMessages.length - 1]; - if (lastUserMessage) { - result.push(lastUserMessage); - budget -= estimateTokens(lastUserMessage.content); - } - - // Add recent messages with remaining budget - const recentMessages = messages - .slice(-3) - .filter((m) => m.role !== "system" && m !== lastUserMessage); + // Keep most recent messages first (regardless of role: user, assistant, tool) + // Work backwards from most recent to fill budget + const reversedNonSystem = [...nonSystemMessages].reverse(); - for (const msg of recentMessages) { + for (const msg of reversedNonSystem) { const tokens = estimateTokens(msg.content); if (tokens <= budget) { result.push(msg); budget -= tokens; } else if (budget > 100) { - // Truncate this message to fit + // Truncate this message to fit remaining budget const truncated = { ...msg, content: truncateToTokens(msg.content, budget), }; result.push(truncated); + budget = 0; + break; + } else { break; } } diff --git a/src/engine/copilotEngine.ts b/src/engine/copilotEngine.ts index 02ac108..ba828a8 100644 --- a/src/engine/copilotEngine.ts +++ b/src/engine/copilotEngine.ts @@ -417,12 +417,16 @@ export class CopilotEngine { } // Validate and fix calls (ensures required args like start/end/step are present for metrics) - plannedCalls = await this.planRefiner.refineCalls( - plannedCalls, - this.mcp.getTools(), - conversationTurns, - allResults, - ); + // Note: On first iteration, refineCalls is already called inside applyHeuristics, + // so we only need to call it explicitly on follow-up iterations. + if (!isFirstIteration) { + plannedCalls = await this.planRefiner.refineCalls( + plannedCalls, + this.mcp.getTools(), + conversationTurns, + allResults, + ); + } // Limit calls plannedCalls = this.limitToolCalls(plannedCalls); @@ -461,11 +465,47 @@ export class CopilotEngine { // D. Accumulate allResults.push(...results); + + // D2. Run follow-up heuristics after first iteration results too + // (Previously follow-ups only ran on iteration ≥2, missing orchestration plan lookups + // when problems were detected on the first tool execution) + let iterationResultsForEntityExtraction = results; + + if (isFirstIteration && results.length > 0) { + const formattedForFollowUp = this.formatResultsForPlanning(allResults); + const availableToolNames = new Set(this.mcp.getTools().map((t) => t.name)); + const firstIterationFollowUps = (await this.followUpEngine.applyFollowUps( + formattedForFollowUp, + chatId, + conversationTurns, + questionForPlanning, + plannedCalls, + )).filter((c) => availableToolNames.has(c.name)); + + if (firstIterationFollowUps.length > 0) { + console.log( + `[Copilot][${chatId}] First-iteration follow-ups: ${firstIterationFollowUps.length} call(s): ${firstIterationFollowUps.map((c) => c.name).join(", ")}`, + ); + + const followUpResults = await this.runToolCallsWithCache( + this.limitToolCalls(firstIterationFollowUps), + chatId, + this.mcp.getTools(), + trace, + ); + allResults.push(...followUpResults); + iterationResultsForEntityExtraction = [ + ...results, + ...followUpResults, + ]; + } + } + isFirstIteration = false; // E. Extract entities from results const extractedEntities = await this.entityExtractor.extractFromResults( - results, + iterationResultsForEntityExtraction, chatId, conversationTurns, ); @@ -537,12 +577,13 @@ export class CopilotEngine { ); } - // Step 5: Synthesize final answer + // Step 5: Synthesize final answer (use resolved question and conversation history for context) const answer = await synthesizeCopilotAnswer( - question, + questionForPlanning, allResults, chatId, this.config.llm, + conversationHistory, ); // If calls were planned but no results were collected, tool calls were likely diff --git a/src/engine/followUpEngine.ts b/src/engine/followUpEngine.ts index ad72a7c..b821675 100644 --- a/src/engine/followUpEngine.ts +++ b/src/engine/followUpEngine.ts @@ -221,24 +221,15 @@ export class FollowUpEngine { "active", "investigating", ]); - const resolvedStatuses = new Set([ - "resolved", - "closed", - "mitigated", - "remediated", - ]); for (const record of this.collectRecords(result)) { const status = record.status; if (typeof status === "string") { const normalized = status.toLowerCase(); if (activeStatuses.has(normalized)) return true; - if (resolvedStatuses.has(normalized)) continue; - return true; - } - if (record.id) { - return true; + // Unknown or resolved statuses are not considered active } + // Records without a status are not considered active } return false; @@ -246,7 +237,6 @@ export class FollowUpEngine { private hasActiveAlert(result: JsonValue): boolean { const activeStatuses = new Set(["firing", "acknowledged", "triggered", "open"]); - const resolvedStatuses = new Set(["resolved", "closed", "cleared"]); for (const record of this.collectRecords(result)) { const status = record.status; if (typeof status === "string") { @@ -254,14 +244,9 @@ export class FollowUpEngine { if (activeStatuses.has(normalized)) { return true; } - if (resolvedStatuses.has(normalized)) { - continue; - } - return true; - } - if (record.id) { - return true; + // Unknown or resolved statuses are not considered active } + // Records without a status are not considered active } return false; } diff --git a/src/engine/referenceBuilder.ts b/src/engine/referenceBuilder.ts index eb269bd..4518e26 100644 --- a/src/engine/referenceBuilder.ts +++ b/src/engine/referenceBuilder.ts @@ -393,61 +393,53 @@ export function buildReferences( } } - let query: string | undefined; - - // MCP logQuerySchema: expression is object with search field - if (args.expression && typeof args.expression === "object" && !Array.isArray(args.expression)) { - const expr = args.expression as JsonObject; - // MCP logExpressionSchema: search is z.string().optional() - if (typeof expr.search === "string") query = expr.search; - - // If we have filters but no search string, use "Filtered Logs" or similar placeholder - // or just allow empty search. The Reference structure allows undefined search if we change types, - // but let's check strict types. LogReference.expression.search is optional in types.ts? - // Let's check types again. - // types.ts: export interface LogExpression { search?: string; ... } - // So we can create a LogReference without search string if filters exist. - } - - // Check if we have enough to make a reference (search OR filters) - const hasSearch = typeof query === "string"; - const exprObj = (args.expression as JsonObject) || {}; - const hasFilters = Array.isArray(exprObj.filters) && exprObj.filters.length > 0; - - if (hasSearch || hasFilters) { - // Construct a simple LogReference - const log: LogReference = { - expression: {}, - }; - if (hasSearch) log.expression.search = query; - - // Copy filters if present - if (hasFilters) { - // We can blindly copy filters if structure matches, or leave emptiness. - // Ideally we copy them to be accurate. - // defined in types as: filters?: { field: string; operator: string; value: string }[]; - // Let's copy them correctly. - log.expression.filters = exprObj.filters as { field: string; operator: string; value: string }[]; + // Only extract log references from log tools + if (capabilityType === "log") { + let query: string | undefined; + + // MCP logQuerySchema: expression is object with search field + if (args.expression && typeof args.expression === "object" && !Array.isArray(args.expression)) { + const expr = args.expression as JsonObject; + // MCP logExpressionSchema: search is z.string().optional() + if (typeof expr.search === "string") query = expr.search; } - if (typeof args.start === "string" && args.start.trim()) - log.start = args.start.trim(); - if (typeof args.end === "string" && args.end.trim()) - log.end = args.end.trim(); - if (typeof args.service === "string" && args.service.trim()) - log.scope = { service: args.service.trim() }; + // Check if we have enough to make a reference (search OR filters) + const hasSearch = typeof query === "string"; + const exprObj = (args.expression as JsonObject) || {}; + const hasFilters = Array.isArray(exprObj.filters) && exprObj.filters.length > 0; - // Handle object scope (MCP uses scope object) - if ( - args.scope && - typeof args.scope === "object" && - !Array.isArray(args.scope) && - (args.scope as JsonObject).service - ) { - log.scope = log.scope ?? { service: (args.scope as JsonObject).service as string }; - } + if (hasSearch || hasFilters) { + // Construct a simple LogReference + const log: LogReference = { + expression: {}, + }; + if (hasSearch) log.expression.search = query; + + // Copy filters if present + if (hasFilters) { + log.expression.filters = exprObj.filters as { field: string; operator: string; value: string }[]; + } + + if (typeof args.start === "string" && args.start.trim()) + log.start = args.start.trim(); + if (typeof args.end === "string" && args.end.trim()) + log.end = args.end.trim(); + if (typeof args.service === "string" && args.service.trim()) + log.scope = { service: args.service.trim() }; + + // Handle object scope (MCP uses scope object) + if ( + args.scope && + typeof args.scope === "object" && + !Array.isArray(args.scope) && + (args.scope as JsonObject).service + ) { + log.scope = log.scope ?? { service: (args.scope as JsonObject).service as string }; + } - logs.push(log); + logs.push(log); + } } } diff --git a/src/engine/referenceResolver.ts b/src/engine/referenceResolver.ts index 1747529..0403af7 100644 --- a/src/engine/referenceResolver.ts +++ b/src/engine/referenceResolver.ts @@ -24,10 +24,10 @@ export class ReferenceResolver { conversationHistory?: ConversationTurn[], ): Promise> { const resolutions = new Map(); - const normalized = question.toLowerCase(); - // Extract potential reference patterns from the question - const referencePatterns = this.extractReferencePatterns(normalized); + // Extract potential reference patterns from the original question + // (handlers receive original casing; replacement uses case-insensitive regex) + const referencePatterns = this.extractReferencePatterns(question); for (const { text, entityType } of referencePatterns) { if (this.referenceRegistry.hasHandlers(entityType)) { diff --git a/src/engine/resultCache.ts b/src/engine/resultCache.ts index a8d950e..9d2da36 100644 --- a/src/engine/resultCache.ts +++ b/src/engine/resultCache.ts @@ -166,9 +166,20 @@ export class ResultCache { set(call: ToolCall, result: ToolResult, scopeKey: string = "global"): void { const key = createScopedCacheKey(scopeKey, call); - // Remove existing entry if present + // Remove existing exact-key entry if present if (this.cache.has(key)) { this.accessOrder = this.accessOrder.filter((k) => k !== key); + } else { + // Also remove any fuzzy-matched entry under a different key to prevent duplicates + for (const [existingKey, candidate] of this.cache.entries()) { + if (candidate.scopeKey !== scopeKey) continue; + if (candidate.call.name !== call.name) continue; + if (isFuzzyEqual(call.arguments || {}, candidate.call.arguments || {})) { + this.cache.delete(existingKey); + this.accessOrder = this.accessOrder.filter((k) => k !== existingKey); + break; + } + } } // Evict oldest entry if cache is full diff --git a/src/engine/scopeInferer.ts b/src/engine/scopeInferer.ts index 3d805f8..8b17826 100644 --- a/src/engine/scopeInferer.ts +++ b/src/engine/scopeInferer.ts @@ -178,56 +178,34 @@ export class ScopeInferer { return scope.service; } - // Service from first item in arrays using MCP schema fields + // Service from items in arrays using MCP schema fields + // Only infer if ALL items share the same service (avoids narrowing multi-service results) // incidents, alerts, logs use: service field // services use: name field - if (Array.isArray(obj.incidents) && obj.incidents.length > 0) { - const first = obj.incidents[0]; - if (first && typeof first === "object" && !Array.isArray(first)) { - const firstObj = first as JsonObject; - if (firstObj.service && typeof firstObj.service === "string") { - return firstObj.service; - } - } - } - - if (Array.isArray(obj.alerts) && obj.alerts.length > 0) { - const first = obj.alerts[0]; - if (first && typeof first === "object" && !Array.isArray(first)) { - const firstObj = first as JsonObject; - if (firstObj.service && typeof firstObj.service === "string") { - return firstObj.service; - } - } - } - - if (Array.isArray(obj.logs) && obj.logs.length > 0) { - const first = obj.logs[0]; - if (first && typeof first === "object" && !Array.isArray(first)) { - const firstObj = first as JsonObject; - if (firstObj.service && typeof firstObj.service === "string") { - return firstObj.service; - } - } - } + const arrayFields: Array<{ key: string; serviceField: string }> = [ + { key: "incidents", serviceField: "service" }, + { key: "alerts", serviceField: "service" }, + { key: "logs", serviceField: "service" }, + { key: "services", serviceField: "name" }, + { key: "metrics", serviceField: "service" }, + ]; - // Services are extracted by name field (MCP serviceSchema) - if (Array.isArray(obj.services) && obj.services.length > 0) { - const first = obj.services[0]; - if (first && typeof first === "object" && !Array.isArray(first)) { - const firstObj = first as JsonObject; - if (firstObj.name && typeof firstObj.name === "string") { - return firstObj.name; + for (const { key, serviceField } of arrayFields) { + const arr = obj[key]; + if (Array.isArray(arr) && arr.length > 0) { + const services = new Set(); + for (const item of arr) { + if (item && typeof item === "object" && !Array.isArray(item)) { + const itemObj = item as JsonObject; + const svc = itemObj[serviceField]; + if (typeof svc === "string") { + services.add(svc); + } + } } - } - } - - if (Array.isArray(obj.metrics) && obj.metrics.length > 0) { - const first = obj.metrics[0]; - if (first && typeof first === "object" && !Array.isArray(first)) { - const firstObj = first as JsonObject; - if (firstObj.service && typeof firstObj.service === "string") { - return firstObj.service; + // Only infer scope if exactly one service is present across all items + if (services.size === 1) { + return [...services][0]; } } } @@ -326,13 +304,13 @@ export class ScopeInferer { applyScope(calls: ToolCall[], inference: ScopeInference): ToolCall[] { return calls.map((call) => { // Only apply scope to tools that commonly use scope - const scopeTools = [ + const scopeTools = new Set([ "query-logs", "query-metrics", "query-incidents", "query-alerts", - ]; - if (!scopeTools.some((tool) => call.name.includes(tool.split("-")[1]))) { + ]); + if (!scopeTools.has(call.name)) { return call; } diff --git a/tests/contextManager.test.ts b/tests/contextManager.test.ts index 98d48fc..afcdeb9 100644 --- a/tests/contextManager.test.ts +++ b/tests/contextManager.test.ts @@ -119,3 +119,50 @@ test('summarizes conversation for logging', () => { assert.ok(summary.includes('2 user messages')); assert.ok(summary.includes('1 assistant')); }); + +test('fitMessagesInContext retains tool messages when within budget', () => { + const messages: LlmMessage[] = [ + { role: 'system', content: 'You are a helper.' }, + { role: 'user', content: 'question' }, + { role: 'tool', content: 'tool result data' }, + { role: 'assistant', content: 'here is my analysis' }, + { role: 'user', content: 'follow up question' }, + ]; + + const fitted = fitMessagesInContext(messages, { + maxContextTokens: 10000, + systemPriority: 1.0, + recentPriority: 0.8, + olderPriority: 0.3, + }); + + assert.equal(fitted.length, 5, 'All messages should fit'); + assert.ok(fitted.some(m => m.role === 'tool'), 'Tool messages should be retained'); + assert.ok(fitted.some(m => m.role === 'assistant'), 'Assistant messages should be retained'); +}); + +test('fitMessagesInContext keeps recent tool/assistant messages under tight budget', () => { + const messages: LlmMessage[] = [ + { role: 'system', content: 'sys' }, + { role: 'user', content: 'old question ' + 'x'.repeat(500) }, + { role: 'tool', content: 'old tool result ' + 'y'.repeat(500) }, + { role: 'assistant', content: 'old answer ' + 'z'.repeat(500) }, + { role: 'user', content: 'recent question' }, + { role: 'tool', content: 'recent tool result' }, + { role: 'assistant', content: 'recent answer' }, + ]; + + const fitted = fitMessagesInContext(messages, { + maxContextTokens: 60, + systemPriority: 1.0, + recentPriority: 0.8, + olderPriority: 0.3, + }); + + assert.ok(fitted.some(m => m.role === 'system')); + const hasRecentTool = fitted.some(m => m.content.includes('recent tool')); + const hasRecentAnswer = fitted.some(m => m.content.includes('recent answer')); + const hasRecentQuestion = fitted.some(m => m.content.includes('recent question')); + assert.ok(hasRecentQuestion || hasRecentTool || hasRecentAnswer, + 'At least some recent messages should be kept'); +}); diff --git a/tests/copilotEngine.followups.test.ts b/tests/copilotEngine.followups.test.ts index 331e2ef..815382a 100644 --- a/tests/copilotEngine.followups.test.ts +++ b/tests/copilotEngine.followups.test.ts @@ -334,3 +334,231 @@ test('caches dependency-resolved detail calls by executed arguments, not unresol 'Timeline calls should use each resolved incident id instead of reusing a stale no-id cache entry', ); }); + +test('first iteration follow-ups suggest orchestration plans for active incidents', async () => { + let plannerCalls = 0; + const llm: LlmClient = { + async chat(_messages: LlmMessage[], tools: Tool[]) { + if (tools.length) { + plannerCalls++; + if (plannerCalls === 1) { + return { + content: 'plan', + toolCalls: [{ name: 'query-incidents', arguments: {} }], + }; + } + return { content: 'stop', toolCalls: [] }; + } + return { + content: JSON.stringify({ conclusion: 'done', evidence: [] }), + toolCalls: [], + }; + }, + }; + + const calls: ToolCall[] = []; + const mcp: StubMcp = { + async listTools() { + return [ + { name: 'query-incidents' } as Tool, + { name: 'query-orchestration-plans' } as Tool, + ]; + }, + async callTool(call): Promise { + calls.push(call); + if (call.name === 'query-incidents') { + return { + name: call.name, + result: [ + { id: 'INC-001', service: 'payments', status: 'active' }, + ], + }; + } + if (call.name === 'query-orchestration-plans') { + return { name: call.name, result: [] }; + } + return { name: call.name, result: {} }; + }, + }; + + const engine = makeEngine(llm, mcp); + await engine.answer('investigate the payments incident'); + + const callNames = calls.map(c => c.name); + assert.ok( + callNames.includes('query-orchestration-plans'), + 'First iteration should trigger orchestration plan follow-up for active incidents. Got: ' + callNames.join(', '), + ); +}); + +test('first iteration follow-ups do not add unavailable tools', async () => { + const llm: LlmClient = { + async chat(_messages: LlmMessage[], tools: Tool[]) { + if (tools.length) { + return { + content: 'plan', + toolCalls: [{ name: 'query-incidents', arguments: {} }], + }; + } + return { + content: JSON.stringify({ conclusion: 'done', evidence: [] }), + toolCalls: [], + }; + }, + }; + + const calls: ToolCall[] = []; + const mcp: StubMcp = { + async listTools() { + // Crucially: query-orchestration-plans is NOT in the tool list + return [{ name: 'query-incidents' } as Tool]; + }, + async callTool(call): Promise { + calls.push(call); + return { + name: call.name, + result: [{ id: 'INC-001', service: 'payments', status: 'active' }], + }; + }, + }; + + const engine = makeEngine(llm, mcp); + await engine.answer('investigate payments incident'); + + const callNames = calls.map(c => c.name); + assert.ok( + !callNames.includes('query-orchestration-plans'), + 'Should not attempt unavailable tools. Got: ' + callNames.join(', '), + ); +}); + +test('first iteration follow-up results contribute entities to conversation state', async () => { + const llm: LlmClient = { + async chat(_messages: LlmMessage[], tools: Tool[]) { + if (tools.length) { + return { + content: 'plan', + toolCalls: [{ name: 'query-incidents', arguments: {} }], + }; + } + return { + content: JSON.stringify({ conclusion: 'done', evidence: [] }), + toolCalls: [], + }; + }, + }; + + const mcp: StubMcp = { + async listTools() { + return [ + { name: 'query-incidents' } as Tool, + { name: 'query-orchestration-plans' } as Tool, + ]; + }, + async callTool(call): Promise { + if (call.name === 'query-incidents') { + return { + name: call.name, + result: [{ id: 'INC-001', service: 'payments', status: 'active' }], + }; + } + if (call.name === 'query-orchestration-plans') { + return { + name: call.name, + result: [{ id: 'plan-42', name: 'payments recovery' }], + }; + } + return { name: call.name, result: {} }; + }, + }; + + const engine = makeEngine(llm, mcp); + const answer = await engine.answer('investigate the payments incident'); + + const conversation = await engine.getConversationManager().peekConversation(answer.chatId); + const turnEntities = conversation?.turns[0]?.entities ?? []; + const planEntity = turnEntities.find((entity) => entity.type === 'orchestration_plan'); + + assert.ok(planEntity, 'Expected orchestration plan entity from first-iteration follow-up results'); + assert.equal(planEntity?.value, 'plan-42'); +}); + +test('answer synthesizer receives resolved question with entity references', async () => { + let synthesisPrompt = ''; + const llm: LlmClient = { + async chat(messages: LlmMessage[], tools: Tool[]) { + if (tools.length) { + return { + content: 'plan', + toolCalls: [{ name: 'query-incidents', arguments: {} }], + }; + } + // Capture the synthesis prompt + const lastMsg = messages[messages.length - 1]; + synthesisPrompt = lastMsg?.content ?? ''; + return { + content: JSON.stringify({ conclusion: 'done', evidence: [] }), + toolCalls: [], + }; + }, + }; + + const mcp: StubMcp = { + async listTools() { + return [{ name: 'query-incidents' } as Tool]; + }, + async callTool(call): Promise { + return { + name: call.name, + result: [{ id: 'INC-001', service: 'payments', status: 'active' }], + }; + }, + }; + + const engine = makeEngine(llm, mcp); + // Ask using a plain question (no references to resolve) + await engine.answer('Show me active incidents'); + + // The synthesis prompt should contain the question text + assert.ok( + synthesisPrompt.includes('active incidents'), + 'Synthesis prompt should include the question text', + ); +}); + +test('first iteration does not double-validate through refineCalls', async () => { + // This is a structural test: we verify the engine doesn't crash and the + // plan goes through validation only once by checking the tool calls executed. + const llm: LlmClient = { + async chat(_messages: LlmMessage[], tools: Tool[]) { + if (tools.length) { + return { + content: 'plan', + toolCalls: [{ name: 'query-services', arguments: {} }], + }; + } + return { + content: JSON.stringify({ conclusion: 'services ok', evidence: [] }), + toolCalls: [], + }; + }, + }; + + const calls: ToolCall[] = []; + const mcp: StubMcp = { + async listTools() { + return [{ name: 'query-services' } as Tool]; + }, + async callTool(call): Promise { + calls.push(call); + return { name: call.name, result: { services: ['svc-a'] } }; + }, + }; + + const engine = makeEngine(llm, mcp); + await engine.answer('list services'); + + // The tool should be called exactly once (no double execution from double validation) + assert.equal(calls.length, 1, 'Tool should execute exactly once'); + assert.equal(calls[0].name, 'query-services'); +}); diff --git a/tests/engine/answerGenerator.test.ts b/tests/engine/answerGenerator.test.ts index 6c8ea0d..df0a5e3 100644 --- a/tests/engine/answerGenerator.test.ts +++ b/tests/engine/answerGenerator.test.ts @@ -155,6 +155,68 @@ describe('synthesizeCopilotAnswer', () => { assert.strictEqual(answer.chatId, 'chat-error'); }); + test('passes very long multi-turn conversation history alongside the final synthesis prompt', async () => { + let capturedMessages: LlmMessage[] = []; + + const llm: LlmClient = { + async chat(messages: LlmMessage[], _tools: Tool[]) { + capturedMessages = messages; + return { + content: JSON.stringify({ + conclusion: 'Long-context synthesis completed.', + confidence: 0.82, + }), + toolCalls: [], + }; + } + }; + + const conversationHistory: LlmMessage[] = [ + { role: 'user', content: 'Initial incident report ' + 'a'.repeat(4000) }, + { role: 'tool', content: 'query-incidents: ' + 'b'.repeat(3500) }, + { role: 'assistant', content: 'Preliminary analysis ' + 'c'.repeat(4200) }, + { role: 'user', content: 'Can you correlate this with logs and metrics? ' + 'd'.repeat(3800) }, + { role: 'tool', content: 'query-logs: ' + 'e'.repeat(3600) }, + { role: 'assistant', content: 'I found several suspicious signals ' + 'f'.repeat(4100) }, + ]; + + const results: ToolResult[] = [ + { + name: 'query-incidents', + arguments: {}, + result: { + incidents: [{ id: 'inc-900', title: 'Major outage', service: 'payments' }] + } + } + ]; + + const answer = await synthesizeCopilotAnswer( + 'Summarize the current status', + results, + 'chat-long-history', + llm, + conversationHistory, + ); + + assert.strictEqual(answer.chatId, 'chat-long-history'); + assert.strictEqual(capturedMessages.length, conversationHistory.length + 1, + 'Synthesis should append exactly one final prompt after the supplied history'); + assert.deepStrictEqual( + capturedMessages.slice(0, conversationHistory.length), + conversationHistory, + 'Conversation history should be forwarded unchanged and in order', + ); + assert.strictEqual( + capturedMessages[capturedMessages.length - 1]?.role, + 'user', + 'Final synthesis prompt should be appended as the last user message', + ); + assert.ok( + capturedMessages[capturedMessages.length - 1]?.content.includes('Summarize the current status'), + 'Final synthesis prompt should still include the current question', + ); + }); + test('includes correlations and anomalies in fallback on LLM failure', async () => { const llm: LlmClient = { async chat(_messages: LlmMessage[], _tools: Tool[]) { @@ -522,4 +584,3 @@ describe('synthesizeCopilotAnswer', () => { }); }); - diff --git a/tests/engine/referenceBuilder.test.ts b/tests/engine/referenceBuilder.test.ts index 5e7c7d0..bd285b1 100644 --- a/tests/engine/referenceBuilder.test.ts +++ b/tests/engine/referenceBuilder.test.ts @@ -412,6 +412,40 @@ test('referenceBuilder', async (t) => { assert.ok(refs.teams.includes('Velocity Team'), 'Should include team name'); }); + await t.test('does not create log references for metric tools with expression.search', () => { + const results: ToolResult[] = [ + { + name: 'query-metrics', + arguments: { + expression: { metricName: 'cpu_usage', search: 'something' }, + start: '2024-01-01T00:00:00Z', + end: '2024-01-01T01:00:00Z', + step: 60, + }, + result: [], + }, + ]; + + const refs = buildReferences(results); + assert.ok(!refs?.logs?.length, 'Metric tools should not produce log references'); + }); + await t.test('creates log references for actual log tools', () => { + const results: ToolResult[] = [ + { + name: 'query-logs', + arguments: { + expression: { search: 'error 504' }, + start: '2024-01-01T00:00:00Z', + end: '2024-01-01T01:00:00Z', + }, + result: { logs: [] }, + }, + ]; + + const refs = buildReferences(results); + assert.ok(refs?.logs?.length, 'Log tools should produce log references'); + assert.strictEqual(refs!.logs![0].expression.search, 'error 504'); + }); }); diff --git a/tests/engine/scopeInferer.test.ts b/tests/engine/scopeInferer.test.ts index 99f9392..0bd8caf 100644 --- a/tests/engine/scopeInferer.test.ts +++ b/tests/engine/scopeInferer.test.ts @@ -326,3 +326,102 @@ test('ScopeInferer: detects multiple team references', async () => { } }); +test('ScopeInferer: applyScope only matches exact query tool names', () => { + const inferer = new ScopeInferer(); + const inference = { + scope: { service: 'payment-api' }, + confidence: 0.8, + source: 'incident' as const, + reason: 'test', + }; + + const calls = [ + { name: 'query-logs', arguments: {} }, + { name: 'query-metrics', arguments: {} }, + { name: 'query-incidents', arguments: {} }, + { name: 'query-alerts', arguments: {} }, + { name: 'get-incident', arguments: {} }, + { name: 'get-incident-timeline', arguments: {} }, + { name: 'query-tickets', arguments: {} }, + { name: 'get-service', arguments: {} }, + ]; + + const updated = inferer.applyScope(calls, inference); + + // First 4 should have scope injected + for (const call of updated.slice(0, 4)) { + const scope = (call.arguments as JsonObject)?.scope as JsonObject | undefined; + assert.equal(scope?.service, 'payment-api', `${call.name} should get scope`); + } + + // Last 4 should NOT have scope injected + for (const call of updated.slice(4)) { + const scope = (call.arguments as JsonObject)?.scope as JsonObject | undefined; + assert.equal(scope, undefined, `${call.name} should NOT get scope`); + } +}); + +test('ScopeInferer: does not narrow scope when result items have different services', () => { + const inferer = new ScopeInferer(); + + const calls = [{ name: 'query-logs', arguments: {} }]; + + // When inferFromResults returns no service (multi-service), applyScope should not inject one + const noServiceInference = { + scope: {} as { service?: string }, + confidence: 0.75, + source: 'previous_query' as const, + reason: 'test', + }; + + const updated = inferer.applyScope(calls, noServiceInference); + const scope = (updated[0].arguments as JsonObject)?.scope as JsonObject | undefined; + assert.equal(scope, undefined, 'No scope should be applied when no service was inferred'); +}); + +test('ScopeInferer: infers scope when all log items share same service', async () => { + const inferer = new ScopeInferer(); + + const results: ToolResult[] = [ + { + name: 'query-logs', + result: { + logs: [ + { id: 'log-1', service: 'payment-api' }, + { id: 'log-2', service: 'payment-api' }, + ], + }, + }, + ]; + + const inference = await inferer.inferScope('what happened?', results); + + assert.ok(inference, 'Should infer scope when all items share the same service'); + assert.equal(inference.scope.service, 'payment-api'); +}); + +test('ScopeInferer: does not narrow multi-service log results', async () => { + const inferer = new ScopeInferer(); + + const results: ToolResult[] = [ + { + name: 'query-logs', + result: { + logs: [ + { id: 'log-1', service: 'api-gateway' }, + { id: 'log-2', service: 'auth-service' }, + ], + }, + }, + ]; + + const inference = await inferer.inferScope('show me logs', results); + + if (inference?.scope.service) { + assert.ok( + inference.scope.service !== 'api-gateway' && inference.scope.service !== 'auth-service', + `Should not pick one service from multi-service results, got: ${inference.scope.service}`, + ); + } +}); + diff --git a/tests/followUpEngine.test.ts b/tests/followUpEngine.test.ts index e5aea2d..606d1d4 100644 --- a/tests/followUpEngine.test.ts +++ b/tests/followUpEngine.test.ts @@ -159,4 +159,60 @@ test('FollowUpEngine', async (t) => { assert.strictEqual(toolServiceKeys.length, uniqueKeys.size, 'Should have no duplicate tool+service combinations'); }); + + await t.test('resolved incidents do not trigger orchestration plan suggestions', async () => { + const results: ToolResult[] = [ + { + name: 'query-incidents', + result: [{ id: 'INC-001', service: 'payments', status: 'resolved' }], + arguments: {}, + }, + ]; + + const suggestions = await engine.applyFollowUps(results, 'test', [], 'show me services'); + const orchestrationCalls = suggestions.filter(c => c.name === 'query-orchestration-plans'); + assert.strictEqual(orchestrationCalls.length, 0, 'Resolved incidents should not trigger orchestration plans'); + }); + + await t.test('incidents with unknown status do not trigger problem detection', async () => { + const results: ToolResult[] = [ + { + name: 'query-incidents', + result: [{ id: 'INC-002', service: 'payments', status: 'postmortem' }], + arguments: {}, + }, + ]; + + const suggestions = await engine.applyFollowUps(results, 'test', [], 'list recent entries'); + const orchestrationCalls = suggestions.filter(c => c.name === 'query-orchestration-plans'); + assert.strictEqual(orchestrationCalls.length, 0, 'Unknown statuses should not trigger orchestration lookups'); + }); + + await t.test('explicitly active incidents still trigger problem detection', async () => { + const results: ToolResult[] = [ + { + name: 'query-incidents', + result: [{ id: 'INC-003', service: 'payments', status: 'triggered' }], + arguments: {}, + }, + ]; + + const suggestions = await engine.applyFollowUps(results, 'test', [], 'what is happening?'); + const orchestrationCalls = suggestions.filter(c => c.name === 'query-orchestration-plans'); + assert.ok(orchestrationCalls.length > 0, 'Explicitly active incidents should still trigger orchestration plans'); + }); + + await t.test('cleared alerts do not trigger problem detection', async () => { + const results: ToolResult[] = [ + { + name: 'query-alerts', + result: [{ id: 'ALT-001', service: 'api', status: 'cleared' }], + arguments: {}, + }, + ]; + + const suggestions = await engine.applyFollowUps(results, 'test', [], 'show me the dashboard'); + const orchestrationCalls = suggestions.filter(c => c.name === 'query-orchestration-plans'); + assert.strictEqual(orchestrationCalls.length, 0, 'Cleared alerts should not trigger orchestration plans'); + }); }); diff --git a/tests/referenceResolver.test.ts b/tests/referenceResolver.test.ts index 1213d93..f793279 100644 --- a/tests/referenceResolver.test.ts +++ b/tests/referenceResolver.test.ts @@ -310,3 +310,30 @@ test('ReferenceResolver: uses prominence as tiebreaker when timestamps are equal const resolved = resolutions.get('that incident'); assert.equal(resolved, 'inc-005'); }); + +test('ReferenceResolver: extracts patterns from original-case question', async () => { + const resolver = new ReferenceResolver(referenceRegistry); + const context: ConversationContext = { + chatId: 'test-chat', + entities: new Map(), + }; + + // The question has mixed casing — extraction should work without crashing + const resolutions = await resolver.resolveReferences( + 'What about That Incident?', + context, + [], + ); + + assert.ok(resolutions instanceof Map); +}); + +test('ReferenceResolver: applyResolutions works case-insensitively on original text', () => { + const resolver = new ReferenceResolver(referenceRegistry); + const resolutions = new Map(); + resolutions.set('That Incident', 'INC-1234'); + + const result = resolver.applyResolutions('Tell me about That Incident please', resolutions); + assert.ok(result.includes('INC-1234')); + assert.ok(!result.includes('That Incident')); +}); diff --git a/tests/resultCache.test.ts b/tests/resultCache.test.ts index cb92e18..2948b60 100644 --- a/tests/resultCache.test.ts +++ b/tests/resultCache.test.ts @@ -305,3 +305,43 @@ test('expired entries count as misses', async () => { assert.equal(stats.hits, 1); assert.equal(stats.misses, 1); }); + +test('cache.set removes existing fuzzy-matched entry to prevent duplicates', () => { + const cache = new ResultCache({ maxSize: 10, ttlMs: 60000 }); + + const call1: ToolCall = { + name: 'query-logs', + arguments: { + start: '2025-01-01T10:00:15.000Z', + end: '2025-01-01T10:30:00.000Z', + }, + }; + cache.set(call1, { name: 'query-logs', result: 'first' }); + + const call2: ToolCall = { + name: 'query-logs', + arguments: { + start: '2025-01-01T10:00:45.000Z', // +30s + end: '2025-01-01T10:30:00.000Z', + }, + }; + cache.set(call2, { name: 'query-logs', result: 'second' }); + + assert.equal(cache.stats().size, 1, 'Fuzzy-matched entries should replace, not accumulate'); + assert.deepEqual(cache.get(call2), { name: 'query-logs', result: 'second' }); +}); + +test('cache.set does not remove entries with different tool names', () => { + const cache = new ResultCache({ maxSize: 10, ttlMs: 60000 }); + + cache.set( + { name: 'query-logs', arguments: { start: '2025-01-01T10:00:00Z' } }, + { name: 'query-logs', result: 'logs' }, + ); + cache.set( + { name: 'query-metrics', arguments: { start: '2025-01-01T10:00:00Z' } }, + { name: 'query-metrics', result: 'metrics' }, + ); + + assert.equal(cache.stats().size, 2, 'Different tool names should not be deduped'); +}); From 03202785f10ea34b7ce4933f4c12ba881d5ea575 Mon Sep 17 00:00:00 2001 From: yusufaytas Date: Sun, 15 Mar 2026 22:45:30 +0000 Subject: [PATCH 2/2] Applied per iteration safeguard to follow ups and refiniing --- src/engine/copilotEngine.ts | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/engine/copilotEngine.ts b/src/engine/copilotEngine.ts index ba828a8..2a8aa4b 100644 --- a/src/engine/copilotEngine.ts +++ b/src/engine/copilotEngine.ts @@ -482,13 +482,25 @@ export class CopilotEngine { plannedCalls, )).filter((c) => availableToolNames.has(c.name)); - if (firstIterationFollowUps.length > 0) { + // 1. Refine the follow-ups to fill missing/default arguments (like start/end times) + const refinedFollowUps = await this.planRefiner.refineCalls( + firstIterationFollowUps, + this.mcp.getTools(), + conversationTurns, + allResults, + ); + + // 2. Ensure total executed calls do not exceed the per-iteration limit + const remainingCapacity = Math.max(0, MAX_TOOL_CALLS_PER_ITERATION - plannedCalls.length); + const limitedFollowUps = refinedFollowUps.slice(0, remainingCapacity); + + if (limitedFollowUps.length > 0) { console.log( - `[Copilot][${chatId}] First-iteration follow-ups: ${firstIterationFollowUps.length} call(s): ${firstIterationFollowUps.map((c) => c.name).join(", ")}`, + `[Copilot][${chatId}] First-iteration follow-ups (refined/limited): ${limitedFollowUps.length} call(s): ${limitedFollowUps.map((c) => c.name).join(", ")}`, ); const followUpResults = await this.runToolCallsWithCache( - this.limitToolCalls(firstIterationFollowUps), + limitedFollowUps, chatId, this.mcp.getTools(), trace,