diff --git a/index.ts b/index.ts index 4baf40f9..7b9a9987 100644 --- a/index.ts +++ b/index.ts @@ -46,6 +46,7 @@ import { import { extractReflectionLearningGovernanceCandidates, extractInjectableReflectionMappedMemoryItems, + isRecallUsed, } from "./src/reflection-slices.js"; import { createReflectionEventId } from "./src/reflection-event-store.js"; import { buildReflectionMappedMetadata } from "./src/reflection-mapped-metadata.js"; @@ -225,6 +226,22 @@ interface PluginConfig { skipLowValue?: boolean; maxExtractionsPerHour?: number; }; + feedback?: { + /** Boost importance when a recalled memory is used (default: 0.05) */ + boostOnUse?: number; + /** Penalty when a recalled memory is not used after consecutive misses (default: 0.03) */ + penaltyOnMiss?: number; + /** Extra boost when user explicitly confirms a recalled memory is correct (default: 0.15) */ + boostOnConfirm?: number; + /** Extra penalty when user explicitly corrects a non-recalled memory (default: 0.10) */ + penaltyOnError?: number; + /** Minimum recall (injection) count before penalty applies to this memory (default: 2) */ + minRecallCountForPenalty?: number; + /** Keywords indicating user confirmation of a recalled memory */ + confirmKeywords?: string[]; + /** Keywords indicating user correction/error for a non-recalled memory */ + errorKeywords?: string[]; + }; } type ReflectionThinkLevel = "off" | "minimal" | "low" | "medium" | "high"; @@ -2006,6 +2023,32 @@ const memoryLanceDBProPlugin = { const autoCapturePendingIngressTexts = new Map(); const autoCaptureRecentTexts = new Map(); + // ======================================================================== + // Proposal A Phase 1: Recall Usage Tracking Hooks + // ======================================================================== + // Track pending recalls per session for usage scoring + type PendingRecallEntry = { + recallIds: string[]; + responseText: string; + injectedAt: number; + /** Summary text lines actually injected into the prompt, used for usage detection. */ + injectedSummaries: string[]; + }; + // P0-1 fix: pendingRecall TTL-based cleanup to prevent unbounded memory growth. + // Entries older than 10 minutes are cleaned up on each set() call. + const PENDING_RECALL_MAX_AGE_MS = 10 * 60 * 1000; // 10 minutes + function cleanupPendingRecall(): void { + const now = Date.now(); + for (const [key, entry] of pendingRecall.entries()) { + if (now - entry.injectedAt > PENDING_RECALL_MAX_AGE_MS) { + pendingRecall.delete(key); + } + } + } + const pendingRecall = new Map(); + // Clean up on module load (handles re-registration edge cases) + cleanupPendingRecall(); + const logReg = isCliMode() ? api.logger.debug : api.logger.info; logReg( `memory-lancedb-pro@${pluginVersion}: plugin registered (db: ${resolvedDbPath}, model: ${config.embedding.model || "text-embedding-3-small"}, smartExtraction: ${smartExtractor ? 'ON' : 'OFF'})` @@ -2469,7 +2512,9 @@ const memoryLanceDBProPlugin = { const nextBadRecallCount = staleInjected ? meta.bad_recall_count + 1 : meta.bad_recall_count; - const shouldSuppress = nextBadRecallCount >= 3 && minRepeated > 0; + // P2 fix: suppress threshold aligned with scoring path (>= 2). After 2 bad recalls, + // both the scoring penalty and suppression kick in simultaneously. + const shouldSuppress = nextBadRecallCount >= 2 && minRepeated > 0; await store.patchMetadata( item.id, { @@ -2496,6 +2541,27 @@ const memoryLanceDBProPlugin = { `memory-lancedb-pro: injecting ${selected.length} memories into context for agent ${agentId}`, ); + // Create or update pendingRecall for this turn so the feedback hook + // (which runs in the NEXT turn's before_prompt_build after agent_end) + // sees a matching pair: Turn N recallIds + Turn N responseText. + // agent_end will write responseText into this same pendingRecall + // entry (only updating responseText, never clearing recallIds). + // Include agentId in the key so different agents in the same session do not overwrite each other's pendingRecall. + const sessionKeyForRecall = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentId ?? ""}`; + // Bug 1 fix: also store the injected summary lines so the feedback hook + // can detect usage even when the agent doesn't use stock phrases or IDs + // but directly incorporates the memory content into the response. + // P1 fix: store summary content only (without prefix) for accurate matching. + const injectedSummaries = selected.map((item) => item.summary); + // P0-1 fix: run TTL cleanup before each set to prevent unbounded growth + cleanupPendingRecall(); + pendingRecall.set(sessionKeyForRecall, { + recallIds: selected.map((item) => item.id), + responseText: "", // Will be populated by agent_end + injectedAt: Date.now(), + injectedSummaries, + }); + return { prependContext: `\n` + @@ -2539,6 +2605,15 @@ const memoryLanceDBProPlugin = { recallHistory.delete(sessionId); turnCounter.delete(sessionId); lastRawUserMessage.delete(sessionId); + // P3 fix: clean all pendingRecall entries for this session. + // pendingRecall keys use format: sessionKey (or sessionKey:agentId with composite key). + // P1 fix: use sessionId only when sessionKey is absent to avoid clearing unrelated sessions. + const sessionKeyToClean = ctx?.sessionKey ?? sessionId; + for (const key of pendingRecall.keys()) { + if (key === sessionId || key.startsWith(`${sessionId}:`) || (sessionKeyToClean && key.startsWith(`${sessionKeyToClean}:`))) { + pendingRecall.delete(key); + } + } } // Also clean by channelId/conversationId if present (shared cache key) const cacheKey = ctx?.channelId || ctx?.conversationId || ""; @@ -2891,8 +2966,205 @@ const memoryLanceDBProPlugin = { }; api.on("agent_end", agentEndAutoCaptureHook); + + // ======================================================================== + // Proposal A Phase 1: agent_end hook - Store response text for usage tracking + // ======================================================================== + // NOTE: Only writes responseText to an EXISTING pendingRecall entry created + // by before_prompt_build (auto-recall). Does NOT create a new entry. + // This ensures recallIds (written by auto-recall in the same turn) and + // responseText (written here) remain paired for the feedback hook. + api.on("agent_end", (event: any, ctx: any) => { + // Use same key format as auto-recall hook (sessionKey:agentId) so we update the right entry. + const agentIdForKey = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey); + const sessionKey = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentIdForKey ?? ""}`; + if (!sessionKey) return; + + // Get the last message content + let lastMsgText: string | null = null; + if (event.messages && Array.isArray(event.messages)) { + const lastMsg = event.messages[event.messages.length - 1]; + if (lastMsg && typeof lastMsg === "object") { + const msgObj = lastMsg as Record; + lastMsgText = extractTextContent(msgObj.content); + } + } + + // Only update an existing pendingRecall entry — do NOT create one. + // This preserves recallIds written by auto-recall earlier in this turn. + const existing = pendingRecall.get(sessionKey); + if (existing && lastMsgText && lastMsgText.trim().length > 0) { + existing.responseText = lastMsgText; + } + }, { priority: 20 }); + + // ======================================================================== + // Proposal A Phase 1: before_prompt_build hook (priority 5) - Score recalls + // ======================================================================== + api.on("before_prompt_build", async (event: any, ctx: any) => { + // Use same key format as auto-recall hook (sessionKey:agentId) so we read the right entry. + const agentIdForKey = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey); + const sessionKey = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentIdForKey ?? ""}`; + // P2 fix: also cleanup on read path to handle idle sessions that never trigger set() + cleanupPendingRecall(); + const pending = pendingRecall.get(sessionKey); + if (!pending) return; + + // Guard: only score if responseText has substantial content + const responseText = pending.responseText; + if (!responseText || responseText.length <= 24) { + // Skip scoring for empty or very short responses + // Bug 5 fix: also clear pendingRecall so the next turn does not + // re-trigger feedback on stale recallIds / old responseText. + pendingRecall.delete(sessionKey); + return; + } + + // Read recall IDs directly from pendingRecall (populated by auto-recall's + // before_prompt_build hook from the PREVIOUS turn). This replaces the + // broken regex-based parsing of prependContext which never matched the + // actual [category:scope] format used by auto-recall injection. + const injectedIds = pending.recallIds ?? []; + + // Bug 1 fix: also retrieve the injected summary lines so isRecallUsed can + // detect when the agent directly incorporates memory content into the response. + const injectedSummaries = pending.injectedSummaries ?? []; + + // Check if any recall was actually used by checking if the response contains reference to the injected content + // This is a heuristic - we check if the response shows awareness of injected memories + let usedRecall = false; + if (injectedIds.length > 0 || injectedSummaries.length > 0) { + // Use the real isRecallUsed function from reflection-slices + usedRecall = isRecallUsed(responseText, injectedIds, injectedSummaries); + } + + // Read feedback config values with defaults + // P2 fix: coerce to Number and use ?? to preserve explicit zero values. + // P2 fix: use nullish coalescing to allow 0 as a valid config value. + const fb = config.feedback ?? {}; + const boostOnUse = Number(fb.boostOnUse ?? 0) ?? 0.05; + const penaltyOnMiss = Number(fb.penaltyOnMiss ?? 0) ?? 0.03; + const boostOnConfirm = Number(fb.boostOnConfirm ?? 0) ?? 0.15; + const penaltyOnError = Number(fb.penaltyOnError ?? 0) ?? 0.10; + const minRecallCountForPenalty = fb.minRecallCountForPenalty ?? 2; + const confirmKeywords = fb.confirmKeywords ?? ["correct", "right", "yes", "confirmed", "exactly", "對", "沒錯", "正確", "確認", "好的"]; + const errorKeywords = fb.errorKeywords ?? ["wrong", "incorrect", "not right", "that's wrong", "error", "mistake", "fix it", "change that", "改成", "改為", "不是這樣", "不對", "錯了"]; + + // event.prompt is a plain string in the current hook contract (confirmed by codebase usage). + // We extract the user's last message from event.messages array instead. + let userPromptText = ""; + try { + if (event.messages && Array.isArray(event.messages)) { + for (let i = event.messages.length - 1; i >= 0; i--) { + const msg = event.messages[i]; + if (msg && msg.role === "user" && typeof msg.content === "string" && msg.content.trim().length > 0) { + userPromptText = msg.content.trim(); + break; + } + if (msg && msg.role === "user" && Array.isArray(msg.content)) { + // Handle array-form content + const text = extractTextContent(msg.content); + if (text && text.trim().length > 0) { + userPromptText = text.trim(); + break; + } + } + } + } + } catch (_e) { + userPromptText = ""; + } + + // Helper: check if text contains any of the keywords (case-insensitive) + const containsKeyword = (text: string, keywords: string[]): boolean => + keywords.some((kw) => text.toLowerCase().includes(kw.toLowerCase())); + + // Score the recall - update importance based on usage + // Score each recall individually — do NOT compute a single usedRecall for the whole batch. + // Bug 1 fix (P1): when auto-recall injects multiple memories, the agent may use only some of them. + // Scoring them all with one decision corrupts ranking: unused memories get boosted, used ones get penalized. + if (injectedIds.length > 0) { + try { + // Build lookup: recallId -> injected summary text for this specific recall + const summaryMap = new Map(); + for (let i = 0; i < injectedIds.length; i++) { + if (injectedSummaries[i]) { + summaryMap.set(injectedIds[i], injectedSummaries[i]); + } + } + + for (const recallId of injectedIds) { + const summaryText = summaryMap.get(recallId) ?? ""; + // Score this specific recall independently + const usedRecall = isRecallUsed( + responseText, + [recallId], + summaryText ? [summaryText] : [], + ); + + const entry = await store.getById(recallId, undefined); + if (!entry) continue; + const meta = parseSmartMetadata(entry.metadata, entry); + const currentImportance = meta.importance ?? entry.importance ?? 0.5; + + // P1 fix (Codex): check errorKeywords BEFORE usedRecall. + // If user explicitly corrects, that overrides the heuristic usage detection. + // Also set last_confirmed_use_at here to prevent injection path's staleInjected + // from double-counting in the same turn. + const hasError = containsKeyword(userPromptText, errorKeywords); + if (hasError) { + if ((meta.injected_count || 0) >= minRecallCountForPenalty) { + await store.update(recallId, { importance: Math.max(0.1, currentImportance - penaltyOnError) }, undefined); + } + await store.patchMetadata(recallId, { bad_recall_count: (meta.bad_recall_count || 0) + 1, last_confirmed_use_at: Date.now() }, undefined); + } else if (usedRecall) { + // Pure positive use: boost importance + let newImportance = Math.min(1.0, currentImportance + boostOnUse); + if (containsKeyword(userPromptText, confirmKeywords)) { + newImportance = Math.min(1.0, newImportance + boostOnConfirm); + } + await store.update(recallId, { importance: newImportance }, undefined); + await store.patchMetadata(recallId, { last_confirmed_use_at: Date.now(), bad_recall_count: 0 }, undefined); + } else { + // P1 fix: align scoring penalty threshold with injection increment. + // Silent miss: apply penalty if badCount >= 1 (injection path handles increment). + const badCount = meta.bad_recall_count || 0; + if (badCount >= 1 && (meta.injected_count || 0) >= minRecallCountForPenalty) { + await store.update(recallId, { importance: Math.max(0.1, currentImportance - penaltyOnMiss) }, undefined); + } + // No increment here - injection path already increments via staleInjected. + } + } + } catch (err) { + api.logger.warn(`memory-lancedb-pro: recall usage scoring failed: ${String(err)}`); + } finally { + pendingRecall.delete(sessionKey); + } + } + }, { priority: 5 }); } + // ======================================================================== + // Proposal A Phase 1: session_end hook - Clean up pending recalls + // ======================================================================== + api.on("session_end", (_event: any, ctx: any) => { + // P1 fix: clean all pendingRecall entries for this session, including composite keys. + // When autoCapture is false, the auto-capture session_end (priority 10) is skipped, + // so this hook must handle composite keys (sessionKey:agentId) as well. + const sessionId = ctx?.sessionId || ""; + const sessionKey = ctx?.sessionKey || ""; + for (const key of pendingRecall.keys()) { + if ( + key === sessionKey || + key === sessionId || + key.startsWith(`${sessionKey}:`) || + key.startsWith(`${sessionId}:`) + ) { + pendingRecall.delete(key); + } + } + }, { priority: 20 }); + // ======================================================================== // Integrated Self-Improvement (inheritance + derived) // ======================================================================== @@ -4045,6 +4317,12 @@ export function parsePluginConfig(value: unknown): PluginConfig { : 30, } : { skipLowValue: false, maxExtractionsPerHour: 30 }, + // Bug 3 fix: parse and return the feedback config block so deployments + // that specify custom feedback values actually take effect instead of + // falling back to hardcoded defaults. + feedback: typeof cfg.feedback === "object" && cfg.feedback !== null + ? { ...(cfg.feedback as Record) } + : {}, }; } diff --git a/src/auto-capture-cleanup.ts b/src/auto-capture-cleanup.ts index c5c00b7b..8b8953e2 100644 --- a/src/auto-capture-cleanup.ts +++ b/src/auto-capture-cleanup.ts @@ -113,6 +113,17 @@ function stripLeadingRuntimeWrappers(text: string): string { continue; } + // Bug fix: also strip known boilerplate continuation lines (e.g. + // "Results auto-announce to your requester.", "Do not use any memory tools.") + // that appear right after the wrapper prefix. These lines do NOT match the + // wrapper prefix regex but are part of the wrapper boilerplate. + if (strippingLeadIn) { + AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE.lastIndex = 0; + if (AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE.test(current)) { + continue; + } + } + strippingLeadIn = false; cleanedLines.push(line); } diff --git a/src/reflection-slices.ts b/src/reflection-slices.ts index 7d39d8a7..a8d3c4ef 100644 --- a/src/reflection-slices.ts +++ b/src/reflection-slices.ts @@ -316,3 +316,98 @@ export function extractReflectionSliceItems(reflectionText: string): ReflectionS export function extractInjectableReflectionSliceItems(reflectionText: string): ReflectionSliceItem[] { return buildReflectionSliceItemsFromSlices(extractInjectableReflectionSlices(reflectionText)); } + +/** + * Check if a recall was actually used by the agent. + * This function determines whether the agent's response shows awareness of the injected memories. + * + * @param responseText - The agent's response text + * @param injectedIds - Array of memory IDs that were injected + * @param injectedSummaries - Optional array of summary text lines that were injected; + * if the response contains any of these verbatim or partially, + * it is a strong usage signal even without explicit markers or IDs. + * @returns true if the response shows evidence of using the recalled information + */ +export function isRecallUsed( + responseText: string, + injectedIds: string[], + injectedSummaries?: string[], +): boolean { + if (!responseText || responseText.length <= 24) { + return false; + } + if ((!injectedIds || injectedIds.length === 0) && (!injectedSummaries || injectedSummaries.length === 0)) { + return false; + } + + const responseLower = responseText.toLowerCase(); + + // Step 1: Check if the response contains any specific injected memory ID. + // This is a prerequisite for confirming actual usage. + const hasSpecificRecall = injectedIds.some( + (id) => id && responseLower.includes(id.toLowerCase()), + ); + + // Step 2: If a specific ID is present, also check for generic usage phrases. + // Both conditions must be met (AND logic) to confirm the recall was used. + if (hasSpecificRecall) { + const usageMarkers = [ + "remember", + "之前", + "记得", + "according to", + "based on what", + "as you mentioned", + "如前所述", + "如您所說", + "如您所说的", + "我記得", + "我记得", + "之前你說", + "之前你说", + "之前提到", + "之前提到的", + "根据之前", + "依据之前", + "按照之前", + "照您之前", + "照你说的", + "from previous", + "earlier you", + "in the memory", + "the memory mentioned", + "the memories show", + ]; + + for (const marker of usageMarkers) { + if (responseLower.includes(marker.toLowerCase())) { + return true; + } + } + } + + // P1 fix: Summary path — detect when injected summary content appears in the response. + // No AND gate here: summary text IS the injected memory, so any verbatim/near-verbatim + // overlap is a strong usage signal. The 10-char minimum prevents false positives on + // common short words. Guards at function entry already ensure injectedSummaries is non-empty. + if (injectedSummaries && injectedSummaries.length > 0) { + const responseTrimmedLower = responseText.trim().toLowerCase(); + for (const summary of injectedSummaries) { + if (summary && summary.trim().length > 0) { + const summaryLower = summary.trim().toLowerCase(); + // Check for verbatim or near-verbatim presence (at least 10 chars to avoid + // false positives on very short fragments). + if ( + summaryLower.length >= 10 && + (responseTrimmedLower.includes(summaryLower) || + // Also check the reverse (summary contains response snippet — agent echoed it) + summaryLower.includes(responseTrimmedLower.slice(0, Math.min(50, responseTrimmedLower.length)))) + ) { + return true; + } + } + } + } + + return false; +} diff --git a/src/retriever.ts b/src/retriever.ts index 769c248b..6223ff48 100644 --- a/src/retriever.ts +++ b/src/retriever.ts @@ -682,7 +682,7 @@ export class MemoryRetriever { ); } else { results = await this.hybridRetrieval( - query, safeLimit, scopeFilter, category, trace, + query, safeLimit, scopeFilter, category, trace, source, ); } @@ -759,7 +759,12 @@ export class MemoryRetriever { ); failureStage = "vector.postProcess"; - const recencyBoosted = this.applyRecencyBoost(mapped); + // Bug 7 fix: when decayEngine is active, skip applyRecencyBoost here + // because applyDecayBoost already incorporates recency into its composite + // score. Calling both double-counts recency for vector-only results. + const recencyBoosted = this.decayEngine + ? mapped + : this.applyRecencyBoost(mapped); if (diagnostics) diagnostics.stageCounts.afterRecency = recencyBoosted.length; const weighted = this.decayEngine ? recencyBoosted