diff --git a/index.ts b/index.ts index ac367227..09481cde 100644 --- a/index.ts +++ b/index.ts @@ -46,6 +46,7 @@ import { import { extractReflectionLearningGovernanceCandidates, extractInjectableReflectionMappedMemoryItems, + isRecallUsed, } from "./src/reflection-slices.js"; import { createReflectionEventId } from "./src/reflection-event-store.js"; import { buildReflectionMappedMetadata } from "./src/reflection-mapped-metadata.js"; @@ -223,6 +224,22 @@ interface PluginConfig { skipLowValue?: boolean; maxExtractionsPerHour?: number; }; + feedback?: { + /** Boost importance when a recalled memory is used (default: 0.05) */ + boostOnUse?: number; + /** Penalty when a recalled memory is not used after consecutive misses (default: 0.03) */ + penaltyOnMiss?: number; + /** Extra boost when user explicitly confirms a recalled memory is correct (default: 0.15) */ + boostOnConfirm?: number; + /** Extra penalty when user explicitly corrects a non-recalled memory (default: 0.10) */ + penaltyOnError?: number; + /** Minimum recall (injection) count before penalty applies to this memory (default: 2) */ + minRecallCountForPenalty?: number; + /** Keywords indicating user confirmation of a recalled memory */ + confirmKeywords?: string[]; + /** Keywords indicating user correction/error for a non-recalled memory */ + errorKeywords?: string[]; + }; } type ReflectionThinkLevel = "off" | "minimal" | "low" | "medium" | "high"; @@ -2004,6 +2021,19 @@ const memoryLanceDBProPlugin = { const autoCapturePendingIngressTexts = new Map(); const autoCaptureRecentTexts = new Map(); + // ======================================================================== + // Proposal A Phase 1: Recall Usage Tracking Hooks + // ======================================================================== + // Track pending recalls per session for usage scoring + type PendingRecallEntry = { + recallIds: string[]; + responseText: string; + injectedAt: number; + /** Summary text lines actually injected into the prompt, used for usage detection. */ + injectedSummaries: string[]; + }; + const pendingRecall = new Map(); + const logReg = isCliMode() ? api.logger.debug : api.logger.info; logReg( `memory-lancedb-pro@${pluginVersion}: plugin registered (db: ${resolvedDbPath}, model: ${config.embedding.model || "text-embedding-3-small"}, smartExtraction: ${smartExtractor ? 'ON' : 'OFF'})` @@ -2577,6 +2607,24 @@ const memoryLanceDBProPlugin = { `memory-lancedb-pro: injecting ${selected.length} memories into context for agent ${agentId}`, ); + // Create or update pendingRecall for this turn so the feedback hook + // (which runs in the NEXT turn's before_prompt_build after agent_end) + // sees a matching pair: Turn N recallIds + Turn N responseText. + // agent_end will write responseText into this same pendingRecall + // entry (only updating responseText, never clearing recallIds). + // Include agentId in the key so different agents in the same session do not overwrite each other's pendingRecall. + const sessionKeyForRecall = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentId ?? ""}`; + // Bug 1 fix: also store the injected summary lines so the feedback hook + // can detect usage even when the agent doesn't use stock phrases or IDs + // but directly incorporates the memory content into the response. + const injectedSummaries = selected.map((item) => item.line); + pendingRecall.set(sessionKeyForRecall, { + recallIds: selected.map((item) => item.id), + responseText: "", // Will be populated by agent_end + injectedAt: Date.now(), + injectedSummaries, + }); + return { prependContext: `\n` + @@ -2620,6 +2668,14 @@ const memoryLanceDBProPlugin = { recallHistory.delete(sessionId); turnCounter.delete(sessionId); lastRawUserMessage.delete(sessionId); + // P3 fix: clean all pendingRecall entries for this session. + // pendingRecall keys use format: sessionKey (or sessionKey:agentId with composite key). + // We clean any key that starts with this sessionId. + for (const key of pendingRecall.keys()) { + if (key === sessionId || key.startsWith(`${sessionId}:`) || key.startsWith(`${ctx?.sessionKey ?? ""}:`)) { + pendingRecall.delete(key); + } + } } // Also clean by channelId/conversationId if present (shared cache key) const cacheKey = ctx?.channelId || ctx?.conversationId || ""; @@ -2972,7 +3028,200 @@ const memoryLanceDBProPlugin = { }; api.on("agent_end", agentEndAutoCaptureHook); - } + + // ======================================================================== + // Proposal A Phase 1: agent_end hook - Store response text for usage tracking + // ======================================================================== + // NOTE: Only writes responseText to an EXISTING pendingRecall entry created + // by before_prompt_build (auto-recall). Does NOT create a new entry. + // This ensures recallIds (written by auto-recall in the same turn) and + // responseText (written here) remain paired for the feedback hook. + api.on("agent_end", (event: any, ctx: any) => { + // Use same key format as auto-recall hook (sessionKey:agentId) so we update the right entry. + const agentIdForKey = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey); + const sessionKey = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentIdForKey ?? ""}`; + if (!sessionKey) return; + + // Get the last message content + let lastMsgText: string | null = null; + if (event.messages && Array.isArray(event.messages)) { + const lastMsg = event.messages[event.messages.length - 1]; + if (lastMsg && typeof lastMsg === "object") { + const msgObj = lastMsg as Record; + lastMsgText = extractTextContent(msgObj.content); + } + } + + // Only update an existing pendingRecall entry — do NOT create one. + // This preserves recallIds written by auto-recall earlier in this turn. + const existing = pendingRecall.get(sessionKey); + if (existing && lastMsgText && lastMsgText.trim().length > 0) { + existing.responseText = lastMsgText; + } + }, { priority: 20 }); + + // ======================================================================== + // Proposal A Phase 1: before_prompt_build hook (priority 5) - Score recalls + // ======================================================================== + api.on("before_prompt_build", async (event: any, ctx: any) => { + // Use same key format as auto-recall hook (sessionKey:agentId) so we read the right entry. + const agentIdForKey = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey); + const sessionKey = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentIdForKey ?? ""}`; + const pending = pendingRecall.get(sessionKey); + if (!pending) return; + + // Guard: only score if responseText has substantial content + const responseText = pending.responseText; + if (!responseText || responseText.length <= 24) { + // Skip scoring for empty or very short responses + // Bug 5 fix: also clear pendingRecall so the next turn does not + // re-trigger feedback on stale recallIds / old responseText. + pendingRecall.delete(sessionKey); + return; + } + + // Read recall IDs directly from pendingRecall (populated by auto-recall's + // before_prompt_build hook from the PREVIOUS turn). This replaces the + // broken regex-based parsing of prependContext which never matched the + // actual [category:scope] format used by auto-recall injection. + const injectedIds = pending.recallIds ?? []; + + // Bug 1 fix: also retrieve the injected summary lines so isRecallUsed can + // detect when the agent directly incorporates memory content into the response. + const injectedSummaries = pending.injectedSummaries ?? []; + + // Check if any recall was actually used by checking if the response contains reference to the injected content + // This is a heuristic - we check if the response shows awareness of injected memories + let usedRecall = false; + if (injectedIds.length > 0 || injectedSummaries.length > 0) { + // Use the real isRecallUsed function from reflection-slices + usedRecall = isRecallUsed(responseText, injectedIds, injectedSummaries); + } + + // Read feedback config values with defaults + const fb = config.feedback ?? {}; + const boostOnUse = fb.boostOnUse ?? 0.05; + const penaltyOnMiss = fb.penaltyOnMiss ?? 0.03; + const boostOnConfirm = fb.boostOnConfirm ?? 0.15; + const penaltyOnError = fb.penaltyOnError ?? 0.10; + const minRecallCountForPenalty = fb.minRecallCountForPenalty ?? 2; + const confirmKeywords = fb.confirmKeywords ?? ["正確", "yes", "right", "沒錯", "確認", "correct", "ok"]; + const errorKeywords = fb.errorKeywords ?? ["不是", "錯", "不對", "wrong", "no", "not right", "錯誤", "更正"]; + + // event.prompt is a plain string in the current hook contract (confirmed by codebase usage). + // We extract the user's last message from event.messages array instead. + let userPromptText = ""; + try { + if (event.messages && Array.isArray(event.messages)) { + for (let i = event.messages.length - 1; i >= 0; i--) { + const msg = event.messages[i]; + if (msg && msg.role === "user" && typeof msg.content === "string" && msg.content.trim().length > 0) { + userPromptText = msg.content.trim(); + break; + } + if (msg && msg.role === "user" && Array.isArray(msg.content)) { + // Handle array-form content + const text = extractTextContent(msg.content); + if (text && text.trim().length > 0) { + userPromptText = text.trim(); + break; + } + } + } + } + } catch (_e) { + userPromptText = ""; + } + + // Helper: check if text contains any of the keywords (case-insensitive) + const containsKeyword = (text: string, keywords: string[]): boolean => + keywords.some((kw) => text.toLowerCase().includes(kw.toLowerCase())); + + // Score the recall - update importance based on usage + // Score each recall individually — do NOT compute a single usedRecall for the whole batch. + // Bug 1 fix (P1): when auto-recall injects multiple memories, the agent may use only some of them. + // Scoring them all with one decision corrupts ranking: unused memories get boosted, used ones get penalized. + if (injectedIds.length > 0) { + try { + // Build lookup: recallId -> injected summary text for this specific recall + const summaryMap = new Map(); + for (let i = 0; i < injectedIds.length; i++) { + if (injectedSummaries[i]) { + summaryMap.set(injectedIds[i], injectedSummaries[i]); + } + } + + for (const recallId of injectedIds) { + const summaryText = summaryMap.get(recallId) ?? ""; + // Score this specific recall independently + const usedRecall = isRecallUsed( + responseText, + [recallId], + summaryText ? [summaryText] : [], + ); + + const entry = await store.getById(recallId, undefined); + if (!entry) continue; + const meta = parseSmartMetadata(entry.metadata, entry); + const currentImportance = meta.importance ?? entry.importance ?? 0.5; + + if (usedRecall) { + let newImportance = Math.min(1.0, currentImportance + boostOnUse); + if (containsKeyword(userPromptText, confirmKeywords)) { + newImportance = Math.min(1.0, newImportance + boostOnConfirm); + } + await store.update(recallId, { importance: newImportance }, undefined); + await store.patchMetadata( + recallId, + { last_confirmed_use_at: Date.now(), bad_recall_count: 0 }, + undefined, + ); + } else { + const badCount = meta.bad_recall_count || 0; + let newImportance = currentImportance; + if (containsKeyword(userPromptText, errorKeywords)) { + if ((meta.injected_count || 0) >= minRecallCountForPenalty) { + newImportance = Math.max(0.1, newImportance - penaltyOnError); + } + await store.update(recallId, { importance: newImportance }, undefined); + await store.patchMetadata(recallId, { bad_recall_count: badCount + 1 }, undefined); + } else if (badCount >= 2) { + if ((meta.injected_count || 0) >= minRecallCountForPenalty) { + newImportance = Math.max(0.1, newImportance - penaltyOnMiss); + } + await store.update(recallId, { importance: newImportance }, undefined); + await store.patchMetadata(recallId, { bad_recall_count: badCount + 1 }, undefined); + } + } + } + } catch (err) { + api.logger.warn(`memory-lancedb-pro: recall usage scoring failed: ${String(err)}`); + } finally { + pendingRecall.delete(sessionKey); + } + } + }, { priority: 5 }); + + // ======================================================================== + // Proposal A Phase 1: session_end hook - Clean up pending recalls + // ======================================================================== + api.on("session_end", (_event: any, ctx: any) => { + // P1 fix: clean all pendingRecall entries for this session, including composite keys. + // When autoCapture is false, the auto-capture session_end (priority 10) is skipped, + // so this hook must handle composite keys (sessionKey:agentId) as well. + const sessionId = ctx?.sessionId || ""; + const sessionKey = ctx?.sessionKey || ""; + for (const key of pendingRecall.keys()) { + if ( + key === sessionKey || + key === sessionId || + key.startsWith(`${sessionKey}:`) || + key.startsWith(`${sessionId}:`) + ) { + pendingRecall.delete(key); + } + } + }, { priority: 20 }); // ======================================================================== // Integrated Self-Improvement (inheritance + derived) @@ -3971,16 +4220,21 @@ export function parsePluginConfig(value: unknown): PluginConfig { typeof cfg.retrieval === "object" && cfg.retrieval !== null ? (() => { const retrieval = { ...(cfg.retrieval as Record) } as Record; - if (typeof retrieval.rerankApiKey === "string") { + // Bug 6 fix: only resolve env vars for rerank fields when reranking is + // actually enabled AND the field contains a ${...} placeholder. + // This prevents startup failures when reranking is disabled and rerankApiKey + // is left as an unresolved placeholder. + const rerankEnabled = retrieval.rerank !== "none"; + if (rerankEnabled && typeof retrieval.rerankApiKey === "string" && retrieval.rerankApiKey.includes("${")) { retrieval.rerankApiKey = resolveEnvVars(retrieval.rerankApiKey); } - if (typeof retrieval.rerankEndpoint === "string") { + if (rerankEnabled && typeof retrieval.rerankEndpoint === "string" && retrieval.rerankEndpoint.includes("${")) { retrieval.rerankEndpoint = resolveEnvVars(retrieval.rerankEndpoint); } - if (typeof retrieval.rerankModel === "string") { + if (rerankEnabled && typeof retrieval.rerankModel === "string" && retrieval.rerankModel.includes("${")) { retrieval.rerankModel = resolveEnvVars(retrieval.rerankModel); } - if (typeof retrieval.rerankProvider === "string") { + if (rerankEnabled && typeof retrieval.rerankProvider === "string" && retrieval.rerankProvider.includes("${")) { retrieval.rerankProvider = resolveEnvVars(retrieval.rerankProvider); } return retrieval as any; @@ -4119,6 +4373,12 @@ export function parsePluginConfig(value: unknown): PluginConfig { : 30, } : { skipLowValue: false, maxExtractionsPerHour: 30 }, + // Bug 3 fix: parse and return the feedback config block so deployments + // that specify custom feedback values actually take effect instead of + // falling back to hardcoded defaults. + feedback: typeof cfg.feedback === "object" && cfg.feedback !== null + ? { ...(cfg.feedback as Record) } + : {}, }; } @@ -4131,7 +4391,7 @@ export function resetRegistration() { // Note: WeakSets cannot be cleared by design. In test scenarios where the // same process reloads the module, a fresh module state means a new WeakSet. // For hot-reload scenarios, the module is re-imported fresh. - _registeredApis.clear(); + // (WeakSet.clear() does not exist, so we do nothing here.) } export default memoryLanceDBProPlugin; diff --git a/package-lock.json b/package-lock.json index fcbf1b04..44f1a29c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "memory-lancedb-pro", - "version": "1.1.0-beta.9", + "version": "1.1.0-beta.10", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "memory-lancedb-pro", - "version": "1.1.0-beta.9", + "version": "1.1.0-beta.10", "license": "MIT", "dependencies": { "@lancedb/lancedb": "^0.26.2", @@ -223,6 +223,7 @@ "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-18.1.0.tgz", "integrity": "sha512-v/ShMp57iBnBp4lDgV8Jx3d3Q5/Hac25FWmQ98eMahUiHPXcvwIMKJD0hBIgclm/FCG+LwPkAKtkRO1O/W0YGg==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@swc/helpers": "^0.5.11", "@types/command-line-args": "^5.2.3", diff --git a/src/auto-capture-cleanup.ts b/src/auto-capture-cleanup.ts index c5c00b7b..8b8953e2 100644 --- a/src/auto-capture-cleanup.ts +++ b/src/auto-capture-cleanup.ts @@ -113,6 +113,17 @@ function stripLeadingRuntimeWrappers(text: string): string { continue; } + // Bug fix: also strip known boilerplate continuation lines (e.g. + // "Results auto-announce to your requester.", "Do not use any memory tools.") + // that appear right after the wrapper prefix. These lines do NOT match the + // wrapper prefix regex but are part of the wrapper boilerplate. + if (strippingLeadIn) { + AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE.lastIndex = 0; + if (AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE.test(current)) { + continue; + } + } + strippingLeadIn = false; cleanedLines.push(line); } diff --git a/src/reflection-slices.ts b/src/reflection-slices.ts index 7d39d8a7..036375fa 100644 --- a/src/reflection-slices.ts +++ b/src/reflection-slices.ts @@ -316,3 +316,98 @@ export function extractReflectionSliceItems(reflectionText: string): ReflectionS export function extractInjectableReflectionSliceItems(reflectionText: string): ReflectionSliceItem[] { return buildReflectionSliceItemsFromSlices(extractInjectableReflectionSlices(reflectionText)); } + +/** + * Check if a recall was actually used by the agent. + * This function determines whether the agent's response shows awareness of the injected memories. + * + * @param responseText - The agent's response text + * @param injectedIds - Array of memory IDs that were injected + * @param injectedSummaries - Optional array of summary text lines that were injected; + * if the response contains any of these verbatim or partially, + * it is a strong usage signal even without explicit markers or IDs. + * @returns true if the response shows evidence of using the recalled information + */ +export function isRecallUsed( + responseText: string, + injectedIds: string[], + injectedSummaries?: string[], +): boolean { + if (!responseText || responseText.length <= 24) { + return false; + } + if ((!injectedIds || injectedIds.length === 0) && (!injectedSummaries || injectedSummaries.length === 0)) { + return false; + } + + const responseLower = responseText.toLowerCase(); + + // Step 1: Check if the response contains any specific injected memory ID. + // This is a prerequisite for confirming actual usage. + const hasSpecificRecall = injectedIds.some( + (id) => id && responseLower.includes(id.toLowerCase()), + ); + + // Step 2: If a specific ID is present, also check for generic usage phrases. + // Both conditions must be met (AND logic) to confirm the recall was used. + if (hasSpecificRecall) { + const usageMarkers = [ + "remember", + "之前", + "记得", + "according to", + "based on what", + "as you mentioned", + "如前所述", + "如您所說", + "如您所说的", + "我記得", + "我记得", + "之前你說", + "之前你说", + "之前提到", + "之前提到的", + "根据之前", + "依据之前", + "按照之前", + "照您之前", + "照你说的", + "from previous", + "earlier you", + "in the memory", + "the memory mentioned", + "the memories show", + ]; + + for (const marker of usageMarkers) { + if (responseLower.includes(marker.toLowerCase())) { + return true; + } + } + } + + // Bug 1 fix (isRecallUsed): when summaries are provided, check if the response + // contains any of the injected summary text verbatim or as a near-identical + // substring. This catches the case where the agent directly uses the memory + // content without any explicit marker phrase. + if (injectedSummaries && injectedSummaries.length > 0) { + const responseTrimmedLower = responseText.trim().toLowerCase(); + for (const summary of injectedSummaries) { + if (summary && summary.trim().length > 0) { + const summaryLower = summary.trim().toLowerCase(); + // Check for verbatim or near-verbatim presence (at least 10 chars to avoid + // false positives on very short fragments). + if ( + summaryLower.length >= 10 && + (responseTrimmedLower.includes(summaryLower) || + // Also check the reverse (summary contains response snippet — agent echoed it) + summaryLower.includes(responseTrimmedLower.slice(0, Math.min(50, responseTrimmedLower.length)))) + ) { + return true; + } + } + } + } + + return false; +} diff --git a/src/retriever.ts b/src/retriever.ts index 11bec158..4e7d6fd0 100644 --- a/src/retriever.ts +++ b/src/retriever.ts @@ -680,7 +680,7 @@ export class MemoryRetriever { ); } else { results = await this.hybridRetrieval( - query, safeLimit, scopeFilter, category, trace, + query, safeLimit, scopeFilter, category, trace, source, ); } @@ -749,7 +749,12 @@ export class MemoryRetriever { ); failureStage = "vector.postProcess"; - const recencyBoosted = this.applyRecencyBoost(mapped); + // Bug 7 fix: when decayEngine is active, skip applyRecencyBoost here + // because applyDecayBoost already incorporates recency into its composite + // score. Calling both double-counts recency for vector-only results. + const recencyBoosted = this.decayEngine + ? mapped + : this.applyRecencyBoost(mapped); if (diagnostics) diagnostics.stageCounts.afterRecency = recencyBoosted.length; const weighted = this.decayEngine ? recencyBoosted diff --git a/src/smart-extractor.ts b/src/smart-extractor.ts index cec72811..75c0eb5f 100644 --- a/src/smart-extractor.ts +++ b/src/smart-extractor.ts @@ -73,7 +73,7 @@ export function stripEnvelopeMetadata(text: string): string { // 0. Strip runtime orchestration wrappers that should never become memories // (sub-agent task scaffolding is execution metadata, not conversation content). let cleaned = text.replace( - /^\[(?:Subagent Context|Subagent Task)\]\s*(?:You are running as a subagent.*?(?:$|(?<=\.)\s+)|Results auto-announce to your requester\.?\s*|do not busy-poll for status\.?\s*|Reply with a brief acknowledgment only\.?\s*|Do not use any memory tools\.?\s*)?/gim, + /^\[(?:Subagent Context|Subagent Task)\]\s*(?:You are running as a subagent\b.*?(?:$|(?<=\.)\s+)|Results auto-announce to your requester\.?\s*|do not busy-poll for status\.?\s*|Reply with a brief acknowledgment only\.?\s*|Do not use any memory tools\.?\s*)?/gim, "", ); cleaned = cleaned.replace(