From 12aa085c46b9c7676aaf522416b24cff7a7f93a4 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Fri, 3 Apr 2026 19:08:37 +0800 Subject: [PATCH 01/21] feat: Proposal A Phase 1 - Recall usage tracking hooks - Add pendingRecall Map for tracking session recalls - Add agent_end hook to store response text for usage scoring - Add before_prompt_build hook (priority 5) to score recall usage - Add session_end hook to clean up pending recalls - Add isRecallUsed function to reflection-slices.ts - Guard: skip scoring for empty responseText (<=24 chars) Implements: recall usage tracking for Proposal A Phase 1 --- index.ts | 134 ++++++++++++++++++++++++++++++++++++++- src/reflection-slices.ts | 57 +++++++++++++++++ 2 files changed, 190 insertions(+), 1 deletion(-) diff --git a/index.ts b/index.ts index 4baf40f9..897b66ad 100644 --- a/index.ts +++ b/index.ts @@ -46,6 +46,7 @@ import { import { extractReflectionLearningGovernanceCandidates, extractInjectableReflectionMappedMemoryItems, + isRecallUsed, } from "./src/reflection-slices.js"; import { createReflectionEventId } from "./src/reflection-event-store.js"; import { buildReflectionMappedMetadata } from "./src/reflection-mapped-metadata.js"; @@ -2006,6 +2007,17 @@ const memoryLanceDBProPlugin = { const autoCapturePendingIngressTexts = new Map(); const autoCaptureRecentTexts = new Map(); + // ======================================================================== + // Proposal A Phase 1: Recall Usage Tracking Hooks + // ======================================================================== + // Track pending recalls per session for usage scoring + type PendingRecallEntry = { + recallIds: string[]; + responseText: string; + injectedAt: number; + }; + const pendingRecall = new Map(); + const logReg = isCliMode() ? api.logger.debug : api.logger.info; logReg( `memory-lancedb-pro@${pluginVersion}: plugin registered (db: ${resolvedDbPath}, model: ${config.embedding.model || "text-embedding-3-small"}, smartExtraction: ${smartExtractor ? 'ON' : 'OFF'})` @@ -2891,7 +2903,127 @@ const memoryLanceDBProPlugin = { }; api.on("agent_end", agentEndAutoCaptureHook); - } + + // ======================================================================== + // Proposal A Phase 1:agent_end hook - Store response text for usage tracking + // ======================================================================== + api.on("agent_end", (event: any, ctx: any) => { + const sessionKey = ctx?.sessionKey || ctx?.sessionId || "default"; + if (!sessionKey) return; + + // Get the last message content + let lastMsgText: string | null = null; + if (event.messages && Array.isArray(event.messages)) { + const lastMsg = event.messages[event.messages.length - 1]; + if (lastMsg && typeof lastMsg === "object") { + const msgObj = lastMsg as Record; + lastMsgText = extractTextContent(msgObj.content); + } + } + + // Store in pendingRecall if we have response text + if (lastMsgText && lastMsgText.trim().length > 0) { + pendingRecall.set(sessionKey, { + recallIds: [], // Will be populated by before_prompt_build + responseText: lastMsgText, + injectedAt: Date.now(), + }); + } + }, { priority: 20 }); + + // ======================================================================== + // Proposal A Phase 1: before_prompt_build hook (priority 5) - Score recalls + // ======================================================================== + api.on("before_prompt_build", async (event: any, ctx: any) => { + const sessionKey = ctx?.sessionKey || ctx?.sessionId || "default"; + const pending = pendingRecall.get(sessionKey); + if (!pending) return; + + // Guard: only score if responseText has substantial content + const responseText = pending.responseText; + if (!responseText || responseText.length <= 24) { + // Skip scoring for empty or very short responses + return; + } + + // Extract injected IDs from prependContext if available + // The auto-recall injects memories with IDs in the injectedIds field + const injectedIds: string[] = []; + if (event.prependContext && typeof event.prependContext === "string") { + // Parse IDs from injected context - format is typically "- [category:scope] summary" + // We'll check if any recall IDs are present in the context + const match = event.prependContext.match(/\[([a-f0-9]{8,})\]/gi); + if (match) { + for (const m of match) { + const id = m.slice(1, -1); + if (id.length >= 8) injectedIds.push(id); + } + } + } + + // Update pending recall entry with IDs + pending.recallIds = injectedIds; + + // Check if any recall was actually used by checking if the response contains reference to the injected content + // This is a heuristic - we check if the response shows awareness of injected memories + let usedRecall = false; + if (injectedIds.length > 0) { + // Use the real isRecallUsed function from reflection-slices + usedRecall = isRecallUsed(responseText, injectedIds); + } + + // Score the recall - update importance based on usage + if (injectedIds.length > 0) { + try { + for (const recallId of injectedIds) { + const meta = parseSmartMetadata(undefined, { id: recallId, metadata: "" }); + // If we can't find the entry, skip + if (!meta) continue; + + if (usedRecall) { + // Recall was used - increase importance (cap at 1.0) + const newImportance = Math.min(1.0, (meta.importance || 0.5) + 0.05); + await store.patchMetadata( + recallId, + { + importance: newImportance, + last_confirmed_use_at: Date.now(), + }, + undefined + ); + } else { + // Recall was not used - increment bad_recall_count + const badCount = (meta.bad_recall_count || 0) + 1; + let newImportance = meta.importance || 0.5; + // Apply penalty after threshold (3 consecutive unused) + if (badCount >= 3) { + newImportance = Math.max(0.1, newImportance - 0.03); + } + await store.patchMetadata( + recallId, + { + importance: newImportance, + bad_recall_count: badCount, + }, + undefined + ); + } + } + } catch (err) { + api.logger.warn(`memory-lancedb-pro: recall usage scoring failed: ${String(err)}`); + } + } + }, { priority: 5 }); + + // ======================================================================== + // Proposal A Phase 1: session_end hook - Clean up pending recalls + // ======================================================================== + api.on("session_end", (_event: any, ctx: any) => { + const sessionKey = ctx?.sessionKey || ctx?.sessionId || "default"; + if (sessionKey) { + pendingRecall.delete(sessionKey); + } + }, { priority: 20 }); // ======================================================================== // Integrated Self-Improvement (inheritance + derived) diff --git a/src/reflection-slices.ts b/src/reflection-slices.ts index 7d39d8a7..1f3b657e 100644 --- a/src/reflection-slices.ts +++ b/src/reflection-slices.ts @@ -316,3 +316,60 @@ export function extractReflectionSliceItems(reflectionText: string): ReflectionS export function extractInjectableReflectionSliceItems(reflectionText: string): ReflectionSliceItem[] { return buildReflectionSliceItemsFromSlices(extractInjectableReflectionSlices(reflectionText)); } + +/** + * Check if a recall was actually used by the agent. + * This function determines whether the agent's response shows awareness of the injected memories. + * + * @param responseText - The agent's response text + * @param injectedIds - Array of memory IDs that were injected + * @returns true if the response shows evidence of using the recalled information + */ +export function isRecallUsed(responseText: string, injectedIds: string[]): boolean { + if (!responseText || responseText.length <= 24) { + return false; + } + if (!injectedIds || injectedIds.length === 0) { + return false; + } + + const responseLower = responseText.toLowerCase(); + + // Check for explicit recall usage markers + const usageMarkers = [ + "remember", + "之前", + "记得", + "记得", + "according to", + "based on what", + "as you mentioned", + "如前所述", + "如您所說", + "如您所说的", + "我記得", + "我记得", + "之前你說", + "之前你说", + "之前提到", + "之前提到的", + "根据之前", + "依据之前", + "按照之前", + "照您之前", + "照你说的", + "from previous", + "earlier you", + "in the memory", + "the memory mentioned", + "the memories show", + ]; + + for (const marker of usageMarkers) { + if (responseLower.includes(marker.toLowerCase())) { + return true; + } + } + + return false; +} From 32d29c8a656416407eaa04f6af4fa04394b80c8c Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Fri, 3 Apr 2026 20:31:50 +0800 Subject: [PATCH 02/21] fix: 3 critical bugs in feat/proposal-a-v3 1. Bug 1 (CRITICAL): injectedIds regex in feedback hook never matched - The feedback hook used a regex /\[([a-f0-9]{8,})\]/gi to parse IDs from prependContext, but auto-recall injects memories in format [preferences:global], [facts:dc-channel], NOT [hex-id]. - Fix: read recallIds directly from pendingRecall (which is populated by auto-recall's before_prompt_build from the previous turn). Also added code in auto-recall to store selected IDs into pendingRecall[sessionKey].recallIds before returning. 2. Bug 2 (MAJOR): stripEnvelopeMetadata regex had literal backspace (0x08) - In src/smart-extractor.ts line 76, a literal backspace character (byte 0x08) was embedded in the regex pattern between 'agent' and '.', producing 'agent[0x08].*?' instead of 'agent\b.*?'. - Fix: replaced the 0x08 byte with the proper \b word boundary. 3. Bug 3 (MAJOR): WeakSet.clear() does not exist - In index.ts resetRegistration(), _registeredApis.clear() was called, but WeakSet has no clear() method. - Fix: removed the .clear() call per the comment's own note. --- index.ts | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/index.ts b/index.ts index 897b66ad..ece0626c 100644 --- a/index.ts +++ b/index.ts @@ -2508,6 +2508,18 @@ const memoryLanceDBProPlugin = { `memory-lancedb-pro: injecting ${selected.length} memories into context for agent ${agentId}`, ); + // Store recall IDs in pendingRecall so the feedback hook (which runs + // in the NEXT turn's before_prompt_build after agent_end) can read + // them directly instead of trying to parse prependContext. + // (agent_end runs after before_prompt_build, so pendingRecall for + // this session was already created with empty recallIds by agent_end + // of the PREVIOUS turn.) + const sessionKeyForRecall = ctx?.sessionKey || ctx?.sessionId || "default"; + const existingPending = pendingRecall.get(sessionKeyForRecall); + if (existingPending) { + existingPending.recallIds = selected.map((item) => item.id); + } + return { prependContext: `\n` + @@ -2946,23 +2958,11 @@ const memoryLanceDBProPlugin = { return; } - // Extract injected IDs from prependContext if available - // The auto-recall injects memories with IDs in the injectedIds field - const injectedIds: string[] = []; - if (event.prependContext && typeof event.prependContext === "string") { - // Parse IDs from injected context - format is typically "- [category:scope] summary" - // We'll check if any recall IDs are present in the context - const match = event.prependContext.match(/\[([a-f0-9]{8,})\]/gi); - if (match) { - for (const m of match) { - const id = m.slice(1, -1); - if (id.length >= 8) injectedIds.push(id); - } - } - } - - // Update pending recall entry with IDs - pending.recallIds = injectedIds; + // Read recall IDs directly from pendingRecall (populated by auto-recall's + // before_prompt_build hook from the PREVIOUS turn). This replaces the + // broken regex-based parsing of prependContext which never matched the + // actual [category:scope] format used by auto-recall injection. + const injectedIds = pending.recallIds ?? []; // Check if any recall was actually used by checking if the response contains reference to the injected content // This is a heuristic - we check if the response shows awareness of injected memories From 14100f9abf0e0ad1b3206072600a0ecfa3f7fc94 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Fri, 3 Apr 2026 20:47:02 +0800 Subject: [PATCH 03/21] fix: resolve 3 bugs in Proposal A feedback hooks (pendingRecall timing, parseSmartMetadata, importance row update) Bug 1 (P1): pendingRecall was written with recallIds from Turn N but responseText from Turn N-1, causing feedback to score the wrong memories. Fix: before_prompt_build (auto-recall) now CREATES pendingRecall with recallIds. agent_end now only WRITES responseText to an existing entry (never creates). Bug 2 (P2): parseSmartMetadata was called with empty placeholder metadata, returning fallback values instead of real entry data. Fix: use store.getById(recallId) to get the real entry before parsing. Bug 3 (P2): patchMetadata only updates the metadata JSON blob, not the entry.importance ROW column. applyImportanceWeight reads entry.importance, so importance adjustments never affected ranking. Fix: use store.update(id, { importance: newValue }) to update the row directly. --- index.ts | 76 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/index.ts b/index.ts index ece0626c..c18a9357 100644 --- a/index.ts +++ b/index.ts @@ -2508,17 +2508,17 @@ const memoryLanceDBProPlugin = { `memory-lancedb-pro: injecting ${selected.length} memories into context for agent ${agentId}`, ); - // Store recall IDs in pendingRecall so the feedback hook (which runs - // in the NEXT turn's before_prompt_build after agent_end) can read - // them directly instead of trying to parse prependContext. - // (agent_end runs after before_prompt_build, so pendingRecall for - // this session was already created with empty recallIds by agent_end - // of the PREVIOUS turn.) + // Create or update pendingRecall for this turn so the feedback hook + // (which runs in the NEXT turn's before_prompt_build after agent_end) + // sees a matching pair: Turn N recallIds + Turn N responseText. + // agent_end will write responseText into this same pendingRecall + // entry (only updating responseText, never clearing recallIds). const sessionKeyForRecall = ctx?.sessionKey || ctx?.sessionId || "default"; - const existingPending = pendingRecall.get(sessionKeyForRecall); - if (existingPending) { - existingPending.recallIds = selected.map((item) => item.id); - } + pendingRecall.set(sessionKeyForRecall, { + recallIds: selected.map((item) => item.id), + responseText: "", // Will be populated by agent_end + injectedAt: Date.now(), + }); return { prependContext: @@ -2917,8 +2917,12 @@ const memoryLanceDBProPlugin = { api.on("agent_end", agentEndAutoCaptureHook); // ======================================================================== - // Proposal A Phase 1:agent_end hook - Store response text for usage tracking + // Proposal A Phase 1: agent_end hook - Store response text for usage tracking // ======================================================================== + // NOTE: Only writes responseText to an EXISTING pendingRecall entry created + // by before_prompt_build (auto-recall). Does NOT create a new entry. + // This ensures recallIds (written by auto-recall in the same turn) and + // responseText (written here) remain paired for the feedback hook. api.on("agent_end", (event: any, ctx: any) => { const sessionKey = ctx?.sessionKey || ctx?.sessionId || "default"; if (!sessionKey) return; @@ -2933,13 +2937,11 @@ const memoryLanceDBProPlugin = { } } - // Store in pendingRecall if we have response text - if (lastMsgText && lastMsgText.trim().length > 0) { - pendingRecall.set(sessionKey, { - recallIds: [], // Will be populated by before_prompt_build - responseText: lastMsgText, - injectedAt: Date.now(), - }); + // Only update an existing pendingRecall entry — do NOT create one. + // This preserves recallIds written by auto-recall earlier in this turn. + const existing = pendingRecall.get(sessionKey); + if (existing && lastMsgText && lastMsgText.trim().length > 0) { + existing.responseText = lastMsgText; } }, { priority: 20 }); @@ -2976,20 +2978,30 @@ const memoryLanceDBProPlugin = { if (injectedIds.length > 0) { try { for (const recallId of injectedIds) { - const meta = parseSmartMetadata(undefined, { id: recallId, metadata: "" }); - // If we can't find the entry, skip - if (!meta) continue; + // Bug 2 fix: use store.getById to retrieve the real entry so we + // get the actual importance value, instead of calling + // parseSmartMetadata with empty placeholder metadata. + const entry = await store.getById(recallId, undefined); + if (!entry) continue; + const meta = parseSmartMetadata(entry.metadata, entry); if (usedRecall) { // Recall was used - increase importance (cap at 1.0) + // Bug 3 fix: use store.update to directly update the row-level + // importance column. patchMetadata only updates the metadata JSON + // blob but NOT the entry.importance field, so importance changes + // never affected ranking (applyImportanceWeight reads entry.importance). const newImportance = Math.min(1.0, (meta.importance || 0.5) + 0.05); + await store.update( + recallId, + { importance: newImportance }, + undefined, + ); + // Also update metadata JSON fields via patchMetadata (separate concern) await store.patchMetadata( recallId, - { - importance: newImportance, - last_confirmed_use_at: Date.now(), - }, - undefined + { last_confirmed_use_at: Date.now() }, + undefined, ); } else { // Recall was not used - increment bad_recall_count @@ -2999,13 +3011,15 @@ const memoryLanceDBProPlugin = { if (badCount >= 3) { newImportance = Math.max(0.1, newImportance - 0.03); } + await store.update( + recallId, + { importance: newImportance }, + undefined, + ); await store.patchMetadata( recallId, - { - importance: newImportance, - bad_recall_count: badCount, - }, - undefined + { bad_recall_count: badCount }, + undefined, ); } } From 6084f7307c90d40ef5c453856c1e7c26d44d01f1 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Fri, 3 Apr 2026 20:59:37 +0800 Subject: [PATCH 04/21] fix(proposal-a): 3 recall-usage bugs in feat/proposal-a-v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug 1 [P1]: pendingRecall.delete() moved from session_end to feedback hook finally block — prevents repeated scoring of the same recallIds/ responseText pair when subsequent turns skip auto-recall (greeting, short input). Now deleted immediately after scoring completes. Bug 2 [P2]: confirmed use now resets bad_recall_count to 0 — so penalty threshold (3) only applies to truly consecutive misses, not interleaved confirmed-use/miss patterns. Bug 3 [P3]: retrieveWithTrace now forwards source to hybridRetrieval(), aligning debug/trace retrieval with real manual-recall behavior. --- index.ts | 7 ++++++- src/retriever.ts | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/index.ts b/index.ts index c18a9357..a32826a2 100644 --- a/index.ts +++ b/index.ts @@ -3000,7 +3000,7 @@ const memoryLanceDBProPlugin = { // Also update metadata JSON fields via patchMetadata (separate concern) await store.patchMetadata( recallId, - { last_confirmed_use_at: Date.now() }, + { last_confirmed_use_at: Date.now(), bad_recall_count: 0 }, undefined, ); } else { @@ -3025,6 +3025,11 @@ const memoryLanceDBProPlugin = { } } catch (err) { api.logger.warn(`memory-lancedb-pro: recall usage scoring failed: ${String(err)}`); + } finally { + // Bug 1 fix: delete pendingRecall immediately after scoring so that + // subsequent turns (greeting, short input) that skip auto-recall do not + // re-trigger feedback scoring on the same recallIds/responseText pair. + pendingRecall.delete(sessionKey); } } }, { priority: 5 }); diff --git a/src/retriever.ts b/src/retriever.ts index 769c248b..432d8430 100644 --- a/src/retriever.ts +++ b/src/retriever.ts @@ -682,7 +682,7 @@ export class MemoryRetriever { ); } else { results = await this.hybridRetrieval( - query, safeLimit, scopeFilter, category, trace, + query, safeLimit, scopeFilter, category, trace, source, ); } From 421f63740753b49abe16278c2e36f78bfebb03d9 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Fri, 3 Apr 2026 21:22:04 +0800 Subject: [PATCH 05/21] fix(proposal-a): 4 final bugs (importance fallback, pendingRecall cleanup, env-resolve gate, recency double-boost) --- index.ts | 13 +++++++++++-- src/retriever.ts | 7 ++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/index.ts b/index.ts index a32826a2..6c88eaea 100644 --- a/index.ts +++ b/index.ts @@ -2957,6 +2957,9 @@ const memoryLanceDBProPlugin = { const responseText = pending.responseText; if (!responseText || responseText.length <= 24) { // Skip scoring for empty or very short responses + // Bug 5 fix: also clear pendingRecall so the next turn does not + // re-trigger feedback on stale recallIds / old responseText. + pendingRecall.delete(sessionKey); return; } @@ -2991,7 +2994,11 @@ const memoryLanceDBProPlugin = { // importance column. patchMetadata only updates the metadata JSON // blob but NOT the entry.importance field, so importance changes // never affected ranking (applyImportanceWeight reads entry.importance). - const newImportance = Math.min(1.0, (meta.importance || 0.5) + 0.05); + // Bug 4 fix (this file): also fall back to entry.importance (row-level) + // so old records that have no importance in metadata JSON still + // get a correct boost instead of always landing at 0.5. + const currentImportance = meta.importance ?? entry.importance ?? 0.5; + const newImportance = Math.min(1.0, currentImportance + 0.05); await store.update( recallId, { importance: newImportance }, @@ -3006,7 +3013,7 @@ const memoryLanceDBProPlugin = { } else { // Recall was not used - increment bad_recall_count const badCount = (meta.bad_recall_count || 0) + 1; - let newImportance = meta.importance || 0.5; + let newImportance = meta.importance ?? entry.importance ?? 0.5; // Apply penalty after threshold (3 consecutive unused) if (badCount >= 3) { newImportance = Math.max(0.1, newImportance - 0.03); @@ -4043,6 +4050,7 @@ export function parsePluginConfig(value: unknown): PluginConfig { typeof cfg.retrieval === "object" && cfg.retrieval !== null ? (() => { const retrieval = { ...(cfg.retrieval as Record) } as Record; +<<<<<<< HEAD // Bug 6 fix: only resolve env vars for rerank fields when reranking is // actually enabled AND the field contains a ${...} placeholder. // This prevents startup failures when reranking is disabled and rerankApiKey @@ -4060,6 +4068,7 @@ export function parsePluginConfig(value: unknown): PluginConfig { if (rerankEnabled && typeof retrieval.rerankProvider === "string" && retrieval.rerankProvider.includes("${")) { retrieval.rerankProvider = resolveEnvVars(retrieval.rerankProvider); } + } return retrieval as any; })() : undefined, diff --git a/src/retriever.ts b/src/retriever.ts index 432d8430..6223ff48 100644 --- a/src/retriever.ts +++ b/src/retriever.ts @@ -759,7 +759,12 @@ export class MemoryRetriever { ); failureStage = "vector.postProcess"; - const recencyBoosted = this.applyRecencyBoost(mapped); + // Bug 7 fix: when decayEngine is active, skip applyRecencyBoost here + // because applyDecayBoost already incorporates recency into its composite + // score. Calling both double-counts recency for vector-only results. + const recencyBoosted = this.decayEngine + ? mapped + : this.applyRecencyBoost(mapped); if (diagnostics) diagnostics.stageCounts.afterRecency = recencyBoosted.length; const weighted = this.decayEngine ? recencyBoosted From d68ee694381c776fd5e1900d7f872e32f2930797 Mon Sep 17 00:00:00 2001 From: Review Claw Date: Fri, 3 Apr 2026 22:24:54 +0800 Subject: [PATCH 06/21] fix(P1-1,P1-2,P2): Codex round-6 verification fixes P1-1 (isRecallUsed): Add direct injected-ID check - The function accepted injectedIds but never used them - Added loop to check if response contains any injected memory ID - This complements the existing stock-phrase check P1-2 (rerank env vars): Add rerank-enabled guard - Only resolve \ placeholders when rerank is actually enabled - Prevents startup failure when rerankApiKey has unresolved placeholder but reranking is disabled (rerank='none') P2 (multi-line wrapper stripping): Strip boilerplate continuation lines - stripLeadingRuntimeWrappers now also strips lines matching AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE (e.g. 'Results auto-announce to your requester.', 'Do not use any memory tools.') while strippingLeadIn is still true, preventing these lines from being kept when they appear right after the wrapper prefix line --- index.ts | 2 -- src/auto-capture-cleanup.ts | 11 +++++++++++ src/reflection-slices.ts | 9 +++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/index.ts b/index.ts index 6c88eaea..b46eef27 100644 --- a/index.ts +++ b/index.ts @@ -4050,7 +4050,6 @@ export function parsePluginConfig(value: unknown): PluginConfig { typeof cfg.retrieval === "object" && cfg.retrieval !== null ? (() => { const retrieval = { ...(cfg.retrieval as Record) } as Record; -<<<<<<< HEAD // Bug 6 fix: only resolve env vars for rerank fields when reranking is // actually enabled AND the field contains a ${...} placeholder. // This prevents startup failures when reranking is disabled and rerankApiKey @@ -4068,7 +4067,6 @@ export function parsePluginConfig(value: unknown): PluginConfig { if (rerankEnabled && typeof retrieval.rerankProvider === "string" && retrieval.rerankProvider.includes("${")) { retrieval.rerankProvider = resolveEnvVars(retrieval.rerankProvider); } - } return retrieval as any; })() : undefined, diff --git a/src/auto-capture-cleanup.ts b/src/auto-capture-cleanup.ts index c5c00b7b..8b8953e2 100644 --- a/src/auto-capture-cleanup.ts +++ b/src/auto-capture-cleanup.ts @@ -113,6 +113,17 @@ function stripLeadingRuntimeWrappers(text: string): string { continue; } + // Bug fix: also strip known boilerplate continuation lines (e.g. + // "Results auto-announce to your requester.", "Do not use any memory tools.") + // that appear right after the wrapper prefix. These lines do NOT match the + // wrapper prefix regex but are part of the wrapper boilerplate. + if (strippingLeadIn) { + AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE.lastIndex = 0; + if (AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE.test(current)) { + continue; + } + } + strippingLeadIn = false; cleanedLines.push(line); } diff --git a/src/reflection-slices.ts b/src/reflection-slices.ts index 1f3b657e..dc6fd20f 100644 --- a/src/reflection-slices.ts +++ b/src/reflection-slices.ts @@ -371,5 +371,14 @@ export function isRecallUsed(responseText: string, injectedIds: string[]): boole } } + // Bug fix (P1-1): also check if the response explicitly references any of the + // injected memory IDs. If the agent mentions the ID (e.g. "based on [abc-123]") + // that is a direct usage signal, not just a stock phrase. + for (const id of injectedIds) { + if (id && responseLower.includes(id.toLowerCase())) { + return true; + } + } + return false; } From 5bb2137537dca9bed64f01f13740436f41f24c9b Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Sat, 4 Apr 2026 00:31:27 +0800 Subject: [PATCH 07/21] feat(proposal-a): add user confirmation signals, min_recall_count, and configurable feedback amplitudes --- index.ts | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/index.ts b/index.ts index b46eef27..911e9002 100644 --- a/index.ts +++ b/index.ts @@ -226,6 +226,22 @@ interface PluginConfig { skipLowValue?: boolean; maxExtractionsPerHour?: number; }; + feedback?: { + /** Boost importance when a recalled memory is used (default: 0.05) */ + boostOnUse?: number; + /** Penalty when a recalled memory is not used after consecutive misses (default: 0.03) */ + penaltyOnMiss?: number; + /** Extra boost when user explicitly confirms a recalled memory is correct (default: 0.15) */ + boostOnConfirm?: number; + /** Extra penalty when user explicitly corrects a non-recalled memory (default: 0.10) */ + penaltyOnError?: number; + /** Minimum recall (injection) count before penalty applies to this memory (default: 2) */ + minRecallCountForPenalty?: number; + /** Keywords indicating user confirmation of a recalled memory */ + confirmKeywords?: string[]; + /** Keywords indicating user correction/error for a non-recalled memory */ + errorKeywords?: string[]; + }; } type ReflectionThinkLevel = "off" | "minimal" | "low" | "medium" | "high"; @@ -2977,6 +2993,20 @@ const memoryLanceDBProPlugin = { usedRecall = isRecallUsed(responseText, injectedIds); } + // Read feedback config values with defaults + const fb = config.feedback ?? {}; + const boostOnUse = fb.boostOnUse ?? 0.05; + const penaltyOnMiss = fb.penaltyOnMiss ?? 0.03; + const boostOnConfirm = fb.boostOnConfirm ?? 0.15; + const penaltyOnError = fb.penaltyOnError ?? 0.10; + const minRecallCountForPenalty = fb.minRecallCountForPenalty ?? 2; + const confirmKeywords = fb.confirmKeywords ?? ["正確", "是", "對", "right", "yes", "沒錯", "對的"]; + const errorKeywords = fb.errorKeywords ?? ["不", "錯", "不是", "wrong", "no", "not right"]; + + // Helper: check if text contains any of the keywords (case-insensitive) + const containsKeyword = (text: string, keywords: string[]): boolean => + keywords.some((kw) => text.includes(kw)); + // Score the recall - update importance based on usage if (injectedIds.length > 0) { try { @@ -2987,6 +3017,7 @@ const memoryLanceDBProPlugin = { const entry = await store.getById(recallId, undefined); if (!entry) continue; const meta = parseSmartMetadata(entry.metadata, entry); + const currentImportance = meta.importance ?? entry.importance ?? 0.5; if (usedRecall) { // Recall was used - increase importance (cap at 1.0) @@ -2997,8 +3028,13 @@ const memoryLanceDBProPlugin = { // Bug 4 fix (this file): also fall back to entry.importance (row-level) // so old records that have no importance in metadata JSON still // get a correct boost instead of always landing at 0.5. - const currentImportance = meta.importance ?? entry.importance ?? 0.5; - const newImportance = Math.min(1.0, currentImportance + 0.05); + let newImportance = Math.min(1.0, currentImportance + boostOnUse); + + // Phase 2 feature (merged into Phase 1): user explicit confirmation signal (+0.15) + if (containsKeyword(responseText, confirmKeywords)) { + newImportance = Math.min(1.0, newImportance + boostOnConfirm); + } + await store.update( recallId, { importance: newImportance }, @@ -3013,11 +3049,24 @@ const memoryLanceDBProPlugin = { } else { // Recall was not used - increment bad_recall_count const badCount = (meta.bad_recall_count || 0) + 1; - let newImportance = meta.importance ?? entry.importance ?? 0.5; - // Apply penalty after threshold (3 consecutive unused) - if (badCount >= 3) { - newImportance = Math.max(0.1, newImportance - 0.03); + let newImportance = currentImportance; + + // Phase 2 feature (merged into Phase 1): user explicit error signal (-0.10) + // Only apply when user explicitly corrects/negates + if (containsKeyword(responseText, errorKeywords)) { + // Only penalize if this memory has been recalled at least minRecallCountForPenalty times + // to avoid penalizing a memory that was just recalled once and didn't fit the context + if ((meta.injected_count || 0) >= minRecallCountForPenalty) { + newImportance = Math.max(0.1, newImportance - penaltyOnError); + } + } else { + // Normal miss: apply penalty after threshold (3 consecutive unused) + // Also gated by minRecallCountForPenalty to avoid penalizing rarely-recalled memories + if (badCount >= 3 && (meta.injected_count || 0) >= minRecallCountForPenalty) { + newImportance = Math.max(0.1, newImportance - penaltyOnMiss); + } } + await store.update( recallId, { importance: newImportance }, From 36a978feb28909f6ab910e09807c31deb7086338 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Sat, 4 Apr 2026 00:46:06 +0800 Subject: [PATCH 08/21] fix(proposal-a): 4 bugs in feedback hooks (isRecallUsed summaries, user prompt extraction, parsePluginConfig feedback, bad_recall_count double-increment) Bug 1 (P1): isRecallUsed() only checked stock phrases and raw IDs, but auto-recall injects [category:scope] summary format text. Fix: store injectedSummaries (item.line) in pendingRecall on auto-recall injection; pass them to isRecallUsed() which now checks if the response contains any of the injected summary text verbatim. Bug 2 (P1): confirm/error keywords were checked against pending.responseText (previous-turn assistant response) instead of the current-turn user prompt. Fix: read event.prompt (array of {role, content} messages) in the before_prompt_build feedback hook and check keywords against the last user message in that array. Bug 3 (P2): parsePluginConfig() never copied cfg.feedback to the returned config object, so all deployments fell back to hardcoded defaults. Fix: add feedback block to the return object in parsePluginConfig. Bug 4 (P2): bad_recall_count was incremented in BOTH the auto-recall injection path AND the feedback hook, causing double-counting that made the 3-consecutive-miss penalty trigger after only 2 actual misses. Fix: remove +1 from the feedback hook; counter now only increments once (in the auto-recall injection path where staleInjected is evaluated). --- index.ts | 60 ++++++++++++++++++++++++++++++++++------ src/reflection-slices.ts | 42 ++++++++++++++++++++++++---- 2 files changed, 88 insertions(+), 14 deletions(-) diff --git a/index.ts b/index.ts index 911e9002..84d8aec4 100644 --- a/index.ts +++ b/index.ts @@ -2031,6 +2031,8 @@ const memoryLanceDBProPlugin = { recallIds: string[]; responseText: string; injectedAt: number; + /** Summary text lines actually injected into the prompt, used for usage detection. */ + injectedSummaries: string[]; }; const pendingRecall = new Map(); @@ -2530,10 +2532,15 @@ const memoryLanceDBProPlugin = { // agent_end will write responseText into this same pendingRecall // entry (only updating responseText, never clearing recallIds). const sessionKeyForRecall = ctx?.sessionKey || ctx?.sessionId || "default"; + // Bug 1 fix: also store the injected summary lines so the feedback hook + // can detect usage even when the agent doesn't use stock phrases or IDs + // but directly incorporates the memory content into the response. + const injectedSummaries = selected.map((item) => item.line); pendingRecall.set(sessionKeyForRecall, { recallIds: selected.map((item) => item.id), responseText: "", // Will be populated by agent_end injectedAt: Date.now(), + injectedSummaries, }); return { @@ -2985,12 +2992,16 @@ const memoryLanceDBProPlugin = { // actual [category:scope] format used by auto-recall injection. const injectedIds = pending.recallIds ?? []; + // Bug 1 fix: also retrieve the injected summary lines so isRecallUsed can + // detect when the agent directly incorporates memory content into the response. + const injectedSummaries = pending.injectedSummaries ?? []; + // Check if any recall was actually used by checking if the response contains reference to the injected content // This is a heuristic - we check if the response shows awareness of injected memories let usedRecall = false; - if (injectedIds.length > 0) { + if (injectedIds.length > 0 || injectedSummaries.length > 0) { // Use the real isRecallUsed function from reflection-slices - usedRecall = isRecallUsed(responseText, injectedIds); + usedRecall = isRecallUsed(responseText, injectedIds, injectedSummaries); } // Read feedback config values with defaults @@ -3003,9 +3014,28 @@ const memoryLanceDBProPlugin = { const confirmKeywords = fb.confirmKeywords ?? ["正確", "是", "對", "right", "yes", "沒錯", "對的"]; const errorKeywords = fb.errorKeywords ?? ["不", "錯", "不是", "wrong", "no", "not right"]; + // Bug 2 fix: confirm/error keywords must be read from the NEXT turn's user + // prompt (event.prompt), not from the previous assistant response (responseText). + // event.prompt is an array of {role, content} message objects. + let userPromptText = ""; + try { + const promptMessages = Array.isArray(event.prompt) ? event.prompt : []; + // Walk backwards to find the last user message + for (let i = promptMessages.length - 1; i >= 0; i--) { + const msg = promptMessages[i]; + if (msg && msg.role === "user" && typeof msg.content === "string" && msg.content.trim().length > 0) { + userPromptText = msg.content.trim(); + break; + } + } + } catch (_e) { + // If we can't read event.prompt, fall back to empty (keyword checks will be skipped) + userPromptText = ""; + } + // Helper: check if text contains any of the keywords (case-insensitive) const containsKeyword = (text: string, keywords: string[]): boolean => - keywords.some((kw) => text.includes(kw)); + keywords.some((kw) => text.toLowerCase().includes(kw.toLowerCase())); // Score the recall - update importance based on usage if (injectedIds.length > 0) { @@ -3031,7 +3061,9 @@ const memoryLanceDBProPlugin = { let newImportance = Math.min(1.0, currentImportance + boostOnUse); // Phase 2 feature (merged into Phase 1): user explicit confirmation signal (+0.15) - if (containsKeyword(responseText, confirmKeywords)) { + // Bug 2 fix: check the next-turn user prompt for confirmation keywords, + // not the previous-turn assistant response. + if (containsKeyword(userPromptText, confirmKeywords)) { newImportance = Math.min(1.0, newImportance + boostOnConfirm); } @@ -3047,13 +3079,19 @@ const memoryLanceDBProPlugin = { undefined, ); } else { - // Recall was not used - increment bad_recall_count - const badCount = (meta.bad_recall_count || 0) + 1; + // Recall was not used. + // Bug 4 fix: do NOT add +1 here — bad_recall_count was already + // incremented by the auto-recall path when this memory was injected + // (staleInjected branch). Adding +1 again would double-count and + // cause the 3-consecutive-miss penalty to trigger after only 2 misses. + const badCount = meta.bad_recall_count || 0; let newImportance = currentImportance; // Phase 2 feature (merged into Phase 1): user explicit error signal (-0.10) - // Only apply when user explicitly corrects/negates - if (containsKeyword(responseText, errorKeywords)) { + // Only apply when user explicitly corrects/negates. + // Bug 2 fix: check the next-turn user prompt for error keywords, + // not the previous-turn assistant response. + if (containsKeyword(userPromptText, errorKeywords)) { // Only penalize if this memory has been recalled at least minRecallCountForPenalty times // to avoid penalizing a memory that was just recalled once and didn't fit the context if ((meta.injected_count || 0) >= minRecallCountForPenalty) { @@ -4252,6 +4290,12 @@ export function parsePluginConfig(value: unknown): PluginConfig { : 30, } : { skipLowValue: false, maxExtractionsPerHour: 30 }, + // Bug 3 fix: parse and return the feedback config block so deployments + // that specify custom feedback values actually take effect instead of + // falling back to hardcoded defaults. + feedback: typeof cfg.feedback === "object" && cfg.feedback !== null + ? { ...(cfg.feedback as Record) } + : {}, }; } diff --git a/src/reflection-slices.ts b/src/reflection-slices.ts index dc6fd20f..a551b9ce 100644 --- a/src/reflection-slices.ts +++ b/src/reflection-slices.ts @@ -320,21 +320,28 @@ export function extractInjectableReflectionSliceItems(reflectionText: string): R /** * Check if a recall was actually used by the agent. * This function determines whether the agent's response shows awareness of the injected memories. - * - * @param responseText - The agent's response text - * @param injectedIds - Array of memory IDs that were injected + * + * @param responseText - The agent's response text + * @param injectedIds - Array of memory IDs that were injected + * @param injectedSummaries - Optional array of summary text lines that were injected; + * if the response contains any of these verbatim or partially, + * it is a strong usage signal even without explicit markers or IDs. * @returns true if the response shows evidence of using the recalled information */ -export function isRecallUsed(responseText: string, injectedIds: string[]): boolean { +export function isRecallUsed( + responseText: string, + injectedIds: string[], + injectedSummaries?: string[], +): boolean { if (!responseText || responseText.length <= 24) { return false; } - if (!injectedIds || injectedIds.length === 0) { + if ((!injectedIds || injectedIds.length === 0) && (!injectedSummaries || injectedSummaries.length === 0)) { return false; } const responseLower = responseText.toLowerCase(); - + // Check for explicit recall usage markers const usageMarkers = [ "remember", @@ -380,5 +387,28 @@ export function isRecallUsed(responseText: string, injectedIds: string[]): boole } } + // Bug 1 fix (isRecallUsed): when summaries are provided, check if the response + // contains any of the injected summary text verbatim or as a near-identical + // substring. This catches the case where the agent directly uses the memory + // content without any explicit marker phrase. + if (injectedSummaries && injectedSummaries.length > 0) { + const responseTrimmedLower = responseText.trim().toLowerCase(); + for (const summary of injectedSummaries) { + if (summary && summary.trim().length > 0) { + const summaryLower = summary.trim().toLowerCase(); + // Check for verbatim or near-verbatim presence (at least 10 chars to avoid + // false positives on very short fragments). + if ( + summaryLower.length >= 10 && + (responseTrimmedLower.includes(summaryLower) || + // Also check the reverse (summary contains response snippet — agent echoed it) + summaryLower.includes(responseTrimmedLower.slice(0, Math.min(50, responseTrimmedLower.length)))) + ) { + return true; + } + } + } + } + return false; } From c6668eb2062b69187a80137be7d5404df7d3a172 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Sat, 4 Apr 2026 01:15:30 +0800 Subject: [PATCH 09/21] fix(proposal-a): OpenCode review fixes - per-recall scoring, event.messages user prompt, agentId keying Bug 1 (P1): Score each recall independently instead of one usedRecall for the whole batch. - Build summaryMap: recallId -> injected summary - Call isRecallUsed per recallId with its specific summary - Prevents unused memories from being boosted or used ones penalized Bug 2 (P2): Extract user prompt from event.messages array, not event.prompt. - event.prompt is a plain string (confirmed by codebase usage), not an array - Extract last user message from event.messages (same pattern as agent_end) Bug 3 (P2): pendingRecall key includes agentId to avoid cross-agent overwrite. - Key format: sessionKey:agentId (both in auto-recall and feedback/agent_end hooks) --- index.ts | 117 ++++++++++++++++++++++++------------------------------- 1 file changed, 51 insertions(+), 66 deletions(-) diff --git a/index.ts b/index.ts index 84d8aec4..7eb15f78 100644 --- a/index.ts +++ b/index.ts @@ -2531,7 +2531,8 @@ const memoryLanceDBProPlugin = { // sees a matching pair: Turn N recallIds + Turn N responseText. // agent_end will write responseText into this same pendingRecall // entry (only updating responseText, never clearing recallIds). - const sessionKeyForRecall = ctx?.sessionKey || ctx?.sessionId || "default"; + // Include agentId in the key so different agents in the same session do not overwrite each other's pendingRecall. + const sessionKeyForRecall = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentId ?? ""}`; // Bug 1 fix: also store the injected summary lines so the feedback hook // can detect usage even when the agent doesn't use stock phrases or IDs // but directly incorporates the memory content into the response. @@ -2947,7 +2948,9 @@ const memoryLanceDBProPlugin = { // This ensures recallIds (written by auto-recall in the same turn) and // responseText (written here) remain paired for the feedback hook. api.on("agent_end", (event: any, ctx: any) => { - const sessionKey = ctx?.sessionKey || ctx?.sessionId || "default"; + // Use same key format as auto-recall hook (sessionKey:agentId) so we update the right entry. + const agentIdForKey = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey); + const sessionKey = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentIdForKey ?? ""}`; if (!sessionKey) return; // Get the last message content @@ -2972,7 +2975,9 @@ const memoryLanceDBProPlugin = { // Proposal A Phase 1: before_prompt_build hook (priority 5) - Score recalls // ======================================================================== api.on("before_prompt_build", async (event: any, ctx: any) => { - const sessionKey = ctx?.sessionKey || ctx?.sessionId || "default"; + // Use same key format as auto-recall hook (sessionKey:agentId) so we read the right entry. + const agentIdForKey = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey); + const sessionKey = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentIdForKey ?? ""}`; const pending = pendingRecall.get(sessionKey); if (!pending) return; @@ -3014,22 +3019,28 @@ const memoryLanceDBProPlugin = { const confirmKeywords = fb.confirmKeywords ?? ["正確", "是", "對", "right", "yes", "沒錯", "對的"]; const errorKeywords = fb.errorKeywords ?? ["不", "錯", "不是", "wrong", "no", "not right"]; - // Bug 2 fix: confirm/error keywords must be read from the NEXT turn's user - // prompt (event.prompt), not from the previous assistant response (responseText). - // event.prompt is an array of {role, content} message objects. + // event.prompt is a plain string in the current hook contract (confirmed by codebase usage). + // We extract the user's last message from event.messages array instead. let userPromptText = ""; try { - const promptMessages = Array.isArray(event.prompt) ? event.prompt : []; - // Walk backwards to find the last user message - for (let i = promptMessages.length - 1; i >= 0; i--) { - const msg = promptMessages[i]; - if (msg && msg.role === "user" && typeof msg.content === "string" && msg.content.trim().length > 0) { - userPromptText = msg.content.trim(); - break; + if (event.messages && Array.isArray(event.messages)) { + for (let i = event.messages.length - 1; i >= 0; i--) { + const msg = event.messages[i]; + if (msg && msg.role === "user" && typeof msg.content === "string" && msg.content.trim().length > 0) { + userPromptText = msg.content.trim(); + break; + } + if (msg && msg.role === "user" && Array.isArray(msg.content)) { + // Handle array-form content + const text = extractTextContent(msg.content); + if (text && text.trim().length > 0) { + userPromptText = text.trim(); + break; + } + } } } } catch (_e) { - // If we can't read event.prompt, fall back to empty (keyword checks will be skipped) userPromptText = ""; } @@ -3038,91 +3049,65 @@ const memoryLanceDBProPlugin = { keywords.some((kw) => text.toLowerCase().includes(kw.toLowerCase())); // Score the recall - update importance based on usage + // Score each recall individually — do NOT compute a single usedRecall for the whole batch. + // Bug 1 fix (P1): when auto-recall injects multiple memories, the agent may use only some of them. + // Scoring them all with one decision corrupts ranking: unused memories get boosted, used ones get penalized. if (injectedIds.length > 0) { try { + // Build lookup: recallId -> injected summary text for this specific recall + const summaryMap = new Map(); + for (let i = 0; i < injectedIds.length; i++) { + if (injectedSummaries[i]) { + summaryMap.set(injectedIds[i], injectedSummaries[i]); + } + } + for (const recallId of injectedIds) { - // Bug 2 fix: use store.getById to retrieve the real entry so we - // get the actual importance value, instead of calling - // parseSmartMetadata with empty placeholder metadata. + const summaryText = summaryMap.get(recallId) ?? ""; + // Score this specific recall independently + const usedRecall = isRecallUsed( + responseText, + [recallId], + summaryText ? [summaryText] : [], + ); + const entry = await store.getById(recallId, undefined); if (!entry) continue; const meta = parseSmartMetadata(entry.metadata, entry); const currentImportance = meta.importance ?? entry.importance ?? 0.5; if (usedRecall) { - // Recall was used - increase importance (cap at 1.0) - // Bug 3 fix: use store.update to directly update the row-level - // importance column. patchMetadata only updates the metadata JSON - // blob but NOT the entry.importance field, so importance changes - // never affected ranking (applyImportanceWeight reads entry.importance). - // Bug 4 fix (this file): also fall back to entry.importance (row-level) - // so old records that have no importance in metadata JSON still - // get a correct boost instead of always landing at 0.5. let newImportance = Math.min(1.0, currentImportance + boostOnUse); - - // Phase 2 feature (merged into Phase 1): user explicit confirmation signal (+0.15) - // Bug 2 fix: check the next-turn user prompt for confirmation keywords, - // not the previous-turn assistant response. if (containsKeyword(userPromptText, confirmKeywords)) { newImportance = Math.min(1.0, newImportance + boostOnConfirm); } - - await store.update( - recallId, - { importance: newImportance }, - undefined, - ); - // Also update metadata JSON fields via patchMetadata (separate concern) + await store.update(recallId, { importance: newImportance }, undefined); await store.patchMetadata( recallId, { last_confirmed_use_at: Date.now(), bad_recall_count: 0 }, undefined, ); } else { - // Recall was not used. - // Bug 4 fix: do NOT add +1 here — bad_recall_count was already - // incremented by the auto-recall path when this memory was injected - // (staleInjected branch). Adding +1 again would double-count and - // cause the 3-consecutive-miss penalty to trigger after only 2 misses. const badCount = meta.bad_recall_count || 0; let newImportance = currentImportance; - - // Phase 2 feature (merged into Phase 1): user explicit error signal (-0.10) - // Only apply when user explicitly corrects/negates. - // Bug 2 fix: check the next-turn user prompt for error keywords, - // not the previous-turn assistant response. if (containsKeyword(userPromptText, errorKeywords)) { - // Only penalize if this memory has been recalled at least minRecallCountForPenalty times - // to avoid penalizing a memory that was just recalled once and didn't fit the context if ((meta.injected_count || 0) >= minRecallCountForPenalty) { newImportance = Math.max(0.1, newImportance - penaltyOnError); } - } else { - // Normal miss: apply penalty after threshold (3 consecutive unused) - // Also gated by minRecallCountForPenalty to avoid penalizing rarely-recalled memories - if (badCount >= 3 && (meta.injected_count || 0) >= minRecallCountForPenalty) { + await store.update(recallId, { importance: newImportance }, undefined); + await store.patchMetadata(recallId, { bad_recall_count: badCount }, undefined); + } else if (badCount >= 3) { + if ((meta.injected_count || 0) >= minRecallCountForPenalty) { newImportance = Math.max(0.1, newImportance - penaltyOnMiss); } + await store.update(recallId, { importance: newImportance }, undefined); + await store.patchMetadata(recallId, { bad_recall_count: badCount }, undefined); } - - await store.update( - recallId, - { importance: newImportance }, - undefined, - ); - await store.patchMetadata( - recallId, - { bad_recall_count: badCount }, - undefined, - ); } } } catch (err) { api.logger.warn(`memory-lancedb-pro: recall usage scoring failed: ${String(err)}`); } finally { - // Bug 1 fix: delete pendingRecall immediately after scoring so that - // subsequent turns (greeting, short input) that skip auto-recall do not - // re-trigger feedback scoring on the same recallIds/responseText pair. pendingRecall.delete(sessionKey); } } From 718fd745a8c627c9c831e17c03014f41e20abeb3 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Sat, 4 Apr 2026 01:25:00 +0800 Subject: [PATCH 10/21] fix(proposal-a): longer CJK keywords + session_end pendingRecall cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1 fix: replace single-char CJK keywords (是/對/不/錯) with longer phrases (是對的/確認/錯誤/更正) to avoid false positives on ordinary conversation. P3 fix: session_end hook was not cleaning pendingRecall at all. Add cleanup of all pendingRecall entries that match the sessionId or sessionKey:agentId composite key pattern. --- index.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/index.ts b/index.ts index 7eb15f78..6e2e6547 100644 --- a/index.ts +++ b/index.ts @@ -2587,6 +2587,14 @@ const memoryLanceDBProPlugin = { recallHistory.delete(sessionId); turnCounter.delete(sessionId); lastRawUserMessage.delete(sessionId); + // P3 fix: clean all pendingRecall entries for this session. + // pendingRecall keys use format: sessionKey (or sessionKey:agentId with composite key). + // We clean any key that starts with this sessionId. + for (const key of pendingRecall.keys()) { + if (key === sessionId || key.startsWith(`${sessionId}:`) || key.startsWith(`${ctx?.sessionKey ?? ""}:`)) { + pendingRecall.delete(key); + } + } } // Also clean by channelId/conversationId if present (shared cache key) const cacheKey = ctx?.channelId || ctx?.conversationId || ""; @@ -3016,8 +3024,8 @@ const memoryLanceDBProPlugin = { const boostOnConfirm = fb.boostOnConfirm ?? 0.15; const penaltyOnError = fb.penaltyOnError ?? 0.10; const minRecallCountForPenalty = fb.minRecallCountForPenalty ?? 2; - const confirmKeywords = fb.confirmKeywords ?? ["正確", "是", "對", "right", "yes", "沒錯", "對的"]; - const errorKeywords = fb.errorKeywords ?? ["不", "錯", "不是", "wrong", "no", "not right"]; + const confirmKeywords = fb.confirmKeywords ?? ["正確", "yes", "right", "沒錯", "確認", "correct", "ok"]; + const errorKeywords = fb.errorKeywords ?? ["不是", "錯", "不對", "wrong", "no", "not right", "錯誤", "更正"]; // event.prompt is a plain string in the current hook contract (confirmed by codebase usage). // We extract the user's last message from event.messages array instead. From ec4d643a776712697b716b06362fbbeb6de83ecf Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Sat, 4 Apr 2026 02:03:15 +0800 Subject: [PATCH 11/21] fix: change bad_recall threshold from 3 to 2 per spec (issue #445) --- index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.ts b/index.ts index 6e2e6547..be490703 100644 --- a/index.ts +++ b/index.ts @@ -3104,7 +3104,7 @@ const memoryLanceDBProPlugin = { } await store.update(recallId, { importance: newImportance }, undefined); await store.patchMetadata(recallId, { bad_recall_count: badCount }, undefined); - } else if (badCount >= 3) { + } else if (badCount >= 2) { if ((meta.injected_count || 0) >= minRecallCountForPenalty) { newImportance = Math.max(0.1, newImportance - penaltyOnMiss); } From 2a8d0e55608bddfc910c118036eb37038d6bfe48 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Sat, 4 Apr 2026 12:57:13 +0800 Subject: [PATCH 12/21] fix(recall): AND logic in isRecallUsed + bad_recall_count increment fix --- index.ts | 4 +- src/reflection-slices.ts | 81 ++++++++++++++++++++-------------------- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/index.ts b/index.ts index be490703..6c0fe816 100644 --- a/index.ts +++ b/index.ts @@ -3103,13 +3103,13 @@ const memoryLanceDBProPlugin = { newImportance = Math.max(0.1, newImportance - penaltyOnError); } await store.update(recallId, { importance: newImportance }, undefined); - await store.patchMetadata(recallId, { bad_recall_count: badCount }, undefined); + await store.patchMetadata(recallId, { bad_recall_count: badCount + 1 }, undefined); } else if (badCount >= 2) { if ((meta.injected_count || 0) >= minRecallCountForPenalty) { newImportance = Math.max(0.1, newImportance - penaltyOnMiss); } await store.update(recallId, { importance: newImportance }, undefined); - await store.patchMetadata(recallId, { bad_recall_count: badCount }, undefined); + await store.patchMetadata(recallId, { bad_recall_count: badCount + 1 }, undefined); } } } diff --git a/src/reflection-slices.ts b/src/reflection-slices.ts index a551b9ce..036375fa 100644 --- a/src/reflection-slices.ts +++ b/src/reflection-slices.ts @@ -342,48 +342,47 @@ export function isRecallUsed( const responseLower = responseText.toLowerCase(); - // Check for explicit recall usage markers - const usageMarkers = [ - "remember", - "之前", - "记得", - "记得", - "according to", - "based on what", - "as you mentioned", - "如前所述", - "如您所說", - "如您所说的", - "我記得", - "我记得", - "之前你說", - "之前你说", - "之前提到", - "之前提到的", - "根据之前", - "依据之前", - "按照之前", - "照您之前", - "照你说的", - "from previous", - "earlier you", - "in the memory", - "the memory mentioned", - "the memories show", - ]; - - for (const marker of usageMarkers) { - if (responseLower.includes(marker.toLowerCase())) { - return true; - } - } + // Step 1: Check if the response contains any specific injected memory ID. + // This is a prerequisite for confirming actual usage. + const hasSpecificRecall = injectedIds.some( + (id) => id && responseLower.includes(id.toLowerCase()), + ); - // Bug fix (P1-1): also check if the response explicitly references any of the - // injected memory IDs. If the agent mentions the ID (e.g. "based on [abc-123]") - // that is a direct usage signal, not just a stock phrase. - for (const id of injectedIds) { - if (id && responseLower.includes(id.toLowerCase())) { - return true; + // Step 2: If a specific ID is present, also check for generic usage phrases. + // Both conditions must be met (AND logic) to confirm the recall was used. + if (hasSpecificRecall) { + const usageMarkers = [ + "remember", + "之前", + "记得", + "according to", + "based on what", + "as you mentioned", + "如前所述", + "如您所說", + "如您所说的", + "我記得", + "我记得", + "之前你說", + "之前你说", + "之前提到", + "之前提到的", + "根据之前", + "依据之前", + "按照之前", + "照您之前", + "照你说的", + "from previous", + "earlier you", + "in the memory", + "the memory mentioned", + "the memories show", + ]; + + for (const marker of usageMarkers) { + if (responseLower.includes(marker.toLowerCase())) { + return true; + } } } From 0b4aeff79c9f8cbf978b1372d9cf4c43f61c3223 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Sat, 4 Apr 2026 13:21:39 +0800 Subject: [PATCH 13/21] fix(proposal-a): session_end hook clean composite keys to prevent memory leak When config.autoCapture === false, the auto-capture session_end (priority 10) was skipped, leaving only the Phase 1 session_end (priority 20) to clean up. The old code only deleted pendingRecall[sessionKey] - a simple key - but not composite keys (sessionKey:agentId). Now uses pattern matching (startsWith) to clean all related keys regardless of format. Fixes: P1 issue from Phase 1 audit --- index.ts | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/index.ts b/index.ts index 6c0fe816..4064266b 100644 --- a/index.ts +++ b/index.ts @@ -3125,9 +3125,20 @@ const memoryLanceDBProPlugin = { // Proposal A Phase 1: session_end hook - Clean up pending recalls // ======================================================================== api.on("session_end", (_event: any, ctx: any) => { - const sessionKey = ctx?.sessionKey || ctx?.sessionId || "default"; - if (sessionKey) { - pendingRecall.delete(sessionKey); + // P1 fix: clean all pendingRecall entries for this session, including composite keys. + // When autoCapture is false, the auto-capture session_end (priority 10) is skipped, + // so this hook must handle composite keys (sessionKey:agentId) as well. + const sessionId = ctx?.sessionId || ""; + const sessionKey = ctx?.sessionKey || ""; + for (const key of pendingRecall.keys()) { + if ( + key === sessionKey || + key === sessionId || + key.startsWith(`${sessionKey}:`) || + key.startsWith(`${sessionId}:`) + ) { + pendingRecall.delete(key); + } } }, { priority: 20 }); From b371f0c75c441be7bff20f2f30badf2a9c60f74d Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 03:36:10 +0800 Subject: [PATCH 14/21] fix(proposal-a): P0-1 pendingRecall TTL cleanup, P0-3 summary AND gate, P0-2 race condition known limitation P0-1: Add TTL-based cleanup to prevent unbounded pendingRecall memory growth P0-3: Enforce AND gate on summary path (hasUsageMarker || hasSpecificRecall required) P0-2: Document known limitation - bad_recall_count read-modify-write is not atomic P1-1: Verified autoCapture block boundary - Proposal A hooks are outside autoCapture block --- index.ts | 20 +++++++++++++ src/reflection-slices.ts | 61 +++++++++++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 16 deletions(-) diff --git a/index.ts b/index.ts index 4064266b..9aee4ff9 100644 --- a/index.ts +++ b/index.ts @@ -2034,7 +2034,20 @@ const memoryLanceDBProPlugin = { /** Summary text lines actually injected into the prompt, used for usage detection. */ injectedSummaries: string[]; }; + // P0-1 fix: pendingRecall TTL-based cleanup to prevent unbounded memory growth. + // Entries older than 10 minutes are cleaned up on each set() call. + const PENDING_RECALL_MAX_AGE_MS = 10 * 60 * 1000; // 10 minutes + function cleanupPendingRecall(): void { + const now = Date.now(); + for (const [key, entry] of pendingRecall.entries()) { + if (now - entry.injectedAt > PENDING_RECALL_MAX_AGE_MS) { + pendingRecall.delete(key); + } + } + } const pendingRecall = new Map(); + // Clean up on module load (handles re-registration edge cases) + cleanupPendingRecall(); const logReg = isCliMode() ? api.logger.debug : api.logger.info; logReg( @@ -2537,6 +2550,8 @@ const memoryLanceDBProPlugin = { // can detect usage even when the agent doesn't use stock phrases or IDs // but directly incorporates the memory content into the response. const injectedSummaries = selected.map((item) => item.line); + // P0-1 fix: run TTL cleanup before each set to prevent unbounded growth + cleanupPendingRecall(); pendingRecall.set(sessionKeyForRecall, { recallIds: selected.map((item) => item.id), responseText: "", // Will be populated by agent_end @@ -3096,6 +3111,11 @@ const memoryLanceDBProPlugin = { undefined, ); } else { + // P0-2 fix: bad_recall_count read-modify-write. + // KNOWN LIMITATION: This is not atomic. Concurrent before_prompt_build + // invocations for the same recallId may overwrite each other's increments. + // Full atomic fix requires store-layer compare-and-swap support (out of + // Phase 1 scope). The increment is best-effort and may undercount. const badCount = meta.bad_recall_count || 0; let newImportance = currentImportance; if (containsKeyword(userPromptText, errorKeywords)) { diff --git a/src/reflection-slices.ts b/src/reflection-slices.ts index 036375fa..46c3061c 100644 --- a/src/reflection-slices.ts +++ b/src/reflection-slices.ts @@ -386,24 +386,53 @@ export function isRecallUsed( } } - // Bug 1 fix (isRecallUsed): when summaries are provided, check if the response - // contains any of the injected summary text verbatim or as a near-identical - // substring. This catches the case where the agent directly uses the memory - // content without any explicit marker phrase. + // P0-3 fix: summary path AND gate — when summaries are provided, + // both the verbatim check AND a usage marker must be present. + // This matches the AND logic of the ID path (hasSpecificRecall + usageMarker). if (injectedSummaries && injectedSummaries.length > 0) { const responseTrimmedLower = responseText.trim().toLowerCase(); - for (const summary of injectedSummaries) { - if (summary && summary.trim().length > 0) { - const summaryLower = summary.trim().toLowerCase(); - // Check for verbatim or near-verbatim presence (at least 10 chars to avoid - // false positives on very short fragments). - if ( - summaryLower.length >= 10 && - (responseTrimmedLower.includes(summaryLower) || - // Also check the reverse (summary contains response snippet — agent echoed it) - summaryLower.includes(responseTrimmedLower.slice(0, Math.min(50, responseTrimmedLower.length)))) - ) { - return true; + const usageMarkers = [ + "remember", + "之前", + "记得", + "according to", + "based on what", + "as you mentioned", + "如前所述", + "如您所說", + "如您所说的", + "我記得", + "我记得", + "之前你說", + "之前你说", + "之前提到", + "之前提到的", + "根据之前", + "依据之前", + "按照之前", + "照您之前", + "照你说的", + "from previous", + "earlier you", + "in the memory", + "the memory mentioned", + "the memories show", + ]; + const hasUsageMarker = usageMarkers.some((m) => responseLower.includes(m.toLowerCase())); + if (hasUsageMarker || hasSpecificRecall) { + for (const summary of injectedSummaries) { + if (summary && summary.trim().length > 0) { + const summaryLower = summary.trim().toLowerCase(); + // Check for verbatim or near-verbatim presence (at least 10 chars to avoid + // false positives on very short fragments). + if ( + summaryLower.length >= 10 && + (responseTrimmedLower.includes(summaryLower) || + // Also check the reverse (summary contains response snippet — agent echoed it) + summaryLower.includes(responseTrimmedLower.slice(0, Math.min(50, responseTrimmedLower.length)))) + ) { + return true; + } } } } From d3d0b7188810af68ff1f0741bc313ca5978d0368 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 03:44:36 +0800 Subject: [PATCH 15/21] fix(proposal-a): P2 align suppression threshold with scoring path (>= 2) P2: Change suppression threshold from >= 3 to >= 2 to match the scoring path threshold. After 2 bad recalls, both penalty and suppression now activate simultaneously, preventing one extra injection turn. P0-2: bad_recall_count race condition remains a known limitation (out of Phase 1 scope without store-layer compare-and-swap support). --- index.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/index.ts b/index.ts index 9aee4ff9..a6b5566a 100644 --- a/index.ts +++ b/index.ts @@ -2512,7 +2512,9 @@ const memoryLanceDBProPlugin = { const nextBadRecallCount = staleInjected ? meta.bad_recall_count + 1 : meta.bad_recall_count; - const shouldSuppress = nextBadRecallCount >= 3 && minRepeated > 0; + // P2 fix: suppress threshold aligned with scoring path (>= 2). After 2 bad recalls, + // both the scoring penalty and suppression kick in simultaneously. + const shouldSuppress = nextBadRecallCount >= 2 && minRepeated > 0; await store.patchMetadata( item.id, { From 8990c9d9b796c455a12ed27d47579441b40be5ae Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 03:59:04 +0800 Subject: [PATCH 16/21] fix(proposal-a): P1 natural usage detection in Summary path P1 (Codex): Remove AND gate from Summary path in isRecallUsed(). Summary text overlap (>= 10 chars) is now detected independently, without requiring hasUsageMarker or hasSpecificRecall. This fixes false negatives where auto-recall memories are naturally used (without explicit markers) and incorrectly counted as misses. Auto-capture (pendingIngress) remains Phase 2 scope (out of Phase 1). --- src/reflection-slices.ts | 61 +++++++++++----------------------------- 1 file changed, 16 insertions(+), 45 deletions(-) diff --git a/src/reflection-slices.ts b/src/reflection-slices.ts index 46c3061c..a8d3c4ef 100644 --- a/src/reflection-slices.ts +++ b/src/reflection-slices.ts @@ -386,53 +386,24 @@ export function isRecallUsed( } } - // P0-3 fix: summary path AND gate — when summaries are provided, - // both the verbatim check AND a usage marker must be present. - // This matches the AND logic of the ID path (hasSpecificRecall + usageMarker). + // P1 fix: Summary path — detect when injected summary content appears in the response. + // No AND gate here: summary text IS the injected memory, so any verbatim/near-verbatim + // overlap is a strong usage signal. The 10-char minimum prevents false positives on + // common short words. Guards at function entry already ensure injectedSummaries is non-empty. if (injectedSummaries && injectedSummaries.length > 0) { const responseTrimmedLower = responseText.trim().toLowerCase(); - const usageMarkers = [ - "remember", - "之前", - "记得", - "according to", - "based on what", - "as you mentioned", - "如前所述", - "如您所說", - "如您所说的", - "我記得", - "我记得", - "之前你說", - "之前你说", - "之前提到", - "之前提到的", - "根据之前", - "依据之前", - "按照之前", - "照您之前", - "照你说的", - "from previous", - "earlier you", - "in the memory", - "the memory mentioned", - "the memories show", - ]; - const hasUsageMarker = usageMarkers.some((m) => responseLower.includes(m.toLowerCase())); - if (hasUsageMarker || hasSpecificRecall) { - for (const summary of injectedSummaries) { - if (summary && summary.trim().length > 0) { - const summaryLower = summary.trim().toLowerCase(); - // Check for verbatim or near-verbatim presence (at least 10 chars to avoid - // false positives on very short fragments). - if ( - summaryLower.length >= 10 && - (responseTrimmedLower.includes(summaryLower) || - // Also check the reverse (summary contains response snippet — agent echoed it) - summaryLower.includes(responseTrimmedLower.slice(0, Math.min(50, responseTrimmedLower.length)))) - ) { - return true; - } + for (const summary of injectedSummaries) { + if (summary && summary.trim().length > 0) { + const summaryLower = summary.trim().toLowerCase(); + // Check for verbatim or near-verbatim presence (at least 10 chars to avoid + // false positives on very short fragments). + if ( + summaryLower.length >= 10 && + (responseTrimmedLower.includes(summaryLower) || + // Also check the reverse (summary contains response snippet — agent echoed it) + summaryLower.includes(responseTrimmedLower.slice(0, Math.min(50, responseTrimmedLower.length)))) + ) { + return true; } } } From d33fe19afda1d99b9daf0b76c16cb42736ef4d69 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 04:05:39 +0800 Subject: [PATCH 17/21] fix(proposal-a): P1 align scoring penalty with injection increment P1 (Codex): Fix double-counting + delayed suppression. - Change scoring condition from >= 2 to >= 1 to sync with injection's staleInjected increment (which increments bad_recall_count when re-injecting an unconfirmed memory from the previous turn). - Don't increment bad_recall_count in scoring path for silent misses: injection path already handles this via staleInjected. - Only increment in scoring for explicit errorKeywords (user corrections). This ensures the penalty applies on second miss (syncs with injection's first increment), not the third. --- index.ts | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/index.ts b/index.ts index a6b5566a..ec30b82e 100644 --- a/index.ts +++ b/index.ts @@ -3113,25 +3113,29 @@ const memoryLanceDBProPlugin = { undefined, ); } else { - // P0-2 fix: bad_recall_count read-modify-write. - // KNOWN LIMITATION: This is not atomic. Concurrent before_prompt_build - // invocations for the same recallId may overwrite each other's increments. - // Full atomic fix requires store-layer compare-and-swap support (out of - // Phase 1 scope). The increment is best-effort and may undercount. + // P1 fix: align scoring penalty threshold with injection increment. + // The injection path increments on staleInjected (previous turn not confirmed). + // To avoid double-counting: scoring only increments for explicit errors, + // and checks >= 1 to sync with injection's first increment. const badCount = meta.bad_recall_count || 0; let newImportance = currentImportance; if (containsKeyword(userPromptText, errorKeywords)) { + // Only increment for explicit user error/correction if ((meta.injected_count || 0) >= minRecallCountForPenalty) { newImportance = Math.max(0.1, newImportance - penaltyOnError); } await store.update(recallId, { importance: newImportance }, undefined); - await store.patchMetadata(recallId, { bad_recall_count: badCount + 1 }, undefined); - } else if (badCount >= 2) { + await store.patchMetadata(recallId, { bad_recall_count: badCount + 1, last_confirmed_use_at: Date.now() }, undefined); + } else if (badCount >= 1) { + // P1 fix: check >= 1 to match injection path's staleInjected increment. + // After injection increments (staleInjected), badCount will be 1, so we apply + // penalty on the second miss rather than waiting for the third. if ((meta.injected_count || 0) >= minRecallCountForPenalty) { newImportance = Math.max(0.1, newImportance - penaltyOnMiss); } await store.update(recallId, { importance: newImportance }, undefined); - await store.patchMetadata(recallId, { bad_recall_count: badCount + 1 }, undefined); + // Don't increment here - injection path already increments via staleInjected. + // This prevents double-counting while still applying penalty. } } } From 45247f2c965293bb0ce425dcf2d87b5ecaad16d9 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 04:08:27 +0800 Subject: [PATCH 18/21] fix(proposal-a): P1/P2 refine feedback loop P1 (Codex): Don't write last_confirmed_use_at on explicit error. When user explicitly corrects a recall, we shouldn't mark it as "confirmed use" - otherwise staleInjected logic breaks. P2 (Codex): 1. Run TTL cleanup on read path too (not just set path). This handles idle sessions that never trigger set() again. 2. Refine confirm/error keywords to reduce false positives. Removed "ok" and "no" which are too generic. --- index.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/index.ts b/index.ts index ec30b82e..bf79a646 100644 --- a/index.ts +++ b/index.ts @@ -3003,6 +3003,8 @@ const memoryLanceDBProPlugin = { // Use same key format as auto-recall hook (sessionKey:agentId) so we read the right entry. const agentIdForKey = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey); const sessionKey = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentIdForKey ?? ""}`; + // P2 fix: also cleanup on read path to handle idle sessions that never trigger set() + cleanupPendingRecall(); const pending = pendingRecall.get(sessionKey); if (!pending) return; @@ -3041,8 +3043,8 @@ const memoryLanceDBProPlugin = { const boostOnConfirm = fb.boostOnConfirm ?? 0.15; const penaltyOnError = fb.penaltyOnError ?? 0.10; const minRecallCountForPenalty = fb.minRecallCountForPenalty ?? 2; - const confirmKeywords = fb.confirmKeywords ?? ["正確", "yes", "right", "沒錯", "確認", "correct", "ok"]; - const errorKeywords = fb.errorKeywords ?? ["不是", "錯", "不對", "wrong", "no", "not right", "錯誤", "更正"]; + const confirmKeywords = fb.confirmKeywords ?? ["correct", "right", "yes", "confirmed", "exactly", "對", "沒錯", "正確", "確認", "好的"]; + const errorKeywords = fb.errorKeywords ?? ["wrong", "incorrect", "not right", "that's wrong", "error", "mistake", "fix it", "change that", "改成", "改為", "不是這樣", "不對", "錯了"]; // event.prompt is a plain string in the current hook contract (confirmed by codebase usage). // We extract the user's last message from event.messages array instead. @@ -3125,7 +3127,7 @@ const memoryLanceDBProPlugin = { newImportance = Math.max(0.1, newImportance - penaltyOnError); } await store.update(recallId, { importance: newImportance }, undefined); - await store.patchMetadata(recallId, { bad_recall_count: badCount + 1, last_confirmed_use_at: Date.now() }, undefined); + await store.patchMetadata(recallId, { bad_recall_count: badCount + 1 }, undefined); } else if (badCount >= 1) { // P1 fix: check >= 1 to match injection path's staleInjected increment. // After injection increments (staleInjected), badCount will be 1, so we apply From 84ddffa6469c593393bdd5fc61b59e35a9a889bb Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 12:05:39 +0800 Subject: [PATCH 19/21] fix(proposal-a): P1 fix errorKeywords precedence + prevent double-count P1 (Codex 2nd round): 1. errorKeywords now checked BEFORE usedRecall heuristic. If user explicitly corrects, that overrides usage detection. No importance boost is applied for errorKeywords cases. 2. errorKeywords sets last_confirmed_use_at = Date.now(). This prevents injection path's staleInjected from double-counting in the same turn. Next injection will NOT increment bad_recall_count via staleInjected (since last_confirmed_use_at >= last_injected_at). This fixes: - Used but corrected: no boost, single increment, no staleInjected double-count - Used and confirmed: boost + reset - Silent miss: penalty applies at badCount >= 1, injection handles increment --- index.ts | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/index.ts b/index.ts index bf79a646..65e99101 100644 --- a/index.ts +++ b/index.ts @@ -3103,42 +3103,32 @@ const memoryLanceDBProPlugin = { const meta = parseSmartMetadata(entry.metadata, entry); const currentImportance = meta.importance ?? entry.importance ?? 0.5; - if (usedRecall) { + // P1 fix (Codex): check errorKeywords BEFORE usedRecall. + // If user explicitly corrects, that overrides the heuristic usage detection. + // Also set last_confirmed_use_at here to prevent injection path's staleInjected + // from double-counting in the same turn. + const hasError = containsKeyword(userPromptText, errorKeywords); + if (hasError) { + if ((meta.injected_count || 0) >= minRecallCountForPenalty) { + await store.update(recallId, { importance: Math.max(0.1, currentImportance - penaltyOnError) }, undefined); + } + await store.patchMetadata(recallId, { bad_recall_count: (meta.bad_recall_count || 0) + 1, last_confirmed_use_at: Date.now() }, undefined); + } else if (usedRecall) { + // Pure positive use: boost importance let newImportance = Math.min(1.0, currentImportance + boostOnUse); if (containsKeyword(userPromptText, confirmKeywords)) { newImportance = Math.min(1.0, newImportance + boostOnConfirm); } await store.update(recallId, { importance: newImportance }, undefined); - await store.patchMetadata( - recallId, - { last_confirmed_use_at: Date.now(), bad_recall_count: 0 }, - undefined, - ); + await store.patchMetadata(recallId, { last_confirmed_use_at: Date.now(), bad_recall_count: 0 }, undefined); } else { // P1 fix: align scoring penalty threshold with injection increment. - // The injection path increments on staleInjected (previous turn not confirmed). - // To avoid double-counting: scoring only increments for explicit errors, - // and checks >= 1 to sync with injection's first increment. + // Silent miss: apply penalty if badCount >= 1 (injection path handles increment). const badCount = meta.bad_recall_count || 0; - let newImportance = currentImportance; - if (containsKeyword(userPromptText, errorKeywords)) { - // Only increment for explicit user error/correction - if ((meta.injected_count || 0) >= minRecallCountForPenalty) { - newImportance = Math.max(0.1, newImportance - penaltyOnError); - } - await store.update(recallId, { importance: newImportance }, undefined); - await store.patchMetadata(recallId, { bad_recall_count: badCount + 1 }, undefined); - } else if (badCount >= 1) { - // P1 fix: check >= 1 to match injection path's staleInjected increment. - // After injection increments (staleInjected), badCount will be 1, so we apply - // penalty on the second miss rather than waiting for the third. - if ((meta.injected_count || 0) >= minRecallCountForPenalty) { - newImportance = Math.max(0.1, newImportance - penaltyOnMiss); - } - await store.update(recallId, { importance: newImportance }, undefined); - // Don't increment here - injection path already increments via staleInjected. - // This prevents double-counting while still applying penalty. + if (badCount >= 1 && (meta.injected_count || 0) >= minRecallCountForPenalty) { + await store.update(recallId, { importance: Math.max(0.1, currentImportance - penaltyOnMiss) }, undefined); } + // No increment here - injection path already increments via staleInjected. } } } catch (err) { From 3d9f27dfd0245853cfe1d9ad2eb14103c81d7a2a Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 13:22:58 +0800 Subject: [PATCH 20/21] fix(proposal-a): P1 restore autoCapture closing brace + P2 coerce feedback config types P1 (Codex review round 4): - The second `if (config.autoCapture !== false)` block (agent_end auto-capture) was missing its closing brace. Phase 1 hooks were added inside this block but the block was never closed, causing ALL subsequent hooks (self-improvement, reflection, lifecycle, backup) to be conditional on autoCapture. Added closing `}` after the Phase 1 before_prompt_build hook (priority 5) to properly close the autoCapture block. P2 (Codex review round 4): - Feedback config values (boostOnUse, penaltyOnMiss, etc.) were used directly without Number() coercion. If deployment provides values as strings (common with env-driven config), Math.min/Math.max would produce NaN. Added Number() coercion with fallback to default values. Both fixes resolve the two issues flagged by Codex in PR review. --- index.ts | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/index.ts b/index.ts index 65e99101..999af289 100644 --- a/index.ts +++ b/index.ts @@ -3037,11 +3037,13 @@ const memoryLanceDBProPlugin = { } // Read feedback config values with defaults + // P2 fix: coerce to Number to handle env-driven string config values. + // Without coercion, string values would cause string concatenation in Math.min/max. const fb = config.feedback ?? {}; - const boostOnUse = fb.boostOnUse ?? 0.05; - const penaltyOnMiss = fb.penaltyOnMiss ?? 0.03; - const boostOnConfirm = fb.boostOnConfirm ?? 0.15; - const penaltyOnError = fb.penaltyOnError ?? 0.10; + const boostOnUse = Number(fb.boostOnUse ?? 0) || 0.05; + const penaltyOnMiss = Number(fb.penaltyOnMiss ?? 0) || 0.03; + const boostOnConfirm = Number(fb.boostOnConfirm ?? 0) || 0.15; + const penaltyOnError = Number(fb.penaltyOnError ?? 0) || 0.10; const minRecallCountForPenalty = fb.minRecallCountForPenalty ?? 2; const confirmKeywords = fb.confirmKeywords ?? ["correct", "right", "yes", "confirmed", "exactly", "對", "沒錯", "正確", "確認", "好的"]; const errorKeywords = fb.errorKeywords ?? ["wrong", "incorrect", "not right", "that's wrong", "error", "mistake", "fix it", "change that", "改成", "改為", "不是這樣", "不對", "錯了"]; @@ -3138,6 +3140,7 @@ const memoryLanceDBProPlugin = { } } }, { priority: 5 }); + } // ======================================================================== // Proposal A Phase 1: session_end hook - Clean up pending recalls From 64767ee4bcd1d8b51aeb960955a324baabc8f728 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 14:00:32 +0800 Subject: [PATCH 21/21] fix(proposal-a): P1/P2 Codex round-5 fixes - session cleanup, summary matching, zero config --- index.ts | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/index.ts b/index.ts index 999af289..7b9a9987 100644 --- a/index.ts +++ b/index.ts @@ -2551,7 +2551,8 @@ const memoryLanceDBProPlugin = { // Bug 1 fix: also store the injected summary lines so the feedback hook // can detect usage even when the agent doesn't use stock phrases or IDs // but directly incorporates the memory content into the response. - const injectedSummaries = selected.map((item) => item.line); + // P1 fix: store summary content only (without prefix) for accurate matching. + const injectedSummaries = selected.map((item) => item.summary); // P0-1 fix: run TTL cleanup before each set to prevent unbounded growth cleanupPendingRecall(); pendingRecall.set(sessionKeyForRecall, { @@ -2606,9 +2607,10 @@ const memoryLanceDBProPlugin = { lastRawUserMessage.delete(sessionId); // P3 fix: clean all pendingRecall entries for this session. // pendingRecall keys use format: sessionKey (or sessionKey:agentId with composite key). - // We clean any key that starts with this sessionId. + // P1 fix: use sessionId only when sessionKey is absent to avoid clearing unrelated sessions. + const sessionKeyToClean = ctx?.sessionKey ?? sessionId; for (const key of pendingRecall.keys()) { - if (key === sessionId || key.startsWith(`${sessionId}:`) || key.startsWith(`${ctx?.sessionKey ?? ""}:`)) { + if (key === sessionId || key.startsWith(`${sessionId}:`) || (sessionKeyToClean && key.startsWith(`${sessionKeyToClean}:`))) { pendingRecall.delete(key); } } @@ -3037,13 +3039,13 @@ const memoryLanceDBProPlugin = { } // Read feedback config values with defaults - // P2 fix: coerce to Number to handle env-driven string config values. - // Without coercion, string values would cause string concatenation in Math.min/max. + // P2 fix: coerce to Number and use ?? to preserve explicit zero values. + // P2 fix: use nullish coalescing to allow 0 as a valid config value. const fb = config.feedback ?? {}; - const boostOnUse = Number(fb.boostOnUse ?? 0) || 0.05; - const penaltyOnMiss = Number(fb.penaltyOnMiss ?? 0) || 0.03; - const boostOnConfirm = Number(fb.boostOnConfirm ?? 0) || 0.15; - const penaltyOnError = Number(fb.penaltyOnError ?? 0) || 0.10; + const boostOnUse = Number(fb.boostOnUse ?? 0) ?? 0.05; + const penaltyOnMiss = Number(fb.penaltyOnMiss ?? 0) ?? 0.03; + const boostOnConfirm = Number(fb.boostOnConfirm ?? 0) ?? 0.15; + const penaltyOnError = Number(fb.penaltyOnError ?? 0) ?? 0.10; const minRecallCountForPenalty = fb.minRecallCountForPenalty ?? 2; const confirmKeywords = fb.confirmKeywords ?? ["correct", "right", "yes", "confirmed", "exactly", "對", "沒錯", "正確", "確認", "好的"]; const errorKeywords = fb.errorKeywords ?? ["wrong", "incorrect", "not right", "that's wrong", "error", "mistake", "fix it", "change that", "改成", "改為", "不是這樣", "不對", "錯了"];