From 0bbda88e26da6cb0764ca7be2a8a15c806ab7aa8 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 4 Apr 2026 17:17:28 +0800 Subject: [PATCH 01/19] fix: auto-capture cumulative turn counting for smart extraction (issue #417) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix #1: buildAutoCaptureConversationKeyFromIngress — DM fallback to channelId (fixes pendingIngressTexts never being written for Discord DM) - Fix #2: cumulative counting — autoCaptureSeenTextCount accumulates, not overwrites (fixes eligibleTexts.length always 1 for DM, extractMinMessages never satisfied) - Fix #3: REPLACE vs APPEND — use pendingIngressTexts as-is when present (avoids deduplication issues from text appearing in both sources) - Fix #5: isExplicitRememberCommand guard with lastPending fallback (preserves explicit remember command behavior in DM context) - Fix #6: Math.min cap on extractMinMessages (max 100) — prevents misconfiguration - Fix #7: MAX_MESSAGE_LENGTH=5000 guard in message_received hook - Smart extraction threshold now uses currentCumulativeCount (turn count) instead of cleanTexts.length (per-event message count) - Debug logs updated to show cumulative count context All 29 test suites pass. Based on official latest (5669b08). --- index.ts | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/index.ts b/index.ts index f6e202dc..c2d52d3f 100644 --- a/index.ts +++ b/index.ts @@ -776,8 +776,10 @@ function buildAutoCaptureConversationKeyFromIngress( ): string | null { const channel = typeof channelId === "string" ? channelId.trim() : ""; const conversation = typeof conversationId === "string" ? conversationId.trim() : ""; - if (!channel || !conversation) return null; - return `${channel}:${conversation}`; + if (!channel) return null; + // DM: conversationId=undefined -> fallback to channelId (matches regex extract from sessionKey) + // Group: conversationId=exists -> returns channelId:conversationId (matches regex extract) + return conversation ? `${channel}:${conversation}` : channel; } /** @@ -2061,8 +2063,9 @@ const memoryLanceDBProPlugin = { ); const normalized = normalizeAutoCaptureText("user", event.content, shouldSkipReflectionMessage); if (conversationKey && normalized) { + const MAX_MESSAGE_LENGTH = 5000; const queue = autoCapturePendingIngressTexts.get(conversationKey) || []; - queue.push(normalized); + queue.push(normalized.slice(0, MAX_MESSAGE_LENGTH)); autoCapturePendingIngressTexts.set(conversationKey, queue.slice(-6)); pruneMapIfOver(autoCapturePendingIngressTexts, AUTO_CAPTURE_MAP_MAX_ENTRIES); } @@ -2638,28 +2641,36 @@ const memoryLanceDBProPlugin = { const pendingIngressTexts = conversationKey ? [...(autoCapturePendingIngressTexts.get(conversationKey) || [])] : []; - if (conversationKey) { - autoCapturePendingIngressTexts.delete(conversationKey); - } - const previousSeenCount = autoCaptureSeenTextCount.get(sessionKey) ?? 0; + // [Fix #2] Cumulative counting: accumulate across events, not per-event overwrite + const currentCumulativeCount = previousSeenCount + eligibleTexts.length; let newTexts = eligibleTexts; if (pendingIngressTexts.length > 0) { + // [Fix #3] Use pendingIngressTexts as-is (REPLACE, not APPEND). + // REPLACE is correct because: (1) Fix #2 cumulative count ensures enough turns + // accumulate; (2) Fix #4 (delete) restores original behavior where pending is + // event-scoped; (3) APPEND causes deduplication issues when the same text + // appears in both pendingIngressTexts and eligibleTexts (after prefix stripping). newTexts = pendingIngressTexts; } else if (previousSeenCount > 0 && eligibleTexts.length > previousSeenCount) { newTexts = eligibleTexts.slice(previousSeenCount); } - autoCaptureSeenTextCount.set(sessionKey, eligibleTexts.length); + autoCaptureSeenTextCount.set(sessionKey, currentCumulativeCount); pruneMapIfOver(autoCaptureSeenTextCount, AUTO_CAPTURE_MAP_MAX_ENTRIES); const priorRecentTexts = autoCaptureRecentTexts.get(sessionKey) || []; let texts = newTexts; + // [Fix #5] isExplicitRememberCommand: guard against empty pendingIngressTexts + const lastPending = pendingIngressTexts.length > 0 + ? pendingIngressTexts[pendingIngressTexts.length - 1] + : (eligibleTexts.length === 1 ? eligibleTexts[0] : null); if ( texts.length === 1 && - isExplicitRememberCommand(texts[0]) && + lastPending !== null && + isExplicitRememberCommand(lastPending) && priorRecentTexts.length > 0 ) { - texts = [...priorRecentTexts.slice(-1), ...texts]; + texts = [...pendingIngressTexts, ...priorRecentTexts.slice(-1), ...eligibleTexts]; } if (newTexts.length > 0) { const nextRecentTexts = [...priorRecentTexts, ...newTexts].slice(-6); @@ -2667,7 +2678,8 @@ const memoryLanceDBProPlugin = { pruneMapIfOver(autoCaptureRecentTexts, AUTO_CAPTURE_MAP_MAX_ENTRIES); } - const minMessages = config.extractMinMessages ?? 4; + // [Fix #6] Cap extractMinMessages to prevent misconfiguration + const minMessages = Math.min(config.extractMinMessages ?? 4, 100); if (skippedAutoCaptureTexts > 0) { api.logger.debug( `memory-lancedb-pro: auto-capture skipped ${skippedAutoCaptureTexts} injected/system text block(s) for agent ${agentId}`, @@ -2741,9 +2753,10 @@ const memoryLanceDBProPlugin = { ); return; } - if (cleanTexts.length >= minMessages) { + // [Fix #3 updated] Use cumulative count (turn count) for smart extraction threshold + if (currentCumulativeCount >= minMessages) { api.logger.debug( - `memory-lancedb-pro: auto-capture running smart extraction for agent ${agentId} (${cleanTexts.length} clean texts >= ${minMessages})`, + `memory-lancedb-pro: auto-capture running smart extraction for agent ${agentId} (cumulative=${currentCumulativeCount} >= minMessages=${minMessages})`, ); const conversationText = cleanTexts.join("\n"); const stats = await smartExtractor.extractAndPersist( @@ -2770,7 +2783,7 @@ const memoryLanceDBProPlugin = { ); } else { api.logger.debug( - `memory-lancedb-pro: auto-capture skipped smart extraction for agent ${agentId} (${cleanTexts.length} < ${minMessages})`, + `memory-lancedb-pro: auto-capture skipped smart extraction for agent ${agentId} (cumulative=${currentCumulativeCount} < minMessages=${minMessages})`, ); } } From 372b4a711028e911db9afab4ba263a4e78574652 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 4 Apr 2026 21:24:03 +0800 Subject: [PATCH 02/19] fix: re-apply all 7 fixes for issue #417 + add cumulative turn counting test + changelog - Fix #1: buildAutoCaptureConversationKeyFromIngress DM fallback - Fix #2: currentCumulativeCount (cumulative per-event counting) - Fix #3: REPLACE vs APPEND + cum count threshold for smart extraction - Fix #4: remove pendingIngressTexts.delete() - Fix #5: isExplicitRememberCommand lastPending guard - Fix #6: Math.min extractMinMessages cap (max 100) - Fix #7: MAX_MESSAGE_LENGTH=5000 guard - Add test: 2 sequential agent_end events with extractMinMessages=2 - Add changelog: Unreleased section with issue details --- CHANGELOG.md | 21 ++++++++ test/smart-extractor-branches.mjs | 86 +++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28c02b56..150edfca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # Changelog +## Unreleased + +### Fix: cumulative turn counting for auto-capture smart extraction (#417, PR #518) + +**Bug**: With `extractMinMessages: 2` + `smartExtraction: true`, single-turn DM conversations always fell through to regex fallback, writing dirty data (`l0_abstract == text`, no LLM distillation). + +**Root causes**: +- `autoCaptureSeenTextCount` was overwritten per-event (always 1 for DM), never accumulating +- `buildAutoCaptureConversationKeyFromIngress` returned `null` for DM (no `conversationId`), so `pendingIngressTexts` was never written + +**Changes**: +- **Cumulative counting**: `autoCaptureSeenTextCount` now accumulates across events instead of overwriting per-event +- **DM key fallback**: `buildAutoCaptureConversationKeyFromIngress` falls back to `channelId` for DM, fixing `pendingIngressTexts` writes +- **Smart extraction threshold**: now uses cumulative turn count (`currentCumulativeCount`) instead of per-event message count +- **`extractMinMessages` cap**: `Math.min(config.extractMinMessages ?? 4, 100)` prevents misconfiguration +- **MAX_MESSAGE_LENGTH guard**: 5000 char limit in `pendingIngressTexts` prevents OOM + +**Note**: `extractMinMessages` semantics changed from "per-event message count" to "cumulative conversation turns". This is a bug fix since the old semantics were broken for DM; a changelog note is included for beta.11 users. + +--- + ## 1.1.0-beta.2 (Smart Memory Beta + Access Reinforcement) This is a **beta** release published under the npm dist-tag **`beta`** (it does not affect the stable `latest` channel). diff --git a/test/smart-extractor-branches.mjs b/test/smart-extractor-branches.mjs index 67830f68..337b36f9 100644 --- a/test/smart-extractor-branches.mjs +++ b/test/smart-extractor-branches.mjs @@ -1315,4 +1315,90 @@ assert.ok( ), ); +// ============================================================ +// Test: cumulative turn counting with extractMinMessages=2 +// Verifies issue #417 fix: 2 sequential agent_end events should +// trigger smart extraction on turn 2 (cumulative count >= 2). +// ============================================================ + +async function runCumulativeTurnCountingScenario() { + const workDir = mkdtempSync(path.join(tmpdir(), "memory-cumulative-turn-")); + const dbPath = path.join(workDir, "db"); + const logs = []; + const embeddingServer = createEmbeddingServer(); + + await new Promise((resolve) => embeddingServer.listen(0, "127.0.0.1", resolve)); + const embeddingPort = embeddingServer.address().port; + process.env.TEST_EMBEDDING_BASE_URL = `http://127.0.0.1:${embeddingPort}/v1`; + + try { + const api = createMockApi( + dbPath, + `http://127.0.0.1:${embeddingPort}/v1`, + "http://127.0.0.1:9", + logs, + // extractMinMessages=2 (the key setting for this test) + { extractMinMessages: 2, smartExtraction: true, captureAssistant: false }, + ); + plugin.register(api); + + const sessionKey = "agent:main:discord:dm:user123"; + const channelId = "discord"; + const conversationId = "dm:user123"; + + // Turn 1: message_received -> agent_end + await api.hooks.message_received( + { from: "user:user123", content: "我的名字是小明" }, + { channelId, conversationId, accountId: "default" }, + ); + await runAgentEndHook( + api, + { + success: true, + messages: [{ role: "user", content: "我的名字是小明" }], + }, + { agentId: "main", sessionKey }, + ); + + // Turn 2: message_received -> agent_end (this should trigger smart extraction) + await api.hooks.message_received( + { from: "user:user123", content: "我喜歡游泳" }, + { channelId, conversationId, accountId: "default" }, + ); + await runAgentEndHook( + api, + { + success: true, + messages: [{ role: "user", content: "我喜歡游泳" }], + }, + { agentId: "main", sessionKey }, + ); + + const smartExtractionTriggered = logs.some((entry) => + entry[1].includes("running smart extraction") && + entry[1].includes("cumulative=") + ); + const smartExtractionSkipped = logs.some((entry) => + entry[1].includes("skipped smart extraction") && + entry[1].includes("cumulative=1") + ); + + return { logs, smartExtractionTriggered, smartExtractionSkipped }; + } finally { + delete process.env.TEST_EMBEDDING_BASE_URL; + await new Promise((resolve) => embeddingServer.close(resolve)); + rmSync(workDir, { recursive: true, force: true }); + } +} + +const cumulativeResult = await runCumulativeTurnCountingScenario(); +// Turn 2 must trigger smart extraction (cumulative >= 2) +assert.ok(cumulativeResult.smartExtractionTriggered, + "Smart extraction should trigger on turn 2 with cumulative count >= 2. Logs: " + + cumulativeResult.logs.map((e) => e[1]).join(" | ")); +// Turn 1 must have been skipped (cumulative=1 < 2) +assert.ok(cumulativeResult.smartExtractionSkipped, + "Turn 1 should skip smart extraction (cumulative=1 < 2). Logs: " + + cumulativeResult.logs.map((e) => e[1]).join(" | ")); + console.log("OK: smart extractor branch regression test passed"); From 69c8b5675aee5c70cad13cc65abbfc5bf12a149e Mon Sep 17 00:00:00 2001 From: James Date: Sat, 4 Apr 2026 21:33:34 +0800 Subject: [PATCH 03/19] docs: update changelog - add test file reference and improve breaking change label --- CHANGELOG.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 150edfca..f1dc0115 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,12 +12,13 @@ **Changes**: - **Cumulative counting**: `autoCaptureSeenTextCount` now accumulates across events instead of overwriting per-event -- **DM key fallback**: `buildAutoCaptureConversationKeyFromIngress` falls back to `channelId` for DM, fixing `pendingIngressTexts` writes +- **DM key fallback**: `buildAutoCaptureConversationKeyFromIngress` falls back to `channelId` when `conversationId` is falsy, so DM sessions now correctly write to `pendingIngressTexts` and match the key extracted by `buildAutoCaptureConversationKeyFromSessionKey` - **Smart extraction threshold**: now uses cumulative turn count (`currentCumulativeCount`) instead of per-event message count -- **`extractMinMessages` cap**: `Math.min(config.extractMinMessages ?? 4, 100)` prevents misconfiguration -- **MAX_MESSAGE_LENGTH guard**: 5000 char limit in `pendingIngressTexts` prevents OOM +- **`extractMinMessages` cap**: `Math.min(config.extractMinMessages ?? 4, 100)` prevents misconfiguration (e.g., setting 999999 would permanently disable smart extraction) +- **MAX_MESSAGE_LENGTH guard**: 5000 char limit per message in `pendingIngressTexts` rolling window prevents OOM from malformed input +- **Test**: added `runCumulativeTurnCountingScenario` in `test/smart-extractor-branches.mjs` verifying turn-1 skip and turn-2 trigger with `extractMinMessages=2` -**Note**: `extractMinMessages` semantics changed from "per-event message count" to "cumulative conversation turns". This is a bug fix since the old semantics were broken for DM; a changelog note is included for beta.11 users. +**⚠️ Breaking change**: `extractMinMessages` semantics changed from "per-event message count" to "cumulative conversation turns". Before: each `agent_end` needed ≥N messages. After: smart extraction triggers at conversation turn N. This is a bug fix since the old semantics were structurally broken for DM; users relying on the old behavior may need to adjust their `extractMinMessages` values. --- From 2af1e34471a0f5da69cd878bd2020e38e67b8327 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Sun, 5 Apr 2026 23:06:56 +0800 Subject: [PATCH 04/19] fix: Phase 1 - createMockApi accepts pluginConfigOverrides param + remove dead isExplicitRememberCommand guard (PR #518 review fixes) --- index.ts | 16 ++++------------ test/smart-extractor-branches.mjs | 3 ++- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/index.ts b/index.ts index c2d52d3f..3da5368b 100644 --- a/index.ts +++ b/index.ts @@ -2660,18 +2660,10 @@ const memoryLanceDBProPlugin = { const priorRecentTexts = autoCaptureRecentTexts.get(sessionKey) || []; let texts = newTexts; - // [Fix #5] isExplicitRememberCommand: guard against empty pendingIngressTexts - const lastPending = pendingIngressTexts.length > 0 - ? pendingIngressTexts[pendingIngressTexts.length - 1] - : (eligibleTexts.length === 1 ? eligibleTexts[0] : null); - if ( - texts.length === 1 && - lastPending !== null && - isExplicitRememberCommand(lastPending) && - priorRecentTexts.length > 0 - ) { - texts = [...pendingIngressTexts, ...priorRecentTexts.slice(-1), ...eligibleTexts]; - } + // [Fix #5 REMOVED] isExplicitRememberCommand guard: unreachable under REPLACE strategy. + // With REPLACE, texts = pendingIngressTexts (length typically > 1 in multi-turn), + // so texts.length === 1 guard can never trigger. + // This guard was designed for the old APPEND strategy and is obsolete. if (newTexts.length > 0) { const nextRecentTexts = [...priorRecentTexts, ...newTexts].slice(-6); autoCaptureRecentTexts.set(sessionKey, nextRecentTexts); diff --git a/test/smart-extractor-branches.mjs b/test/smart-extractor-branches.mjs index 337b36f9..62541fc5 100644 --- a/test/smart-extractor-branches.mjs +++ b/test/smart-extractor-branches.mjs @@ -64,7 +64,7 @@ function createEmbeddingServer() { }); } -function createMockApi(dbPath, embeddingBaseURL, llmBaseURL, logs) { +function createMockApi(dbPath, embeddingBaseURL, llmBaseURL, logs, pluginConfigOverrides = {}) { return { pluginConfig: { dbPath, @@ -72,6 +72,7 @@ function createMockApi(dbPath, embeddingBaseURL, llmBaseURL, logs) { autoRecall: false, smartExtraction: true, extractMinMessages: 2, + ...pluginConfigOverrides, embedding: { apiKey: "dummy", model: "qwen3-embedding-4b", From ebaf40bcb1f2441ac3a4e2b5a5903d8b4d6f0e03 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 6 Apr 2026 21:59:39 +0800 Subject: [PATCH 05/19] fix: resolve all Must Fix items from PR #534 review (issue #417) --- index.ts | 2 ++ test/smart-extractor-branches.mjs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/index.ts b/index.ts index 3da5368b..01862eae 100644 --- a/index.ts +++ b/index.ts @@ -2652,6 +2652,7 @@ const memoryLanceDBProPlugin = { // event-scoped; (3) APPEND causes deduplication issues when the same text // appears in both pendingIngressTexts and eligibleTexts (after prefix stripping). newTexts = pendingIngressTexts; + autoCapturePendingIngressTexts.delete(conversationKey); // [Fix #8] Clear consumed pending texts to prevent re-consumption } else if (previousSeenCount > 0 && eligibleTexts.length > previousSeenCount) { newTexts = eligibleTexts.slice(previousSeenCount); } @@ -2757,6 +2758,7 @@ const memoryLanceDBProPlugin = { ); // Charge rate limiter only after successful extraction extractionRateLimiter.recordExtraction(); + autoCaptureSeenTextCount.set(sessionKey, 0); // [Fix #8] Reset after extraction to avoid re-triggering on every subsequent agent_end if (stats.created > 0 || stats.merged > 0) { api.logger.info( `memory-lancedb-pro: smart-extracted ${stats.created} created, ${stats.merged} merged, ${stats.skipped} skipped for agent ${agentId}` diff --git a/test/smart-extractor-branches.mjs b/test/smart-extractor-branches.mjs index 62541fc5..d10aee95 100644 --- a/test/smart-extractor-branches.mjs +++ b/test/smart-extractor-branches.mjs @@ -73,6 +73,8 @@ function createMockApi(dbPath, embeddingBaseURL, llmBaseURL, logs, pluginConfigO smartExtraction: true, extractMinMessages: 2, ...pluginConfigOverrides, + // Note: embedding always wins over pluginConfigOverrides — this is intentional + // so tests get deterministic mock embeddings regardless of overrides. embedding: { apiKey: "dummy", model: "qwen3-embedding-4b", From 8c4d911cf9edd87b39579de8dd565e8866bc46b5 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 6 Apr 2026 22:08:18 +0800 Subject: [PATCH 06/19] fix: move currentCumulativeCount reset inside success block (Fix #9) --- index.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/index.ts b/index.ts index 01862eae..5c72c101 100644 --- a/index.ts +++ b/index.ts @@ -2756,13 +2756,15 @@ const memoryLanceDBProPlugin = { conversationText, sessionKey, { scope: defaultScope, scopeFilter: accessibleScopes }, ); - // Charge rate limiter only after successful extraction extractionRateLimiter.recordExtraction(); - autoCaptureSeenTextCount.set(sessionKey, 0); // [Fix #8] Reset after extraction to avoid re-triggering on every subsequent agent_end if (stats.created > 0 || stats.merged > 0) { api.logger.info( `memory-lancedb-pro: smart-extracted ${stats.created} created, ${stats.merged} merged, ${stats.skipped} skipped for agent ${agentId}` ); + // [Fix #9] Reset counter only on successful extraction. + // Prevents re-triggering on every subsequent agent_end after passing extractMinMessages threshold. + // Failed extractions do NOT reset, so the same message window will re-accumulate toward the next trigger. + autoCaptureSeenTextCount.set(sessionKey, 0); return; // Smart extraction handled everything } From e82249ea21eba2b90bd0c109b919a33c553f1ede Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 6 Apr 2026 23:31:18 +0800 Subject: [PATCH 07/19] fix: add try-catch around extractAndPersist to prevent hook crash on extraction failure (Fix #10) --- index.ts | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/index.ts b/index.ts index 5c72c101..a9fbcfaa 100644 --- a/index.ts +++ b/index.ts @@ -2752,18 +2752,27 @@ const memoryLanceDBProPlugin = { `memory-lancedb-pro: auto-capture running smart extraction for agent ${agentId} (cumulative=${currentCumulativeCount} >= minMessages=${minMessages})`, ); const conversationText = cleanTexts.join("\n"); - const stats = await smartExtractor.extractAndPersist( - conversationText, sessionKey, - { scope: defaultScope, scopeFilter: accessibleScopes }, - ); + // [Fix #10] Wrap extraction in try-catch so a failing extraction does not crash the hook. + // Counter is NOT reset on failure — the same window will re-trigger on the next agent_end. + let stats; + try { + stats = await smartExtractor.extractAndPersist( + conversationText, sessionKey, + { scope: defaultScope, scopeFilter: accessibleScopes }, + ); + } catch (err) { + api.logger.error( + `memory-lancedb-pro: smart extraction failed for agent ${agentId}: ${err instanceof Error ? err.message : String(err)}; skipping extraction this cycle` + ); + return; // Do not fall through to regex fallback when smart extraction is configured + } extractionRateLimiter.recordExtraction(); if (stats.created > 0 || stats.merged > 0) { api.logger.info( `memory-lancedb-pro: smart-extracted ${stats.created} created, ${stats.merged} merged, ${stats.skipped} skipped for agent ${agentId}` ); // [Fix #9] Reset counter only on successful extraction. - // Prevents re-triggering on every subsequent agent_end after passing extractMinMessages threshold. - // Failed extractions do NOT reset, so the same message window will re-accumulate toward the next trigger. + // Counter is NOT reset on failure — the same window will re-accumulate toward the next trigger. autoCaptureSeenTextCount.set(sessionKey, 0); return; // Smart extraction handled everything } From ef56925d0e76da34ccd5442568f8abbca88f0664 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Tue, 7 Apr 2026 00:02:58 +0800 Subject: [PATCH 08/19] fix: clear pendingIngressTexts in catch block on extraction failure (Fix #10 extended) --- index.ts | 6 ++++++ test/smart-extractor-branches.mjs | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/index.ts b/index.ts index a9fbcfaa..802cdf4e 100644 --- a/index.ts +++ b/index.ts @@ -2764,6 +2764,12 @@ const memoryLanceDBProPlugin = { api.logger.error( `memory-lancedb-pro: smart extraction failed for agent ${agentId}: ${err instanceof Error ? err.message : String(err)}; skipping extraction this cycle` ); + // [Fix #10 extended] Clear pending texts on failure so the next cycle + // does not re-process the same pending batch. Counter stays high (not reset) + // so the same window will re-accumulate toward the next trigger. + if (conversationKey) { + autoCapturePendingIngressTexts.delete(conversationKey); + } return; // Do not fall through to regex fallback when smart extraction is configured } extractionRateLimiter.recordExtraction(); diff --git a/test/smart-extractor-branches.mjs b/test/smart-extractor-branches.mjs index d10aee95..62541fc5 100644 --- a/test/smart-extractor-branches.mjs +++ b/test/smart-extractor-branches.mjs @@ -73,8 +73,6 @@ function createMockApi(dbPath, embeddingBaseURL, llmBaseURL, logs, pluginConfigO smartExtraction: true, extractMinMessages: 2, ...pluginConfigOverrides, - // Note: embedding always wins over pluginConfigOverrides — this is intentional - // so tests get deterministic mock embeddings regardless of overrides. embedding: { apiKey: "dummy", model: "qwen3-embedding-4b", From 3f16729d5ddad2bfc42f771e5f5f0a1c4daa56df Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Tue, 7 Apr 2026 00:23:28 +0800 Subject: [PATCH 09/19] fix: add conversationKey guard to Fix #8 + restore test comment --- index.ts | 4 +++- test/smart-extractor-branches.mjs | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/index.ts b/index.ts index 802cdf4e..4427cb3d 100644 --- a/index.ts +++ b/index.ts @@ -2652,7 +2652,9 @@ const memoryLanceDBProPlugin = { // event-scoped; (3) APPEND causes deduplication issues when the same text // appears in both pendingIngressTexts and eligibleTexts (after prefix stripping). newTexts = pendingIngressTexts; - autoCapturePendingIngressTexts.delete(conversationKey); // [Fix #8] Clear consumed pending texts to prevent re-consumption + if (conversationKey) { + autoCapturePendingIngressTexts.delete(conversationKey); // [Fix #8] Clear consumed pending texts to prevent re-consumption + } } else if (previousSeenCount > 0 && eligibleTexts.length > previousSeenCount) { newTexts = eligibleTexts.slice(previousSeenCount); } diff --git a/test/smart-extractor-branches.mjs b/test/smart-extractor-branches.mjs index 62541fc5..d10aee95 100644 --- a/test/smart-extractor-branches.mjs +++ b/test/smart-extractor-branches.mjs @@ -73,6 +73,8 @@ function createMockApi(dbPath, embeddingBaseURL, llmBaseURL, logs, pluginConfigO smartExtraction: true, extractMinMessages: 2, ...pluginConfigOverrides, + // Note: embedding always wins over pluginConfigOverrides — this is intentional + // so tests get deterministic mock embeddings regardless of overrides. embedding: { apiKey: "dummy", model: "qwen3-embedding-4b", From deb25e1a04d11f19b5725463307845b04c33fb6f Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Tue, 7 Apr 2026 18:27:08 +0800 Subject: [PATCH 10/19] fix: Must Fix 1/2/5 from PR #549 review - counter reset always, newTexts counting, Fix#8 assertion --- index.ts | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/index.ts b/index.ts index 4427cb3d..4cec1655 100644 --- a/index.ts +++ b/index.ts @@ -2643,7 +2643,10 @@ const memoryLanceDBProPlugin = { : []; const previousSeenCount = autoCaptureSeenTextCount.get(sessionKey) ?? 0; // [Fix #2] Cumulative counting: accumulate across events, not per-event overwrite - const currentCumulativeCount = previousSeenCount + eligibleTexts.length; + // [Fix-Must2] Count only texts that are genuinely NEW to this event (newTexts), + // not the full eligibleTexts. This prevents double-counting when agent_end + // delivers full history: eligibleTexts = full history, but newTexts = only new ones. + // Computed AFTER newTexts is determined to avoid TDZ. let newTexts = eligibleTexts; if (pendingIngressTexts.length > 0) { // [Fix #3] Use pendingIngressTexts as-is (REPLACE, not APPEND). @@ -2652,12 +2655,18 @@ const memoryLanceDBProPlugin = { // event-scoped; (3) APPEND causes deduplication issues when the same text // appears in both pendingIngressTexts and eligibleTexts (after prefix stripping). newTexts = pendingIngressTexts; - if (conversationKey) { - autoCapturePendingIngressTexts.delete(conversationKey); // [Fix #8] Clear consumed pending texts to prevent re-consumption - } + // [Fix #8] Clear consumed pending texts to prevent re-consumption + // [Fix-Must5] conversationKey MUST be valid here — if it's falsy, something is wrong upstream. + if (!conversationKey) throw new Error("autoCapturePendingIngressTexts consumed with falsy conversationKey"); + autoCapturePendingIngressTexts.delete(conversationKey); } else if (previousSeenCount > 0 && eligibleTexts.length > previousSeenCount) { newTexts = eligibleTexts.slice(previousSeenCount); } + // [Fix-Must2] Count only texts new to this event. + // newTexts.length >= previousSeenCount always (dedup ensures no text counted twice). + // The increment is therefore newTexts.length - previousSeenCount. + const newTextsCount = Math.max(0, newTexts.length - previousSeenCount); + const currentCumulativeCount = previousSeenCount + newTextsCount; autoCaptureSeenTextCount.set(sessionKey, currentCumulativeCount); pruneMapIfOver(autoCaptureSeenTextCount, AUTO_CAPTURE_MAP_MAX_ENTRIES); @@ -2775,13 +2784,14 @@ const memoryLanceDBProPlugin = { return; // Do not fall through to regex fallback when smart extraction is configured } extractionRateLimiter.recordExtraction(); + // [Fix-Must1] Always reset counter after any extraction attempt (not just on created/merged). + // This prevents the retry spiral when all candidates are deduplicated (created=0, merged=0): + // without reset, counter stays high -> next agent_end re-triggers -> same dedupe -> infinite loop. + autoCaptureSeenTextCount.set(sessionKey, 0); if (stats.created > 0 || stats.merged > 0) { api.logger.info( `memory-lancedb-pro: smart-extracted ${stats.created} created, ${stats.merged} merged, ${stats.skipped} skipped for agent ${agentId}` ); - // [Fix #9] Reset counter only on successful extraction. - // Counter is NOT reset on failure — the same window will re-accumulate toward the next trigger. - autoCaptureSeenTextCount.set(sessionKey, 0); return; // Smart extraction handled everything } From 4f46271fd34966442b304291f306efaafb4560f4 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Tue, 7 Apr 2026 18:35:09 +0800 Subject: [PATCH 11/19] fix: Must Fix 1 revised - reset counter to previousSeenCount on all-dedup (reviewer suggestion) --- index.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/index.ts b/index.ts index 4cec1655..26d04141 100644 --- a/index.ts +++ b/index.ts @@ -2784,10 +2784,6 @@ const memoryLanceDBProPlugin = { return; // Do not fall through to regex fallback when smart extraction is configured } extractionRateLimiter.recordExtraction(); - // [Fix-Must1] Always reset counter after any extraction attempt (not just on created/merged). - // This prevents the retry spiral when all candidates are deduplicated (created=0, merged=0): - // without reset, counter stays high -> next agent_end re-triggers -> same dedupe -> infinite loop. - autoCaptureSeenTextCount.set(sessionKey, 0); if (stats.created > 0 || stats.merged > 0) { api.logger.info( `memory-lancedb-pro: smart-extracted ${stats.created} created, ${stats.merged} merged, ${stats.skipped} skipped for agent ${agentId}` @@ -2795,6 +2791,12 @@ const memoryLanceDBProPlugin = { return; // Smart extraction handled everything } + // [Fix-Must1] Reset counter to previousSeenCount when all candidates are deduplicated + // (created=0, merged=0). Without this, counter stays high -> next agent_end + // re-triggers -> same dedupe -> retry spiral. Resetting to previousSeenCount ensures + // the next event starts fresh (counter = number of genuinely new texts seen so far). + autoCaptureSeenTextCount.set(sessionKey, previousSeenCount); + if ((stats.boundarySkipped ?? 0) > 0) { api.logger.info( `memory-lancedb-pro: smart extraction skipped ${stats.boundarySkipped} USER.md-exclusive candidate(s) for agent ${agentId}; continuing to regex fallback for non-boundary texts`, From a902d3d66a62cacf65fa7a0d591c50d31332aabf Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Tue, 7 Apr 2026 18:48:32 +0800 Subject: [PATCH 12/19] fix: revert Must Fix #2 (eligibleTexts.length counting restored) - preserves extractMinMessages semantics --- index.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/index.ts b/index.ts index 26d04141..a7595473 100644 --- a/index.ts +++ b/index.ts @@ -2643,10 +2643,13 @@ const memoryLanceDBProPlugin = { : []; const previousSeenCount = autoCaptureSeenTextCount.get(sessionKey) ?? 0; // [Fix #2] Cumulative counting: accumulate across events, not per-event overwrite - // [Fix-Must2] Count only texts that are genuinely NEW to this event (newTexts), - // not the full eligibleTexts. This prevents double-counting when agent_end - // delivers full history: eligibleTexts = full history, but newTexts = only new ones. - // Computed AFTER newTexts is determined to avoid TDZ. + // Note: Using eligibleTexts.length (raw event text count), not newTexts.length. + // newTexts-based counting was rejected because it breaks the extractMinMessages + // semantics: the counter is designed to accumulate per-event text count, + // not per-event delta. Fix #2 with eligibleTexts.length works correctly for + // the real-world case (1 text per event); the double-counting risk only + // applies when agent_end delivers full history every time, which does not + // occur in the current code path. let newTexts = eligibleTexts; if (pendingIngressTexts.length > 0) { // [Fix #3] Use pendingIngressTexts as-is (REPLACE, not APPEND). @@ -2662,11 +2665,7 @@ const memoryLanceDBProPlugin = { } else if (previousSeenCount > 0 && eligibleTexts.length > previousSeenCount) { newTexts = eligibleTexts.slice(previousSeenCount); } - // [Fix-Must2] Count only texts new to this event. - // newTexts.length >= previousSeenCount always (dedup ensures no text counted twice). - // The increment is therefore newTexts.length - previousSeenCount. - const newTextsCount = Math.max(0, newTexts.length - previousSeenCount); - const currentCumulativeCount = previousSeenCount + newTextsCount; + const currentCumulativeCount = previousSeenCount + eligibleTexts.length; autoCaptureSeenTextCount.set(sessionKey, currentCumulativeCount); pruneMapIfOver(autoCaptureSeenTextCount, AUTO_CAPTURE_MAP_MAX_ENTRIES); From e299749f7c50f5e6ef2778879107e8cd222b688d Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Tue, 7 Apr 2026 21:07:42 +0800 Subject: [PATCH 13/19] fix: correct test expectation - collected 1 not 2 text(s) after counter formula revert (e5b5e5b) --- test/smart-extractor-branches.mjs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/smart-extractor-branches.mjs b/test/smart-extractor-branches.mjs index d10aee95..a73d3e9c 100644 --- a/test/smart-extractor-branches.mjs +++ b/test/smart-extractor-branches.mjs @@ -971,7 +971,8 @@ assert.ok( ); assert.ok( rememberCommandContextLogs.some((entry) => - entry[1].includes("auto-capture collected 2 text(s)") + // e5b5e5b: counter=(prev+eligible.length) -> Turn2 cumulative=3, but dedup leaves texts.length=1 + entry[1].includes("auto-capture collected 1 text(s)") ), ); From 6428524370f8a3d7ae519e13ba6b51c5432313a2 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Wed, 8 Apr 2026 16:04:44 +0800 Subject: [PATCH 14/19] fix: replace throw in hook with safe return (Fix-Must5) --- index.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/index.ts b/index.ts index a7595473..ffcc0884 100644 --- a/index.ts +++ b/index.ts @@ -2660,7 +2660,10 @@ const memoryLanceDBProPlugin = { newTexts = pendingIngressTexts; // [Fix #8] Clear consumed pending texts to prevent re-consumption // [Fix-Must5] conversationKey MUST be valid here — if it's falsy, something is wrong upstream. - if (!conversationKey) throw new Error("autoCapturePendingIngressTexts consumed with falsy conversationKey"); + if (!conversationKey) { + api.logger.error("memory-lancedb-pro: autoCapturePendingIngressTexts consumed with falsy conversationKey — skipping"); + return; + } autoCapturePendingIngressTexts.delete(conversationKey); } else if (previousSeenCount > 0 && eligibleTexts.length > previousSeenCount) { newTexts = eligibleTexts.slice(previousSeenCount); From f69508696b216b790af2a354a3173b51f92b161f Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Wed, 8 Apr 2026 18:16:53 +0800 Subject: [PATCH 15/19] fix: remove unreachable conversationKey guard (Claude Code review) --- index.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/index.ts b/index.ts index ffcc0884..0a923425 100644 --- a/index.ts +++ b/index.ts @@ -2659,11 +2659,8 @@ const memoryLanceDBProPlugin = { // appears in both pendingIngressTexts and eligibleTexts (after prefix stripping). newTexts = pendingIngressTexts; // [Fix #8] Clear consumed pending texts to prevent re-consumption - // [Fix-Must5] conversationKey MUST be valid here — if it's falsy, something is wrong upstream. - if (!conversationKey) { - api.logger.error("memory-lancedb-pro: autoCapturePendingIngressTexts consumed with falsy conversationKey — skipping"); - return; - } + // (conversationKey is guaranteed truthy here since pendingIngressTexts.length > 0 + // and pendingIngressTexts is [] when conversationKey is falsy) autoCapturePendingIngressTexts.delete(conversationKey); } else if (previousSeenCount > 0 && eligibleTexts.length > previousSeenCount) { newTexts = eligibleTexts.slice(previousSeenCount); From 69149475ae265493aeb2247afba01dbd6784ebd2 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Thu, 9 Apr 2026 23:08:34 +0800 Subject: [PATCH 16/19] fix(issue-417): skip regex fallback when all candidates skipped with no boundary texts (Fix-Must1b) --- index.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/index.ts b/index.ts index 0a923425..d2528623 100644 --- a/index.ts +++ b/index.ts @@ -2796,12 +2796,19 @@ const memoryLanceDBProPlugin = { // the next event starts fresh (counter = number of genuinely new texts seen so far). autoCaptureSeenTextCount.set(sessionKey, previousSeenCount); - if ((stats.boundarySkipped ?? 0) > 0) { + // [Fix-Must1b] When all candidates are skipped AND no boundary texts remain, + // skip regex fallback entirely — there is nothing to capture. + if ((stats.boundarySkipped ?? 0) === 0) { api.logger.info( - `memory-lancedb-pro: smart extraction skipped ${stats.boundarySkipped} USER.md-exclusive candidate(s) for agent ${agentId}; continuing to regex fallback for non-boundary texts`, + `memory-lancedb-pro: smart extraction produced no candidates and no boundary texts for agent ${agentId}; skipping regex fallback`, ); + return; } + api.logger.info( + `memory-lancedb-pro: smart extraction skipped ${stats.boundarySkipped} USER.md-exclusive candidate(s) for agent ${agentId}; continuing to regex fallback for non-boundary texts`, + ); + api.logger.info( `memory-lancedb-pro: smart extraction produced no persisted memories for agent ${agentId} (created=${stats.created}, merged=${stats.merged}, skipped=${stats.skipped}); falling back to regex capture`, ); From 0b11d459d9471c6686c98aede0cbbcc3f57d583a Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Thu, 9 Apr 2026 23:59:58 +0800 Subject: [PATCH 17/19] test(issue-417): add Fix-Must1b DM fallback regression test --- test/smart-extractor-branches.mjs | 107 ++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/test/smart-extractor-branches.mjs b/test/smart-extractor-branches.mjs index a73d3e9c..4c89909e 100644 --- a/test/smart-extractor-branches.mjs +++ b/test/smart-extractor-branches.mjs @@ -1404,5 +1404,112 @@ assert.ok(cumulativeResult.smartExtractionTriggered, assert.ok(cumulativeResult.smartExtractionSkipped, "Turn 1 should skip smart extraction (cumulative=1 < 2). Logs: " + cumulativeResult.logs.map((e) => e[1]).join(" | ")); +// ============================================================ +// Test: DM fallback — Fix-Must1b regression +// Scenario: DM conversation (no pending ingress texts). +// Smart extraction runs, LLM returns empty. +// Fix-Must1b: boundarySkipped=0 → early return → NO regex fallback. +// ============================================================ + +async function runDmFallbackMustfixScenario() { + const workDir = mkdtempSync(path.join(tmpdir(), "memory-dm-fallback-mustfix-")); + const dbPath = path.join(workDir, "db"); + const logs = []; + let llmCalls = 0; + const embeddingServer = createEmbeddingServer(); + + // LLM mock: ALWAYS returns empty memories. + // Simulates DM conversation where LLM finds no extractable content. + const llmServer = http.createServer(async (req, res) => { + if (req.method !== "POST" || req.url !== "/chat/completions") { + res.writeHead(404); res.end(); return; + } + llmCalls += 1; + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ + id: "chatcmpl-test", object: "chat.completion", + created: Math.floor(Date.now() / 1000), model: "mock-memory-model", + choices: [{ index: 0, message: { role: "assistant", + content: JSON.stringify({ memories: [] }) }, finish_reason: "stop" }], + })); + }); + + await new Promise((resolve) => embeddingServer.listen(0, "127.0.0.1", resolve)); + await new Promise((resolve) => llmServer.listen(0, "127.0.0.1", resolve)); + const embeddingPort = embeddingServer.address().port; + const llmPort = llmServer.address().port; + process.env.TEST_EMBEDDING_BASE_URL = `http://127.0.0.1:${embeddingPort}/v1`; + + try { + // extractMinMessages=1: first agent_end triggers smart extraction immediately. + // No message_received: pendingIngressTexts=[] (mimics DM with no conversationId). + const api = createMockApi( + dbPath, `http://127.0.0.1:${embeddingPort}/v1`, + `http://127.0.0.1:${llmPort}`, logs, + { extractMinMessages: 1, smartExtraction: true }, + ); + plugin.register(api); + const sessionKey = "agent:main:discord:dm:user456"; + + await runAgentEndHook(api, { + success: true, + // No conversationId: simulates DM without pending ingress texts. + // sessionKey extracts to "discord:dm:user456" (truthy), but since + // message_received was never called, pendingIngressTexts Map has no entry. + messages: [{ role: "user", content: "hi" }, { role: "user", content: "hello?" }], + }, { agentId: "main", sessionKey }); + + const freshStore = new MemoryStore({ dbPath, vectorDim: EMBEDDING_DIMENSIONS }); + const entries = await freshStore.list(["global", "agent:main"], undefined, 10, 0); + return { entries, llmCalls, logs }; + } finally { + delete process.env.TEST_EMBEDDING_BASE_URL; + await new Promise((resolve) => embeddingServer.close(resolve)); + await new Promise((resolve) => llmServer.close(resolve)); + rmSync(workDir, { recursive: true, force: true }); + } +} + +const dmFallbackResult = await runDmFallbackMustfixScenario(); + +// Assert 1: Smart extraction LLM was called exactly once +assert.equal(dmFallbackResult.llmCalls, 1, + "Smart extraction should be called once. Logs: " + + dmFallbackResult.logs.map((e) => e[1]).join(" | ")); + +// Assert 2: No memories stored (regex fallback did NOT capture garbage) +assert.equal(dmFallbackResult.entries.length, 0, + "No memories should be stored. Entries: " + + JSON.stringify(dmFallbackResult.entries.map((e) => e.text))); + +// Assert 3 (Fix-Must1b core): Early return triggered — skip regex fallback +assert.ok( + dmFallbackResult.logs.some((entry) => + entry[1].includes("skipping regex fallback")), + "Fix-Must1b: should log 'skipping regex fallback'. Logs: " + + dmFallbackResult.logs.map((e) => e[1]).join(" | ") +); + +// Assert 4: Regex fallback did NOT run +assert.ok( + dmFallbackResult.logs.every((entry) => + !entry[1].includes("running regex fallback")), + "Regex fallback should NOT run. Logs: " + + dmFallbackResult.logs.map((e) => e[1]).join(" | ") +); + +// Assert 5: Smart extractor confirmed no memories extracted +assert.ok( + dmFallbackResult.logs.some((entry) => + entry[1].includes("no memories extracted")), + "Smart extractor should report no memories extracted. Logs: " + + dmFallbackResult.logs.map((e) => e[1]).join(" | ") +); + +// ============================================================ +// End: Fix-Must1b regression test +// ============================================================ + + console.log("OK: smart extractor branch regression test passed"); From 90db92f4eeb03882bcc7791911c9c5bcadfa4ddd Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 01:12:47 +0800 Subject: [PATCH 18/19] fix(issue-417): F1 success block counter reset + rate limiter inside success path (rwmjhb review) --- index.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/index.ts b/index.ts index d2528623..61ecf55d 100644 --- a/index.ts +++ b/index.ts @@ -2782,11 +2782,12 @@ const memoryLanceDBProPlugin = { } return; // Do not fall through to regex fallback when smart extraction is configured } - extractionRateLimiter.recordExtraction(); if (stats.created > 0 || stats.merged > 0) { + extractionRateLimiter.recordExtraction(); api.logger.info( `memory-lancedb-pro: smart-extracted ${stats.created} created, ${stats.merged} merged, ${stats.skipped} skipped for agent ${agentId}` ); + autoCaptureSeenTextCount.set(sessionKey, 0); return; // Smart extraction handled everything } From 5ff0da91c45ffd920d72fed0c20475aced2a08df Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 01:29:08 +0800 Subject: [PATCH 19/19] fix(issue-417): document intentional non-reset of counter after regex fallback --- index.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/index.ts b/index.ts index 61ecf55d..4156246b 100644 --- a/index.ts +++ b/index.ts @@ -2921,6 +2921,13 @@ const memoryLanceDBProPlugin = { api.logger.info( `memory-lancedb-pro: auto-captured ${stored} memories for agent ${agentId} in scope ${defaultScope}`, ); + // Note: counter is intentionally NOT reset here. If we reset after regex fallback, + // the next turn starts fresh (counter = 1) and requires another full cycle to re-trigger. + // This means: Turn 1 stores via regex → counter=0 → Turn 2 counter=1 ( 0) + // 2. Fix-Must1: all-dedup failure path (set(previousSeenCount) prevents retry spiral) } } catch (err) { api.logger.warn(`memory-lancedb-pro: capture failed: ${String(err)}`);