Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
272 changes: 266 additions & 6 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import {
import {
extractReflectionLearningGovernanceCandidates,
extractInjectableReflectionMappedMemoryItems,
isRecallUsed,
} from "./src/reflection-slices.js";
import { createReflectionEventId } from "./src/reflection-event-store.js";
import { buildReflectionMappedMetadata } from "./src/reflection-mapped-metadata.js";
Expand Down Expand Up @@ -223,6 +224,22 @@ interface PluginConfig {
skipLowValue?: boolean;
maxExtractionsPerHour?: number;
};
feedback?: {
/** Boost importance when a recalled memory is used (default: 0.05) */
boostOnUse?: number;
/** Penalty when a recalled memory is not used after consecutive misses (default: 0.03) */
penaltyOnMiss?: number;
/** Extra boost when user explicitly confirms a recalled memory is correct (default: 0.15) */
boostOnConfirm?: number;
/** Extra penalty when user explicitly corrects a non-recalled memory (default: 0.10) */
penaltyOnError?: number;
/** Minimum recall (injection) count before penalty applies to this memory (default: 2) */
minRecallCountForPenalty?: number;
/** Keywords indicating user confirmation of a recalled memory */
confirmKeywords?: string[];
/** Keywords indicating user correction/error for a non-recalled memory */
errorKeywords?: string[];
};
}

type ReflectionThinkLevel = "off" | "minimal" | "low" | "medium" | "high";
Expand Down Expand Up @@ -2004,6 +2021,19 @@ const memoryLanceDBProPlugin = {
const autoCapturePendingIngressTexts = new Map<string, string[]>();
const autoCaptureRecentTexts = new Map<string, string[]>();

// ========================================================================
// Proposal A Phase 1: Recall Usage Tracking Hooks
// ========================================================================
// Track pending recalls per session for usage scoring
type PendingRecallEntry = {
recallIds: string[];
responseText: string;
injectedAt: number;
/** Summary text lines actually injected into the prompt, used for usage detection. */
injectedSummaries: string[];
};
const pendingRecall = new Map<string, PendingRecallEntry>();

const logReg = isCliMode() ? api.logger.debug : api.logger.info;
logReg(
`memory-lancedb-pro@${pluginVersion}: plugin registered (db: ${resolvedDbPath}, model: ${config.embedding.model || "text-embedding-3-small"}, smartExtraction: ${smartExtractor ? 'ON' : 'OFF'})`
Expand Down Expand Up @@ -2577,6 +2607,24 @@ const memoryLanceDBProPlugin = {
`memory-lancedb-pro: injecting ${selected.length} memories into context for agent ${agentId}`,
);

// Create or update pendingRecall for this turn so the feedback hook
// (which runs in the NEXT turn's before_prompt_build after agent_end)
// sees a matching pair: Turn N recallIds + Turn N responseText.
// agent_end will write responseText into this same pendingRecall
// entry (only updating responseText, never clearing recallIds).
// Include agentId in the key so different agents in the same session do not overwrite each other's pendingRecall.
const sessionKeyForRecall = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentId ?? ""}`;
// Bug 1 fix: also store the injected summary lines so the feedback hook
// can detect usage even when the agent doesn't use stock phrases or IDs
// but directly incorporates the memory content into the response.
const injectedSummaries = selected.map((item) => item.line);
pendingRecall.set(sessionKeyForRecall, {
recallIds: selected.map((item) => item.id),
responseText: "", // Will be populated by agent_end
injectedAt: Date.now(),
injectedSummaries,
});

return {
prependContext:
`<relevant-memories>\n` +
Expand Down Expand Up @@ -2620,6 +2668,14 @@ const memoryLanceDBProPlugin = {
recallHistory.delete(sessionId);
turnCounter.delete(sessionId);
lastRawUserMessage.delete(sessionId);
// P3 fix: clean all pendingRecall entries for this session.
// pendingRecall keys use format: sessionKey (or sessionKey:agentId with composite key).
// We clean any key that starts with this sessionId.
for (const key of pendingRecall.keys()) {
if (key === sessionId || key.startsWith(`${sessionId}:`) || key.startsWith(`${ctx?.sessionKey ?? ""}:`)) {
pendingRecall.delete(key);
}
}
}
// Also clean by channelId/conversationId if present (shared cache key)
const cacheKey = ctx?.channelId || ctx?.conversationId || "";
Expand Down Expand Up @@ -2972,7 +3028,200 @@ const memoryLanceDBProPlugin = {
};

api.on("agent_end", agentEndAutoCaptureHook);
}

// ========================================================================
Comment on lines 3030 to +3032
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Restore autoCapture block boundary before feedback hooks

The if (config.autoCapture !== false) block is no longer closed before the new feedback-hook section, so subsequent registrations (e.g., selfImprovement hooks starting at if (config.selfImprovement?.enabled !== false)) are now conditionally skipped whenever autoCapture is disabled. This is a behavioral regression for configs that set autoCapture: false but still rely on command/session hooks, and it changes feature wiring far beyond recall feedback.

Useful? React with 👍 / 👎.

// Proposal A Phase 1: agent_end hook - Store response text for usage tracking
// ========================================================================
// NOTE: Only writes responseText to an EXISTING pendingRecall entry created
// by before_prompt_build (auto-recall). Does NOT create a new entry.
// This ensures recallIds (written by auto-recall in the same turn) and
// responseText (written here) remain paired for the feedback hook.
api.on("agent_end", (event: any, ctx: any) => {
// Use same key format as auto-recall hook (sessionKey:agentId) so we update the right entry.
const agentIdForKey = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey);
const sessionKey = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentIdForKey ?? ""}`;
if (!sessionKey) return;

// Get the last message content
let lastMsgText: string | null = null;
if (event.messages && Array.isArray(event.messages)) {
const lastMsg = event.messages[event.messages.length - 1];
if (lastMsg && typeof lastMsg === "object") {
const msgObj = lastMsg as Record<string, unknown>;
lastMsgText = extractTextContent(msgObj.content);
}
}

// Only update an existing pendingRecall entry — do NOT create one.
// This preserves recallIds written by auto-recall earlier in this turn.
const existing = pendingRecall.get(sessionKey);
if (existing && lastMsgText && lastMsgText.trim().length > 0) {
existing.responseText = lastMsgText;
}
}, { priority: 20 });

// ========================================================================
// Proposal A Phase 1: before_prompt_build hook (priority 5) - Score recalls
// ========================================================================
api.on("before_prompt_build", async (event: any, ctx: any) => {
// Use same key format as auto-recall hook (sessionKey:agentId) so we read the right entry.
const agentIdForKey = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey);
const sessionKey = `${ctx?.sessionKey || ctx?.sessionId || "default"}:${agentIdForKey ?? ""}`;
const pending = pendingRecall.get(sessionKey);
if (!pending) return;

// Guard: only score if responseText has substantial content
const responseText = pending.responseText;
if (!responseText || responseText.length <= 24) {
// Skip scoring for empty or very short responses
// Bug 5 fix: also clear pendingRecall so the next turn does not
// re-trigger feedback on stale recallIds / old responseText.
pendingRecall.delete(sessionKey);
return;
}

// Read recall IDs directly from pendingRecall (populated by auto-recall's
// before_prompt_build hook from the PREVIOUS turn). This replaces the
// broken regex-based parsing of prependContext which never matched the
// actual [category:scope] format used by auto-recall injection.
const injectedIds = pending.recallIds ?? [];

// Bug 1 fix: also retrieve the injected summary lines so isRecallUsed can
// detect when the agent directly incorporates memory content into the response.
const injectedSummaries = pending.injectedSummaries ?? [];

// Check if any recall was actually used by checking if the response contains reference to the injected content
// This is a heuristic - we check if the response shows awareness of injected memories
let usedRecall = false;
if (injectedIds.length > 0 || injectedSummaries.length > 0) {
// Use the real isRecallUsed function from reflection-slices
usedRecall = isRecallUsed(responseText, injectedIds, injectedSummaries);
}

// Read feedback config values with defaults
const fb = config.feedback ?? {};
const boostOnUse = fb.boostOnUse ?? 0.05;
const penaltyOnMiss = fb.penaltyOnMiss ?? 0.03;
const boostOnConfirm = fb.boostOnConfirm ?? 0.15;
const penaltyOnError = fb.penaltyOnError ?? 0.10;
const minRecallCountForPenalty = fb.minRecallCountForPenalty ?? 2;
const confirmKeywords = fb.confirmKeywords ?? ["正確", "yes", "right", "沒錯", "確認", "correct", "ok"];
const errorKeywords = fb.errorKeywords ?? ["不是", "錯", "不對", "wrong", "no", "not right", "錯誤", "更正"];

// event.prompt is a plain string in the current hook contract (confirmed by codebase usage).
// We extract the user's last message from event.messages array instead.
let userPromptText = "";
try {
if (event.messages && Array.isArray(event.messages)) {
for (let i = event.messages.length - 1; i >= 0; i--) {
const msg = event.messages[i];
if (msg && msg.role === "user" && typeof msg.content === "string" && msg.content.trim().length > 0) {
userPromptText = msg.content.trim();
break;
}
if (msg && msg.role === "user" && Array.isArray(msg.content)) {
// Handle array-form content
const text = extractTextContent(msg.content);
if (text && text.trim().length > 0) {
userPromptText = text.trim();
break;
}
}
}
}
} catch (_e) {
userPromptText = "";
}

// Helper: check if text contains any of the keywords (case-insensitive)
const containsKeyword = (text: string, keywords: string[]): boolean =>
keywords.some((kw) => text.toLowerCase().includes(kw.toLowerCase()));

// Score the recall - update importance based on usage
// Score each recall individually — do NOT compute a single usedRecall for the whole batch.
// Bug 1 fix (P1): when auto-recall injects multiple memories, the agent may use only some of them.
// Scoring them all with one decision corrupts ranking: unused memories get boosted, used ones get penalized.
if (injectedIds.length > 0) {
try {
// Build lookup: recallId -> injected summary text for this specific recall
const summaryMap = new Map<string, string>();
for (let i = 0; i < injectedIds.length; i++) {
if (injectedSummaries[i]) {
summaryMap.set(injectedIds[i], injectedSummaries[i]);
}
}

for (const recallId of injectedIds) {
const summaryText = summaryMap.get(recallId) ?? "";
// Score this specific recall independently
const usedRecall = isRecallUsed(
responseText,
[recallId],
summaryText ? [summaryText] : [],
);

const entry = await store.getById(recallId, undefined);
if (!entry) continue;
const meta = parseSmartMetadata(entry.metadata, entry);
const currentImportance = meta.importance ?? entry.importance ?? 0.5;

if (usedRecall) {
let newImportance = Math.min(1.0, currentImportance + boostOnUse);
if (containsKeyword(userPromptText, confirmKeywords)) {
newImportance = Math.min(1.0, newImportance + boostOnConfirm);
}
await store.update(recallId, { importance: newImportance }, undefined);
await store.patchMetadata(
recallId,
{ last_confirmed_use_at: Date.now(), bad_recall_count: 0 },
undefined,
);
} else {
const badCount = meta.bad_recall_count || 0;
let newImportance = currentImportance;
if (containsKeyword(userPromptText, errorKeywords)) {
if ((meta.injected_count || 0) >= minRecallCountForPenalty) {
newImportance = Math.max(0.1, newImportance - penaltyOnError);
}
await store.update(recallId, { importance: newImportance }, undefined);
await store.patchMetadata(recallId, { bad_recall_count: badCount + 1 }, undefined);
} else if (badCount >= 2) {
if ((meta.injected_count || 0) >= minRecallCountForPenalty) {
newImportance = Math.max(0.1, newImportance - penaltyOnMiss);
}
await store.update(recallId, { importance: newImportance }, undefined);
await store.patchMetadata(recallId, { bad_recall_count: badCount + 1 }, undefined);
}
}
}
} catch (err) {
api.logger.warn(`memory-lancedb-pro: recall usage scoring failed: ${String(err)}`);
} finally {
pendingRecall.delete(sessionKey);
}
}
}, { priority: 5 });

// ========================================================================
// Proposal A Phase 1: session_end hook - Clean up pending recalls
// ========================================================================
api.on("session_end", (_event: any, ctx: any) => {
// P1 fix: clean all pendingRecall entries for this session, including composite keys.
// When autoCapture is false, the auto-capture session_end (priority 10) is skipped,
// so this hook must handle composite keys (sessionKey:agentId) as well.
const sessionId = ctx?.sessionId || "";
const sessionKey = ctx?.sessionKey || "";
for (const key of pendingRecall.keys()) {
if (
key === sessionKey ||
key === sessionId ||
key.startsWith(`${sessionKey}:`) ||
key.startsWith(`${sessionId}:`)
) {
pendingRecall.delete(key);
}
}
}, { priority: 20 });

// ========================================================================
// Integrated Self-Improvement (inheritance + derived)
Expand Down Expand Up @@ -3971,16 +4220,21 @@ export function parsePluginConfig(value: unknown): PluginConfig {
typeof cfg.retrieval === "object" && cfg.retrieval !== null
? (() => {
const retrieval = { ...(cfg.retrieval as Record<string, unknown>) } as Record<string, unknown>;
if (typeof retrieval.rerankApiKey === "string") {
// Bug 6 fix: only resolve env vars for rerank fields when reranking is
// actually enabled AND the field contains a ${...} placeholder.
// This prevents startup failures when reranking is disabled and rerankApiKey
// is left as an unresolved placeholder.
const rerankEnabled = retrieval.rerank !== "none";
if (rerankEnabled && typeof retrieval.rerankApiKey === "string" && retrieval.rerankApiKey.includes("${")) {
retrieval.rerankApiKey = resolveEnvVars(retrieval.rerankApiKey);
}
if (typeof retrieval.rerankEndpoint === "string") {
if (rerankEnabled && typeof retrieval.rerankEndpoint === "string" && retrieval.rerankEndpoint.includes("${")) {
retrieval.rerankEndpoint = resolveEnvVars(retrieval.rerankEndpoint);
}
if (typeof retrieval.rerankModel === "string") {
if (rerankEnabled && typeof retrieval.rerankModel === "string" && retrieval.rerankModel.includes("${")) {
retrieval.rerankModel = resolveEnvVars(retrieval.rerankModel);
}
if (typeof retrieval.rerankProvider === "string") {
if (rerankEnabled && typeof retrieval.rerankProvider === "string" && retrieval.rerankProvider.includes("${")) {
retrieval.rerankProvider = resolveEnvVars(retrieval.rerankProvider);
}
return retrieval as any;
Expand Down Expand Up @@ -4119,6 +4373,12 @@ export function parsePluginConfig(value: unknown): PluginConfig {
: 30,
}
: { skipLowValue: false, maxExtractionsPerHour: 30 },
// Bug 3 fix: parse and return the feedback config block so deployments
// that specify custom feedback values actually take effect instead of
// falling back to hardcoded defaults.
feedback: typeof cfg.feedback === "object" && cfg.feedback !== null
? { ...(cfg.feedback as Record<string, unknown>) }
Comment on lines +4379 to +4380
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Validate feedback config field types before consumption

parsePluginConfig now forwards cfg.feedback as an untyped object, but the scoring hook later assumes confirmKeywords/errorKeywords are arrays and calls .some(...) on them. If a deployment supplies these as strings (a common JSON/YAML misconfiguration), the hook throws and the catch path disables recall scoring for that turn, so custom feedback settings can silently break the new feature.

Useful? React with 👍 / 👎.

: {},
};
}

Expand All @@ -4131,7 +4391,7 @@ export function resetRegistration() {
// Note: WeakSets cannot be cleared by design. In test scenarios where the
// same process reloads the module, a fresh module state means a new WeakSet.
// For hot-reload scenarios, the module is re-imported fresh.
_registeredApis.clear();
// (WeakSet.clear() does not exist, so we do nothing here.)
}

export default memoryLanceDBProPlugin;
5 changes: 3 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions src/auto-capture-cleanup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,17 @@ function stripLeadingRuntimeWrappers(text: string): string {
continue;
}

// Bug fix: also strip known boilerplate continuation lines (e.g.
// "Results auto-announce to your requester.", "Do not use any memory tools.")
// that appear right after the wrapper prefix. These lines do NOT match the
// wrapper prefix regex but are part of the wrapper boilerplate.
if (strippingLeadIn) {
AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE.lastIndex = 0;
if (AUTO_CAPTURE_RUNTIME_WRAPPER_BOILERPLATE_RE.test(current)) {
continue;
}
}

strippingLeadIn = false;
cleanedLines.push(line);
}
Expand Down
Loading
Loading