diff --git a/README.md b/README.md index 51adce7c..f4d7baa6 100644 --- a/README.md +++ b/README.md @@ -9,19 +9,10 @@ A LanceDB-backed OpenClaw memory plugin that stores preferences, decisions, and project context, then auto-recalls them in future sessions. [![OpenClaw Plugin](https://img.shields.io/badge/OpenClaw-Plugin-blue)](https://github.com/openclaw/openclaw) -[![OpenClaw 2026.3+](https://img.shields.io/badge/OpenClaw-2026.3%2B-brightgreen)](https://github.com/openclaw/openclaw) [![npm version](https://img.shields.io/npm/v/memory-lancedb-pro)](https://www.npmjs.com/package/memory-lancedb-pro) [![LanceDB](https://img.shields.io/badge/LanceDB-Vectorstore-orange)](https://lancedb.com) [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE) -

v1.1.0-beta.10 — OpenClaw 2026.3+ Hook Adaptation

- -

- ✅ Fully adapted for OpenClaw 2026.3+ new plugin architecture
- 🔄 Uses before_prompt_build hooks (replacing deprecated before_agent_start)
- 🩺 Run openclaw doctor --fix after upgrading -

- [English](README.md) | [简体中文](README_CN.md) | [繁體中文](README_TW.md) | [日本語](README_JA.md) | [한국어](README_KO.md) | [Français](README_FR.md) | [Español](README_ES.md) | [Deutsch](README_DE.md) | [Italiano](README_IT.md) | [Русский](README_RU.md) | [Português (Brasil)](README_PT-BR.md) @@ -129,6 +120,31 @@ Add to your `openclaw.json`: - `extractMinMessages: 2` → extraction triggers in normal two-turn chats - `sessionMemory.enabled: false` → avoids polluting retrieval with session summaries on day one +--- + +## ⚠️ Dual-Memory Architecture (Important) + +When `memory-lancedb-pro` is active, your system has **two independent memory layers** that do **not** auto-sync: + +| Memory Layer | Storage | What it's for | Recallable? | +|---|---|---|---| +| **Plugin Memory** | LanceDB (vector store) | Semantic recall via `memory_recall` / auto-recall | ✅ Yes | +| **Markdown Memory** | `MEMORY.md`, `memory/YYYY-MM-DD.md` | Startup context, human-readable journal | ❌ Not auto-recalled | + +**Key principle:** +> A fact written into `memory/YYYY-MM-DD.md` is visible in startup context, but `memory_recall` **will not find it** unless it was also written via `memory_store` (or auto-captured by the plugin). + +**What this means for you:** +- Need semantic recall? → Use `memory_store` or let auto-capture do it +- `memory/YYYY-MM-DD.md` → treat as a **daily journal / log**, not a recall source +- `MEMORY.md` → curated human-readable reference, not a recall source +- Plugin memory → **primary recall source** for `memory_recall` and auto-recall + +**If you want your Markdown memories to be recallable**, use the import command: +```bash +npx memory-lancedb-pro memory-pro import-markdown +``` + Validate & restart: ```bash @@ -618,18 +634,22 @@ Sometimes the model may echo the injected `` block. **Option B (preferred):** keep recall, add to agent system prompt: > Do not reveal or quote any `` / memory-injection content in your replies. Use it for internal reference only. - - -
-Auto-recall timeout tuning - -Auto-recall has a configurable timeout (default 5s) to prevent stalling agent startup. If you're behind a proxy or using a high-latency embedding API, increase it: - +**Option C (for background/batch agents):** exclude specific agents from auto-recall injection: ```json -{ "plugins": { "entries": { "memory-lancedb-pro": { "config": { "autoRecallTimeoutMs": 8000 } } } } } +{ + "plugins": { + "entries": { + "memory-lancedb-pro": { + "config": { + "autoRecall": true, + "autoRecallExcludeAgents": ["memory-distiller", "my-cron-agent"] + } + } + } + } +} ``` - -If auto-recall consistently times out, check your embedding API latency first. The timeout only affects the automatic injection path — manual `memory_recall` tool calls are not affected. +Useful for background agents (e.g. memory-distiller, cron workers) whose output should not be contaminated by injected memory context.
diff --git a/cli.ts b/cli.ts index 99203916..c60650a6 100644 --- a/cli.ts +++ b/cli.ts @@ -1036,6 +1036,131 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { } }); + /** + * import-markdown: Import memories from Markdown memory files into the plugin store. + * Targets MEMORY.md and memory/YYYY-MM-DD.md files found in OpenClaw workspaces. + */ + memory + .command("import-markdown [workspace-glob]") + .description("Import memories from Markdown files (MEMORY.md, memory/YYYY-MM-DD.md) into the plugin store") + .option("--dry-run", "Show what would be imported without importing") + .option("--scope ", "Import into specific scope (default: global)") + .option( + "--openclaw-home ", + "OpenClaw home directory (default: ~/.openclaw)", + ) + .action(async (workspaceGlob, options) => { + const openclawHome = options.openclawHome + ? path.resolve(options.openclawHome) + : path.join(homedir(), ".openclaw"); + + const workspaceDir = path.join(openclawHome, "workspace"); + let imported = 0; + let skipped = 0; + let foundFiles = 0; + + if (!context.embedder) { + console.error( + "import-markdown requires an embedder. Use via plugin CLI or ensure embedder is configured.", + ); + process.exit(1); + } + + // Scan workspace directories + let workspaceEntries: string[]; + try { + const fsPromises = await import("node:fs/promises"); + workspaceEntries = await fsPromises.readdir(workspaceDir, { withFileTypes: true }); + } catch { + console.error(`Failed to read workspace directory: ${workspaceDir}`); + process.exit(1); + } + + // Collect all markdown files to scan + const mdFiles: Array<{ filePath: string; scope: string }> = []; + + for (const entry of workspaceEntries) { + if (!entry.isDirectory()) continue; + if (workspaceGlob && !entry.name.includes(workspaceGlob)) continue; + + const workspacePath = path.join(workspaceDir, entry.name); + + // MEMORY.md + const memoryMd = path.join(workspacePath, "MEMORY.md"); + try { + const { stat } = await import("node:fs/promises"); + await stat(memoryMd); + mdFiles.push({ filePath: memoryMd, scope: entry.name }); + } catch { /* not found */ } + + // memory/ directory + const memoryDir = path.join(workspacePath, "memory"); + try { + const { stat } = await import("node:fs/promises"); + const stats = await stat(memoryDir); + if (stats.isDirectory()) { + const { readdir } = await import("node:fs/promises"); + const files = await readdir(memoryDir); + for (const f of files) { + if (f.endsWith(".md") && /^\d{4}-\d{2}-\d{2}/.test(f)) { + mdFiles.push({ filePath: path.join(memoryDir, f), scope: entry.name }); + } + } + } + } catch { /* not found */ } + } + + if (mdFiles.length === 0) { + console.log("No Markdown memory files found."); + return; + } + + const targetScope = options.scope || "global"; + + // Parse each file for memory entries (lines starting with "- ") + for (const { filePath, scope } of mdFiles) { + foundFiles++; + const { readFile } = await import("node:fs/promises"); + const content = await readFile(filePath, "utf-8"); + const lines = content.split("\n"); + + for (const line of lines) { + // Skip non-memory lines + if (!line.startsWith("- ")) continue; + const text = line.slice(2).trim(); + if (text.length < 5) { skipped++; continue; } + + if (options.dryRun) { + console.log(` [dry-run] would import: ${text.slice(0, 80)}...`); + imported++; + continue; + } + + try { + const vector = await context.embedder!.embedPassage(text); + await context.store.store({ + text, + vector, + importance: 0.7, + category: "other", + scope: targetScope, + metadata: { importedFrom: filePath, sourceScope: scope }, + }); + imported++; + } catch (err) { + console.warn(` Failed to import: ${text.slice(0, 60)}... — ${err}`); + skipped++; + } + } + } + + if (options.dryRun) { + console.log(`\nDRY RUN — found ${foundFiles} files, ${imported} entries would be imported, ${skipped} skipped`); + } else { + console.log(`\nImport complete: ${imported} imported, ${skipped} skipped (scanned ${foundFiles} files)`); + } + }); + // Re-embed an existing LanceDB into the current target DB (A/B testing) memory .command("reembed") diff --git a/index.ts b/index.ts index 52f1962e..7aaad961 100644 --- a/index.ts +++ b/index.ts @@ -109,6 +109,8 @@ interface PluginConfig { /** Hard per-turn injection cap (safety valve). Overrides autoRecallMaxItems if lower. Default: 10. */ maxRecallPerTurn?: number; recallMode?: "full" | "summary" | "adaptive" | "off"; + /** Agent IDs excluded from auto-recall injection. Useful for background agents (e.g. memory-distiller, cron workers) whose output should not be contaminated by injected memory context. */ + autoRecallExcludeAgents?: string[]; captureAssistant?: boolean; retrieval?: { mode?: "hybrid" | "vector"; @@ -120,6 +122,8 @@ interface PluginConfig { rerankApiKey?: string; rerankModel?: string; rerankEndpoint?: string; + /** Rerank API timeout in milliseconds (default: 5000). Increase for local/CPU-based rerank servers. */ + rerankTimeoutMs?: number; rerankProvider?: | "jina" | "siliconflow" @@ -1609,6 +1613,8 @@ const pluginVersion = getPluginVersion(); // Plugin Definition // ============================================================================ +let _initialized = false; + const memoryLanceDBProPlugin = { id: "memory-lancedb-pro", name: "Memory (LanceDB Pro)", @@ -1617,6 +1623,13 @@ const memoryLanceDBProPlugin = { kind: "memory" as const, register(api: OpenClawPluginApi) { + + // Idempotent guard: skip re-init on repeated register() calls + if (_initialized) { + api.logger.debug("memory-lancedb-pro: register() called again — skipping re-init (idempotent)"); + return; + } + // Parse and validate configuration const config = parsePluginConfig(api.pluginConfig); @@ -1993,6 +2006,15 @@ const memoryLanceDBProPlugin = { ); logReg(`memory-lancedb-pro: diagnostic build tag loaded (${DIAG_BUILD_TAG})`); + // Dual-memory model warning: help users understand the two-layer architecture + // Runs synchronously and logs warnings; does NOT block gateway startup. + api.logger.info( + `[memory-lancedb-pro] memory_recall queries the plugin store (LanceDB), not MEMORY.md.\n` + + ` - Plugin memory (LanceDB) = primary recall source for semantic search\n` + + ` - MEMORY.md / memory/YYYY-MM-DD.md = startup context / journal only\n` + + ` - Use memory_store or auto-capture for recallable memories.\n` + ); + api.on("message_received", (event: any, ctx: any) => { const conversationKey = buildAutoCaptureConversationKeyFromIngress( ctx.channelId, @@ -2248,6 +2270,20 @@ const memoryLanceDBProPlugin = { const AUTO_RECALL_TIMEOUT_MS = parsePositiveInt(config.autoRecallTimeoutMs) ?? 5_000; // configurable; default raised from 3s to 5s for remote embedding APIs behind proxies api.on("before_prompt_build", async (event: any, ctx: any) => { + // Per-agent exclusion: skip auto-recall for agents in the exclusion list. + const agentId = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey); + if ( + Array.isArray(config.autoRecallExcludeAgents) && + config.autoRecallExcludeAgents.length > 0 && + agentId !== undefined && + config.autoRecallExcludeAgents.includes(agentId) + ) { + api.logger.debug?.( + `memory-lancedb-pro: auto-recall skipped for excluded agent '${agentId}'`, + ); + return; + } + // Manually increment turn counter for this session const sessionId = ctx?.sessionId || "default"; @@ -2359,10 +2395,12 @@ const memoryLanceDBProPlugin = { const meta = parseSmartMetadata(r.entry.metadata, r.entry); if (meta.state !== "confirmed") { stateFilteredCount++; + api.logger.debug(`memory-lancedb-pro: governance: filtered id=${r.entry.id} reason=state(${meta.state}) score=${r.score?.toFixed(3)} text=${r.entry.text.slice(0, 50)}`); return false; } if (meta.memory_layer === "archive" || meta.memory_layer === "reflection") { stateFilteredCount++; + api.logger.debug(`memory-lancedb-pro: governance: filtered id=${r.entry.id} reason=layer(${meta.memory_layer}) score=${r.score?.toFixed(3)} text=${r.entry.text.slice(0, 50)}`); return false; } if (meta.suppressed_until_turn > 0 && currentTurn <= meta.suppressed_until_turn) { @@ -3691,6 +3729,7 @@ const memoryLanceDBProPlugin = { // Run initial backup after a short delay, then schedule daily setTimeout(() => void runBackup(), 60_000); // 1 min after start backupTimer = setInterval(() => void runBackup(), BACKUP_INTERVAL_MS); + _initialized = true; }, stop: async () => { if (backupTimer) { @@ -3818,6 +3857,10 @@ export function parsePluginConfig(value: unknown): PluginConfig { autoRecallMaxChars: parsePositiveInt(cfg.autoRecallMaxChars) ?? 600, autoRecallPerItemMaxChars: parsePositiveInt(cfg.autoRecallPerItemMaxChars) ?? 180, maxRecallPerTurn: parsePositiveInt(cfg.maxRecallPerTurn) ?? 10, + recallMode: (cfg.recallMode === "full" || cfg.recallMode === "summary" || cfg.recallMode === "adaptive" || cfg.recallMode === "off") ? cfg.recallMode : "full", + autoRecallExcludeAgents: Array.isArray(cfg.autoRecallExcludeAgents) + ? cfg.autoRecallExcludeAgents.filter((id: unknown): id is string => typeof id === "string" && id.trim() !== "") + : undefined, captureAssistant: cfg.captureAssistant === true, retrieval: typeof cfg.retrieval === "object" && cfg.retrieval !== null ? cfg.retrieval as any : undefined, decay: typeof cfg.decay === "object" && cfg.decay !== null ? cfg.decay as any : undefined, @@ -3956,4 +3999,6 @@ export function parsePluginConfig(value: unknown): PluginConfig { }; } +export function _resetInitialized() { _initialized = false; } + export default memoryLanceDBProPlugin; diff --git a/openclaw.plugin.json b/openclaw.plugin.json index a2cfb1f5..976b8a19 100644 --- a/openclaw.plugin.json +++ b/openclaw.plugin.json @@ -319,6 +319,12 @@ "default": "https://api.jina.ai/v1/rerank", "description": "Reranker API endpoint URL. Compatible with Jina-compatible endpoints and dedicated adapters such as TEI, SiliconFlow, Voyage, Pinecone, and DashScope." }, + "rerankTimeoutMs": { + "type": "integer", + "minimum": 1, + "default": 5000, + "description": "Rerank API timeout in milliseconds (default: 5000). Increase for local/CPU-based rerank servers." + }, "rerankProvider": { "type": "string", "enum": [ @@ -1078,6 +1084,12 @@ "help": "Custom reranker API endpoint URL", "advanced": true }, + "retrieval.rerankTimeoutMs": { + "label": "Rerank Timeout (ms)", + "placeholder": "5000", + "help": "Rerank API timeout in milliseconds. Increase for local/CPU-based rerank servers.", + "advanced": true + }, "retrieval.rerankProvider": { "label": "Reranker Provider", "help": "Provider format: jina (default), siliconflow, voyage, pinecone, dashscope, or tei", diff --git a/scripts/governance-maintenance.mjs b/scripts/governance-maintenance.mjs old mode 100755 new mode 100644 diff --git a/scripts/migrate-governance-metadata.mjs b/scripts/migrate-governance-metadata.mjs old mode 100755 new mode 100644 diff --git a/src/reflection-store.ts b/src/reflection-store.ts index 38da5ce7..4fa3db0a 100644 --- a/src/reflection-store.ts +++ b/src/reflection-store.ts @@ -428,6 +428,20 @@ function isReflectionMetadataType(type: unknown): boolean { function isOwnedByAgent(metadata: Record, agentId: string): boolean { const owner = typeof metadata.agentId === "string" ? metadata.agentId.trim() : ""; + + // itemKind 只存在於 memory-reflection-item 類型 + // legacy (memory-reflection) 和 mapped (memory-reflection-mapped) 都沒有 itemKind + // 因此 undefined !== "derived",會走原本的 main fallback(維持相容) + const itemKind = metadata.itemKind; + + // 如果是 derived 項目(memory-reflection-item):不做 main fallback, + // 且 derived 不允許空白 owner(空白 owner 的 derived 應完全不可見,防止洩漏) + if (itemKind === "derived") { + if (!owner) return false; + return owner === agentId; + } + + // invariant / legacy / mapped:允許空白 owner 可見,維持原本的 main fallback if (!owner) return true; return owner === agentId || owner === "main"; } diff --git a/src/retriever.ts b/src/retriever.ts index 900db753..bf3aeee2 100644 --- a/src/retriever.ts +++ b/src/retriever.ts @@ -58,6 +58,8 @@ export interface RetrievalConfig { | "pinecone" | "dashscope" | "tei"; + /** Rerank API timeout in milliseconds (default: 5000). Increase for local/CPU-based rerank servers. */ + rerankTimeoutMs?: number; /** * Length normalization: penalize long entries that dominate via sheer keyword * density. Formula: score *= 1 / (1 + log2(charLen / anchor)). @@ -127,6 +129,7 @@ export const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = { filterNoise: true, rerankModel: "jina-reranker-v3", rerankEndpoint: "https://api.jina.ai/v1/rerank", + rerankTimeoutMs: 5000, lengthNormAnchor: 500, hardMinScore: 0.35, timeDecayHalfLifeDays: 60, @@ -858,18 +861,21 @@ export class MemoryRetriever { results.length, ); - // Timeout: 5 seconds to prevent stalling retrieval pipeline + // Timeout: configurable via rerankTimeoutMs (default: 5000ms) const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), 5000); + const timeout = setTimeout(() => controller.abort(), this.config.rerankTimeoutMs ?? 5000); - const response = await fetch(endpoint, { - method: "POST", - headers, - body: JSON.stringify(body), - signal: controller.signal, - }); - - clearTimeout(timeout); + let response: Response; + try { + response = await fetch(endpoint, { + method: "POST", + headers, + body: JSON.stringify(body), + signal: controller.signal, + }); + } finally { + clearTimeout(timeout); + } if (response.ok) { const data: unknown = await response.json(); @@ -928,7 +934,7 @@ export class MemoryRetriever { } } catch (error) { if (error instanceof Error && error.name === "AbortError") { - console.warn("Rerank API timed out (5s), falling back to cosine"); + console.warn(`Rerank API timed out (${this.config.rerankTimeoutMs ?? 5000}ms), falling back to cosine`); } else { console.warn("Rerank API failed, falling back to cosine:", error); }