diff --git a/benchmarks/longmemeval/run-research.ts b/benchmarks/longmemeval/run-research.ts new file mode 100644 index 0000000..131bfea --- /dev/null +++ b/benchmarks/longmemeval/run-research.ts @@ -0,0 +1,32 @@ +// LongMemEval — Research-variant benchmark wrapper +// +// Thin wrapper around benchmarks/longmemeval/run.ts that retargets results +// to a separate output directory and (by default) the research server's +// port, so SDM-variant runs don't overwrite the upstream baseline numbers +// stored in benchmarks/results/. +// +// Defaults applied if the corresponding env var is unset: +// MEMFORGE_URL → http://localhost:3334 (research server port) +// BENCHMARK_RESULTS_DIR → benchmarks/results-research +// +// Run (after starting the research server): +// tsx src/research/research-server.ts & +// tsx benchmarks/longmemeval/run-research.ts +// +// See /root/.claude/plans/continue-the-research-on-merry-rabbit.md. + +if (!process.env['MEMFORGE_URL']) { + process.env['MEMFORGE_URL'] = 'http://localhost:3334'; +} +if (!process.env['BENCHMARK_RESULTS_DIR']) { + process.env['BENCHMARK_RESULTS_DIR'] = 'benchmarks/results-research'; +} + +console.log('╔══════════════════════════════════════════╗'); +console.log('║ MemForge LongMemEval — RESEARCH VARIANT ║'); +console.log('╚══════════════════════════════════════════╝'); +console.log(`Target server: ${process.env['MEMFORGE_URL']}`); +console.log(`Results dir: ${process.env['BENCHMARK_RESULTS_DIR']}`); +console.log(''); + +await import('./run.js'); diff --git a/src/memory-manager.ts b/src/memory-manager.ts index 922a3cb..1fd4fb4 100644 --- a/src/memory-manager.ts +++ b/src/memory-manager.ts @@ -376,7 +376,8 @@ export class MemoryManager { * Activated by ENABLE_LLM_RERANK=true. Sends top results + question to LLM * for relevance-based reordering. Adds ~2K tokens per query. */ - private async rerankWithLlm(question: string, results: QueryResult[]): Promise { + // VARIANT: visibility promoted from private → protected for src/research/ subclasses + protected async rerankWithLlm(question: string, results: QueryResult[]): Promise { if (!this.llm || results.length <= 1) return results; const numbered = results @@ -787,7 +788,8 @@ Ranking (numbers only):`; // ─── Hybrid search (reciprocal rank fusion) ─────────────────────────────── - private async queryHybrid( + // VARIANT: visibility promoted from private → protected for src/research/ subclasses + protected async queryHybrid( agentId: string, searchText: string, limit: number, @@ -1938,6 +1940,18 @@ Ranking (numbers only):`; // ─── Sleep Cycle ────────────────────────────────────────────────────────── + // VARIANT: factory method for src/research/ subclasses to swap in a custom + // SleepCycleEngine. Default implementation matches prior inline construction. + protected createSleepEngine( + pool: import('pg').Pool, + llm: LLMProvider, + embedder: EmbeddingProvider, + cycleConfig: SleepCycleConfig, + audit: AuditChain | null, + ): SleepCycleEngine { + return new SleepCycleEngine(pool, llm, embedder, cycleConfig, audit); + } + /** * Execute a sleep cycle — background processing that scores, triages, * revises, and maintains the knowledge base. @@ -1971,7 +1985,8 @@ Ranking (numbers only):`; weights: { ...this.config.sleepCycle.weights, ...safeOverrides.weights }, }; - const engine = new SleepCycleEngine(this.pool, revisionLlm, this.embedder, cycleConfig, this.audit); + // VARIANT: dispatch through factory so src/research/ subclasses can swap engines + const engine = this.createSleepEngine(this.pool, revisionLlm, this.embedder, cycleConfig, this.audit); const promise = engine.run(agentId); this.sleepLocks.set(agentId, promise); diff --git a/src/research/research-memory-manager.ts b/src/research/research-memory-manager.ts new file mode 100644 index 0000000..512bd16 --- /dev/null +++ b/src/research/research-memory-manager.ts @@ -0,0 +1,64 @@ +// MemForge — Research-variant MemoryManager +// +// Subclass scaffold for the SDM (Sparse Distributed Memory, Kanerva 1988) +// research variant. Lives in src/research/ to keep core upstream files +// (src/memory-manager.ts) untouched aside from the minimal `protected` +// visibility promotions on `queryHybrid` and `rerankWithLlm`. +// +// Current state: passthrough — every override delegates to `super`. This +// scaffold proves the extension points compile and gives a clean home for +// SDM-specific retrieval logic to land. +// +// Likely first SDM experiment (notes, not yet implemented): +// 1. Compute a high-dim binary/bipolar address from each warm-tier row's +// content (hash or quantized embedding) at consolidation time, store in +// a sidecar table or as a metadata column. +// 2. In queryHybrid, replace RRF with a Hamming-radius readout: +// activate all rows whose address is within H of the query address, +// threshold-vote the activated rows, return top-k. +// 3. Compare R@5 / R@10 against the upstream RRF baseline on LongMemEval. +// +// See /root/.claude/plans/continue-the-research-on-merry-rabbit.md. + +import type { Pool } from 'pg'; +import { MemoryManager } from '../memory-manager.js'; +import { SleepCycleEngine } from '../sleep-cycle.js'; +import { getLogger } from '../logger.js'; +import type { LLMProvider } from '../llm.js'; +import type { EmbeddingProvider } from '../embedding.js'; +import type { AuditChain } from '../audit.js'; +import type { QueryResult, SleepCycleConfig } from '../types.js'; +import { ResearchSleepCycleEngine } from './research-sleep-cycle.js'; + +const log = getLogger('research-memory-manager'); + +export class ResearchMemoryManager extends MemoryManager { + protected override async queryHybrid( + agentId: string, + searchText: string, + limit: number, + after?: Date, + before?: Date, + ): Promise { + log.debug({ agentId, mode: 'hybrid', variant: 'sdm-passthrough' }, 'research queryHybrid'); + return super.queryHybrid(agentId, searchText, limit, after, before); + } + + protected override async rerankWithLlm( + question: string, + results: QueryResult[], + ): Promise { + log.debug({ count: results.length, variant: 'sdm-passthrough' }, 'research rerank'); + return super.rerankWithLlm(question, results); + } + + protected override createSleepEngine( + pool: Pool, + llm: LLMProvider, + embedder: EmbeddingProvider, + cycleConfig: SleepCycleConfig, + audit: AuditChain | null, + ): SleepCycleEngine { + return new ResearchSleepCycleEngine(pool, llm, embedder, cycleConfig, audit); + } +} diff --git a/src/research/research-server.ts b/src/research/research-server.ts new file mode 100644 index 0000000..13cb41a --- /dev/null +++ b/src/research/research-server.ts @@ -0,0 +1,128 @@ +// MemForge — Research-variant standalone server +// +// Mirrors src/server.ts but wires `ResearchMemoryManager` (which in turn +// wires `ResearchSleepCycleEngine`) through createApp(). Use this entrypoint +// to run the SDM research variant alongside the upstream server for +// head-to-head benchmarking. +// +// Defaults: +// PORT=3334 (vs. upstream default 3333 — coexist on one host) +// LOG_LEVEL inherited +// all other env vars identical to src/server.ts +// +// Run: +// tsx src/research/research-server.ts +// +// See /root/.claude/plans/continue-the-research-on-merry-rabbit.md. + +import { ResearchMemoryManager } from './research-memory-manager.js'; +import { createEmbeddingProvider } from '../embedding.js'; +import { createLLMProvider } from '../llm.js'; +import { closePool, getPool } from '../db.js'; +import { closeRedis } from '../cache.js'; +import { createDefaultRegistry } from '../classifier.js'; +import { wrapLLMProvider } from '../llm-safety.js'; +import { AuditChain } from '../audit.js'; +import { createApp } from '../app.js'; +import { getLogger } from '../logger.js'; +import { configureWebhooks } from '../webhooks.js'; +import type { ConsolidationMode } from '../types.js'; + +const log = getLogger('research-server'); + +const PORT = parseInt(process.env['PORT'] ?? '3334', 10); +const ADMIN_TOKEN = process.env['ADMIN_TOKEN'] ?? ''; + +const classifierRegistry = createDefaultRegistry(); +const embeddingProvider = createEmbeddingProvider(); + +const llmProviderType = process.env['LLM_PROVIDER'] ?? 'none'; +const allowRemoteLLM = process.env['ALLOW_REMOTE_LLM'] === 'true'; +const rawLlmProvider = createLLMProvider(); +const llmProvider = wrapLLMProvider(rawLlmProvider, llmProviderType, classifierRegistry, allowRemoteLLM); + +const revisionProviderType = process.env['REVISION_LLM_PROVIDER'] ?? llmProviderType; +const rawRevisionLlmProvider = process.env['REVISION_LLM_PROVIDER'] + ? createLLMProvider(process.env['REVISION_LLM_PROVIDER'] as 'anthropic' | 'openai' | 'ollama') + : null; +const revisionLlmProvider = wrapLLMProvider( + rawRevisionLlmProvider, + revisionProviderType, + classifierRegistry, + allowRemoteLLM, +); + +const auditChain = new AuditChain(getPool(process.env['DATABASE_URL'] || undefined), { + hmacKey: process.env['AUDIT_HMAC_KEY'], + retentionDays: parseInt(process.env['AUDIT_RETENTION_DAYS'] ?? '90', 10), + archiveOnExpiry: process.env['AUDIT_ARCHIVE_ON_EXPIRY'] !== 'false', +}); + +const manager = new ResearchMemoryManager({ + databaseUrl: process.env['DATABASE_URL'], + consolidationBatchSize: parseInt(process.env['CONSOLIDATION_BATCH_SIZE'] ?? '500', 10), + consolidationThreshold: parseInt(process.env['CONSOLIDATION_THRESHOLD'] ?? '50', 10), + autoRegisterAgents: process.env['AUTO_REGISTER_AGENTS'] !== 'false', + embeddingProvider, + llmProvider, + revisionLlmProvider, + consolidationMode: (process.env['CONSOLIDATION_MODE'] as ConsolidationMode) ?? 'concat', + temporalDecayRate: parseFloat(process.env['TEMPORAL_DECAY_RATE'] ?? '0'), + consolidationInnerBatchSize: parseInt(process.env['CONSOLIDATION_INNER_BATCH_SIZE'] ?? '50', 10), + keywordOverlapBoost: parseFloat(process.env['KEYWORD_OVERLAP_BOOST'] ?? '0.3'), + temporalProximityDays: parseFloat(process.env['TEMPORAL_PROXIMITY_DAYS'] ?? '7'), + enableLlmRerank: process.env['ENABLE_LLM_RERANK'] === 'true', + enableLlmIngest: process.env['ENABLE_LLM_INGEST'] === 'true', + sleepCycle: { + tokenBudget: parseInt(process.env['SLEEP_CYCLE_TOKEN_BUDGET'] ?? '100000', 10), + evictionThreshold: parseFloat(process.env['SLEEP_CYCLE_EVICTION_THRESHOLD'] ?? '0.1'), + revisionThreshold: parseFloat(process.env['SLEEP_CYCLE_REVISION_THRESHOLD'] ?? '0.4'), + includeReflection: process.env['SLEEP_CYCLE_INCLUDE_REFLECTION'] !== 'false', + coldRetentionDays: process.env['COLD_TIER_RETENTION_DAYS'] + ? Math.max(1, parseInt(process.env['COLD_TIER_RETENTION_DAYS'], 10)) + : undefined, + weights: { recency: 0.25, frequency: 0.20, centrality: 0.20, reflection: 0.15, stability: 0.20 }, + }, + auditChain, +}); + +configureWebhooks(); + +const app = createApp({ + manager, + auditChain, + classifierRegistry, + adminToken: ADMIN_TOKEN, + rateLimitWindowMs: parseInt(process.env['RATE_LIMIT_WINDOW_MS'] ?? '60000', 10), + rateLimitMax: parseInt(process.env['RATE_LIMIT_MAX'] ?? '100', 10), + port: PORT, + corsOrigin: process.env['CORS_ORIGIN'], + corsMethods: process.env['CORS_METHODS'], + corsHeaders: process.env['CORS_HEADERS'], +}); + +const server = app.listen(PORT, () => { + log.info( + { + port: PORT, + variant: 'sdm-research', + embeddings: manager.embeddingsEnabled, + summarization: manager.summarizationEnabled, + }, + 'research server started', + ); +}); + +async function shutdown(signal: string): Promise { + log.info({ signal }, 'shutting down research server'); + server.close(async () => { + await Promise.all([closePool(), closeRedis()]); + process.exit(0); + }); + setTimeout(() => process.exit(1), 10_000).unref(); +} + +process.on('SIGTERM', () => void shutdown('SIGTERM')); +process.on('SIGINT', () => void shutdown('SIGINT')); + +export { app }; diff --git a/src/research/research-sleep-cycle.ts b/src/research/research-sleep-cycle.ts new file mode 100644 index 0000000..f40a8a5 --- /dev/null +++ b/src/research/research-sleep-cycle.ts @@ -0,0 +1,46 @@ +// MemForge — Research-variant SleepCycleEngine +// +// Subclass scaffold for the SDM (Sparse Distributed Memory) research variant. +// Mirrors the relationship between ResearchMemoryManager and MemoryManager. +// +// Current state: passthrough — every override delegates to `super`. +// +// Likely first SDM experiment for sleep phases (notes, not yet implemented): +// - phaseScoring: replace the 5-factor composite (recency, frequency, +// centrality, reflection, stability) with an SDM-flavored "activation +// density" signal — score by how often a memory's hard-locations were +// hit during recent retrievals. +// - reviseMemory: instead of LLM-rewriting the row's content, reinforce or +// decay the activation pattern at the row's hard-locations. (This is a +// bigger redesign and likely needs a sidecar address-space table first.) +// - phaseTriage: evict on activation density rather than composite +// importance. +// +// See /root/.claude/plans/continue-the-research-on-merry-rabbit.md. + +import { SleepCycleEngine } from '../sleep-cycle.js'; +import { getLogger } from '../logger.js'; + +const log = getLogger('research-sleep-cycle'); + +export class ResearchSleepCycleEngine extends SleepCycleEngine { + protected override async phaseScoring(agentId: string): Promise { + log.debug({ agentId, phase: 1, variant: 'sdm-passthrough' }, 'research phaseScoring'); + return super.phaseScoring(agentId); + } + + protected override async phaseTriage( + agentId: string, + ): Promise<{ evicted: number; flaggedIds: bigint[] }> { + log.debug({ agentId, phase: 2, variant: 'sdm-passthrough' }, 'research phaseTriage'); + return super.phaseTriage(agentId); + } + + protected override async reviseMemory(agentId: string, warmTierId: bigint): Promise { + log.debug( + { agentId, phase: 3, warmTierId: String(warmTierId), variant: 'sdm-passthrough' }, + 'research reviseMemory', + ); + return super.reviseMemory(agentId, warmTierId); + } +} diff --git a/src/sleep-cycle.ts b/src/sleep-cycle.ts index 0726d76..e3bea94 100644 --- a/src/sleep-cycle.ts +++ b/src/sleep-cycle.ts @@ -245,7 +245,8 @@ export class SleepCycleEngine { // ─── Phase 1: Scoring ────────────────────────────────────────────────────── - private async phaseScoring(agentId: string): Promise { + // VARIANT: visibility promoted from private → protected for src/research/ subclasses + protected async phaseScoring(agentId: string): Promise { // Load per-agent weights if available, fall back to global defaults const agentWeights = await this.pool.query<{ scoring_weights: Record | null }>( `SELECT scoring_weights FROM agents WHERE id = $1`, [agentId], @@ -303,7 +304,8 @@ export class SleepCycleEngine { // ─── Phase 2: Triage ─────────────────────────────────────────────────────── - private async phaseTriage(agentId: string): Promise<{ evicted: number; flaggedIds: bigint[] }> { + // VARIANT: visibility promoted from private → protected for src/research/ subclasses + protected async phaseTriage(agentId: string): Promise<{ evicted: number; flaggedIds: bigint[] }> { // Graduate high-confidence memories — inspired by claude-code-toolkit (MIT) await this.pool.query( `UPDATE warm_tier SET graduated = true @@ -362,7 +364,8 @@ export class SleepCycleEngine { // ─── Phase 3: Revision ───────────────────────────────────────────────────── - private async reviseMemory(agentId: string, warmTierId: bigint): Promise { + // VARIANT: visibility promoted from private → protected for src/research/ subclasses + protected async reviseMemory(agentId: string, warmTierId: bigint): Promise { // Gather the memory and its context const memory = await this.pool.query<{ content: string; metadata: Record; importance: number }>( `SELECT content, metadata, importance FROM warm_tier WHERE id = $1 AND agent_id = $2`,