From 4f40f55ab3f64a79b07138d2c7647afb221c0c80 Mon Sep 17 00:00:00 2001 From: truffle Date: Sat, 16 May 2026 13:10:29 +0000 Subject: [PATCH] memory: dedup facts within a session by normalized message text Single message matching both correction and preference patterns produced two facts; identical messages repeated within a session produced N facts. Track normalized text (lowercase + trim) in a per-call Set and skip both pattern checks if the key is already seen. First-match wins on the pattern order. The Slack-fragment accumulation in #84 is partly cross-session which this doesn't address, but bounding per-session contribution to one fact per unique utterance is the floor before any cross- session check earns its complexity. Signed-off-by: truffle --- src/memory/__tests__/consolidation.test.ts | 45 ++++++++++++++++++++++ src/memory/consolidation.ts | 8 ++++ 2 files changed, 53 insertions(+) diff --git a/src/memory/__tests__/consolidation.test.ts b/src/memory/__tests__/consolidation.test.ts index ed088ef7..fe01b7b1 100644 --- a/src/memory/__tests__/consolidation.test.ts +++ b/src/memory/__tests__/consolidation.test.ts @@ -135,6 +135,51 @@ describe("consolidateSession", () => { expect(storedFacts.length).toBe(0); }); + test("duplicate identical user messages produce a single fact", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: [ + "Actually, the staging server is on port 3001 not 3000", + "Actually, the staging server is on port 3001 not 3000", + "Actually, the staging server is on port 3001 not 3000", + ], + }); + + const result = await consolidateSession(memory, data); + + expect(result.factsExtracted).toBe(1); + expect(storedFacts.length).toBe(1); + }); + + test("a message matching both correction and preference patterns produces a single fact", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: ["No, make sure to use feature branches from now on"], + }); + + const result = await consolidateSession(memory, data); + + expect(result.factsExtracted).toBe(1); + expect(storedFacts.length).toBe(1); + expect(storedFacts[0].tags).toContain("correction"); + }); + + test("dedup is case- and whitespace-insensitive within a session", async () => { + const { memory, storedFacts } = createMockMemory(); + const data = makeTestSessionData({ + userMessages: [ + "I prefer PRs over direct pushes", + " I PREFER PRs over direct pushes ", + "i prefer PRs over direct pushes", + ], + }); + + const result = await consolidateSession(memory, data); + + expect(result.factsExtracted).toBe(1); + expect(storedFacts.length).toBe(1); + }); + test("episode detail includes tools and files", async () => { const { memory, storedEpisodes } = createMockMemory(); const data = makeTestSessionData({ diff --git a/src/memory/consolidation.ts b/src/memory/consolidation.ts index a6e4cc1e..dc76454c 100644 --- a/src/memory/consolidation.ts +++ b/src/memory/consolidation.ts @@ -105,8 +105,13 @@ function calculateImportance(data: SessionData): number { function extractFactsFromSession(data: SessionData, episodeId: string): SemanticFact[] { const facts: SemanticFact[] = []; const now = new Date().toISOString(); + const seenKeys = new Set(); for (const message of data.userMessages) { + const key = message.toLowerCase().trim(); + if (seenKeys.has(key)) { + continue; + } const lower = message.toLowerCase(); if (matchesCorrectionPattern(lower)) { @@ -125,6 +130,8 @@ function extractFactsFromSession(data: SessionData, episodeId: string): Semantic category: "user_preference", tags: ["correction"], }); + seenKeys.add(key); + continue; } if (matchesPreferencePattern(lower)) { @@ -143,6 +150,7 @@ function extractFactsFromSession(data: SessionData, episodeId: string): Semantic category: "user_preference", tags: ["preference"], }); + seenKeys.add(key); } }