sochdb-nodejs-sdk/examples/memory-system-example.ts at main · sochdb/sochdb-nodejs-sdk · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
/**
 * Memory System Example
 *
 * Demonstrates LLM-native memory with extraction, consolidation, and retrieval.
 */

import {
  EmbeddedDatabase,
  ExtractionPipeline,
  Consolidator,
  HybridRetriever,
  AllowedSet,
} from '../src';
import * as path from 'path';

// Mock LLM extractor (replace with actual LLM API in production)
async function mockExtractor(text: string) {
  // In production, call your LLM API (OpenAI, Anthropic, etc.)
  // This is a simple pattern-based extractor for demo

  const entities = [];
  const relations = [];
  const assertions = [];

  // Extract entities (simple pattern matching)
  const personMatches = text.match(/([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\s+(?:is|works)/g);
  if (personMatches) {
    for (const match of personMatches) {
      const name = match.replace(/\s+(is|works)/, '');
      entities.push({
        name,
        entity_type: 'person',
        confidence: 0.9,
      });
    }
  }

  // Extract companies
  const companyMatches = text.match(/(?:at|for)\s+([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)/g);
  if (companyMatches) {
    for (const match of companyMatches) {
      const name = match.replace(/^(at|for)\s+/, '');
      entities.push({
        name,
        entity_type: 'organization',
        confidence: 0.85,
      });
    }
  }

  // Extract work relations
  const workMatches = text.match(/([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\s+works\s+(?:at|for)\s+([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)/g);
  if (workMatches) {
    for (const match of workMatches) {
      const parts = match.match(/([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\s+works\s+(?:at|for)\s+([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)/);
      if (parts) {
        relations.push({
          from_entity: parts[1],
          relation_type: 'works_at',
          to_entity: parts[2],
          confidence: 0.9,
        });
      }
    }
  }

  // Extract role assertions
  const roleMatches = text.match(/([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\s+is\s+(?:an?\s+)?([a-z]+(?:\s[a-z]+)*)/g);
  if (roleMatches) {
    for (const match of roleMatches) {
      const parts = match.match(/([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\s+is\s+(?:an?\s+)?([a-z]+(?:\s[a-z]+)*)/);
      if (parts) {
        assertions.push({
          subject: parts[1],
          predicate: 'role',
          object: parts[2],
          confidence: 0.85,
        });
      }
    }
  }

  return { entities, relations, assertions };
}

// Mock embedding function (use real embeddings in production)
function mockEmbed(text: string): number[] {
  // In production, use actual embedding models
  const embedding = new Array(384).fill(0);
  for (let i = 0; i < Math.min(text.length, 384); i++) {
    embedding[i] = text.charCodeAt(i) / 255;
  }
  return embedding;
}

async function main() {
  const dbPath = path.join(__dirname, '../test-data/memory-example-db');

  // Open database
  const db = await EmbeddedDatabase.open(dbPath);
  console.log('✓ Database opened\n');

  // ============================================
  // Example 1: Extraction Pipeline
  // ============================================
  console.log('=== Example 1: Extraction Pipeline ===');

  const pipeline = ExtractionPipeline.fromDatabase(db, 'user_123', {
    entityTypes: ['person', 'organization', 'location'],
    relationTypes: ['works_at', 'knows', 'located_in'],
    minConfidence: 0.7,
  });

  const text1 = 'Alice works at Acme Corp. Bob is an engineer at Tech Inc.';
  const result1 = await pipeline.extractAndCommit(text1, mockExtractor);

  console.log(`Extracted ${result1.entities.length} entities:`);
  for (const entity of result1.entities) {
    console.log(`  - ${entity.name} (${entity.entityType}) [confidence: ${entity.confidence?.toFixed(2)}]`);
  }

  console.log(`Extracted ${result1.relations.length} relations:`);
  for (const relation of result1.relations) {
    console.log(`  - ${relation.fromEntity} --${relation.relationType}--> ${relation.toEntity}`);
  }

  console.log(`Extracted ${result1.assertions.length} assertions:`);
  for (const assertion of result1.assertions) {
    console.log(`  - ${assertion.subject} ${assertion.predicate} ${assertion.object}`);
  }
  console.log();

  // ============================================
  // Example 2: Retrieve Stored Facts
  // ============================================
  console.log('=== Example 2: Retrieve Stored Facts ===');

  const allEntities = await pipeline.getEntities();
  console.log(`Total entities stored: ${allEntities.length}`);

  const allRelations = await pipeline.getRelations();
  console.log(`Total relations stored: ${allRelations.length}`);
  console.log();

  // ============================================
  // Example 3: Consolidation
  // ============================================
  console.log('=== Example 3: Consolidation ===');

  const consolidator = Consolidator.fromDatabase(db, 'user_123', {
    similarityThreshold: 0.85,
    useTemporalUpdates: true,
  });

  // Add some assertions
  await consolidator.add({
    fact: { subject: 'Alice', predicate: 'lives_in', object: 'San Francisco' },
    source: 'conversation_1',
    confidence: 0.9,
  });

  await consolidator.add({
    fact: { subject: 'Alice', predicate: 'lives_in', object: 'San Francisco' },
    source: 'conversation_2',
    confidence: 0.95,
  });

  // Add contradicting assertion
  await consolidator.addWithContradiction(
    {
      fact: { subject: 'Alice', predicate: 'lives_in', object: 'New York' },
      source: 'conversation_3',
      confidence: 0.92,
    },
    [] // IDs of contradicted assertions (empty for demo)
  );

  // Run consolidation
  const updated = await consolidator.consolidate();
  console.log(`✓ Consolidated ${updated} facts`);

  // Get canonical facts
  const canonicalFacts = await consolidator.getCanonicalFacts();
  console.log(`Canonical facts: ${canonicalFacts.length}`);
  for (const fact of canonicalFacts) {
    console.log(`  - Fact: ${JSON.stringify(fact.mergedFact)}`);
    console.log(`    Confidence: ${fact.confidence.toFixed(3)}, Sources: ${fact.sources.length}`);
  }
  console.log();

  // ============================================
  // Example 4: Hybrid Retrieval
  // ============================================
  console.log('=== Example 4: Hybrid Retrieval ===');

  const retriever = HybridRetriever.fromDatabase(db, 'user_123', 'documents', {
    k: 5,
    alpha: 0.5, // Balanced vector + keyword
  });

  // Index some documents
  const documents = [
    {
      id: 'doc1',
      content: 'Machine learning is a subset of artificial intelligence focused on data-driven algorithms.',
      embedding: mockEmbed('machine learning AI algorithms'),
      metadata: { category: 'AI', author: 'Alice' },
    },
    {
      id: 'doc2',
      content: 'Deep learning uses neural networks with multiple layers to learn hierarchical representations.',
      embedding: mockEmbed('deep learning neural networks'),
      metadata: { category: 'AI', author: 'Bob' },
    },
    {
      id: 'doc3',
      content: 'Natural language processing enables computers to understand and generate human language.',
      embedding: mockEmbed('NLP language processing'),
      metadata: { category: 'NLP', author: 'Alice' },
    },
    {
      id: 'doc4',
      content: 'Computer vision allows machines to interpret and understand visual information from images.',
      embedding: mockEmbed('computer vision images'),
      metadata: { category: 'Vision', author: 'Carol' },
    },
  ];

  await retriever.indexDocuments(documents);
  console.log(`✓ Indexed ${documents.length} documents`);

  // Search with namespace isolation
  const query = 'neural networks and AI';
  const queryEmbedding = mockEmbed(query);
  const allowed = AllowedSet.fromNamespace('user_123');

  const searchResults = await retriever.retrieve(query, queryEmbedding, allowed, 3);
  console.log(`\nQuery: "${query}"`);
  console.log(`Found ${searchResults.results.length} results (${searchResults.queryTime}ms):`);

  for (const result of searchResults.results) {
    console.log(`\n  ${result.id} (score: ${result.score.toFixed(4)})`);
    console.log(`  Content: ${result.content.substring(0, 80)}...`);
    console.log(`  Vector rank: ${result.vectorRank}, Keyword rank: ${result.keywordRank}`);
  }
  console.log();

  // ============================================
  // Example 5: Pre-Filtering with AllowedSet
  // ============================================
  console.log('=== Example 5: Pre-Filtering with AllowedSet ===');

  // Only Alice's documents
  const aliceAllowed = AllowedSet.fromFilter(
    (id, metadata) => metadata?.author === 'Alice'
  );

  const aliceResults = await retriever.retrieve(query, queryEmbedding, aliceAllowed, 5);
  console.log(`Alice-only results: ${aliceResults.results.length}`);
  for (const result of aliceResults.results) {
    console.log(`  - ${result.id} by ${result.metadata?.author}`);
  }
  console.log();

  // ============================================
  // Example 6: Multiple Extractions
  // ============================================
  console.log('=== Example 6: Multiple Extractions ===');

  const texts = [
    'Carol is a data scientist at Research Labs',
    'Dave works at Startup Inc as a software developer',
    'Eve is the CEO of Innovation Corp',
  ];

  for (const text of texts) {
    const result = await pipeline.extractAndCommit(text, mockExtractor);
    console.log(`✓ Processed: "${text}"`);
    console.log(`  Entities: ${result.entities.length}, Relations: ${result.relations.length}`);
  }

  const finalCount = await pipeline.getEntities();
  console.log(`\nTotal entities in database: ${finalCount.length}`);
  console.log();

  // Clean up
  await db.close();
  console.log('✓ Database closed');
}

main().catch(console.error);