-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path23_collections_embedded.py
More file actions
138 lines (113 loc) · 4.33 KB
/
23_collections_embedded.py
File metadata and controls
138 lines (113 loc) · 4.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python3
"""
SochDB Python SDK - Example: Collections (Embedded FFI)
This example demonstrates collection operations using the embedded
Database class (FFI). No server required!
Collections provide structured document storage with vector embeddings
for semantic search.
"""
import json
from sochdb import Database
def main():
print("=" * 60)
print("SochDB - Collections Example (Embedded FFI)")
print("=" * 60)
print("Note: This uses embedded Database - no server required!\n")
# Open embedded database
db = Database.open("./example_collections_db")
print("--- Creating a Collection ---")
# Collections are stored using a naming convention in the KV store
# Format: collection:{name}:doc:{id}
collection_name = "articles"
# Simulate documents with embeddings
documents = [
{
"id": "article_1",
"title": "Introduction to Machine Learning",
"content": "Machine learning is a subset of AI...",
"category": "AI",
"embedding": [0.1, 0.2, 0.3, 0.4] # Simplified 4D embedding
},
{
"id": "article_2",
"title": "Python Best Practices",
"content": "Writing clean Python code requires...",
"category": "Programming",
"embedding": [0.5, 0.6, 0.7, 0.8]
},
{
"id": "article_3",
"title": "Deep Learning Fundamentals",
"content": "Neural networks are the foundation...",
"category": "AI",
"embedding": [0.15, 0.25, 0.35, 0.45]
}
]
print(f"✓ Adding {len(documents)} documents to '{collection_name}'")
# Store documents
for doc in documents:
key = f"collection:{collection_name}:doc:{doc['id']}".encode()
db.put(key, json.dumps(doc).encode())
print(f" → Added: {doc['title']}")
print("\n--- Querying Documents ---")
# Get a specific document
doc_key = f"collection:{collection_name}:doc:article_1".encode()
raw = db.get(doc_key)
if raw:
doc = json.loads(raw.decode())
print(f"Retrieved: {doc['title']}")
print(f" Category: {doc['category']}")
print("\n--- Scanning Collection ---")
# Scan all documents in collection
prefix = f"collection:{collection_name}:doc:".encode()
ai_articles = []
for key, value in db.scan_prefix(prefix):
doc = json.loads(value.decode())
print(f" • {doc['title']} [{doc['category']}]")
if doc['category'] == "AI":
ai_articles.append(doc)
print(f"\n Found {len(ai_articles)} AI articles")
print("\n--- Using Transactions ---")
# Atomic update of multiple documents
with db.transaction() as txn:
# Update article category
doc_key = f"collection:{collection_name}:doc:article_2".encode()
raw = txn.get(doc_key)
if raw:
doc = json.loads(raw.decode())
doc['category'] = "Python"
doc['updated'] = True
txn.put(doc_key, json.dumps(doc).encode())
print(f"✓ Updated category for: {doc['title']}")
# Add a new document atomically
new_doc = {
"id": "article_4",
"title": "Async Python Patterns",
"content": "Asynchronous programming in Python...",
"category": "Python",
"embedding": [0.6, 0.7, 0.8, 0.9]
}
new_key = f"collection:{collection_name}:doc:article_4".encode()
txn.put(new_key, json.dumps(new_doc).encode())
print(f"✓ Added new document: {new_doc['title']}")
print("\n--- Embedded vs gRPC ---")
print("""
Embedded (Database class):
✓ No server needed - runs in-process
✓ Zero network latency
✓ Simple deployment
✗ Business logic in Python (maintenance overhead)
gRPC (SochDBClient):
✓ All logic in Rust server (single source of truth)
✓ Language-agnostic API
✓ Temporal graphs, policies, and advanced features
✗ Requires server process
Choose embedded for simple apps, gRPC for production.
""")
# Cleanup
db.close()
import shutil
shutil.rmtree("./example_collections_db", ignore_errors=True)
print("✅ Collection example complete!")
if __name__ == "__main__":
main()