-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathseed_data.py
More file actions
135 lines (113 loc) · 4.38 KB
/
seed_data.py
File metadata and controls
135 lines (113 loc) · 4.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""Seed demo contacts — pre-load faces + memories for demo.
Run: python3 seed_data.py
Requires: .env with API keys, sample face images in seed_faces/
"""
import sys
import os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
sys.path.insert(0, str(Path(__file__).parent / "backend"))
import face_pipeline
import memory_store
from self_learning import face_tracker
DEMO_CONTACTS = [
{
"name": "Alex Kim",
"person_id": "alex_kim",
"company": "Datadog",
"role": "Senior APM Engineer",
"memories": [
"Works on the APM team at Datadog, focused on distributed tracing.",
"Mentioned they're building a new service map feature.",
"Interested in AI observability and LLM monitoring.",
"From San Francisco, moved from Seattle last year.",
],
"topics": ["APM", "distributed tracing", "AI observability"],
},
{
"name": "Sarah Chen",
"person_id": "sarah_chen",
"company": "Anthropic",
"role": "ML Research Engineer",
"memories": [
"Works on safety research at Anthropic.",
"Previously at Google DeepMind working on reinforcement learning.",
"Gave a great talk about AI alignment at NeurIPS.",
"Looking for collaboration on interpretability research.",
],
"topics": ["AI safety", "interpretability", "reinforcement learning"],
},
{
"name": "Marcus Johnson",
"person_id": "marcus_johnson",
"company": "Stripe",
"role": "Staff Engineer",
"memories": [
"Leads the payments infrastructure team at Stripe.",
"Built their real-time fraud detection system.",
"Interested in applying ML to financial compliance.",
"Organizes the SF Systems meetup group.",
],
"topics": ["payments", "fraud detection", "systems engineering"],
},
]
def seed_memories():
"""Seed memories for demo contacts (no face images needed)."""
print("Seeding demo contact memories...\n")
for contact in DEMO_CONTACTS:
person_id = contact["person_id"]
name = contact["name"]
print(f" {name} ({person_id})")
# Store identity
memory_store.store_identity(
person_id,
name,
metadata={
"company": contact["company"],
"role": contact["role"],
},
)
# Store memories
for mem in contact["memories"]:
memory_store.add_memory(person_id, mem, metadata={"type": "conversation"})
print(f" + {mem[:60]}...")
# Store conversation summary
memory_store.store_conversation_summary(
person_id,
f"Met {name} from {contact['company']}. They work as {contact['role']}. "
f"Discussed: {', '.join(contact['topics'])}.",
topics=contact["topics"],
)
# Register in face tracker
face_tracker.confirm_identity(person_id, name)
# Register face mapping
memory_store.update_identity_mapping(person_id, name)
print(f" Done ({len(contact['memories'])} memories)\n")
print(f"Seeded {len(DEMO_CONTACTS)} contacts.")
def seed_faces():
"""Seed face images if available in seed_faces/ directory."""
faces_dir = Path(__file__).parent / "seed_faces"
if not faces_dir.exists():
print("\nNo seed_faces/ directory found. Skipping face indexing.")
print("To seed faces, create seed_faces/ with images named like: alex_kim.jpg")
return
print("\nIndexing seed face images...")
for img_file in faces_dir.glob("*.jpg"):
person_id = img_file.stem # e.g., "alex_kim"
image_bytes = img_file.read_bytes()
# Index face in Rekognition
result = face_pipeline.index_face(image_bytes, person_id=person_id)
print(f" Indexed {person_id}: face_id={result.get('face_id')}")
# Compute and store CLIP embedding
clip_emb = face_pipeline.compute_clip_embedding(image_bytes)
face_tracker.record_sighting(
person_id=person_id,
confidence=99.0, # Seed at high confidence
clip_embedding=clip_emb.tolist(),
)
print("Face indexing complete.")
if __name__ == "__main__":
seed_memories()
seed_faces()
print("\nDone! Run the backend to start using ORBIT.")