Update docs and tests

anaslimem · anaslimem · commit 252553c30fcc · 2026-03-08T21:09:38.000+01:00
diff --git a/crates/cortexadb-py/cortexadb/__init__.py b/crates/cortexadb-py/cortexadb/__init__.py
@@ -1,4 +1,4 @@
-from .client import CortexaDB, Namespace
+from .client import CortexaDB, Collection
 from ._cortexadb import (
     Hit,
     Memory,
@@ -15,7 +15,7 @@
 
 __all__ = [
     "CortexaDB",
-    "Namespace",
+    "Collection",
     "Hit",
     "Memory",
     "Stats",
diff --git a/crates/cortexadb-py/cortexadb/client.py b/crates/cortexadb-py/cortexadb/client.py
@@ -208,7 +208,7 @@ def replay(cls, log_path: str, db_path: str, **kwargs) -> "CortexaDB":
                         text=op.get("text"),
                         vector=op.get("embedding"),
                         metadata=op.get("metadata"),
-                        collection=op.get("namespace", "default")
+                        collection=op.get("collection") or op.get("namespace", "default")
                     )
                     id_map[op.get("id")] = new_id
                     report["exported"] += 1
@@ -246,7 +246,7 @@ def add(self, text=None, vector=None, metadata=None, collection=None, **kwargs)
         content = text or ""
         mid = self._inner.remember_embedding(vec, metadata=metadata, collection=collection, content=content)
         if self._recorder:
-            self._recorder.record_remember(id=mid, text=content, embedding=vec, namespace=collection, metadata=metadata)
+            self._recorder.record_remember(id=mid, text=content, embedding=vec, collection=collection, metadata=metadata)
         return mid
 
     def search(
@@ -331,7 +331,7 @@ def export_replay(self, path: str):
                         id=mem.id,
                         text=bytes(mem.content).decode("utf-8") if mem.content else "",
                         embedding=mem.embedding,
-                        namespace=mem.collection,
+                        collection=mem.collection,
                         metadata=mem.metadata
                     )
                     report["exported"] += 1
diff --git a/crates/cortexadb-py/cortexadb/replay.py b/crates/cortexadb-py/cortexadb/replay.py
@@ -16,7 +16,7 @@
 
 Lines 2..N — operation records (one JSON object per line):
 
-    {"op": "remember", "id": 1, "text": "...", "embedding": [...], "namespace": "default", "metadata": null}
+    {"op": "remember", "id": 1, "text": "...", "embedding": [...], "collection": "default", "metadata": null}
     {"op": "connect",  "from_id": 1, "to_id": 2, "relation": "caused_by"}
     {"op": "compact"}
 
@@ -56,7 +56,7 @@ class ReplayWriter:
     Example::
 
         writer = ReplayWriter("session.log", dimension=128, sync="strict")
-        writer.record_remember(id=1, text="hello", embedding=[...], namespace="default")
+        writer.record_remember(id=1, text="hello", embedding=[...], collection="default")
         writer.close()
     """
 
@@ -84,7 +84,7 @@ def record_remember(
         id: int,
         text: str,
         embedding: List[float],
-        namespace: str,
+        collection: str,
         metadata: Optional[Dict[str, str]],
     ) -> None:
         """Append a ``remember`` operation."""
@@ -93,7 +93,7 @@ def record_remember(
             "id": id,
             "text": text,
             "embedding": embedding,
-            "namespace": namespace,
+            "collection": collection,
             "metadata": metadata,
         })
 
diff --git a/crates/cortexadb-py/src/lib.rs b/crates/cortexadb-py/src/lib.rs
@@ -209,6 +209,8 @@ struct PyMemory {
     #[pyo3(get)]
     collection: String,
     #[pyo3(get)]
+    namespace: String,
+    #[pyo3(get)]
     created_at: u64,
     #[pyo3(get)]
     importance: f32,
@@ -545,6 +547,7 @@ impl PyCortexaDB {
         Ok(PyMemory {
             id: entry.id,
             collection: entry.namespace.clone(),
+            namespace: entry.namespace.clone(),
             created_at: entry.created_at,
             importance: entry.importance,
             content: entry.content.clone(),
diff --git a/crates/cortexadb-py/test_smoke.py b/crates/cortexadb-py/test_smoke.py
@@ -22,13 +22,13 @@ def test_cortexadb_basic_flow():
     mid = db.remember("Hello world", embedding=[1.0, 0.0, 0.0])
     
     # 3. Ask
-    hits = db.ask("world", embedding=[1.0, 0.0, 0.0])
+    hits = db.search("world", embedding=[1.0, 0.0, 0.0])
     assert len(hits) == 1
     assert hits[0].id == mid
     
     # 4. Get full memory
     mem = db.get(mid)
-    assert mem.namespace == "default"
+    assert mem.collection == "default"
     assert mem.id == mid
     assert bytes(mem.content).decode("utf-8") == "Hello world"
 
@@ -56,10 +56,10 @@ def test_cortexadb_namespaces():
     id_a = agent_a.remember("I am Agent A", embedding=[1.0, 0.0, 0.0])
     agent_b.remember("I am Agent B", embedding=[0.0, 1.0, 0.0])
 
-    assert db.get(id_a).namespace == "agent_a"
+    assert db.get(id_a).collection == "agent_a"
     
     # Test ask filters by namespace using the wrapper
-    hits_a = agent_a.ask("Agent A", embedding=[1.0, 0.0, 0.0])
+    hits_a = agent_a.search("Agent A", embedding=[1.0, 0.0, 0.0])
     assert len(hits_a) == 1
     assert hits_a[0].id == id_a
 
@@ -125,7 +125,7 @@ def test_open_with_embedder():
     # remember without explicit embedding
     mid = db.remember("Auto-embedded text")
     assert mid > 0
-    hits = db.ask("Auto-embedded text")
+    hits = db.search("Auto-embedded text")
     assert len(hits) >= 1
 
 def test_open_requires_one_of_dimension_or_embedder():
@@ -144,23 +144,23 @@ def test_ingest_document():
     emb = HashEmbedder(dimension=32)
     db = CortexaDB.open(DB_PATH, embedder=emb)
     long_text = ("The quick brown fox jumps over the lazy dog. " * 30).strip()
-    ids = db.ingest_document(long_text, chunk_size=100, overlap=20)
+    ids = db.ingest(long_text, chunk_size=100, overlap=20)
     assert len(ids) > 1
     assert len(set(ids)) == len(ids)   # all IDs unique
     assert db.stats().entries == len(ids)
 
 def test_ingest_document_requires_embedder():
     db = CortexaDB.open(DB_PATH, dimension=16)
     with pytest.raises(CortexaDBError, match="ingest_document"):
-        db.ingest_document("some text")
+        db.ingest("some text")
 
 def test_namespace_auto_embed():
     emb = HashEmbedder(dimension=32)
     db = CortexaDB.open(DB_PATH, embedder=emb)
     ns = db.namespace("agent_a")
     mid = ns.remember("I am agent A")
-    assert db.get(mid).namespace == "agent_a"
-    hits = ns.ask("agent A")
+    assert db.get(mid).collection == "agent_a"
+    hits = ns.search("agent A")
     assert any(h.id == mid for h in hits)
 
 # Namespace Model
@@ -175,8 +175,8 @@ def test_namespace_isolation():
     mid_a = agent_a.remember("I am agent A, secret info")
     mid_b = agent_b.remember("I am agent B, different info")
 
-    hits_a = agent_a.ask("agent A", top_k=10)
-    hits_b = agent_b.ask("agent B", top_k=10)
+    hits_a = agent_a.search("agent A", top_k=10)
+    hits_b = agent_b.search("agent B", top_k=10)
 
     a_ids = {h.id for h in hits_a}
     b_ids = {h.id for h in hits_b}
@@ -188,32 +188,32 @@ def test_namespace_isolation():
 
 
 def test_namespaced_ask_param():
-    """db.ask(query, namespaces=[...]) should scope results correctly."""
+    """db.search(query, collections=[...]) should scope results correctly."""
     emb = HashEmbedder(dimension=32)
     db = CortexaDB.open(DB_PATH, embedder=emb)
 
-    mid_a = db.remember("Agent A private", namespace="agent_a")
-    mid_b = db.remember("Agent B private", namespace="agent_b")
-    mid_s = db.remember("Shared knowledge", namespace="shared")
+    mid_a = db.remember("Agent A private", collection="agent_a")
+    mid_b = db.remember("Agent B private", collection="agent_b")
+    mid_s = db.remember("Shared knowledge", collection="shared")
 
-    # Single namespace via namespaces= param
-    hits = db.ask("knowledge", namespaces=["shared"])
+    # Single namespace via collections= param
+    hits = db.search("knowledge", collections=["shared"])
     ids = {h.id for h in hits}
     assert mid_s in ids
     assert mid_a not in ids
     assert mid_b not in ids
 
 
 def test_cross_namespace_fan_out():
-    """namespaces=[a, b] should return merged re-ranked results from both."""
+    """collections=[a, b] should return merged re-ranked results from both."""
     emb = HashEmbedder(dimension=32)
     db = CortexaDB.open(DB_PATH, embedder=emb)
 
-    mid_a = db.remember("Agent A knowledge", namespace="agent_a")
-    mid_s = db.remember("Shared knowledge",  namespace="shared")
-    db.remember("Agent B only",              namespace="agent_b")
+    mid_a = db.remember("Agent A knowledge", collection="agent_a")
+    mid_s = db.remember("Shared knowledge",  collection="shared")
+    db.remember("Agent B only",              collection="agent_b")
 
-    hits = db.ask("knowledge", namespaces=["agent_a", "shared"], top_k=10)
+    hits = db.search("knowledge", collections=["agent_a", "shared"], top_k=10)
     ids = {h.id for h in hits}
 
     # Both agent_a and shared results must be present.
@@ -222,23 +222,23 @@ def test_cross_namespace_fan_out():
 
 
 def test_global_ask_returns_all_namespaces():
-    """db.ask(query) with no namespaces= should search globally."""
+    """db.search(query) with no collections= should search globally."""
     emb = HashEmbedder(dimension=32)
     db = CortexaDB.open(DB_PATH, embedder=emb)
 
-    mid_a = db.remember("Agent A fact", namespace="agent_a")
-    mid_b = db.remember("Agent B fact", namespace="agent_b")
-    mid_s = db.remember("Shared fact",  namespace="shared")
+    mid_a = db.remember("Agent A fact", collection="agent_a")
+    mid_b = db.remember("Agent B fact", collection="agent_b")
+    mid_s = db.remember("Shared fact",  collection="shared")
 
-    hits = db.ask("fact", top_k=10)
+    hits = db.search("fact", top_k=10)
     ids = {h.id for h in hits}
     assert mid_a in ids
     assert mid_b in ids
     assert mid_s in ids
 
 
 def test_readonly_namespace():
-    """A readonly namespace should allow ask() but reject remember()."""
+    """A readonly namespace should allow search() but reject remember()."""
     emb = HashEmbedder(dimension=32)
     db = CortexaDB.open(DB_PATH, embedder=emb)
 
@@ -247,7 +247,7 @@ def test_readonly_namespace():
 
     # Read from a readonly view.
     ro = db.namespace("shared", readonly=True)
-    hits = ro.ask("Public knowledge")
+    hits = ro.search("Public knowledge")
     assert any(h.id == mid for h in hits)
 
     # Writes must be rejected.
@@ -304,14 +304,14 @@ def test_replay_recording_creates_ndjson(cleanup_replay):
 def test_replay_round_trip(cleanup_replay):
     """Replaying a log into a new DB should recreate the same memories."""
     with CortexaDB.open(DB_PATH, dimension=3, record=LOG_PATH) as db:
-        mid1 = db.remember("Alpha", embedding=[1.0, 0.0, 0.0], namespace="agent_a")
-        mid2 = db.remember("Beta",  embedding=[0.0, 1.0, 0.0], namespace="agent_b")
+        mid1 = db.remember("Alpha", embedding=[1.0, 0.0, 0.0], collection="agent_a")
+        mid2 = db.remember("Beta",  embedding=[0.0, 1.0, 0.0], collection="agent_b")
 
     db2 = CortexaDB.replay(LOG_PATH, REPLAY_DB)
 
     assert len(db2) == 2
 
-    hits = db2.ask("query", embedding=[1.0, 0.0, 0.0], top_k=2)
+    hits = db2.search("query", embedding=[1.0, 0.0, 0.0], top_k=2)
     texts = {db2.get(h.id).content.decode() if isinstance(db2.get(h.id).content, bytes) else db2.get(h.id).content for h in hits}
     assert "Alpha" in texts
     assert "Beta" in texts
@@ -332,13 +332,13 @@ def test_replay_connect_id_mapping(cleanup_replay):
 def test_replay_namespace_preserved(cleanup_replay):
     """Replay should preserve original namespaces."""
     with CortexaDB.open(DB_PATH, dimension=3, record=LOG_PATH) as db:
-        db.remember("In A", embedding=[1.0, 0.0, 0.0], namespace="agent_a")
-        db.remember("In B", embedding=[0.0, 1.0, 0.0], namespace="agent_b")
+        db.remember("In A", embedding=[1.0, 0.0, 0.0], collection="agent_a")
+        db.remember("In B", embedding=[0.0, 1.0, 0.0], collection="agent_b")
 
     db2 = CortexaDB.replay(LOG_PATH, REPLAY_DB)
 
-    hits_a = db2.ask("query", embedding=[1.0, 0.0, 0.0], namespaces=["agent_a"])
-    hits_b = db2.ask("query", embedding=[0.0, 1.0, 0.0], namespaces=["agent_b"])
+    hits_a = db2.search("query", embedding=[1.0, 0.0, 0.0], collections=["agent_a"])
+    hits_b = db2.search("query", embedding=[0.0, 1.0, 0.0], collections=["agent_b"])
 
     assert len(hits_a) == 1
     assert len(hits_b) == 1
@@ -457,14 +457,14 @@ def test_hybrid_use_graph():
     db.connect(id1, id2, "links_to")
 
     # Vector only: expects id1 with high score, id2 with score ~0
-    hits_normal = db.ask("test", embedding=[1.0, 0.0], top_k=2)
+    hits_normal = db.search("test", embedding=[1.0, 0.0], top_k=2)
     assert hits_normal[0].id == id1
     assert hits_normal[1].id == id2
     assert hits_normal[0].score > 0.9
     assert hits_normal[1].score <= 0.501
 
     # Graph mixed query: id2 gets pulled up via id1's edge (score * 0.9)
-    hits_graph = db.ask("test", embedding=[1.0, 0.0], top_k=2, use_graph=True)
+    hits_graph = db.search("test", embedding=[1.0, 0.0], top_k=2, use_graph=True)
     assert hits_graph[0].id == id1
     assert hits_graph[1].id == id2
     # The score of id2 should be updated because of graph neighbor logic
@@ -475,20 +475,20 @@ def test_hybrid_use_graph_respects_namespaces(monkeypatch):
     import cortexadb
 
     db = cortexadb.CortexaDB.open(DB_PATH, dimension=2, sync="strict")
-    id_a = db.remember("Node A", embedding=[1.0, 0.0], namespace="agent_a")
-    id_b = db.remember("Node B", embedding=[0.0, 1.0], namespace="agent_b")
+    id_a = db.remember("Node A", embedding=[1.0, 0.0], collection="agent_a")
+    id_b = db.remember("Node B", embedding=[0.0, 1.0], collection="agent_b")
 
     def fake_get_neighbors(_mid):
         # Simulate an unexpected backend neighbor response across namespaces.
         return [(id_b, "forced")]
 
     monkeypatch.setattr(type(db._inner), "get_neighbors", lambda self, mid: fake_get_neighbors(mid))
 
-    scoped_hits = db.ask(
+    scoped_hits = db.search(
         "test",
         embedding=[1.0, 0.0],
         top_k=5,
-        namespaces=["agent_a"],
+        collections=["agent_a"],
         use_graph=True,
     )
     scoped_ids = {h.id for h in scoped_hits}
@@ -500,8 +500,8 @@ def test_hybrid_recency_bias():
     db = cortexadb.CortexaDB.open(DB_PATH, dimension=2, sync="strict")
     id1 = db.remember("Node A", embedding=[1.0, 0.0])
 
-    hits_normal = db.ask("test", embedding=[1.0, 0.0], top_k=1)
-    hits_recent = db.ask("test", embedding=[1.0, 0.0], top_k=1, recency_bias=True)
+    hits_normal = db.search("test", embedding=[1.0, 0.0], top_k=1)
+    hits_recent = db.search("test", embedding=[1.0, 0.0], top_k=1, recency_bias=True)
 
     # With exactly 0 delay, the boost is exactly 1.2x.
     assert hits_recent[0].score > hits_normal[0].score
diff --git a/crates/cortexadb-py/test_stress.py b/crates/cortexadb-py/test_stress.py
@@ -65,7 +65,7 @@ def test_concurrent_compaction(clean_db_path):
         # Delete 300 entries to exceed the 20% threshold in segment 0 (if rotation happened)
         # Actually, let's just make sure we have enough deleted entries.
         for i in range(300):
-            db.delete_memory(i + 1) # IDs start at 1
+            db.delete(i + 1) # IDs start at 1
 
         assert len(db) == 700
 
diff --git a/docs/mdx-components.tsx b/docs/mdx-components.tsx
@@ -1,9 +1,13 @@
 import defaultMdxComponents from 'fumadocs-ui/mdx';
 import type { MDXComponents } from 'mdx/types';
+import { Shield, Zap, TrendingUp } from 'lucide-react';
 
 export function getMDXComponents(components?: MDXComponents): MDXComponents {
   return {
     ...defaultMdxComponents,
+    Shield,
+    Zap,
+    TrendingUp,
     ...components,
   };
 }