lupuletic · lupuletic · May 20, 2026 · May 19, 2026 · May 19, 2026 · May 20, 2026
diff --git a/docs/assets/code-recall-activity.svg b/docs/assets/code-recall-activity.svg
diff --git a/docs/assets/code-recall-ai-chat.svg b/docs/assets/code-recall-ai-chat.svg
diff --git a/docs/assets/code-recall-demo.gif b/docs/assets/code-recall-demo.gif
diff --git a/docs/assets/code-recall-related.svg b/docs/assets/code-recall-related.svg
diff --git a/docs/assets/code-recall-search.svg b/docs/assets/code-recall-search.svg
diff --git a/docs/assets/code-recall-why.svg b/docs/assets/code-recall-why.svg
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "code-recall"
-version = "0.2.4"
+version = "0.2.5"
 description = "Semantic search across local coding-agent sessions. Find past conversations by intent, not just keywords."
 readme = "README.md"
 license = "MIT"

diff --git a/src/code_recall/__init__.py b/src/code_recall/__init__.py
@@ -1,6 +1,6 @@
 """code-recall: Semantic search across local coding-agent sessions."""
 
-__version__ = "0.2.4"
+__version__ = "0.2.5"
 
 
 def has_semantic() -> bool:

diff --git a/src/code_recall/db.py b/src/code_recall/db.py
@@ -448,23 +448,48 @@ def setup_vec_table(conn: sqlite3.Connection) -> None:
     """Create the vector table for semantic search. Requires sqlite-vec."""
     if not load_vec_extension(conn):
         return
+
+    # Dim is sourced from the active embedder so the table and embeddings
+    # stay in lockstep when the model changes. If an existing chunks_vec
+    # table has the wrong dim, drop it — the indexer will re-embed all
+    # chunks on the next run.
+    row = conn.execute(
+        "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks_vec'"
+    ).fetchone()
+    if row and not _chunks_vec_uses_current_dim(row["sql"]):
+        conn.execute("DROP TABLE chunks_vec")
+
+    dim = _current_embedding_dim()
     conn.execute(
-        """CREATE VIRTUAL TABLE IF NOT EXISTS chunks_vec USING vec0(
+        f"""CREATE VIRTUAL TABLE IF NOT EXISTS chunks_vec USING vec0(
             chunk_rowid INTEGER PRIMARY KEY,
-            embedding float[384] distance_metric=cosine
+            embedding float[{dim}] distance_metric=cosine
         )"""
     )
-    # Drop old sessions_vec if migrating from v1
     try:
         conn.execute("DROP TABLE IF EXISTS sessions_vec")
     except Exception:
         pass
     conn.commit()
 
 
+def _current_embedding_dim() -> int:
+    """Return the active embedding dimension without loading model weights."""
+    from code_recall.embedder import Embedder
+
+    return Embedder.DIM
+
+
+def _chunks_vec_uses_current_dim(sql: str | None) -> bool:
+    """Return whether an existing chunks_vec table matches the active embedder."""
+    if not sql:
+        return False
+    return f"float[{_current_embedding_dim()}]" in sql
+
+
 def has_vec_table(conn: sqlite3.Connection) -> bool:
-    """Check if the vector table exists."""
+    """Check if the vector table exists and matches the active embedder."""
     row = conn.execute(
-        "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks_vec'"
+        "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks_vec'"
     ).fetchone()
-    return row is not None
+    return row is not None and _chunks_vec_uses_current_dim(row["sql"])
diff --git a/src/code_recall/embedder.py b/src/code_recall/embedder.py
@@ -14,20 +14,23 @@
 class Embedder:
     """Wrapper around FastEmbed for generating text embeddings."""
 
-    MODEL = "BAAI/bge-small-en-v1.5"  # 33MB, 384 dimensions, ONNX
+    MODEL = "nomic-ai/nomic-embed-text-v1.5-Q"  # 130MB, 768d, 8K context, ONNX
+    DIM = 768
 
     def __init__(self):
         from fastembed import TextEmbedding
 
         self._model = TextEmbedding(model_name=self.MODEL)
 
     def embed(self, texts: list[str]) -> list["np.ndarray"]:
-        """Embed a batch of texts. Returns list of numpy arrays."""
-        return list(self._model.embed(texts))
+        """Embed a batch of documents (uses model's passage prefix)."""
+        # Cast to float32 — nomic returns float64, sqlite-vec stores float32.
+        return [arr.astype("float32", copy=False) for arr in self._model.passage_embed(texts)]
 
     def embed_single(self, text: str) -> "np.ndarray":
-        """Embed a single text string."""
-        return list(self._model.embed([text]))[0]
+        """Embed a single query (uses model's query prefix)."""
+        arr = list(self._model.query_embed([text]))[0]
+        return arr.astype("float32", copy=False)
 
 
 class Reranker:
@@ -37,7 +40,7 @@ class Reranker:
     cross-attention — much more accurate than bi-encoder similarity.
     """
 
-    MODEL = "Xenova/ms-marco-MiniLM-L-6-v2"  # 80MB, 18ms for 20 docs
+    MODEL = "jinaai/jina-reranker-v1-tiny-en"  # 130MB, 8K context, ONNX
 
     def __init__(self):
         from fastembed.rerank.cross_encoder import TextCrossEncoder
@@ -52,7 +55,6 @@ def rerank(
         Returns list of (original_index, score) sorted by score descending.
         """
         scores = list(self._model.rerank(query, documents))
-        # scores is a list of floats, one per document in original order
         indexed_scores = list(enumerate(scores))
         indexed_scores.sort(key=lambda x: x[1], reverse=True)
         return indexed_scores
@@ -85,15 +87,13 @@ def get_reranker(allow_download: bool = False) -> Reranker | None:
         return _reranker_instance
 
     if not allow_download:
-        # Check if model is already cached before loading
         try:
             from fastembed.common.utils import define_cache_dir
 
             cache = define_cache_dir()
-            # Look for the model in cache
-            model_dirs = list(cache.glob("*ms-marco*MiniLM*"))
+            model_dirs = list(cache.glob("*jina-reranker*tiny*"))
             if not model_dirs:
-                return None  # Not downloaded yet — skip reranking
+                return None
         except Exception:
             pass
 

diff --git a/src/code_recall/indexer.py b/src/code_recall/indexer.py
@@ -445,33 +445,58 @@ def _generate_embeddings(
     if not rows:
         return 0
 
-    if verbose:
-        print(f"\n  Generating embeddings for {len(rows)} chunks...", file=sys.stderr)
-
     # Prepare texts and IDs
     texts = [row["chunk_text"] for row in rows if row["chunk_text"].strip()]
     chunk_ids = [row["chunk_id"] for row in rows if row["chunk_text"].strip()]
 
     if not texts:
         return 0
 
-    # Batch embed
-    embeddings = embedder.embed(texts)
+    # Embed in mini-batches so the user sees steady progress; one giant
+    # batched call produces a long silent stretch on heavier models.
+    batch_size = 64
+    use_tqdm = verbose and sys.stderr.isatty()
+    progress = None
+    if use_tqdm:
+        try:
+            from tqdm import tqdm
 
-    # Store in vec table with periodic commits
-    for i, (chunk_id, embedding) in enumerate(zip(chunk_ids, embeddings)):
-        conn.execute(
-            "INSERT OR REPLACE INTO chunks_vec (chunk_rowid, embedding) VALUES (?, ?)",
-            (chunk_id, embedding.tobytes()),
-        )
-        if (i + 1) % 50 == 0:
-            conn.commit()
-            if verbose:
-                print(
-                    f"\r  Embedded {i + 1}/{len(chunk_ids)} chunks...",
-                    end="",
-                    file=sys.stderr,
-                )
+            progress = tqdm(
+                total=len(texts),
+                unit="chunk",
+                desc="  Embedding",
+                file=sys.stderr,
+                leave=True,
+            )
+        except ImportError:
+            progress = None
+    elif verbose:
+        print(f"\n  Generating embeddings for {len(texts)} chunks...", file=sys.stderr)
+
+    embedded = 0
+    for start in range(0, len(texts), batch_size):
+        batch_texts = texts[start : start + batch_size]
+        batch_ids = chunk_ids[start : start + batch_size]
+        batch_embeddings = embedder.embed(batch_texts)
+        for chunk_id, embedding in zip(batch_ids, batch_embeddings):
+            conn.execute(
+                "INSERT OR REPLACE INTO chunks_vec (chunk_rowid, embedding) VALUES (?, ?)",
+                (chunk_id, embedding.tobytes()),
+            )
+            embedded += 1
+        conn.commit()
+        if progress is not None:
+            progress.update(len(batch_texts))
+        elif verbose:
+            print(
+                f"\r  Embedded {embedded}/{len(texts)} chunks...",
+                end="",
+                file=sys.stderr,
+            )
+
+    if progress is not None:
+        progress.close()
+    elif verbose:
+        print(file=sys.stderr)
 
-    conn.commit()
     return len(chunk_ids)
diff --git a/src/code_recall/utils.py b/src/code_recall/utils.py
@@ -805,7 +805,7 @@ def _dedupe(values: list[str]) -> list[str]:
 # Chunk configuration
 CHUNK_SIZE = 5  # messages per chunk
 CHUNK_OVERLAP = 1  # overlapping messages between chunks
-MAX_CHUNK_CHARS = 2000  # max chars per chunk text
+MAX_CHUNK_CHARS = 8000  # max chars per chunk (~2K tokens, well under embedder's 8K limit)
 
 
 def _build_fts_text(

diff --git a/tests/test_db.py b/tests/test_db.py
@@ -15,6 +15,7 @@
     get_related_sessions,
     get_session_mtime,
     get_stats,
+    has_vec_table,
     upsert_chunks,
     upsert_graph_edges,
     upsert_session,
@@ -164,6 +165,19 @@ def test_migrates_v3_database_before_provider_index(self, tmp_path):
         assert index_row is not None
         assert version == "4"
 
+    def test_has_vec_table_rejects_old_embedding_dimension(self, db_conn):
+        """Search should not use stale vector tables from older embedder dims."""
+        db_conn.execute(
+            "CREATE TABLE chunks_vec (chunk_rowid INTEGER PRIMARY KEY, embedding float[384])"
+        )
+        assert not has_vec_table(db_conn)
+
+        db_conn.execute("DROP TABLE chunks_vec")
+        db_conn.execute(
+            "CREATE TABLE chunks_vec (chunk_rowid INTEGER PRIMARY KEY, embedding float[768])"
+        )
+        assert has_vec_table(db_conn)
+
     def test_chunks_columns(self, db_conn):
         cols = {
             row[1]

diff --git a/uv.lock b/uv.lock