Skip to content

Commit 3fbd89a

Browse files
committed
Add indexer + search tools
1 parent d2f2822 commit 3fbd89a

6 files changed

Lines changed: 1493 additions & 7 deletions

File tree

src/main.rs

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ use icrab::cron_runner;
1515
use icrab::heartbeat;
1616
use icrab::llm::HttpProvider;
1717
use icrab::memory::db::BrainDb;
18+
use icrab::memory::indexer::VaultIndexer;
19+
use icrab::tools::SearchVaultTool;
1820
use icrab::telegram::{self, OutboundMsg};
1921
use icrab::tools;
2022
use icrab::tools::cron::{CronStore, CronTool};
@@ -66,8 +68,28 @@ async fn main() {
6668
};
6769
eprintln!("brain db opened: {}", icrab::workspace::brain_db_path(&workspace).display());
6870

69-
// Build subagent registry (core only — no spawn, no cron).
70-
let subagent_registry = Arc::new(tools::build_core_registry(&cfg));
71+
// Kick off the vault indexer in a background task so startup isn't blocked.
72+
// The indexer walks the workspace and upserts any new/modified .md files
73+
// into vault_index (FTS5 stays in sync via triggers). Errors are logged
74+
// but never fatal.
75+
{
76+
let indexer = VaultIndexer::new(Arc::clone(&db));
77+
let ws_clone = workspace.clone();
78+
tokio::spawn(async move {
79+
match tokio::task::spawn_blocking(move || indexer.scan(&ws_clone)).await {
80+
Ok(Ok(stats)) => eprintln!("vault index: {stats}"),
81+
Ok(Err(e)) => eprintln!("vault index warning: {e}"),
82+
Err(e) => eprintln!("vault index task error: {e}"),
83+
}
84+
});
85+
}
86+
87+
// Build subagent registry (core + search — no spawn, no cron).
88+
let subagent_registry = Arc::new({
89+
let reg = tools::build_core_registry(&cfg);
90+
reg.register(SearchVaultTool::new(Arc::clone(&db)));
91+
reg
92+
});
7193

7294
// SubagentManager: owns the subagent config and task map.
7395
let manager = Arc::new(SubagentManager::new(
@@ -79,8 +101,9 @@ async fn main() {
79101
SUBAGENT_MAX_ITERATIONS,
80102
));
81103

82-
// Main registry: core + spawn + cron (cron is main-agent-only).
104+
// Main registry: core + search + spawn + cron (cron is main-agent-only).
83105
let registry = tools::build_core_registry(&cfg);
106+
registry.register(SearchVaultTool::new(Arc::clone(&db)));
84107
registry.register(SpawnTool::new(Arc::clone(&manager)));
85108
registry.register(SubagentTool::new(Arc::clone(&manager)));
86109

src/memory.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
//! Persistent brain: SQLite-backed chat history, vault index, and FTS5 search engine.
22
33
pub mod db;
4+
pub mod indexer;

src/memory/db.rs

Lines changed: 277 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
//! Tables:
66
//! - `chat_history` — persistent chat messages per session (replaces sessions/*.json)
77
//! - `chat_summary` — per-session LLM-generated summary string
8-
//! - `vault_index` — mirrors Obsidian Markdown files (for Phase 3 indexer)
9-
//! - `vault_fts` — FTS5 virtual table with BM25 scoring (for Phase 4 search)
8+
//! - `vault_index` — mirrors Obsidian Markdown files
9+
//! - `vault_fts` — FTS5 virtual table with BM25 scoring
1010
1111
use std::path::Path;
1212
use std::sync::Mutex;
@@ -111,14 +111,14 @@ impl BrainDb {
111111
summary TEXT NOT NULL DEFAULT ''
112112
);
113113
114-
-- ── Vault index (Phase 3 indexer will populate this) ────────────────────
114+
-- ── Vault index ──────────────────────────────────────────────────────
115115
CREATE TABLE IF NOT EXISTS vault_index (
116116
filepath TEXT PRIMARY KEY,
117117
content TEXT,
118118
last_modified INTEGER
119119
);
120120
121-
-- ── Vault FTS5 (Phase 4 search tool will query this) ───────────────────
121+
-- ── Vault FTS5 ──────────────────────────────────────────────────────
122122
CREATE VIRTUAL TABLE IF NOT EXISTS vault_fts USING fts5(
123123
filepath, content,
124124
content=vault_index,
@@ -240,6 +240,176 @@ impl BrainDb {
240240
.map(|c| c.execute_batch("SELECT 1").is_ok())
241241
.unwrap_or(false)
242242
}
243+
244+
// -----------------------------------------------------------------------
245+
// Vault index operations
246+
// -----------------------------------------------------------------------
247+
248+
/// Upsert a vault file entry. The triggers in the schema keep `vault_fts`
249+
/// in sync automatically on every INSERT OR REPLACE.
250+
pub fn upsert_vault_entry(
251+
&self,
252+
filepath: &str,
253+
content: &str,
254+
last_modified: i64,
255+
) -> Result<(), DbError> {
256+
let conn = self
257+
.conn
258+
.lock()
259+
.map_err(|e| DbError(format!("lock: {e}")))?;
260+
261+
conn.execute(
262+
"INSERT OR REPLACE INTO vault_index (filepath, content, last_modified)
263+
VALUES (?1, ?2, ?3)",
264+
params![filepath, content, last_modified],
265+
)?;
266+
Ok(())
267+
}
268+
269+
/// Return the stored `last_modified` timestamp for a vault file, or `None`
270+
/// if the file has not been indexed yet.
271+
pub fn get_vault_last_modified(&self, filepath: &str) -> Result<Option<i64>, DbError> {
272+
let conn = self
273+
.conn
274+
.lock()
275+
.map_err(|e| DbError(format!("lock: {e}")))?;
276+
277+
match conn.query_row(
278+
"SELECT last_modified FROM vault_index WHERE filepath = ?1",
279+
params![filepath],
280+
|row| row.get(0),
281+
) {
282+
Ok(v) => Ok(Some(v)),
283+
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
284+
Err(e) => Err(DbError(e.to_string())),
285+
}
286+
}
287+
288+
/// Delete all `vault_index` rows whose filepath is **not** present in
289+
/// `known_paths`. Returns the number of rows deleted.
290+
///
291+
/// Holds a single lock for the entire operation (no nested locks).
292+
pub fn delete_vault_stale(
293+
&self,
294+
known_paths: &std::collections::HashSet<String>,
295+
) -> Result<usize, DbError> {
296+
let conn = self
297+
.conn
298+
.lock()
299+
.map_err(|e| DbError(format!("lock: {e}")))?;
300+
301+
// Collect all stored filepaths while holding the lock.
302+
let stored: Vec<String> = {
303+
let mut stmt = conn.prepare("SELECT filepath FROM vault_index")?;
304+
stmt.query_map([], |row| row.get(0))?
305+
.collect::<Result<_, _>>()?
306+
};
307+
308+
let mut deleted = 0usize;
309+
for fp in stored {
310+
if !known_paths.contains(&fp) {
311+
deleted +=
312+
conn.execute("DELETE FROM vault_index WHERE filepath = ?1", params![fp])?;
313+
}
314+
}
315+
Ok(deleted)
316+
}
317+
318+
/// Return the filepaths of all entries currently in `vault_index`.
319+
pub fn list_vault_filepaths(&self) -> Result<Vec<String>, DbError> {
320+
let conn = self
321+
.conn
322+
.lock()
323+
.map_err(|e| DbError(format!("lock: {e}")))?;
324+
let mut stmt = conn.prepare("SELECT filepath FROM vault_index ORDER BY filepath ASC")?;
325+
let paths: Vec<String> = stmt
326+
.query_map([], |row| row.get(0))?
327+
.collect::<Result<_, _>>()?;
328+
Ok(paths)
329+
}
330+
331+
// -----------------------------------------------------------------------
332+
// Vault FTS5 queries
333+
// -----------------------------------------------------------------------
334+
335+
/// Count documents whose `vault_fts` entry matches `fts_query` (FTS5
336+
/// syntax, e.g. `"\"squats\""` for exact-phrase match).
337+
///
338+
/// Useful for diagnostics, testing, and the search tool.
339+
pub fn vault_fts_count(&self, fts_query: &str) -> Result<usize, DbError> {
340+
let conn = self
341+
.conn
342+
.lock()
343+
.map_err(|e| DbError(format!("lock: {e}")))?;
344+
345+
let count: i64 = conn
346+
.query_row(
347+
"SELECT COUNT(*) FROM vault_fts WHERE vault_fts MATCH ?1",
348+
params![fts_query],
349+
|row| row.get::<_, i64>(0),
350+
)
351+
.map_err(DbError::from)?;
352+
353+
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
354+
Ok(count as usize)
355+
}
356+
357+
/// Return the stored content of a single vault file, or `None` if not indexed.
358+
pub fn get_vault_content(&self, filepath: &str) -> Result<Option<String>, DbError> {
359+
let conn = self
360+
.conn
361+
.lock()
362+
.map_err(|e| DbError(format!("lock: {e}")))?;
363+
364+
match conn.query_row(
365+
"SELECT content FROM vault_index WHERE filepath = ?1",
366+
params![filepath],
367+
|row| row.get::<_, String>(0),
368+
) {
369+
Ok(c) => Ok(Some(c)),
370+
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
371+
Err(e) => Err(DbError(e.to_string())),
372+
}
373+
}
374+
375+
/// Return a BM25-ranked list of `(filepath, snippet)` pairs for `fts_query`.
376+
///
377+
/// `snippet_col` is the FTS5 column index for `snippet()` (-1 = best).
378+
/// Returns at most `limit` results. This is the foundation for the
379+
pub fn vault_fts_search(
380+
&self,
381+
fts_query: &str,
382+
limit: usize,
383+
) -> Result<Vec<(String, String)>, DbError> {
384+
if fts_query.trim().is_empty() {
385+
return Ok(Vec::new());
386+
}
387+
388+
let conn = self
389+
.conn
390+
.lock()
391+
.map_err(|e| DbError(format!("lock: {e}")))?;
392+
393+
#[allow(clippy::cast_possible_wrap)]
394+
let limit_i64 = limit as i64;
395+
396+
let mut stmt = conn.prepare(
397+
"SELECT filepath, snippet(vault_fts, -1, '**', '**', '...', 10) AS snip
398+
FROM vault_fts
399+
WHERE vault_fts MATCH ?1
400+
ORDER BY bm25(vault_fts)
401+
LIMIT ?2",
402+
)?;
403+
404+
let rows = stmt.query_map(params![fts_query, limit_i64], |row| {
405+
let fp: String = row.get(0)?;
406+
let sn: String = row.get(1)?;
407+
Ok((fp, sn))
408+
})?;
409+
410+
let results: Vec<(String, String)> = rows.collect::<Result<_, _>>()?;
411+
Ok(results)
412+
}
243413
}
244414

245415
// ---------------------------------------------------------------------------
@@ -484,6 +654,109 @@ mod tests {
484654
assert_eq!(count, 1, "vault_fts virtual table should exist");
485655
}
486656

657+
// ── Vault index: BrainDb operations ─────────────────────────────────────
658+
659+
#[test]
660+
fn upsert_vault_entry_and_get_mtime() {
661+
let (_tmp, db) = temp_db();
662+
db.upsert_vault_entry("Daily log/2026-02-20.md", "Ran 5km today.", 1_708_384_000)
663+
.unwrap();
664+
let mtime = db
665+
.get_vault_last_modified("Daily log/2026-02-20.md")
666+
.unwrap();
667+
assert_eq!(mtime, Some(1_708_384_000));
668+
}
669+
670+
#[test]
671+
fn upsert_vault_entry_replaces_existing() {
672+
let (_tmp, db) = temp_db();
673+
db.upsert_vault_entry("note.md", "old content", 100).unwrap();
674+
db.upsert_vault_entry("note.md", "new content", 200).unwrap();
675+
676+
let mtime = db.get_vault_last_modified("note.md").unwrap();
677+
assert_eq!(mtime, Some(200));
678+
679+
// FTS5 should see new content, not old
680+
let conn = db.conn.lock().unwrap();
681+
let count: i64 = conn
682+
.query_row(
683+
"SELECT COUNT(*) FROM vault_fts WHERE vault_fts MATCH '\"new\"'",
684+
[],
685+
|row| row.get(0),
686+
)
687+
.unwrap();
688+
assert_eq!(count, 1);
689+
}
690+
691+
#[test]
692+
fn get_vault_last_modified_missing() {
693+
let (_tmp, db) = temp_db();
694+
let mtime = db.get_vault_last_modified("not_indexed.md").unwrap();
695+
assert_eq!(mtime, None);
696+
}
697+
698+
#[test]
699+
fn list_vault_filepaths_empty() {
700+
let (_tmp, db) = temp_db();
701+
let paths = db.list_vault_filepaths().unwrap();
702+
assert!(paths.is_empty());
703+
}
704+
705+
#[test]
706+
fn list_vault_filepaths_sorted() {
707+
let (_tmp, db) = temp_db();
708+
db.upsert_vault_entry("z.md", "z", 0).unwrap();
709+
db.upsert_vault_entry("a.md", "a", 0).unwrap();
710+
db.upsert_vault_entry("m.md", "m", 0).unwrap();
711+
712+
let paths = db.list_vault_filepaths().unwrap();
713+
assert_eq!(paths, vec!["a.md", "m.md", "z.md"]);
714+
}
715+
716+
#[test]
717+
fn delete_vault_stale_removes_unlisted() {
718+
use std::collections::HashSet;
719+
let (_tmp, db) = temp_db();
720+
db.upsert_vault_entry("keep.md", "kept", 1).unwrap();
721+
db.upsert_vault_entry("stale1.md", "gone1", 2).unwrap();
722+
db.upsert_vault_entry("stale2.md", "gone2", 3).unwrap();
723+
724+
let known: HashSet<String> = vec!["keep.md".to_string()].into_iter().collect();
725+
let deleted = db.delete_vault_stale(&known).unwrap();
726+
assert_eq!(deleted, 2);
727+
728+
let paths = db.list_vault_filepaths().unwrap();
729+
assert_eq!(paths, vec!["keep.md"]);
730+
}
731+
732+
#[test]
733+
fn delete_vault_stale_empty_known_deletes_all() {
734+
use std::collections::HashSet;
735+
let (_tmp, db) = temp_db();
736+
db.upsert_vault_entry("a.md", "a", 1).unwrap();
737+
db.upsert_vault_entry("b.md", "b", 2).unwrap();
738+
739+
let known: HashSet<String> = HashSet::new();
740+
let deleted = db.delete_vault_stale(&known).unwrap();
741+
assert_eq!(deleted, 2);
742+
assert!(db.list_vault_filepaths().unwrap().is_empty());
743+
}
744+
745+
#[test]
746+
fn delete_vault_stale_all_known_deletes_none() {
747+
use std::collections::HashSet;
748+
let (_tmp, db) = temp_db();
749+
db.upsert_vault_entry("a.md", "a", 1).unwrap();
750+
db.upsert_vault_entry("b.md", "b", 2).unwrap();
751+
752+
let known: HashSet<String> = vec!["a.md".to_string(), "b.md".to_string()]
753+
.into_iter()
754+
.collect();
755+
let deleted = db.delete_vault_stale(&known).unwrap();
756+
assert_eq!(deleted, 0);
757+
assert_eq!(db.list_vault_filepaths().unwrap().len(), 2);
758+
}
759+
487760
// ── Vault index: basic insert & fts5 roundtrip ───────────────────────────
488761

489762
#[test]

0 commit comments

Comments
 (0)