|
5 | 5 | //! Tables: |
6 | 6 | //! - `chat_history` — persistent chat messages per session (replaces sessions/*.json) |
7 | 7 | //! - `chat_summary` — per-session LLM-generated summary string |
8 | | -//! - `vault_index` — mirrors Obsidian Markdown files (for Phase 3 indexer) |
9 | | -//! - `vault_fts` — FTS5 virtual table with BM25 scoring (for Phase 4 search) |
| 8 | +//! - `vault_index` — mirrors Obsidian Markdown files |
| 9 | +//! - `vault_fts` — FTS5 virtual table with BM25 scoring |
10 | 10 |
|
11 | 11 | use std::path::Path; |
12 | 12 | use std::sync::Mutex; |
@@ -111,14 +111,14 @@ impl BrainDb { |
111 | 111 | summary TEXT NOT NULL DEFAULT '' |
112 | 112 | ); |
113 | 113 |
|
114 | | - -- ── Vault index (Phase 3 indexer will populate this) ──────────────────── |
| 114 | + -- ── Vault index ────────────────────────────────────────────────────── |
115 | 115 | CREATE TABLE IF NOT EXISTS vault_index ( |
116 | 116 | filepath TEXT PRIMARY KEY, |
117 | 117 | content TEXT, |
118 | 118 | last_modified INTEGER |
119 | 119 | ); |
120 | 120 |
|
121 | | - -- ── Vault FTS5 (Phase 4 search tool will query this) ─────────────────── |
| 121 | + -- ── Vault FTS5 ────────────────────────────────────────────────────── |
122 | 122 | CREATE VIRTUAL TABLE IF NOT EXISTS vault_fts USING fts5( |
123 | 123 | filepath, content, |
124 | 124 | content=vault_index, |
@@ -240,6 +240,176 @@ impl BrainDb { |
240 | 240 | .map(|c| c.execute_batch("SELECT 1").is_ok()) |
241 | 241 | .unwrap_or(false) |
242 | 242 | } |
| 243 | + |
| 244 | + // ----------------------------------------------------------------------- |
| 245 | + // Vault index operations |
| 246 | + // ----------------------------------------------------------------------- |
| 247 | + |
| 248 | + /// Upsert a vault file entry. The triggers in the schema keep `vault_fts` |
| 249 | + /// in sync automatically on every INSERT OR REPLACE. |
| 250 | + pub fn upsert_vault_entry( |
| 251 | + &self, |
| 252 | + filepath: &str, |
| 253 | + content: &str, |
| 254 | + last_modified: i64, |
| 255 | + ) -> Result<(), DbError> { |
| 256 | + let conn = self |
| 257 | + .conn |
| 258 | + .lock() |
| 259 | + .map_err(|e| DbError(format!("lock: {e}")))?; |
| 260 | + |
| 261 | + conn.execute( |
| 262 | + "INSERT OR REPLACE INTO vault_index (filepath, content, last_modified) |
| 263 | + VALUES (?1, ?2, ?3)", |
| 264 | + params![filepath, content, last_modified], |
| 265 | + )?; |
| 266 | + Ok(()) |
| 267 | + } |
| 268 | + |
| 269 | + /// Return the stored `last_modified` timestamp for a vault file, or `None` |
| 270 | + /// if the file has not been indexed yet. |
| 271 | + pub fn get_vault_last_modified(&self, filepath: &str) -> Result<Option<i64>, DbError> { |
| 272 | + let conn = self |
| 273 | + .conn |
| 274 | + .lock() |
| 275 | + .map_err(|e| DbError(format!("lock: {e}")))?; |
| 276 | + |
| 277 | + match conn.query_row( |
| 278 | + "SELECT last_modified FROM vault_index WHERE filepath = ?1", |
| 279 | + params![filepath], |
| 280 | + |row| row.get(0), |
| 281 | + ) { |
| 282 | + Ok(v) => Ok(Some(v)), |
| 283 | + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), |
| 284 | + Err(e) => Err(DbError(e.to_string())), |
| 285 | + } |
| 286 | + } |
| 287 | + |
| 288 | + /// Delete all `vault_index` rows whose filepath is **not** present in |
| 289 | + /// `known_paths`. Returns the number of rows deleted. |
| 290 | + /// |
| 291 | + /// Holds a single lock for the entire operation (no nested locks). |
| 292 | + pub fn delete_vault_stale( |
| 293 | + &self, |
| 294 | + known_paths: &std::collections::HashSet<String>, |
| 295 | + ) -> Result<usize, DbError> { |
| 296 | + let conn = self |
| 297 | + .conn |
| 298 | + .lock() |
| 299 | + .map_err(|e| DbError(format!("lock: {e}")))?; |
| 300 | + |
| 301 | + // Collect all stored filepaths while holding the lock. |
| 302 | + let stored: Vec<String> = { |
| 303 | + let mut stmt = conn.prepare("SELECT filepath FROM vault_index")?; |
| 304 | + stmt.query_map([], |row| row.get(0))? |
| 305 | + .collect::<Result<_, _>>()? |
| 306 | + }; |
| 307 | + |
| 308 | + let mut deleted = 0usize; |
| 309 | + for fp in stored { |
| 310 | + if !known_paths.contains(&fp) { |
| 311 | + deleted += |
| 312 | + conn.execute("DELETE FROM vault_index WHERE filepath = ?1", params![fp])?; |
| 313 | + } |
| 314 | + } |
| 315 | + Ok(deleted) |
| 316 | + } |
| 317 | + |
| 318 | + /// Return the filepaths of all entries currently in `vault_index`. |
| 319 | + pub fn list_vault_filepaths(&self) -> Result<Vec<String>, DbError> { |
| 320 | + let conn = self |
| 321 | + .conn |
| 322 | + .lock() |
| 323 | + .map_err(|e| DbError(format!("lock: {e}")))?; |
| 324 | + let mut stmt = conn.prepare("SELECT filepath FROM vault_index ORDER BY filepath ASC")?; |
| 325 | + let paths: Vec<String> = stmt |
| 326 | + .query_map([], |row| row.get(0))? |
| 327 | + .collect::<Result<_, _>>()?; |
| 328 | + Ok(paths) |
| 329 | + } |
| 330 | + |
| 331 | + // ----------------------------------------------------------------------- |
| 332 | + // Vault FTS5 queries |
| 333 | + // ----------------------------------------------------------------------- |
| 334 | + |
| 335 | + /// Count documents whose `vault_fts` entry matches `fts_query` (FTS5 |
| 336 | + /// syntax, e.g. `"\"squats\""` for exact-phrase match). |
| 337 | + /// |
| 338 | + /// Useful for diagnostics, testing, and the search tool. |
| 339 | + pub fn vault_fts_count(&self, fts_query: &str) -> Result<usize, DbError> { |
| 340 | + let conn = self |
| 341 | + .conn |
| 342 | + .lock() |
| 343 | + .map_err(|e| DbError(format!("lock: {e}")))?; |
| 344 | + |
| 345 | + let count: i64 = conn |
| 346 | + .query_row( |
| 347 | + "SELECT COUNT(*) FROM vault_fts WHERE vault_fts MATCH ?1", |
| 348 | + params![fts_query], |
| 349 | + |row| row.get::<_, i64>(0), |
| 350 | + ) |
| 351 | + .map_err(DbError::from)?; |
| 352 | + |
| 353 | + #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] |
| 354 | + Ok(count as usize) |
| 355 | + } |
| 356 | + |
| 357 | + /// Return the stored content of a single vault file, or `None` if not indexed. |
| 358 | + pub fn get_vault_content(&self, filepath: &str) -> Result<Option<String>, DbError> { |
| 359 | + let conn = self |
| 360 | + .conn |
| 361 | + .lock() |
| 362 | + .map_err(|e| DbError(format!("lock: {e}")))?; |
| 363 | + |
| 364 | + match conn.query_row( |
| 365 | + "SELECT content FROM vault_index WHERE filepath = ?1", |
| 366 | + params![filepath], |
| 367 | + |row| row.get::<_, String>(0), |
| 368 | + ) { |
| 369 | + Ok(c) => Ok(Some(c)), |
| 370 | + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), |
| 371 | + Err(e) => Err(DbError(e.to_string())), |
| 372 | + } |
| 373 | + } |
| 374 | + |
| 375 | + /// Return a BM25-ranked list of `(filepath, snippet)` pairs for `fts_query`. |
| 376 | + /// |
| 377 | + /// `snippet_col` is the FTS5 column index for `snippet()` (-1 = best). |
| 378 | + /// Returns at most `limit` results. This is the foundation for the |
| 379 | + pub fn vault_fts_search( |
| 380 | + &self, |
| 381 | + fts_query: &str, |
| 382 | + limit: usize, |
| 383 | + ) -> Result<Vec<(String, String)>, DbError> { |
| 384 | + if fts_query.trim().is_empty() { |
| 385 | + return Ok(Vec::new()); |
| 386 | + } |
| 387 | + |
| 388 | + let conn = self |
| 389 | + .conn |
| 390 | + .lock() |
| 391 | + .map_err(|e| DbError(format!("lock: {e}")))?; |
| 392 | + |
| 393 | + #[allow(clippy::cast_possible_wrap)] |
| 394 | + let limit_i64 = limit as i64; |
| 395 | + |
| 396 | + let mut stmt = conn.prepare( |
| 397 | + "SELECT filepath, snippet(vault_fts, -1, '**', '**', '...', 10) AS snip |
| 398 | + FROM vault_fts |
| 399 | + WHERE vault_fts MATCH ?1 |
| 400 | + ORDER BY bm25(vault_fts) |
| 401 | + LIMIT ?2", |
| 402 | + )?; |
| 403 | + |
| 404 | + let rows = stmt.query_map(params![fts_query, limit_i64], |row| { |
| 405 | + let fp: String = row.get(0)?; |
| 406 | + let sn: String = row.get(1)?; |
| 407 | + Ok((fp, sn)) |
| 408 | + })?; |
| 409 | + |
| 410 | + let results: Vec<(String, String)> = rows.collect::<Result<_, _>>()?; |
| 411 | + Ok(results) |
| 412 | + } |
243 | 413 | } |
244 | 414 |
|
245 | 415 | // --------------------------------------------------------------------------- |
@@ -484,6 +654,109 @@ mod tests { |
484 | 654 | assert_eq!(count, 1, "vault_fts virtual table should exist"); |
485 | 655 | } |
486 | 656 |
|
| 657 | + // ── Vault index: BrainDb operations ───────────────────────────────────── |
| 658 | + |
| 659 | + #[test] |
| 660 | + fn upsert_vault_entry_and_get_mtime() { |
| 661 | + let (_tmp, db) = temp_db(); |
| 662 | + db.upsert_vault_entry("Daily log/2026-02-20.md", "Ran 5km today.", 1_708_384_000) |
| 663 | + .unwrap(); |
| 664 | + let mtime = db |
| 665 | + .get_vault_last_modified("Daily log/2026-02-20.md") |
| 666 | + .unwrap(); |
| 667 | + assert_eq!(mtime, Some(1_708_384_000)); |
| 668 | + } |
| 669 | + |
| 670 | + #[test] |
| 671 | + fn upsert_vault_entry_replaces_existing() { |
| 672 | + let (_tmp, db) = temp_db(); |
| 673 | + db.upsert_vault_entry("note.md", "old content", 100).unwrap(); |
| 674 | + db.upsert_vault_entry("note.md", "new content", 200).unwrap(); |
| 675 | + |
| 676 | + let mtime = db.get_vault_last_modified("note.md").unwrap(); |
| 677 | + assert_eq!(mtime, Some(200)); |
| 678 | + |
| 679 | + // FTS5 should see new content, not old |
| 680 | + let conn = db.conn.lock().unwrap(); |
| 681 | + let count: i64 = conn |
| 682 | + .query_row( |
| 683 | + "SELECT COUNT(*) FROM vault_fts WHERE vault_fts MATCH '\"new\"'", |
| 684 | + [], |
| 685 | + |row| row.get(0), |
| 686 | + ) |
| 687 | + .unwrap(); |
| 688 | + assert_eq!(count, 1); |
| 689 | + } |
| 690 | + |
| 691 | + #[test] |
| 692 | + fn get_vault_last_modified_missing() { |
| 693 | + let (_tmp, db) = temp_db(); |
| 694 | + let mtime = db.get_vault_last_modified("not_indexed.md").unwrap(); |
| 695 | + assert_eq!(mtime, None); |
| 696 | + } |
| 697 | + |
| 698 | + #[test] |
| 699 | + fn list_vault_filepaths_empty() { |
| 700 | + let (_tmp, db) = temp_db(); |
| 701 | + let paths = db.list_vault_filepaths().unwrap(); |
| 702 | + assert!(paths.is_empty()); |
| 703 | + } |
| 704 | + |
| 705 | + #[test] |
| 706 | + fn list_vault_filepaths_sorted() { |
| 707 | + let (_tmp, db) = temp_db(); |
| 708 | + db.upsert_vault_entry("z.md", "z", 0).unwrap(); |
| 709 | + db.upsert_vault_entry("a.md", "a", 0).unwrap(); |
| 710 | + db.upsert_vault_entry("m.md", "m", 0).unwrap(); |
| 711 | + |
| 712 | + let paths = db.list_vault_filepaths().unwrap(); |
| 713 | + assert_eq!(paths, vec!["a.md", "m.md", "z.md"]); |
| 714 | + } |
| 715 | + |
| 716 | + #[test] |
| 717 | + fn delete_vault_stale_removes_unlisted() { |
| 718 | + use std::collections::HashSet; |
| 719 | + let (_tmp, db) = temp_db(); |
| 720 | + db.upsert_vault_entry("keep.md", "kept", 1).unwrap(); |
| 721 | + db.upsert_vault_entry("stale1.md", "gone1", 2).unwrap(); |
| 722 | + db.upsert_vault_entry("stale2.md", "gone2", 3).unwrap(); |
| 723 | + |
| 724 | + let known: HashSet<String> = vec!["keep.md".to_string()].into_iter().collect(); |
| 725 | + let deleted = db.delete_vault_stale(&known).unwrap(); |
| 726 | + assert_eq!(deleted, 2); |
| 727 | + |
| 728 | + let paths = db.list_vault_filepaths().unwrap(); |
| 729 | + assert_eq!(paths, vec!["keep.md"]); |
| 730 | + } |
| 731 | + |
| 732 | + #[test] |
| 733 | + fn delete_vault_stale_empty_known_deletes_all() { |
| 734 | + use std::collections::HashSet; |
| 735 | + let (_tmp, db) = temp_db(); |
| 736 | + db.upsert_vault_entry("a.md", "a", 1).unwrap(); |
| 737 | + db.upsert_vault_entry("b.md", "b", 2).unwrap(); |
| 738 | + |
| 739 | + let known: HashSet<String> = HashSet::new(); |
| 740 | + let deleted = db.delete_vault_stale(&known).unwrap(); |
| 741 | + assert_eq!(deleted, 2); |
| 742 | + assert!(db.list_vault_filepaths().unwrap().is_empty()); |
| 743 | + } |
| 744 | + |
| 745 | + #[test] |
| 746 | + fn delete_vault_stale_all_known_deletes_none() { |
| 747 | + use std::collections::HashSet; |
| 748 | + let (_tmp, db) = temp_db(); |
| 749 | + db.upsert_vault_entry("a.md", "a", 1).unwrap(); |
| 750 | + db.upsert_vault_entry("b.md", "b", 2).unwrap(); |
| 751 | + |
| 752 | + let known: HashSet<String> = vec!["a.md".to_string(), "b.md".to_string()] |
| 753 | + .into_iter() |
| 754 | + .collect(); |
| 755 | + let deleted = db.delete_vault_stale(&known).unwrap(); |
| 756 | + assert_eq!(deleted, 0); |
| 757 | + assert_eq!(db.list_vault_filepaths().unwrap().len(), 2); |
| 758 | + } |
| 759 | + |
487 | 760 | // ── Vault index: basic insert & fts5 roundtrip ─────────────────────────── |
488 | 761 |
|
489 | 762 | #[test] |
|
0 commit comments