From 268e0e0d38079490ba246fd0882c8d6b00a54b3d Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Thu, 26 Mar 2026 20:41:35 +0200 Subject: [PATCH 1/9] feat(temporal): add date extraction, heuristic range parsing, and temporal scoring Pure functions for temporal search: extract_note_date from frontmatter/filename, temporal_score for proximity decay, parse_date_range_heuristic for natural language temporal keywords, and parse_date_range_from_json for LLM orchestrator output. 33 unit tests covering all functions. --- Cargo.lock | 12 + Cargo.toml | 2 +- src/lib.rs | 1 + src/temporal.rs | 649 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 663 insertions(+), 1 deletion(-) create mode 100644 src/temporal.rs diff --git a/Cargo.lock b/Cargo.lock index 9be9607..f1ee97e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2235,10 +2235,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", + "itoa", "num-conv", "powerfmt", "serde_core", "time-core", + "time-macros", ] [[package]] @@ -2247,6 +2249,16 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinystr" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 2cfc549..f3aac95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ indicatif = "0.17" sqlite-vec = "0.1.8-alpha.1" zerocopy = { version = "0.7", features = ["derive"] } rayon = "1" -time = "0.3" +time = { version = "0.3", features = ["parsing", "formatting", "macros"] } strsim = "0.11" ignore = "0.4" rmcp = { version = "1.2", features = ["transport-io"] } diff --git a/src/lib.rs b/src/lib.rs index 3939ba2..237516f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ pub mod search; pub mod serve; pub mod store; pub mod tags; +pub mod temporal; pub mod vecstore; pub mod watcher; pub mod writer; diff --git a/src/temporal.rs b/src/temporal.rs new file mode 100644 index 0000000..ffd25c7 --- /dev/null +++ b/src/temporal.rs @@ -0,0 +1,649 @@ +//! Temporal date extraction, heuristic range parsing, and proximity scoring. +//! +//! Provides utilities for: +//! - Extracting authoring dates from note frontmatter or filenames +//! - Parsing natural-language temporal expressions into date ranges +//! - Scoring search results by temporal proximity to a target range + +use time::macros::format_description; +use time::{Date, Duration, Month, OffsetDateTime, Weekday}; + +// ── Date extraction ───────────────────────────────────────────── + +/// Extract a note's authoring date as a Unix timestamp (start of day UTC). +/// +/// Priority: frontmatter `date: YYYY-MM-DD` → filename `YYYY-MM-DD` pattern → None. +pub fn extract_note_date(frontmatter: &str, filename: &str) -> Option { + // Try frontmatter first + if let Some(ts) = extract_date_from_frontmatter(frontmatter) { + return Some(ts); + } + // Fall back to filename pattern + extract_date_from_filename(filename) +} + +/// Parse `date: YYYY-MM-DD` from YAML frontmatter. +fn extract_date_from_frontmatter(frontmatter: &str) -> Option { + for line in frontmatter.lines() { + let trimmed = line.trim(); + if let Some(rest) = trimmed.strip_prefix("date:") { + let value = rest.trim().trim_matches('"').trim_matches('\''); + // Take only the first 10 chars in case of datetime like 2026-03-25T10:00:00 + let date_str = if value.len() >= 10 { &value[..10] } else { value }; + if let Some(ts) = parse_iso_date(date_str) { + return Some(ts); + } + } + } + None +} + +/// Extract YYYY-MM-DD pattern from a filename. +fn extract_date_from_filename(filename: &str) -> Option { + // Look for YYYY-MM-DD pattern anywhere in the filename + let bytes = filename.as_bytes(); + if bytes.len() < 10 { + return None; + } + for i in 0..=bytes.len() - 10 { + let candidate = &filename[i..i + 10]; + if candidate.as_bytes()[4] == b'-' && candidate.as_bytes()[7] == b'-' { + if let Some(ts) = parse_iso_date(candidate) { + return Some(ts); + } + } + } + None +} + +/// Parse an ISO date string (YYYY-MM-DD) into a Unix timestamp at start of day UTC. +fn parse_iso_date(s: &str) -> Option { + let fmt = format_description!("[year]-[month]-[day]"); + let date = Date::parse(s, &fmt).ok()?; + Some(date.midnight().assume_utc().unix_timestamp()) +} + +// ── Temporal scoring ──────────────────────────────────────────── + +/// Score a file by temporal proximity to a date range. +/// +/// - Inside range: 1.0 +/// - Outside range: `1.0 / (1.0 + days_away * 0.1)` (smooth decay) +/// +/// All timestamps are Unix seconds (UTC). +pub fn temporal_score(note_date: i64, range_start: i64, range_end: i64) -> f64 { + if note_date >= range_start && note_date <= range_end { + return 1.0; + } + + let seconds_away = if note_date < range_start { + (range_start - note_date) as f64 + } else { + (note_date - range_end) as f64 + }; + + let days_away = seconds_away / 86400.0; + 1.0 / (1.0 + days_away * 0.1) +} + +// ── Heuristic date range parsing ──────────────────────────────── + +/// Scan a natural-language query for temporal keywords and return a date range +/// as `(start_timestamp, end_timestamp)` in Unix seconds UTC. +/// +/// Supported patterns: +/// - "today" / "this morning" → today 00:00–23:59:59 +/// - "yesterday" → yesterday 00:00–23:59:59 +/// - "last week" → previous Monday–Sunday 23:59:59 +/// - "this week" → current Monday–Sunday 23:59:59 +/// - "last month" → previous month 1st–last day 23:59:59 +/// - "this month" → current month 1st–last day 23:59:59 +/// - "recent" / "recently" → last 7 days +/// - Month names with optional year: "March 2026", "march" +/// - ISO dates: "2026-03-25" → that day +/// - "January to March" → Jan 1–Mar 31 (current year) +/// - No match → None +pub fn parse_date_range_heuristic(query: &str) -> Option<(i64, i64)> { + let now = OffsetDateTime::now_utc(); + parse_date_range_heuristic_with_ref(query, now) +} + +/// Internal implementation with injectable reference time for testing. +fn parse_date_range_heuristic_with_ref( + query: &str, + now: OffsetDateTime, +) -> Option<(i64, i64)> { + let lower = query.to_lowercase(); + let today = now.date(); + + // "today" or "this morning" + if lower.contains("today") || lower.contains("this morning") { + return Some(day_range(today)); + } + + // "yesterday" + if lower.contains("yesterday") { + let yesterday = today.previous_day()?; + return Some(day_range(yesterday)); + } + + // "last week" — previous Monday to Sunday + if lower.contains("last week") { + let current_monday = monday_of_week(today); + let prev_monday = current_monday.checked_sub(Duration::weeks(1))?; + let prev_sunday = prev_monday.checked_add(Duration::days(6))?; + return Some((start_of_day(prev_monday), end_of_day(prev_sunday))); + } + + // "this week" — current Monday to Sunday + if lower.contains("this week") { + let current_monday = monday_of_week(today); + let current_sunday = current_monday.checked_add(Duration::days(6))?; + return Some((start_of_day(current_monday), end_of_day(current_sunday))); + } + + // "last month" — previous month 1st to last day + if lower.contains("last month") { + let (prev_year, prev_month) = prev_month(today.year(), today.month()); + return Some(month_range(prev_year, prev_month)?); + } + + // "this month" — current month 1st to last day + if lower.contains("this month") { + return Some(month_range(today.year(), today.month())?); + } + + // "recent" / "recently" — last 7 days + if lower.contains("recent") { + let week_ago = today.checked_sub(Duration::days(6))?; + return Some((start_of_day(week_ago), end_of_day(today))); + } + + // ISO date: "2026-03-25" + if let Some(ts) = find_iso_date_in_query(&lower) { + return Some(ts); + } + + // "January to March" / "jan to mar" — month range (current year) + if let Some(range) = parse_month_to_month(&lower, today.year()) { + return Some(range); + } + + // "March 2026" or just "march" — specific month with optional year + if let Some(range) = parse_month_with_optional_year(&lower, today.year()) { + return Some(range); + } + + None +} + +// ── JSON date range parsing ───────────────────────────────────── + +/// Parse a date range from LLM orchestrator JSON. +/// +/// Expected format: `{"date_range": {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}}`. +/// The end date gets +86399 seconds (end of day). +pub fn parse_date_range_from_json(json: &serde_json::Value) -> Option<(i64, i64)> { + let range = json.get("date_range")?; + let start_str = range.get("start")?.as_str()?; + let end_str = range.get("end")?.as_str()?; + + let start_ts = parse_iso_date(start_str)?; + let end_ts = parse_iso_date(end_str)? + 86399; // end of day + + Some((start_ts, end_ts)) +} + +// ── Helpers ───────────────────────────────────────────────────── + +/// Return (start_of_day, end_of_day) timestamps for a given date. +fn day_range(date: Date) -> (i64, i64) { + (start_of_day(date), end_of_day(date)) +} + +/// Unix timestamp for 00:00:00 UTC of the given date. +fn start_of_day(date: Date) -> i64 { + date.midnight().assume_utc().unix_timestamp() +} + +/// Unix timestamp for 23:59:59 UTC of the given date. +fn end_of_day(date: Date) -> i64 { + date.with_hms(23, 59, 59) + .expect("valid HMS") + .assume_utc() + .unix_timestamp() +} + +/// Find the Monday of the ISO week containing `date`. +fn monday_of_week(date: Date) -> Date { + let wd = date.weekday(); + let days_since_monday = match wd { + Weekday::Monday => 0, + Weekday::Tuesday => 1, + Weekday::Wednesday => 2, + Weekday::Thursday => 3, + Weekday::Friday => 4, + Weekday::Saturday => 5, + Weekday::Sunday => 6, + }; + date.checked_sub(Duration::days(days_since_monday)).expect("valid date subtraction") +} + +/// Return the previous month and its year. +fn prev_month(year: i32, month: Month) -> (i32, Month) { + let m = month as u8; + if m == 1 { + (year - 1, Month::December) + } else { + (year, Month::try_from(m - 1).expect("valid month")) + } +} + +/// Return (start_of_day of 1st, end_of_day of last day) for a given year/month. +fn month_range(year: i32, month: Month) -> Option<(i64, i64)> { + let first = Date::from_calendar_date(year, month, 1).ok()?; + let last = last_day_of_month(year, month)?; + Some((start_of_day(first), end_of_day(last))) +} + +/// Get the last day of a given month. +fn last_day_of_month(year: i32, month: Month) -> Option { + let m = month as u8; + if m == 12 { + Date::from_calendar_date(year + 1, Month::January, 1) + .ok()? + .previous_day() + } else { + let next_month = Month::try_from(m + 1).ok()?; + Date::from_calendar_date(year, next_month, 1) + .ok()? + .previous_day() + } +} + +/// Find an ISO date (YYYY-MM-DD) in the query and return a day range. +fn find_iso_date_in_query(query: &str) -> Option<(i64, i64)> { + let bytes = query.as_bytes(); + if bytes.len() < 10 { + return None; + } + for i in 0..=bytes.len() - 10 { + let candidate = &query[i..i + 10]; + if candidate.as_bytes()[4] == b'-' && candidate.as_bytes()[7] == b'-' { + if let Some(ts) = parse_iso_date(candidate) { + let fmt = format_description!("[year]-[month]-[day]"); + if let Ok(date) = Date::parse(candidate, &fmt) { + return Some(day_range(date)); + } + // Fallback: use the parsed timestamp + return Some((ts, ts + 86399)); + } + } + } + None +} + +/// Parse "January to March" style range. +fn parse_month_to_month(query: &str, current_year: i32) -> Option<(i64, i64)> { + // Look for "MONTH to MONTH" or "MONTH - MONTH" + let separators = [" to ", " - ", " through "]; + for sep in &separators { + if let Some(idx) = query.find(sep) { + let before = query[..idx].trim(); + let after = query[idx + sep.len()..].trim(); + + // Extract month name (last word before separator, first word after) + let start_month = parse_month_name(last_word(before))?; + let end_month = parse_month_name(first_word(after))?; + + let start = Date::from_calendar_date(current_year, start_month, 1).ok()?; + let end = last_day_of_month(current_year, end_month)?; + return Some((start_of_day(start), end_of_day(end))); + } + } + None +} + +/// Parse "March 2026" or bare "march" into a month range. +fn parse_month_with_optional_year(query: &str, current_year: i32) -> Option<(i64, i64)> { + let words: Vec<&str> = query.split_whitespace().collect(); + for (i, word) in words.iter().enumerate() { + if let Some(month) = parse_month_name(word) { + // Check if next word is a 4-digit year + let year = if i + 1 < words.len() { + words[i + 1] + .parse::() + .ok() + .filter(|&y| (1900..=2100).contains(&y)) + .unwrap_or(current_year) + } else { + current_year + }; + return month_range(year, month); + } + } + None +} + +/// Parse a month name (full or 3-letter abbreviation) into a `time::Month`. +fn parse_month_name(s: &str) -> Option { + match s.to_lowercase().as_str() { + "jan" | "january" => Some(Month::January), + "feb" | "february" => Some(Month::February), + "mar" | "march" => Some(Month::March), + "apr" | "april" => Some(Month::April), + "may" => Some(Month::May), + "jun" | "june" => Some(Month::June), + "jul" | "july" => Some(Month::July), + "aug" | "august" => Some(Month::August), + "sep" | "september" => Some(Month::September), + "oct" | "october" => Some(Month::October), + "nov" | "november" => Some(Month::November), + "dec" | "december" => Some(Month::December), + _ => None, + } +} + +fn first_word(s: &str) -> &str { + s.split_whitespace().next().unwrap_or("") +} + +fn last_word(s: &str) -> &str { + s.split_whitespace().last().unwrap_or("") +} + +// ── Tests ─────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use time::macros::datetime; + + // ── extract_note_date ─────────────────────────────────────── + + #[test] + fn extract_date_from_frontmatter_yaml() { + let fm = "---\ntitle: Test\ndate: 2026-03-25\ntags: [work]\n---"; + let ts = extract_note_date(fm, "random-note.md").unwrap(); + assert_eq!(ts, date_ts(2026, 3, 25)); + } + + #[test] + fn extract_date_from_frontmatter_quoted() { + let fm = "---\ndate: \"2025-12-31\"\n---"; + let ts = extract_note_date(fm, "note.md").unwrap(); + assert_eq!(ts, date_ts(2025, 12, 31)); + } + + #[test] + fn extract_date_from_frontmatter_datetime() { + // date field with full datetime should still extract the date part + let fm = "---\ndate: 2026-01-15T10:30:00\n---"; + let ts = extract_note_date(fm, "note.md").unwrap(); + assert_eq!(ts, date_ts(2026, 1, 15)); + } + + #[test] + fn extract_date_from_filename_pattern() { + let fm = "---\ntitle: Daily Note\n---"; + let ts = extract_note_date(fm, "2026-03-25.md").unwrap(); + assert_eq!(ts, date_ts(2026, 3, 25)); + } + + #[test] + fn extract_date_from_filename_with_prefix() { + let fm = ""; + let ts = extract_note_date(fm, "daily-2026-03-25-standup.md").unwrap(); + assert_eq!(ts, date_ts(2026, 3, 25)); + } + + #[test] + fn extract_date_frontmatter_takes_priority_over_filename() { + let fm = "---\ndate: 2026-01-01\n---"; + let ts = extract_note_date(fm, "2026-12-31.md").unwrap(); + // Frontmatter date should win + assert_eq!(ts, date_ts(2026, 1, 1)); + } + + #[test] + fn extract_date_no_date_returns_none() { + let fm = "---\ntitle: No Date Here\ntags: [misc]\n---"; + assert!(extract_note_date(fm, "random-note.md").is_none()); + } + + #[test] + fn extract_date_empty_inputs() { + assert!(extract_note_date("", "").is_none()); + } + + // ── temporal_score ────────────────────────────────────────── + + #[test] + fn score_inside_range() { + let start = date_ts(2026, 3, 20); + let end = date_ts(2026, 3, 26) + 86399; + let note = date_ts(2026, 3, 23); + assert_eq!(temporal_score(note, start, end), 1.0); + } + + #[test] + fn score_at_range_boundary() { + let start = date_ts(2026, 3, 20); + let end = date_ts(2026, 3, 26) + 86399; + // Exactly at start + assert_eq!(temporal_score(start, start, end), 1.0); + // Exactly at end + assert_eq!(temporal_score(end, start, end), 1.0); + } + + #[test] + fn score_one_day_before_range() { + let start = date_ts(2026, 3, 20); + let end = date_ts(2026, 3, 26) + 86399; + let note = date_ts(2026, 3, 19); // 1 day before start + let score = temporal_score(note, start, end); + // 1.0 / (1.0 + 1.0 * 0.1) = 1.0 / 1.1 ≈ 0.909 + let expected = 1.0 / (1.0 + 1.0 * 0.1); + assert!((score - expected).abs() < 1e-10); + } + + #[test] + fn score_one_week_outside_range() { + let start = date_ts(2026, 3, 20); + let end = date_ts(2026, 3, 26) + 86399; + let note = date_ts(2026, 4, 2); // 7 days after end (note is start of Apr 2, end is end of Mar 26) + let score = temporal_score(note, start, end); + // days_away ≈ (note_ts - end_ts) / 86400 + let days_away = (note - end) as f64 / 86400.0; + let expected = 1.0 / (1.0 + days_away * 0.1); + assert!((score - expected).abs() < 1e-10); + // Should be significantly less than 1.0 + assert!(score < 0.7); + } + + #[test] + fn score_far_outside_range() { + let start = date_ts(2026, 3, 20); + let end = date_ts(2026, 3, 26) + 86399; + let note = date_ts(2025, 1, 1); // ~15 months before + let score = temporal_score(note, start, end); + // Should be very low but still positive + assert!(score > 0.0); + assert!(score < 0.1); + } + + // ── parse_date_range_heuristic ────────────────────────────── + + // Reference time: 2026-03-26 14:30:00 UTC (Thursday) + fn ref_time() -> OffsetDateTime { + datetime!(2026-03-26 14:30:00 UTC) + } + + #[test] + fn heuristic_today() { + let (start, end) = parse_date_range_heuristic_with_ref("what happened today", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 3, 26)); + assert_eq!(end, date_ts(2026, 3, 26) + 86399); + } + + #[test] + fn heuristic_this_morning() { + let (start, end) = parse_date_range_heuristic_with_ref("notes from this morning", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 3, 26)); + assert_eq!(end, date_ts(2026, 3, 26) + 86399); + } + + #[test] + fn heuristic_yesterday() { + let (start, end) = parse_date_range_heuristic_with_ref("yesterday's standup", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 3, 25)); + assert_eq!(end, date_ts(2026, 3, 25) + 86399); + } + + #[test] + fn heuristic_last_week() { + // 2026-03-26 is Thursday. Last week = Mon Mar 16 – Sun Mar 22 + let (start, end) = parse_date_range_heuristic_with_ref("what did I do last week", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 3, 16)); + assert_eq!(end, date_ts(2026, 3, 22) + 86399); + } + + #[test] + fn heuristic_this_week() { + // 2026-03-26 is Thursday. This week = Mon Mar 23 – Sun Mar 29 + let (start, end) = parse_date_range_heuristic_with_ref("this week's tasks", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 3, 23)); + assert_eq!(end, date_ts(2026, 3, 29) + 86399); + } + + #[test] + fn heuristic_last_month() { + // Current: March 2026. Last month = Feb 1 – Feb 28, 2026 + let (start, end) = parse_date_range_heuristic_with_ref("last month summary", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 2, 1)); + assert_eq!(end, date_ts(2026, 2, 28) + 86399); + } + + #[test] + fn heuristic_this_month() { + let (start, end) = parse_date_range_heuristic_with_ref("this month", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 3, 1)); + assert_eq!(end, date_ts(2026, 3, 31) + 86399); + } + + #[test] + fn heuristic_recent() { + // "recent" = last 7 days: Mar 20 – Mar 26 + let (start, end) = parse_date_range_heuristic_with_ref("recent notes", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 3, 20)); + assert_eq!(end, date_ts(2026, 3, 26) + 86399); + } + + #[test] + fn heuristic_recently() { + let result = parse_date_range_heuristic_with_ref("what I recently worked on", ref_time()); + assert!(result.is_some()); + } + + #[test] + fn heuristic_iso_date() { + let (start, end) = parse_date_range_heuristic_with_ref("notes from 2026-03-25", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 3, 25)); + assert_eq!(end, date_ts(2026, 3, 25) + 86399); + } + + #[test] + fn heuristic_month_name_with_year() { + let (start, end) = parse_date_range_heuristic_with_ref("notes from March 2026", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 3, 1)); + assert_eq!(end, date_ts(2026, 3, 31) + 86399); + } + + #[test] + fn heuristic_month_name_bare() { + // Bare month name uses current year + let (start, end) = parse_date_range_heuristic_with_ref("february notes", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 2, 1)); + assert_eq!(end, date_ts(2026, 2, 28) + 86399); + } + + #[test] + fn heuristic_month_to_month() { + let (start, end) = parse_date_range_heuristic_with_ref("january to march", ref_time()).unwrap(); + assert_eq!(start, date_ts(2026, 1, 1)); + assert_eq!(end, date_ts(2026, 3, 31) + 86399); + } + + #[test] + fn heuristic_no_temporal_match() { + assert!(parse_date_range_heuristic_with_ref("how does RRF work", ref_time()).is_none()); + } + + #[test] + fn heuristic_no_temporal_match_empty() { + assert!(parse_date_range_heuristic_with_ref("", ref_time()).is_none()); + } + + // ── parse_date_range_from_json ────────────────────────────── + + #[test] + fn json_valid_range() { + let json: serde_json::Value = serde_json::json!({ + "date_range": { + "start": "2026-03-19", + "end": "2026-03-25" + } + }); + let (start, end) = parse_date_range_from_json(&json).unwrap(); + assert_eq!(start, date_ts(2026, 3, 19)); + assert_eq!(end, date_ts(2026, 3, 25) + 86399); + } + + #[test] + fn json_missing_date_range() { + let json: serde_json::Value = serde_json::json!({"query": "test"}); + assert!(parse_date_range_from_json(&json).is_none()); + } + + #[test] + fn json_missing_start() { + let json: serde_json::Value = serde_json::json!({ + "date_range": { + "end": "2026-03-25" + } + }); + assert!(parse_date_range_from_json(&json).is_none()); + } + + #[test] + fn json_missing_end() { + let json: serde_json::Value = serde_json::json!({ + "date_range": { + "start": "2026-03-19" + } + }); + assert!(parse_date_range_from_json(&json).is_none()); + } + + #[test] + fn json_invalid_date_format() { + let json: serde_json::Value = serde_json::json!({ + "date_range": { + "start": "not-a-date", + "end": "2026-03-25" + } + }); + assert!(parse_date_range_from_json(&json).is_none()); + } + + // ── Test helpers ──────────────────────────────────────────── + + /// Helper to get Unix timestamp for start of day UTC. + fn date_ts(year: i32, month: u8, day: u8) -> i64 { + Date::from_calendar_date(year, Month::try_from(month).unwrap(), day) + .unwrap() + .midnight() + .assume_utc() + .unix_timestamp() + } +} From ec55952611442fa0e79be57d8c0bbc14559d21f6 Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Thu, 26 Mar 2026 20:50:10 +0200 Subject: [PATCH 2/9] feat(store): add note_date column, date range query, update insert_file signature Add note_date INTEGER column to files table via fire-and-forget migration. Extend insert_file with note_date parameter, update all FileRecord construction sites to include the new field, and add get_files_in_date_range + count_files_with_dates query methods. All 141 call sites across 10 files updated. --- src/context.rs | 22 ++-- src/fts.rs | 6 + src/graph.rs | 16 ++- src/health.rs | 16 +-- src/indexer.rs | 14 ++- src/links.rs | 2 + src/store.rs | 244 +++++++++++++++++++++++++++++++--------- src/writer.rs | 34 ++++-- tests/integration.rs | 2 +- tests/write_pipeline.rs | 2 + 10 files changed, 264 insertions(+), 94 deletions(-) diff --git a/src/context.rs b/src/context.rs index c7c2b34..222dca8 100644 --- a/src/context.rs +++ b/src/context.rs @@ -758,10 +758,10 @@ mod tests { let d1 = generate_docid("note.md"); let d2 = generate_docid("other.md"); store - .insert_file("note.md", "h1", 100, &["rust".into()], &d1, None) + .insert_file("note.md", "h1", 100, &["rust".into()], &d1, None, None) .unwrap(); store - .insert_file("other.md", "h2", 100, &[], &d2, None) + .insert_file("other.md", "h2", 100, &[], &d2, None, None) .unwrap(); let f1 = store.get_file("note.md").unwrap().unwrap().id; @@ -808,7 +808,7 @@ mod tests { fn test_read_file_not_on_disk() { let (_tmp, store, root) = setup_vault(); store - .insert_file("ghost.md", "h3", 100, &[], "ggg333", None) + .insert_file("ghost.md", "h3", 100, &[], "ggg333", None, None) .unwrap(); let params = ContextParams { store: &store, @@ -906,10 +906,11 @@ mod tests { &["person".into()], "aaa111", None, + None, ) .unwrap(); let f2 = store - .insert_file("daily.md", "h2", 100, &[], "bbb222", None) + .insert_file("daily.md", "h2", 100, &[], "bbb222", None, None) .unwrap(); store.insert_edge(f2, f1, "mention").unwrap(); store @@ -969,10 +970,11 @@ mod tests { &["project".into()], "aaa111", None, + None, ) .unwrap(); let f2 = store - .insert_file("01-Projects/child.md", "h2", 100, &[], "bbb222", None) + .insert_file("01-Projects/child.md", "h2", 100, &[], "bbb222", None, None) .unwrap(); store.insert_edge(f2, f1, "wikilink").unwrap(); store.insert_edge(f1, f2, "wikilink").unwrap(); @@ -1019,7 +1021,7 @@ mod tests { let store = Store::open_memory().unwrap(); store - .insert_file("result.md", "h1", 100, &["topic".into()], "aaa111", None) + .insert_file("result.md", "h1", 100, &["topic".into()], "aaa111", None, None) .unwrap(); let params = ContextParams { @@ -1052,7 +1054,7 @@ mod tests { let store = Store::open_memory().unwrap(); store - .insert_file("long.md", "h1", 100, &[], "aaa111", None) + .insert_file("long.md", "h1", 100, &[], "aaa111", None, None) .unwrap(); let params = ContextParams { @@ -1085,10 +1087,10 @@ mod tests { let store = Store::open_memory().unwrap(); let f1 = store - .insert_file("main.md", "h1", 100, &[], "aaa111", None) + .insert_file("main.md", "h1", 100, &[], "aaa111", None, None) .unwrap(); let f2 = store - .insert_file("related.md", "h2", 100, &[], "bbb222", None) + .insert_file("related.md", "h2", 100, &[], "bbb222", None, None) .unwrap(); store.insert_edge(f1, f2, "wikilink").unwrap(); @@ -1156,7 +1158,7 @@ mod tests { let content = "# Person\n\n## Role\n\nEngineer\n\n## Interactions\n\nMet on 2026-03-26\n"; std::fs::write(root.join("person.md"), content).unwrap(); store - .insert_file("person.md", "hash", 100, &[], "per123", None) + .insert_file("person.md", "hash", 100, &[], "per123", None, None) .unwrap(); let result = read_section(&store, &root, "person.md", "Interactions").unwrap(); diff --git a/src/fts.rs b/src/fts.rs index 598043e..6e49dba 100644 --- a/src/fts.rs +++ b/src/fts.rs @@ -28,6 +28,7 @@ mod tests { &[], &generate_docid("notes/ticket.md"), None, + None, ) .unwrap(); @@ -56,6 +57,7 @@ mod tests { &[], &generate_docid("notes/note.md"), None, + None, ) .unwrap(); @@ -79,6 +81,7 @@ mod tests { &[], &generate_docid("notes/a.md"), None, + None, ) .unwrap(); let file_id2 = store @@ -89,6 +92,7 @@ mod tests { &[], &generate_docid("notes/b.md"), None, + None, ) .unwrap(); let file_id3 = store @@ -99,6 +103,7 @@ mod tests { &[], &generate_docid("notes/c.md"), None, + None, ) .unwrap(); @@ -138,6 +143,7 @@ mod tests { &[], &generate_docid("notes/del.md"), None, + None, ) .unwrap(); diff --git a/src/graph.rs b/src/graph.rs index 442ce37..09258a0 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -218,6 +218,7 @@ mod tests { &["rust".into()], &generate_docid("seed.md"), None, + None, ) .unwrap(); let f2 = store @@ -228,6 +229,7 @@ mod tests { &["rust".into()], &generate_docid("linked.md"), None, + None, ) .unwrap(); let _f3 = store @@ -238,6 +240,7 @@ mod tests { &[], &generate_docid("unlinked.md"), None, + None, ) .unwrap(); @@ -268,10 +271,10 @@ mod tests { fn test_graph_expand_skips_seeds() { let store = Store::open_memory().unwrap(); let f1 = store - .insert_file("a.md", "h1", 100, &[], &generate_docid("a.md"), None) + .insert_file("a.md", "h1", 100, &[], &generate_docid("a.md"), None, None) .unwrap(); let f2 = store - .insert_file("b.md", "h2", 100, &[], &generate_docid("b.md"), None) + .insert_file("b.md", "h2", 100, &[], &generate_docid("b.md"), None, None) .unwrap(); store.insert_edge(f1, f2, "wikilink").unwrap(); @@ -305,10 +308,10 @@ mod tests { fn test_graph_expand_multi_parent_takes_highest() { let store = Store::open_memory().unwrap(); let f1 = store - .insert_file("a.md", "h1", 100, &[], &generate_docid("a.md"), None) + .insert_file("a.md", "h1", 100, &[], &generate_docid("a.md"), None, None) .unwrap(); let f2 = store - .insert_file("b.md", "h2", 100, &[], &generate_docid("b.md"), None) + .insert_file("b.md", "h2", 100, &[], &generate_docid("b.md"), None, None) .unwrap(); let f3 = store .insert_file( @@ -318,6 +321,7 @@ mod tests { &[], &generate_docid("shared.md"), None, + None, ) .unwrap(); @@ -360,7 +364,7 @@ mod tests { fn test_graph_expand_empty_graph() { let store = Store::open_memory().unwrap(); let f1 = store - .insert_file("a.md", "h1", 100, &[], "aaa111", None) + .insert_file("a.md", "h1", 100, &[], "aaa111", None, None) .unwrap(); let seeds = vec![RankedResult { @@ -387,6 +391,7 @@ mod tests { &["rust".into(), "cli".into()], &generate_docid("seed.md"), None, + None, ) .unwrap(); let f2 = store @@ -397,6 +402,7 @@ mod tests { &["rust".into()], &generate_docid("linked.md"), None, + None, ) .unwrap(); diff --git a/src/health.rs b/src/health.rs index fc2e980..efd334d 100644 --- a/src/health.rs +++ b/src/health.rs @@ -139,13 +139,13 @@ mod tests { let store = Store::open_memory().unwrap(); // Insert files with edges to test orphan detection. let linked_id = store - .insert_file("linked.md", "aaa111", 100, &[], "aaa111", None) + .insert_file("linked.md", "aaa111", 100, &[], "aaa111", None, None) .unwrap(); let orphan_id = store - .insert_file("orphan.md", "bbb222", 100, &[], "bbb222", None) + .insert_file("orphan.md", "bbb222", 100, &[], "bbb222", None, None) .unwrap(); let _daily_id = store - .insert_file("daily/2026-03-26.md", "ccc333", 100, &[], "ccc333", None) + .insert_file("daily/2026-03-26.md", "ccc333", 100, &[], "ccc333", None, None) .unwrap(); // Add edge: linked.md → orphan.md (both files are "connected") store.insert_edge(linked_id, orphan_id, "wikilink").unwrap(); @@ -169,13 +169,13 @@ mod tests { fn test_find_orphans_detects_isolated() { let store = Store::open_memory().unwrap(); store - .insert_file("connected.md", "h1", 100, &[], "d1", None) + .insert_file("connected.md", "h1", 100, &[], "d1", None, None) .unwrap(); let iso_id = store - .insert_file("island.md", "h2", 100, &[], "d2", None) + .insert_file("island.md", "h2", 100, &[], "d2", None, None) .unwrap(); let other_id = store - .insert_file("other.md", "h3", 100, &[], "d3", None) + .insert_file("other.md", "h3", 100, &[], "d3", None, None) .unwrap(); store.insert_edge(iso_id, other_id, "wikilink").unwrap(); @@ -213,10 +213,10 @@ mod tests { fn test_generate_health_report() { let store = Store::open_memory().unwrap(); store - .insert_file("note.md", "h1", 100, &[], "d1", None) + .insert_file("note.md", "h1", 100, &[], "d1", None, None) .unwrap(); store - .insert_file("00-Inbox/unsorted.md", "h2", 100, &[], "d2", None) + .insert_file("00-Inbox/unsorted.md", "h2", 100, &[], "d2", None, None) .unwrap(); store .insert_unresolved_link("note.md", "missing.md") diff --git a/src/indexer.rs b/src/indexer.rs index 774fc98..e8f0d51 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -351,6 +351,7 @@ pub fn index_file( &tags, &docid, created_by.as_deref(), + None, )?; let mut next_vector_id: u64 = store.next_vector_id()?; @@ -780,6 +781,7 @@ mod tests { &[], &generate_docid("note.md"), None, + None, ) .unwrap(); @@ -811,6 +813,7 @@ mod tests { &[], &generate_docid("surviving.md"), None, + None, ) .unwrap(); store @@ -821,6 +824,7 @@ mod tests { &[], &generate_docid("deleted.md"), None, + None, ) .unwrap(); @@ -864,13 +868,13 @@ mod tests { let store = Store::open_memory().unwrap(); let f_a = store - .insert_file("a.md", "h1", 100, &[], "aaa111", None) + .insert_file("a.md", "h1", 100, &[], "aaa111", None, None) .unwrap(); let f_b = store - .insert_file("b.md", "h2", 100, &[], "bbb222", None) + .insert_file("b.md", "h2", 100, &[], "bbb222", None, None) .unwrap(); let _f_c = store - .insert_file("c.md", "h3", 100, &[], "ccc333", None) + .insert_file("c.md", "h3", 100, &[], "ccc333", None, None) .unwrap(); let content_a = std::fs::read_to_string(root.join("a.md")).unwrap(); @@ -911,10 +915,10 @@ mod tests { fn test_people_mention_detection() { let store = Store::open_memory().unwrap(); let person = store - .insert_file("People/John Nelson.md", "h1", 100, &[], "aaa111", None) + .insert_file("People/John Nelson.md", "h1", 100, &[], "aaa111", None, None) .unwrap(); let note = store - .insert_file("daily.md", "h2", 100, &[], "bbb222", None) + .insert_file("daily.md", "h2", 100, &[], "bbb222", None, None) .unwrap(); let people = vec![(person, vec!["John Nelson".to_string()])]; diff --git a/src/links.rs b/src/links.rs index 7ed4643..9453c5a 100644 --- a/src/links.rs +++ b/src/links.rs @@ -560,6 +560,7 @@ mod tests { &[], "aaa111", None, + None, ) .unwrap(); store @@ -570,6 +571,7 @@ mod tests { &[], "bbb222", None, + None, ) .unwrap(); diff --git a/src/store.rs b/src/store.rs index b33aa02..168ae5a 100644 --- a/src/store.rs +++ b/src/store.rs @@ -14,6 +14,7 @@ pub struct FileRecord { pub indexed_at: String, pub docid: Option, pub created_by: Option, + pub note_date: Option, } /// A record representing a chunk of a file. @@ -227,6 +228,11 @@ impl Store { .conn .execute_batch("ALTER TABLE files ADD COLUMN created_by TEXT;"); + // Add note_date column (idempotent — ignores error if column already exists). + let _ = self + .conn + .execute_batch("ALTER TABLE files ADD COLUMN note_date INTEGER;"); + // Check if edges table exists. let has_edges: bool = { let mut stmt = self @@ -372,20 +378,22 @@ impl Store { tags: &[String], docid: &str, created_by: Option<&str>, + note_date: Option, ) -> Result { let tags_json = serde_json::to_string(tags).unwrap_or_else(|_| "[]".into()); let now = chrono_now(); self.conn.execute( - "INSERT INTO files (path, content_hash, mtime, tags, indexed_at, docid, created_by) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7) + "INSERT INTO files (path, content_hash, mtime, tags, indexed_at, docid, created_by, note_date) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) ON CONFLICT(path) DO UPDATE SET content_hash = excluded.content_hash, mtime = excluded.mtime, tags = excluded.tags, indexed_at = excluded.indexed_at, docid = excluded.docid, - created_by = excluded.created_by", - params![path, hash, mtime, tags_json, now, docid, created_by], + created_by = excluded.created_by, + note_date = excluded.note_date", + params![path, hash, mtime, tags_json, now, docid, created_by, note_date], )?; let file_id: i64 = self.conn.query_row( "SELECT id FROM files WHERE path = ?1", @@ -397,7 +405,7 @@ impl Store { pub fn get_file(&self, path: &str) -> Result> { let mut stmt = self.conn.prepare( - "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by FROM files WHERE path = ?1", + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by, note_date FROM files WHERE path = ?1", )?; let mut rows = stmt.query_map(params![path], |row| { Ok(FileRecord { @@ -409,6 +417,7 @@ impl Store { indexed_at: row.get(5)?, docid: row.get(6)?, created_by: row.get(7)?, + note_date: row.get(8)?, }) })?; match rows.next() { @@ -419,7 +428,7 @@ impl Store { pub fn get_all_files(&self) -> Result> { let mut stmt = self.conn.prepare( - "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by FROM files", + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by, note_date FROM files", )?; let rows = stmt.query_map([], |row| { Ok(FileRecord { @@ -431,6 +440,7 @@ impl Store { indexed_at: row.get(5)?, docid: row.get(6)?, created_by: row.get(7)?, + note_date: row.get(8)?, }) })?; let mut files = Vec::new(); @@ -729,7 +739,7 @@ impl Store { /// Look up a file record by its row ID. pub fn get_file_by_id(&self, file_id: i64) -> Result> { let mut stmt = self.conn.prepare( - "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by FROM files WHERE id = ?1", + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by, note_date FROM files WHERE id = ?1", )?; let mut rows = stmt.query_map(params![file_id], |row| { Ok(FileRecord { @@ -741,6 +751,7 @@ impl Store { indexed_at: row.get(5)?, docid: row.get(6)?, created_by: row.get(7)?, + note_date: row.get(8)?, }) })?; match rows.next() { @@ -752,7 +763,7 @@ impl Store { /// Look up a file by its 6-character docid. pub fn get_file_by_docid(&self, docid: &str) -> Result> { let mut stmt = self.conn.prepare( - "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by FROM files WHERE docid = ?1", + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by, note_date FROM files WHERE docid = ?1", )?; let mut rows = stmt.query_map(params![docid], |row| { Ok(FileRecord { @@ -764,6 +775,7 @@ impl Store { indexed_at: row.get(5)?, docid: row.get(6)?, created_by: row.get(7)?, + note_date: row.get(8)?, }) })?; match rows.next() { @@ -976,7 +988,7 @@ impl Store { limit: usize, ) -> Result> { let mut sql = String::from( - "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by FROM files WHERE 1=1", + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by, note_date FROM files WHERE 1=1", ); let mut param_values: Vec> = Vec::new(); if let Some(f) = folder { @@ -1005,6 +1017,7 @@ impl Store { indexed_at: row.get(5)?, docid: row.get(6)?, created_by: row.get(7)?, + note_date: row.get(8)?, }) })?; let mut results = Vec::new(); @@ -1054,7 +1067,7 @@ impl Store { /// Most recently indexed files. pub fn recent_files(&self, limit: usize) -> Result> { let mut stmt = self.conn.prepare( - "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by, note_date FROM files ORDER BY indexed_at DESC LIMIT ?", )?; let rows = stmt.query_map(params![limit as i64], |row| { @@ -1067,6 +1080,7 @@ impl Store { indexed_at: row.get(5)?, docid: row.get(6)?, created_by: row.get(7)?, + note_date: row.get(8)?, }) })?; let mut results = Vec::new(); @@ -1124,7 +1138,7 @@ impl Store { /// Find all files whose path matches a LIKE pattern (e.g., "03-Resources/People/%"). pub fn find_files_by_prefix(&self, pattern: &str) -> Result> { let mut stmt = self.conn.prepare( - "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by, note_date FROM files WHERE path LIKE ?1", )?; let rows = stmt.query_map(params![pattern], |row| { @@ -1137,6 +1151,7 @@ impl Store { indexed_at: row.get(5)?, docid: row.get(6)?, created_by: row.get(7)?, + note_date: row.get(8)?, }) })?; rows.collect::>>() @@ -1175,7 +1190,7 @@ impl Store { // Try each candidate as a case-insensitive basename match. for candidate in &candidates { let mut stmt = self.conn.prepare( - "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by, note_date FROM files WHERE lower(path) LIKE '%/' || lower(?1) OR lower(path) = lower(?1) ORDER BY length(path) ASC LIMIT 1", @@ -1190,6 +1205,7 @@ impl Store { indexed_at: row.get(5)?, docid: row.get(6)?, created_by: row.get(7)?, + note_date: row.get(8)?, }) })?; if let Some(row) = rows.next() { @@ -1200,6 +1216,39 @@ impl Store { Ok(None) } + /// Query files whose note_date falls within a given range (inclusive). + pub fn get_files_in_date_range(&self, start: i64, end: i64) -> Result> { + let mut stmt = self.conn.prepare( + "SELECT id, path, content_hash, mtime, tags, indexed_at, docid, created_by, note_date + FROM files WHERE note_date BETWEEN ?1 AND ?2 + ORDER BY note_date ASC", + )?; + let rows = stmt.query_map(params![start, end], |row| { + Ok(FileRecord { + id: row.get(0)?, + path: row.get(1)?, + content_hash: row.get(2)?, + mtime: row.get(3)?, + tags: parse_tags(&row.get::<_, String>(4)?), + indexed_at: row.get(5)?, + docid: row.get(6)?, + created_by: row.get(7)?, + note_date: row.get(8)?, + }) + })?; + rows.collect::, _>>().map_err(Into::into) + } + + /// Count files that have a non-NULL note_date. + pub fn count_files_with_dates(&self) -> Result { + let count: i64 = self.conn.query_row( + "SELECT COUNT(*) FROM files WHERE note_date IS NOT NULL", + [], + |row| row.get(0), + )?; + Ok(count as usize) + } + /// Rename a file's path in the store, preserving its row ID (and thus edge integrity). pub fn update_file_path(&self, old_path: &str, new_path: &str, new_docid: &str) -> Result<()> { if self.get_file(new_path)?.is_some() { @@ -1784,7 +1833,7 @@ mod tests { let tags = vec!["rust".to_string(), "programming".to_string()]; let docid = generate_docid("notes/test.md"); let file_id = store - .insert_file("notes/test.md", "abc123", 1700000000, &tags, &docid, None) + .insert_file("notes/test.md", "abc123", 1700000000, &tags, &docid, None, None) .unwrap(); assert!(file_id > 0); @@ -1807,6 +1856,7 @@ mod tests { &[], &generate_docid("notes/chunk_test.md"), None, + None, ) .unwrap(); @@ -1839,6 +1889,7 @@ mod tests { &[], &generate_docid("notes/del.md"), None, + None, ) .unwrap(); store.insert_chunk(file_id, "H", "snippet", 10, 5).unwrap(); @@ -1889,6 +1940,7 @@ mod tests { &["tag1".to_string()], &docid, None, + None, ) .unwrap(); store.insert_chunk(file_id, "H", "text", 50, 10).unwrap(); @@ -1912,6 +1964,7 @@ mod tests { &["tag1".to_string()], &docid, None, + None, ) .unwrap(); store @@ -1960,7 +2013,7 @@ mod tests { let store = Store::open_memory().unwrap(); let docid = generate_docid("notes/findme.md"); store - .insert_file("notes/findme.md", "hash", 100, &[], &docid, None) + .insert_file("notes/findme.md", "hash", 100, &[], &docid, None, None) .unwrap(); let rec = store.get_file_by_docid(&docid).unwrap().unwrap(); @@ -1983,6 +2036,7 @@ mod tests { &[], &generate_docid("notes/a.md"), None, + None, ) .unwrap(); let b = store @@ -1993,6 +2047,7 @@ mod tests { &[], &generate_docid("notes/b.md"), None, + None, ) .unwrap(); (a, b) @@ -2030,6 +2085,7 @@ mod tests { &[], &generate_docid("notes/c.md"), None, + None, ) .unwrap(); @@ -2058,6 +2114,7 @@ mod tests { &[], &generate_docid("notes/c.md"), None, + None, ) .unwrap(); @@ -2101,6 +2158,7 @@ mod tests { &[], &generate_docid("notes/c.md"), None, + None, ) .unwrap(); @@ -2130,13 +2188,13 @@ mod tests { fn test_get_neighbors_depth_1() { let store = Store::open_memory().unwrap(); let f1 = store - .insert_file("n/f1.md", "h1", 100, &[], &generate_docid("n/f1.md"), None) + .insert_file("n/f1.md", "h1", 100, &[], &generate_docid("n/f1.md"), None, None) .unwrap(); let f2 = store - .insert_file("n/f2.md", "h2", 100, &[], &generate_docid("n/f2.md"), None) + .insert_file("n/f2.md", "h2", 100, &[], &generate_docid("n/f2.md"), None, None) .unwrap(); let f3 = store - .insert_file("n/f3.md", "h3", 100, &[], &generate_docid("n/f3.md"), None) + .insert_file("n/f3.md", "h3", 100, &[], &generate_docid("n/f3.md"), None, None) .unwrap(); store.insert_edge(f1, f2, "wikilink").unwrap(); @@ -2159,16 +2217,16 @@ mod tests { fn test_get_neighbors_depth_2() { let store = Store::open_memory().unwrap(); let f1 = store - .insert_file("n/f1.md", "h1", 100, &[], &generate_docid("n/f1.md"), None) + .insert_file("n/f1.md", "h1", 100, &[], &generate_docid("n/f1.md"), None, None) .unwrap(); let f2 = store - .insert_file("n/f2.md", "h2", 100, &[], &generate_docid("n/f2.md"), None) + .insert_file("n/f2.md", "h2", 100, &[], &generate_docid("n/f2.md"), None, None) .unwrap(); let f3 = store - .insert_file("n/f3.md", "h3", 100, &[], &generate_docid("n/f3.md"), None) + .insert_file("n/f3.md", "h3", 100, &[], &generate_docid("n/f3.md"), None, None) .unwrap(); let f4 = store - .insert_file("n/f4.md", "h4", 100, &[], &generate_docid("n/f4.md"), None) + .insert_file("n/f4.md", "h4", 100, &[], &generate_docid("n/f4.md"), None, None) .unwrap(); // f1 -> f2 -> f3 -> f4 @@ -2197,6 +2255,7 @@ mod tests { &["rust".to_string(), "cli".to_string()], &generate_docid("n/f1.md"), None, + None, ) .unwrap(); let f2 = store @@ -2207,6 +2266,7 @@ mod tests { &["rust".to_string(), "web".to_string()], &generate_docid("n/f2.md"), None, + None, ) .unwrap(); let _f3 = store @@ -2217,6 +2277,7 @@ mod tests { &["python".to_string()], &generate_docid("n/f3.md"), None, + None, ) .unwrap(); @@ -2236,6 +2297,7 @@ mod tests { &[], &generate_docid("n/fts.md"), None, + None, ) .unwrap(); @@ -2259,6 +2321,7 @@ mod tests { &[], &generate_docid("n/best.md"), None, + None, ) .unwrap(); @@ -2278,17 +2341,17 @@ mod tests { fn test_get_edge_stats() { let store = Store::open_memory().unwrap(); let a = store - .insert_file("n/a.md", "ha", 100, &[], &generate_docid("n/a.md"), None) + .insert_file("n/a.md", "ha", 100, &[], &generate_docid("n/a.md"), None, None) .unwrap(); let b = store - .insert_file("n/b.md", "hb", 100, &[], &generate_docid("n/b.md"), None) + .insert_file("n/b.md", "hb", 100, &[], &generate_docid("n/b.md"), None, None) .unwrap(); let c = store - .insert_file("n/c.md", "hc", 100, &[], &generate_docid("n/c.md"), None) + .insert_file("n/c.md", "hc", 100, &[], &generate_docid("n/c.md"), None, None) .unwrap(); // d is isolated (no edges). let _d = store - .insert_file("n/d.md", "hd", 100, &[], &generate_docid("n/d.md"), None) + .insert_file("n/d.md", "hd", 100, &[], &generate_docid("n/d.md"), None, None) .unwrap(); store.insert_edge(a, b, "wikilink").unwrap(); @@ -2314,6 +2377,7 @@ mod tests { &["rust".into()], "aaa111", None, + None, ) .unwrap(); store @@ -2324,6 +2388,7 @@ mod tests { &["health".into()], "bbb222", None, + None, ) .unwrap(); store @@ -2334,6 +2399,7 @@ mod tests { &["rust".into(), "cli".into()], "ccc333", None, + None, ) .unwrap(); let files = store.list_files(None, &[], None, 20).unwrap(); @@ -2344,10 +2410,10 @@ mod tests { fn test_list_files_folder_filter() { let store = Store::open_memory().unwrap(); store - .insert_file("01-Projects/a.md", "h1", 100, &[], "aaa111", None) + .insert_file("01-Projects/a.md", "h1", 100, &[], "aaa111", None, None) .unwrap(); store - .insert_file("02-Areas/b.md", "h2", 200, &[], "bbb222", None) + .insert_file("02-Areas/b.md", "h2", 200, &[], "bbb222", None, None) .unwrap(); let files = store .list_files(Some("01-Projects"), &[], None, 20) @@ -2367,13 +2433,14 @@ mod tests { &["rust".into(), "cli".into()], "aaa111", None, + None, ) .unwrap(); store - .insert_file("b.md", "h2", 200, &["rust".into()], "bbb222", None) + .insert_file("b.md", "h2", 200, &["rust".into()], "bbb222", None, None) .unwrap(); store - .insert_file("c.md", "h3", 300, &["python".into()], "ccc333", None) + .insert_file("c.md", "h3", 300, &["python".into()], "ccc333", None, None) .unwrap(); let files = store.list_files(None, &["rust".into()], None, 20).unwrap(); assert_eq!(files.len(), 2); @@ -2388,13 +2455,13 @@ mod tests { fn test_list_files_created_by_filter() { let store = Store::open_memory().unwrap(); store - .insert_file("a.md", "h1", 100, &[], "aaa111", Some("cli")) + .insert_file("a.md", "h1", 100, &[], "aaa111", Some("cli"), None) .unwrap(); store - .insert_file("b.md", "h2", 200, &[], "bbb222", Some("mcp")) + .insert_file("b.md", "h2", 200, &[], "bbb222", Some("mcp"), None) .unwrap(); store - .insert_file("c.md", "h3", 300, &[], "ccc333", None) + .insert_file("c.md", "h3", 300, &[], "ccc333", None, None) .unwrap(); // Filter by "cli" → only the cli-created file @@ -2417,16 +2484,16 @@ mod tests { fn test_folder_note_counts() { let store = Store::open_memory().unwrap(); store - .insert_file("01-Projects/a.md", "h1", 100, &[], "a1", None) + .insert_file("01-Projects/a.md", "h1", 100, &[], "a1", None, None) .unwrap(); store - .insert_file("01-Projects/b.md", "h2", 100, &[], "b2", None) + .insert_file("01-Projects/b.md", "h2", 100, &[], "b2", None, None) .unwrap(); store - .insert_file("02-Areas/c.md", "h3", 100, &[], "c3", None) + .insert_file("02-Areas/c.md", "h3", 100, &[], "c3", None, None) .unwrap(); store - .insert_file("root.md", "h4", 100, &[], "d4", None) + .insert_file("root.md", "h4", 100, &[], "d4", None, None) .unwrap(); let counts = store.folder_note_counts().unwrap(); assert!(counts.iter().any(|(f, c)| f == "01-Projects" && *c == 2)); @@ -2445,6 +2512,7 @@ mod tests { &["rust".into(), "cli".into()], "a1", None, + None, ) .unwrap(); store @@ -2455,10 +2523,11 @@ mod tests { &["rust".into(), "web".into()], "b2", None, + None, ) .unwrap(); store - .insert_file("c.md", "h3", 100, &["rust".into()], "c3", None) + .insert_file("c.md", "h3", 100, &["rust".into()], "c3", None, None) .unwrap(); let tags = store.top_tags(10).unwrap(); assert_eq!(tags[0].0, "rust"); @@ -2469,10 +2538,10 @@ mod tests { fn test_recent_files() { let store = Store::open_memory().unwrap(); store - .insert_file("old.md", "h1", 100, &[], "a1", None) + .insert_file("old.md", "h1", 100, &[], "a1", None, None) .unwrap(); store - .insert_file("new.md", "h2", 200, &[], "b2", None) + .insert_file("new.md", "h2", 200, &[], "b2", None, None) .unwrap(); let recent = store.recent_files(1).unwrap(); assert_eq!(recent.len(), 1); @@ -2482,10 +2551,10 @@ mod tests { fn test_edge_count_for_file() { let store = Store::open_memory().unwrap(); let f1 = store - .insert_file("a.md", "h1", 100, &[], "a1", None) + .insert_file("a.md", "h1", 100, &[], "a1", None, None) .unwrap(); let f2 = store - .insert_file("b.md", "h2", 100, &[], "b2", None) + .insert_file("b.md", "h2", 100, &[], "b2", None, None) .unwrap(); store.insert_edge(f1, f2, "wikilink").unwrap(); store.insert_edge(f2, f1, "wikilink").unwrap(); @@ -2497,10 +2566,10 @@ mod tests { fn test_find_file_by_basename() { let store = Store::open_memory().unwrap(); store - .insert_file("01-Projects/Work/note.md", "h1", 100, &[], "aaa111", None) + .insert_file("01-Projects/Work/note.md", "h1", 100, &[], "aaa111", None, None) .unwrap(); store - .insert_file("root.md", "h2", 100, &[], "bbb222", None) + .insert_file("root.md", "h2", 100, &[], "bbb222", None, None) .unwrap(); let found = store.find_file_by_basename("note").unwrap(); @@ -2518,13 +2587,13 @@ mod tests { fn test_edge_counts_for_files() { let store = Store::open_memory().unwrap(); let f1 = store - .insert_file("a.md", "h1", 100, &[], "a1", None) + .insert_file("a.md", "h1", 100, &[], "a1", None, None) .unwrap(); let f2 = store - .insert_file("b.md", "h2", 100, &[], "b2", None) + .insert_file("b.md", "h2", 100, &[], "b2", None, None) .unwrap(); let f3 = store - .insert_file("c.md", "h3", 100, &[], "c3", None) + .insert_file("c.md", "h3", 100, &[], "c3", None, None) .unwrap(); store.insert_edge(f1, f2, "wikilink").unwrap(); store.insert_edge(f2, f1, "wikilink").unwrap(); @@ -2573,7 +2642,7 @@ mod tests { let store = Store::open_memory().unwrap(); // Insert a file + chunk with a vector BLOB. let file_id = store - .insert_file("test.md", "hash123", 0, &[], "abc123", None) + .insert_file("test.md", "hash123", 0, &[], "abc123", None, None) .unwrap(); let vector: Vec = (0..256).map(|i| (i as f32) / 256.0).collect(); store @@ -2725,7 +2794,7 @@ mod tests { let store = Store::open_memory().unwrap(); let docid = generate_docid("notes/test.md"); store - .insert_file("notes/test.md", "hash1", 100, &[], &docid, Some("cli")) + .insert_file("notes/test.md", "hash1", 100, &[], &docid, Some("cli"), None) .unwrap(); let rec = store.get_file("notes/test.md").unwrap().unwrap(); assert_eq!(rec.created_by, Some("cli".to_string())); @@ -2736,7 +2805,7 @@ mod tests { let store = Store::open_memory().unwrap(); let docid = generate_docid("notes/test.md"); store - .insert_file("notes/test.md", "hash1", 100, &[], &docid, None) + .insert_file("notes/test.md", "hash1", 100, &[], &docid, None, None) .unwrap(); let rec = store.get_file("notes/test.md").unwrap().unwrap(); assert_eq!(rec.created_by, None); @@ -2747,7 +2816,7 @@ mod tests { let store = Store::open_memory().unwrap(); let old_docid = generate_docid("notes/old.md"); let file_id = store - .insert_file("notes/old.md", "hash1", 100, &[], &old_docid, None) + .insert_file("notes/old.md", "hash1", 100, &[], &old_docid, None, None) .unwrap(); let new_docid = generate_docid("notes/new.md"); @@ -2774,6 +2843,7 @@ mod tests { &[], &generate_docid("notes/a.md"), None, + None, ) .unwrap(); store @@ -2784,6 +2854,7 @@ mod tests { &[], &generate_docid("notes/b.md"), None, + None, ) .unwrap(); @@ -2805,6 +2876,7 @@ mod tests { &[], &generate_docid("notes/vec.md"), None, + None, ) .unwrap(); @@ -2834,6 +2906,7 @@ mod tests { &[], &generate_docid("notes/empty.md"), None, + None, ) .unwrap(); @@ -2932,7 +3005,7 @@ mod tests { fn test_resolve_file_fuzzy_match() { let store = Store::open_memory().unwrap(); store - .insert_file("Steve Barbera.md", "hash1", 100, &[], "ab1234", None) + .insert_file("Steve Barbera.md", "hash1", 100, &[], "ab1234", None, None) .unwrap(); // "Steve Barbara" is within Levenshtein 2 of "Steve Barbera" let result = store.resolve_file("Steve Barbara").unwrap(); @@ -2944,10 +3017,10 @@ mod tests { fn test_resolve_file_fuzzy_ambiguous() { let store = Store::open_memory().unwrap(); store - .insert_file("test-a.md", "h1", 100, &[], "aaa111", None) + .insert_file("test-a.md", "h1", 100, &[], "aaa111", None, None) .unwrap(); store - .insert_file("test-b.md", "h2", 100, &[], "bbb222", None) + .insert_file("test-b.md", "h2", 100, &[], "bbb222", None, None) .unwrap(); // "test-c" is equidistant from both — should error, not pick arbitrarily let result = store.resolve_file("test-c"); @@ -2958,7 +3031,7 @@ mod tests { fn test_resolve_file_existing_docid() { let store = Store::open_memory().unwrap(); store - .insert_file("note.md", "hash", 100, &[], "abc123", None) + .insert_file("note.md", "hash", 100, &[], "abc123", None, None) .unwrap(); let result = store.resolve_file("#abc123").unwrap(); assert!(result.is_some()); @@ -3016,7 +3089,7 @@ mod tests { let store = Store::open_memory().unwrap(); let tags = vec!["tag".to_string()]; let file_id = store - .insert_file("delete-me.md", "hash", 100, &tags, "del123", None) + .insert_file("delete-me.md", "hash", 100, &tags, "del123", None, None) .unwrap(); // Insert a chunk + FTS entry + vec entry for the file @@ -3032,7 +3105,7 @@ mod tests { // Insert an edge from this file to itself (just to test edge cleanup) let file_id2 = store - .insert_file("other.md", "hash2", 100, &[], "oth123", None) + .insert_file("other.md", "hash2", 100, &[], "oth123", None, None) .unwrap(); store.insert_edge(file_id, file_id2, "wikilink").unwrap(); store.insert_edge(file_id2, file_id, "wikilink").unwrap(); @@ -3065,4 +3138,63 @@ mod tests { assert!(result.is_err()); assert!(result.unwrap_err().to_string().contains("file not found")); } + + #[test] + fn test_insert_file_with_note_date() { + let store = Store::open_memory().unwrap(); + let note_date = Some(1774000000i64); + store + .insert_file("dated.md", "hash", 100, &[], "dat123", None, note_date) + .unwrap(); + let file = store.get_file("dated.md").unwrap().unwrap(); + assert_eq!(file.note_date, note_date); + } + + #[test] + fn test_insert_file_without_note_date() { + let store = Store::open_memory().unwrap(); + store + .insert_file("undated.md", "hash", 100, &[], "und123", None, None) + .unwrap(); + let file = store.get_file("undated.md").unwrap().unwrap(); + assert!(file.note_date.is_none()); + } + + #[test] + fn test_get_files_in_date_range() { + let store = Store::open_memory().unwrap(); + let day1 = 1774000000i64; + let day2 = day1 + 86400; + let day3 = day1 + 2 * 86400; + store + .insert_file("a.md", "h1", 100, &[], "aaa111", None, Some(day1)) + .unwrap(); + store + .insert_file("b.md", "h2", 100, &[], "bbb222", None, Some(day2)) + .unwrap(); + store + .insert_file("c.md", "h3", 100, &[], "ccc333", None, Some(day3)) + .unwrap(); + store + .insert_file("d.md", "h4", 100, &[], "ddd444", None, None) + .unwrap(); + let results = store.get_files_in_date_range(day1, day2).unwrap(); + assert_eq!(results.len(), 2); + } + + #[test] + fn test_count_files_with_dates() { + let store = Store::open_memory().unwrap(); + let day1 = 1774000000i64; + store + .insert_file("a.md", "h1", 100, &[], "aaa111", None, Some(day1)) + .unwrap(); + store + .insert_file("b.md", "h2", 100, &[], "bbb222", None, None) + .unwrap(); + store + .insert_file("c.md", "h3", 100, &[], "ccc333", None, Some(day1 + 86400)) + .unwrap(); + assert_eq!(store.count_files_with_dates().unwrap(), 2); + } } diff --git a/src/writer.rs b/src/writer.rs index 4484369..8229661 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -458,6 +458,7 @@ pub fn create_note( &resolved_tags, &docid, Some(&input.created_by), + None, )?; let mut next_vid = store.next_vector_id()?; @@ -495,6 +496,7 @@ pub fn create_note( &resolved_tags, &docid, Some(&input.created_by), + None, )?; // Incrementally update folder centroid with new note's mean vector @@ -604,6 +606,7 @@ pub fn append_to_note( &file_record.tags, &docid, file_record.created_by.as_deref(), + None, )?; let mut next_vid = store.next_vector_id()?; @@ -633,6 +636,7 @@ pub fn append_to_note( &file_record.tags, &docid, file_record.created_by.as_deref(), + None, )?; } Err(e) => { @@ -717,6 +721,7 @@ pub fn update_metadata( &tags, &docid, file_record.created_by.as_deref(), + None, )?; // Register tags @@ -966,6 +971,7 @@ pub fn edit_frontmatter( &updated_tags, &docid, file_record.created_by.as_deref(), + None, )?; Ok(EditResult { @@ -1082,6 +1088,7 @@ pub fn move_note( &file_record.tags, &new_docid, file_record.created_by.as_deref(), + None, )?; Ok(()) @@ -1181,6 +1188,7 @@ pub fn delete_note( &tags, &docid, created_by.as_deref(), + None, )?; Ok(()) @@ -1366,6 +1374,7 @@ pub fn unarchive_note( &tags, &docid, Some("unarchive"), + None, )?; let mut next_vid = store.next_vector_id()?; @@ -1570,6 +1579,7 @@ mod tests { &[], &crate::docid::generate_docid("notes/existing.md"), None, + None, ) .unwrap(); store @@ -1580,6 +1590,7 @@ mod tests { &[], &crate::docid::generate_docid("notes/gone.md"), None, + None, ) .unwrap(); @@ -1615,7 +1626,7 @@ mod tests { let content = "# Person\n\n## Interactions\n\nOld entry\n\n## Links\n\nSome links\n"; std::fs::write(root.join("person.md"), content).unwrap(); store - .insert_file("person.md", "hash", 100, &[], "per123", None) + .insert_file("person.md", "hash", 100, &[], "per123", None, None) .unwrap(); let input = EditInput { @@ -1644,7 +1655,7 @@ mod tests { let content = "# Note\n\n## Tasks\n\n- [x] Old task\n\n## Notes\n\nText\n"; std::fs::write(root.join("note.md"), content).unwrap(); store - .insert_file("note.md", "hash", 100, &[], "not123", None) + .insert_file("note.md", "hash", 100, &[], "not123", None, None) .unwrap(); let input = EditInput { @@ -1668,7 +1679,7 @@ mod tests { let content = "# Doc\n\n## Log\n\nExisting line\n\n## Footer\n\nEnd\n"; std::fs::write(root.join("doc.md"), content).unwrap(); store - .insert_file("doc.md", "hash", 100, &[], "doc123", None) + .insert_file("doc.md", "hash", 100, &[], "doc123", None, None) .unwrap(); let input = EditInput { @@ -1695,7 +1706,7 @@ mod tests { let content = "# Note\n\n## Existing\n\nContent\n"; std::fs::write(root.join("note.md"), content).unwrap(); store - .insert_file("note.md", "hash", 100, &[], "not123", None) + .insert_file("note.md", "hash", 100, &[], "not123", None, None) .unwrap(); let input = EditInput { @@ -1744,6 +1755,7 @@ mod tests { &["project".to_string()], "rew123", None, + None, ) .unwrap(); @@ -1775,6 +1787,7 @@ mod tests { &["project".to_string()], "efm123", None, + None, ) .unwrap(); @@ -1803,6 +1816,7 @@ mod tests { &["project".to_string(), "old".to_string()], "efm456", None, + None, ) .unwrap(); @@ -1824,7 +1838,7 @@ mod tests { let content = "---\nstatus: draft\n---\n\n# Content\n"; std::fs::write(root.join("note.md"), content).unwrap(); store - .insert_file("note.md", "hash", 100, &[], "efm789", None) + .insert_file("note.md", "hash", 100, &[], "efm789", None, None) .unwrap(); let input = EditFrontmatterInput { @@ -1845,7 +1859,7 @@ mod tests { let content = "---\nstatus: draft\ntitle: Test\n---\n\n# Content\n"; std::fs::write(root.join("note.md"), content).unwrap(); store - .insert_file("note.md", "hash", 100, &[], "efmrm1", None) + .insert_file("note.md", "hash", 100, &[], "efmrm1", None, None) .unwrap(); let input = EditFrontmatterInput { @@ -1873,6 +1887,7 @@ mod tests { &["test".to_string()], "efmal1", None, + None, ) .unwrap(); @@ -1894,7 +1909,7 @@ mod tests { let content = "# Content\n\nJust body, no frontmatter.\n"; std::fs::write(root.join("note.md"), content).unwrap(); store - .insert_file("note.md", "hash", 100, &[], "efmnf1", None) + .insert_file("note.md", "hash", 100, &[], "efmnf1", None, None) .unwrap(); let input = EditFrontmatterInput { @@ -1927,6 +1942,7 @@ mod tests { &["old-tag".to_string()], "efmmo1", None, + None, ) .unwrap(); @@ -1956,7 +1972,7 @@ mod tests { std::fs::create_dir_all(root.join("04-Archive")).unwrap(); std::fs::write(root.join("deleteme.md"), "# Delete me").unwrap(); store - .insert_file("deleteme.md", "hash", 100, &[], "del123", None) + .insert_file("deleteme.md", "hash", 100, &[], "del123", None, None) .unwrap(); delete_note( @@ -1978,7 +1994,7 @@ mod tests { let (tmp, store, root) = setup_vault(); std::fs::write(root.join("gone.md"), "# Gone forever").unwrap(); store - .insert_file("gone.md", "hash", 100, &[], "gon123", None) + .insert_file("gone.md", "hash", 100, &[], "gon123", None, None) .unwrap(); delete_note(&store, &root, "gone.md", DeleteMode::Hard, "").unwrap(); diff --git a/tests/integration.rs b/tests/integration.rs index 89923d1..049e5c9 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -103,7 +103,7 @@ fn index_vault(vault_path: &Path, data_dir: &Path, config: &Config, rebuild: boo let docid = generate_docid(&rel_str); let file_id = store - .insert_file(&rel_str, &hash, 0, &tags, &docid) + .insert_file(&rel_str, &hash, 0, &tags, &docid, None, None) .unwrap(); for chunk in &chunks { diff --git a/tests/write_pipeline.rs b/tests/write_pipeline.rs index 302f66d..33e88bc 100644 --- a/tests/write_pipeline.rs +++ b/tests/write_pipeline.rs @@ -38,6 +38,8 @@ fn setup(vault_dir: &Path) -> (Store, Embedder) { 0, &[], &docid, + None, + None, ) .unwrap(); From 6b3ae697fae321620e4863a59a8b4bfc93be431c Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Thu, 26 Mar 2026 20:51:24 +0200 Subject: [PATCH 3/9] feat(indexer): extract note_date from frontmatter and filename during indexing --- src/indexer.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/indexer.rs b/src/indexer.rs index e8f0d51..e5a3238 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -305,6 +305,9 @@ pub fn index_file( None }); + // Extract note_date from frontmatter or filename + let note_date = crate::temporal::extract_note_date(&frontmatter, rel_path); + // 2. Embed all chunks let token_counts: Vec = chunks .iter() @@ -351,7 +354,7 @@ pub fn index_file( &tags, &docid, created_by.as_deref(), - None, + note_date, )?; let mut next_vector_id: u64 = store.next_vector_id()?; From 65a24def40f264ef3c8563bab10e2d3e92c3f0ff Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Thu, 26 Mar 2026 20:54:59 +0200 Subject: [PATCH 4/9] feat(llm): add Temporal intent, date_range on OrchestrationResult, heuristic + LLM detection --- src/llm.rs | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/src/llm.rs b/src/llm.rs index 4709320..77ee478 100644 --- a/src/llm.rs +++ b/src/llm.rs @@ -94,6 +94,8 @@ pub enum QueryIntent { Relationship, /// User is browsing without a clear target. Exploratory, + /// User is asking about a specific time period. + Temporal, } /// Output produced by an orchestrator model for a query. @@ -103,6 +105,9 @@ pub struct OrchestrationResult { pub intent: QueryIntent, /// Query string(s) to actually run (original + any expansions). pub expansions: Vec, + /// Optional unix-timestamp range for temporal queries (start, end). + #[serde(default)] + pub date_range: Option<(i64, i64)>, } /// Per-lane weights for the RRF fusion step. @@ -112,6 +117,7 @@ pub struct LaneWeights { pub fts: f64, pub graph: f64, pub rerank: f64, + pub temporal: f64, } impl LaneWeights { @@ -123,24 +129,35 @@ impl LaneWeights { semantic: 0.6, graph: 0.6, rerank: 0.8, + temporal: 0.0, }, QueryIntent::Conceptual => Self { semantic: 1.2, fts: 0.8, graph: 1.0, rerank: 1.2, + temporal: 0.0, }, QueryIntent::Relationship => Self { graph: 1.5, semantic: 0.8, fts: 0.8, rerank: 1.0, + temporal: 0.0, }, QueryIntent::Exploratory => Self { semantic: 1.0, fts: 1.0, graph: 0.8, rerank: 1.0, + temporal: 0.0, + }, + QueryIntent::Temporal => Self { + semantic: 0.6, + fts: 0.8, + graph: 0.5, + rerank: 0.8, + temporal: 1.5, }, } } @@ -152,6 +169,7 @@ impl LaneWeights { fts: 1.0, graph: 0.8, rerank: 0.0, + temporal: 0.0, } } } @@ -311,6 +329,7 @@ impl OrchestratorModel for MockLlm { Ok(OrchestrationResult { intent: QueryIntent::Exploratory, expansions: vec![query.to_owned()], + date_range: None, }) } } @@ -776,11 +795,22 @@ impl EmbedModel for LlamaEmbed { pub fn heuristic_orchestrate(query: &str) -> OrchestrationResult { let trimmed = query.trim(); + // Temporal: detect date/time references in the query + let date_range = crate::temporal::parse_date_range_heuristic(query); + if date_range.is_some() { + return OrchestrationResult { + intent: QueryIntent::Temporal, + expansions: vec![trimmed.to_string()], + date_range, + }; + } + // Exact: docids (#abc123) or ticket IDs (ABC-1234) if trimmed.starts_with('#') && trimmed.len() <= 8 { return OrchestrationResult { intent: QueryIntent::Exact, expansions: vec![trimmed.to_string()], + date_range: None, }; } // Ticket ID pattern: PREFIX-1234 @@ -793,6 +823,7 @@ pub fn heuristic_orchestrate(query: &str) -> OrchestrationResult { return OrchestrationResult { intent: QueryIntent::Exact, expansions: vec![trimmed.to_string()], + date_range: None, }; } } @@ -803,6 +834,7 @@ pub fn heuristic_orchestrate(query: &str) -> OrchestrationResult { return OrchestrationResult { intent: QueryIntent::Relationship, expansions: vec![trimmed.to_string()], + date_range: None, }; } @@ -824,6 +856,7 @@ pub fn heuristic_orchestrate(query: &str) -> OrchestrationResult { OrchestrationResult { intent: QueryIntent::Exploratory, expansions, + date_range: None, } } @@ -843,6 +876,7 @@ pub fn parse_orchestration_json(text: &str) -> Result { "exact" => QueryIntent::Exact, "conceptual" => QueryIntent::Conceptual, "relationship" => QueryIntent::Relationship, + "temporal" => QueryIntent::Temporal, _ => QueryIntent::Exploratory, }; @@ -859,7 +893,14 @@ pub fn parse_orchestration_json(text: &str) -> Result { anyhow::bail!("no expansions in orchestration response"); } - Ok(OrchestrationResult { intent, expansions }) + let date_range = crate::temporal::parse_date_range_from_json(&parsed); + let intent = if date_range.is_some() && intent != QueryIntent::Temporal { + QueryIntent::Temporal + } else { + intent + }; + + Ok(OrchestrationResult { intent, expansions, date_range }) } /// Extract the first JSON object ({...}) from text, handling nested braces. @@ -886,8 +927,11 @@ fn extract_json_object(text: &str) -> Option<&str> { const ORCHESTRATOR_SYSTEM_PROMPT: &str = r#"You are a search query analyzer. Given a user's search query, classify it and expand it. Return JSON with: -- "intent": one of "exact", "conceptual", "relationship", "exploratory" +- "intent": one of "exact", "conceptual", "relationship", "exploratory", "temporal" - "expansions": 2-4 alternative phrasings (always include the original query first) +- "date_range": (only for temporal queries) {"start":"YYYY-MM-DD","end":"YYYY-MM-DD"} + +Use "temporal" intent when the query references a time period (e.g. "yesterday", "last week", "March 2026"). Be concise. Only return the JSON object."#; @@ -1512,4 +1556,50 @@ mod tests { let mock = MockLlm::new(256); assert_rerank(&mock); } + + // ── Temporal intent tests ──────────────────────────────────────────────── + + #[test] + fn test_temporal_intent_weights() { + let weights = LaneWeights::from_intent(&QueryIntent::Temporal); + assert!(weights.temporal > weights.semantic); + assert!(weights.temporal > 1.0); + } + + #[test] + fn test_non_temporal_intent_has_zero_temporal() { + let exact = LaneWeights::from_intent(&QueryIntent::Exact); + assert!((exact.temporal - 0.0).abs() < f64::EPSILON); + let conceptual = LaneWeights::from_intent(&QueryIntent::Conceptual); + assert!((conceptual.temporal - 0.0).abs() < f64::EPSILON); + } + + #[test] + fn test_heuristic_orchestrate_temporal() { + let result = heuristic_orchestrate("what happened yesterday"); + assert_eq!(result.intent, QueryIntent::Temporal); + assert!(result.date_range.is_some()); + } + + #[test] + fn test_heuristic_orchestrate_non_temporal() { + let result = heuristic_orchestrate("how does auth work"); + assert!(result.date_range.is_none()); + assert_ne!(result.intent, QueryIntent::Temporal); + } + + #[test] + fn test_parse_json_with_date_range() { + let json = r#"{"intent":"temporal","expansions":["last week updates"],"date_range":{"start":"2026-03-19","end":"2026-03-25"}}"#; + let result = parse_orchestration_json(json).unwrap(); + assert_eq!(result.intent, QueryIntent::Temporal); + assert!(result.date_range.is_some()); + } + + #[test] + fn test_parse_json_without_date_range_backward_compat() { + let json = r#"{"intent":"exact","expansions":["BRE-1234"]}"#; + let result = parse_orchestration_json(json).unwrap(); + assert!(result.date_range.is_none()); + } } From 94606e19c43502b55ece144085c85b6325a35910 Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Thu, 26 Mar 2026 20:58:04 +0200 Subject: [PATCH 5/9] feat(search): integrate temporal lane with candidate injection and 5-lane RRF --- src/search.rs | 94 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 4 deletions(-) diff --git a/src/search.rs b/src/search.rs index eb41be5..3119351 100644 --- a/src/search.rs +++ b/src/search.rs @@ -201,7 +201,37 @@ pub fn search_with_intelligence( let fts_results = dedup_by_file(all_fts); // --- Graph lane from combined seeds --- - let combined_seeds = merge_seeds(&semantic_results, &fts_results); + let mut combined_seeds = merge_seeds(&semantic_results, &fts_results); + + // Inject temporal candidates as graph seeds when date_range is present + let temporal_seeds: Vec = if let Some(range) = &orchestration.date_range { + config + .store + .get_files_in_date_range(range.0, range.1) + .unwrap_or_default() + .iter() + .map(|f| RankedResult { + file_path: f.path.clone(), + file_id: f.id, + score: 1.0, + heading: None, + snippet: String::new(), + docid: f.docid.clone(), + }) + .collect() + } else { + vec![] + }; + for ts in &temporal_seeds { + let dominated = combined_seeds + .iter() + .any(|s| s.file_path == ts.file_path && s.score >= ts.score); + if !dominated { + combined_seeds.retain(|s| s.file_path != ts.file_path); + combined_seeds.push(ts.clone()); + } + } + let graph_results = graph::graph_expand(config.store, &combined_seeds, query, 2, 20).unwrap_or_default(); @@ -217,8 +247,8 @@ pub fn search_with_intelligence( ); // --- Step 4: Reranker (4th lane) if available --- - let final_fused = if let Some(reranker) = &mut config.reranker { - let mut rerank_results: Vec = Vec::new(); + let mut rerank_results: Vec = Vec::new(); + let reranker_used = if let Some(reranker) = &mut config.reranker { for candidate in fused_pass1.iter().take(config.rerank_candidates) { let score = reranker .rerank_score(query, &candidate.snippet) @@ -237,8 +267,63 @@ pub fn search_with_intelligence( .partial_cmp(&a.score) .unwrap_or(std::cmp::Ordering::Equal) }); + true + } else { + false + }; + + // --- Step 5: Temporal lane (5th lane) when date_range is present --- + let final_fused = if let Some(range) = &orchestration.date_range { + // Build temporal lane: score ALL candidates from pass1/reranked by date proximity + let base_fused = if reranker_used { + fusion::rrf_fuse( + &[ + ("semantic", &semantic_results, weights.semantic), + ("fts", &fts_results, weights.fts), + ("graph", &graph_results, weights.graph), + ("rerank", &rerank_results, weights.rerank), + ], + RRF_K, + ) + } else { + // Use pass1 as the candidate source; avoid clone by re-referencing + fused_pass1 + }; + let mut temporal_results: Vec = base_fused + .iter() + .filter_map(|c| { + let file = config.store.get_file(&c.file_path).ok()??; + let nd = file.note_date?; + let score = crate::temporal::temporal_score(nd, range.0, range.1); + Some(RankedResult { + file_path: c.file_path.clone(), + file_id: c.file_id, + score, + heading: c.heading.clone(), + snippet: c.snippet.clone(), + docid: c.docid.clone(), + }) + }) + .collect(); + temporal_results.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); - // RRF Pass 2 (4-lane) + // 5-lane RRF (rerank_results is empty when reranker absent, weight 0) + fusion::rrf_fuse( + &[ + ("semantic", &semantic_results, weights.semantic), + ("fts", &fts_results, weights.fts), + ("graph", &graph_results, weights.graph), + ("rerank", &rerank_results, weights.rerank), + ("temporal", &temporal_results, weights.temporal), + ], + RRF_K, + ) + } else if reranker_used { + // Non-temporal with reranker: 4-lane (existing behavior) fusion::rrf_fuse( &[ ("semantic", &semantic_results, weights.semantic), @@ -249,6 +334,7 @@ pub fn search_with_intelligence( RRF_K, ) } else { + // Non-temporal without reranker: 3-lane (existing behavior) fused_pass1 }; From 7e2db8b2dac1723a64651f17b432680c09300494 Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Thu, 26 Mar 2026 21:02:33 +0200 Subject: [PATCH 6/9] feat(search): confidence % display, date coverage in status --- src/context.rs | 3 +++ src/fusion.rs | 13 +++++++++++++ src/search.rs | 33 +++++++++++++++++++++++++-------- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/src/context.rs b/src/context.rs index 222dca8..1807cf2 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1033,6 +1033,7 @@ mod tests { file_path: "result.md".into(), file_id: 1, score: 0.85, + confidence: 100.0, heading: Some("# Result".into()), snippet: "relevant content".into(), docid: Some("aaa111".into()), @@ -1066,6 +1067,7 @@ mod tests { file_path: "long.md".into(), file_id: 1, score: 0.9, + confidence: 100.0, heading: None, snippet: "word word".into(), docid: Some("aaa111".into()), @@ -1103,6 +1105,7 @@ mod tests { file_path: "main.md".into(), file_id: f1, score: 0.8, + confidence: 100.0, heading: None, snippet: "Main".into(), docid: Some("aaa111".into()), diff --git a/src/fusion.rs b/src/fusion.rs index 67d1810..8bd026f 100644 --- a/src/fusion.rs +++ b/src/fusion.rs @@ -25,6 +25,7 @@ pub struct FusedResult { pub snippet: String, pub docid: Option, pub lane_contributions: Vec, + pub confidence: f64, // 0-100% normalized score } /// Per-lane contribution details for --explain output. @@ -110,6 +111,7 @@ pub fn rrf_fuse(lanes: &[(&str, &[RankedResult], f64)], k: usize) -> Vec Vec 0.0 { + (r.rrf_score / max_score) * 100.0 + } else { + 0.0 + }; + } + results } @@ -236,6 +248,7 @@ mod tests { heading: None, snippet: "test".to_string(), docid: None, + confidence: 100.0, lane_contributions: vec![ LaneContribution { lane_name: "semantic".to_string(), diff --git a/src/search.rs b/src/search.rs index 3119351..ee36d1e 100644 --- a/src/search.rs +++ b/src/search.rs @@ -19,6 +19,7 @@ fn orchestration_cache_key(query: &str) -> String { /// A single search result with metadata. pub struct SearchResult { pub score: f32, + pub confidence: f64, pub file_path: String, pub heading: Option, pub snippet: String, @@ -31,6 +32,7 @@ pub struct InternalSearchResult { pub file_path: String, pub file_id: i64, pub score: f64, + pub confidence: f64, pub heading: Option, pub snippet: String, pub docid: Option, @@ -346,6 +348,7 @@ pub fn search_with_intelligence( file_path: f.file_path.clone(), file_id: f.file_id, score: f.rrf_score, + confidence: f.confidence, heading: f.heading.clone(), snippet: f.snippet.clone(), docid: f.docid.clone(), @@ -457,6 +460,7 @@ pub fn run_search( .iter() .map(|r| SearchResult { score: r.score as f32, + confidence: r.confidence, file_path: r.file_path.clone(), heading: r.heading.clone(), snippet: r.snippet.clone(), @@ -488,6 +492,7 @@ pub fn run_status(json: bool, data_dir: &Path) -> Result<()> { let db_path = data_dir.join("engraph.db"); let store = Store::open(&db_path).context("opening store")?; let stats = store.stats()?; + let date_count = store.count_files_with_dates().unwrap_or(0); // Compute index size on disk (sqlite db file). let index_size = std::fs::metadata(&db_path).map(|m| m.len()).unwrap_or(0); @@ -501,7 +506,7 @@ pub fn run_status(json: bool, data_dir: &Path) -> Result<()> { "disabled" }; - let output = format_status(&stats, index_size, model_name, intelligence, json); + let output = format_status(&stats, index_size, model_name, intelligence, date_count, json); print!("{output}"); Ok(()) } @@ -526,6 +531,7 @@ pub fn format_results(results: &[SearchResult], json: bool) -> String { json!({ "rank": i + 1, "score": score_rounded, + "confidence": r.confidence, "file": r.file_path, "heading": r.heading, "snippet": r.snippet, @@ -547,9 +553,9 @@ pub fn format_results(results: &[SearchResult], json: bool) -> String { }; let snippet = truncate_snippet(&r.snippet, 200); out.push_str(&format!( - "{:>2}. [{:.2}] {}{}{}\n {}\n", + "{:>2}. [{:>3.0}%] {}{}{}\n {}\n", i + 1, - r.score, + r.confidence, r.file_path, heading_part, docid_part, @@ -566,6 +572,7 @@ pub fn format_status( index_size: u64, model_name: &str, intelligence: &str, + date_count: usize, json: bool, ) -> String { let vault = stats.vault_path.as_deref().unwrap_or(""); @@ -581,6 +588,7 @@ pub fn format_status( "index_size": index_size, "model": model_name, "intelligence": intelligence, + "files_with_dates": date_count, }); if let (Some(edges), Some(wl), Some(mn)) = (stats.edge_count, stats.wikilink_count, stats.mention_count) @@ -606,11 +614,14 @@ pub fn format_status( )); } out.push_str(&format!( - "Tombstones: {} (pending cleanup)\n\ + "Dates: {}/{} files\n\ + Tombstones: {} (pending cleanup)\n\ Last index: {}\n\ Index size: {}\n\ Model: {}\n\ Intelligence: {}\n", + date_count, + stats.file_count, stats.tombstone_count, last_indexed, format_bytes(index_size), @@ -660,6 +671,7 @@ mod tests { fn test_format_human_result() { let results = vec![SearchResult { score: 0.87, + confidence: 100.0, file_path: "foo.md".to_string(), heading: Some("## Bar".to_string()), snippet: "Some text...".to_string(), @@ -668,7 +680,7 @@ mod tests { let output = format_results(&results, false); assert_eq!( output, - " 1. [0.87] foo.md > ## Bar #ab12cd\n Some text...\n" + " 1. [100%] foo.md > ## Bar #ab12cd\n Some text...\n" ); } @@ -676,19 +688,21 @@ mod tests { fn test_format_human_result_no_docid() { let results = vec![SearchResult { score: 0.87, + confidence: 100.0, file_path: "foo.md".to_string(), heading: Some("## Bar".to_string()), snippet: "Some text...".to_string(), docid: None, }]; let output = format_results(&results, false); - assert_eq!(output, " 1. [0.87] foo.md > ## Bar\n Some text...\n"); + assert_eq!(output, " 1. [100%] foo.md > ## Bar\n Some text...\n"); } #[test] fn test_format_json_result() { let results = vec![SearchResult { score: 0.87, + confidence: 100.0, file_path: "foo.md".to_string(), heading: Some("## Bar".to_string()), snippet: "Some text...".to_string(), @@ -699,6 +713,7 @@ mod tests { assert_eq!(parsed.len(), 1); assert_eq!(parsed[0]["rank"], 1); assert_eq!(parsed[0]["score"], 0.87); + assert_eq!(parsed[0]["confidence"], 100.0); assert_eq!(parsed[0]["file"], "foo.md"); assert_eq!(parsed[0]["heading"], "## Bar"); assert_eq!(parsed[0]["snippet"], "Some text..."); @@ -726,11 +741,12 @@ mod tests { wikilink_count: None, mention_count: None, }; - let output = format_status(&stats, 2_516_582, "all-MiniLM-L6-v2", "disabled", false); + let output = format_status(&stats, 2_516_582, "all-MiniLM-L6-v2", "disabled", 30, false); assert!(output.contains("/path/to/vault"), "missing vault path"); assert!(output.contains("42"), "missing file count"); assert!(output.contains("187"), "missing chunk count"); + assert!(output.contains("30/42 files"), "missing date coverage"); assert!(output.contains("3"), "missing tombstone count"); assert!(output.contains("2026-03-19 14:30:00"), "missing last index"); assert!(output.contains("2.4 MB"), "missing index size"); @@ -750,7 +766,7 @@ mod tests { wikilink_count: None, mention_count: None, }; - let output = format_status(&stats, 2_516_582, "all-MiniLM-L6-v2", "enabled", true); + let output = format_status(&stats, 2_516_582, "all-MiniLM-L6-v2", "enabled", 30, true); let parsed: serde_json::Value = serde_json::from_str(&output).unwrap(); assert_eq!(parsed["vault"], "/path/to/vault"); @@ -761,6 +777,7 @@ mod tests { assert_eq!(parsed["index_size"], 2_516_582); assert_eq!(parsed["model"], "all-MiniLM-L6-v2"); assert_eq!(parsed["intelligence"], "enabled"); + assert_eq!(parsed["files_with_dates"], 30); } #[test] From f66d3c256c7edabebeb9c0cb068a7ae66c620819 Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Thu, 26 Mar 2026 21:06:00 +0200 Subject: [PATCH 7/9] fix: resolve clippy warnings for v1.2 --- src/context.rs | 10 +++- src/health.rs | 10 +++- src/indexer.rs | 10 +++- src/llm.rs | 6 ++- src/search.rs | 9 +++- src/store.rs | 141 +++++++++++++++++++++++++++++++++++++++++++----- src/temporal.rs | 74 ++++++++++++++----------- 7 files changed, 211 insertions(+), 49 deletions(-) diff --git a/src/context.rs b/src/context.rs index 1807cf2..083ace8 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1021,7 +1021,15 @@ mod tests { let store = Store::open_memory().unwrap(); store - .insert_file("result.md", "h1", 100, &["topic".into()], "aaa111", None, None) + .insert_file( + "result.md", + "h1", + 100, + &["topic".into()], + "aaa111", + None, + None, + ) .unwrap(); let params = ContextParams { diff --git a/src/health.rs b/src/health.rs index efd334d..3da45c4 100644 --- a/src/health.rs +++ b/src/health.rs @@ -145,7 +145,15 @@ mod tests { .insert_file("orphan.md", "bbb222", 100, &[], "bbb222", None, None) .unwrap(); let _daily_id = store - .insert_file("daily/2026-03-26.md", "ccc333", 100, &[], "ccc333", None, None) + .insert_file( + "daily/2026-03-26.md", + "ccc333", + 100, + &[], + "ccc333", + None, + None, + ) .unwrap(); // Add edge: linked.md → orphan.md (both files are "connected") store.insert_edge(linked_id, orphan_id, "wikilink").unwrap(); diff --git a/src/indexer.rs b/src/indexer.rs index e5a3238..141083d 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -918,7 +918,15 @@ mod tests { fn test_people_mention_detection() { let store = Store::open_memory().unwrap(); let person = store - .insert_file("People/John Nelson.md", "h1", 100, &[], "aaa111", None, None) + .insert_file( + "People/John Nelson.md", + "h1", + 100, + &[], + "aaa111", + None, + None, + ) .unwrap(); let note = store .insert_file("daily.md", "h2", 100, &[], "bbb222", None, None) diff --git a/src/llm.rs b/src/llm.rs index 77ee478..fde78a7 100644 --- a/src/llm.rs +++ b/src/llm.rs @@ -900,7 +900,11 @@ pub fn parse_orchestration_json(text: &str) -> Result { intent }; - Ok(OrchestrationResult { intent, expansions, date_range }) + Ok(OrchestrationResult { + intent, + expansions, + date_range, + }) } /// Extract the first JSON object ({...}) from text, handling nested braces. diff --git a/src/search.rs b/src/search.rs index ee36d1e..4a78481 100644 --- a/src/search.rs +++ b/src/search.rs @@ -506,7 +506,14 @@ pub fn run_status(json: bool, data_dir: &Path) -> Result<()> { "disabled" }; - let output = format_status(&stats, index_size, model_name, intelligence, date_count, json); + let output = format_status( + &stats, + index_size, + model_name, + intelligence, + date_count, + json, + ); print!("{output}"); Ok(()) } diff --git a/src/store.rs b/src/store.rs index 168ae5a..3eea4f0 100644 --- a/src/store.rs +++ b/src/store.rs @@ -370,6 +370,7 @@ impl Store { // ── Files ─────────────────────────────────────────────────── + #[allow(clippy::too_many_arguments)] pub fn insert_file( &self, path: &str, @@ -1833,7 +1834,15 @@ mod tests { let tags = vec!["rust".to_string(), "programming".to_string()]; let docid = generate_docid("notes/test.md"); let file_id = store - .insert_file("notes/test.md", "abc123", 1700000000, &tags, &docid, None, None) + .insert_file( + "notes/test.md", + "abc123", + 1700000000, + &tags, + &docid, + None, + None, + ) .unwrap(); assert!(file_id > 0); @@ -2188,13 +2197,37 @@ mod tests { fn test_get_neighbors_depth_1() { let store = Store::open_memory().unwrap(); let f1 = store - .insert_file("n/f1.md", "h1", 100, &[], &generate_docid("n/f1.md"), None, None) + .insert_file( + "n/f1.md", + "h1", + 100, + &[], + &generate_docid("n/f1.md"), + None, + None, + ) .unwrap(); let f2 = store - .insert_file("n/f2.md", "h2", 100, &[], &generate_docid("n/f2.md"), None, None) + .insert_file( + "n/f2.md", + "h2", + 100, + &[], + &generate_docid("n/f2.md"), + None, + None, + ) .unwrap(); let f3 = store - .insert_file("n/f3.md", "h3", 100, &[], &generate_docid("n/f3.md"), None, None) + .insert_file( + "n/f3.md", + "h3", + 100, + &[], + &generate_docid("n/f3.md"), + None, + None, + ) .unwrap(); store.insert_edge(f1, f2, "wikilink").unwrap(); @@ -2217,16 +2250,48 @@ mod tests { fn test_get_neighbors_depth_2() { let store = Store::open_memory().unwrap(); let f1 = store - .insert_file("n/f1.md", "h1", 100, &[], &generate_docid("n/f1.md"), None, None) + .insert_file( + "n/f1.md", + "h1", + 100, + &[], + &generate_docid("n/f1.md"), + None, + None, + ) .unwrap(); let f2 = store - .insert_file("n/f2.md", "h2", 100, &[], &generate_docid("n/f2.md"), None, None) + .insert_file( + "n/f2.md", + "h2", + 100, + &[], + &generate_docid("n/f2.md"), + None, + None, + ) .unwrap(); let f3 = store - .insert_file("n/f3.md", "h3", 100, &[], &generate_docid("n/f3.md"), None, None) + .insert_file( + "n/f3.md", + "h3", + 100, + &[], + &generate_docid("n/f3.md"), + None, + None, + ) .unwrap(); let f4 = store - .insert_file("n/f4.md", "h4", 100, &[], &generate_docid("n/f4.md"), None, None) + .insert_file( + "n/f4.md", + "h4", + 100, + &[], + &generate_docid("n/f4.md"), + None, + None, + ) .unwrap(); // f1 -> f2 -> f3 -> f4 @@ -2341,17 +2406,49 @@ mod tests { fn test_get_edge_stats() { let store = Store::open_memory().unwrap(); let a = store - .insert_file("n/a.md", "ha", 100, &[], &generate_docid("n/a.md"), None, None) + .insert_file( + "n/a.md", + "ha", + 100, + &[], + &generate_docid("n/a.md"), + None, + None, + ) .unwrap(); let b = store - .insert_file("n/b.md", "hb", 100, &[], &generate_docid("n/b.md"), None, None) + .insert_file( + "n/b.md", + "hb", + 100, + &[], + &generate_docid("n/b.md"), + None, + None, + ) .unwrap(); let c = store - .insert_file("n/c.md", "hc", 100, &[], &generate_docid("n/c.md"), None, None) + .insert_file( + "n/c.md", + "hc", + 100, + &[], + &generate_docid("n/c.md"), + None, + None, + ) .unwrap(); // d is isolated (no edges). let _d = store - .insert_file("n/d.md", "hd", 100, &[], &generate_docid("n/d.md"), None, None) + .insert_file( + "n/d.md", + "hd", + 100, + &[], + &generate_docid("n/d.md"), + None, + None, + ) .unwrap(); store.insert_edge(a, b, "wikilink").unwrap(); @@ -2566,7 +2663,15 @@ mod tests { fn test_find_file_by_basename() { let store = Store::open_memory().unwrap(); store - .insert_file("01-Projects/Work/note.md", "h1", 100, &[], "aaa111", None, None) + .insert_file( + "01-Projects/Work/note.md", + "h1", + 100, + &[], + "aaa111", + None, + None, + ) .unwrap(); store .insert_file("root.md", "h2", 100, &[], "bbb222", None, None) @@ -2794,7 +2899,15 @@ mod tests { let store = Store::open_memory().unwrap(); let docid = generate_docid("notes/test.md"); store - .insert_file("notes/test.md", "hash1", 100, &[], &docid, Some("cli"), None) + .insert_file( + "notes/test.md", + "hash1", + 100, + &[], + &docid, + Some("cli"), + None, + ) .unwrap(); let rec = store.get_file("notes/test.md").unwrap().unwrap(); assert_eq!(rec.created_by, Some("cli".to_string())); diff --git a/src/temporal.rs b/src/temporal.rs index ffd25c7..4db4899 100644 --- a/src/temporal.rs +++ b/src/temporal.rs @@ -29,7 +29,11 @@ fn extract_date_from_frontmatter(frontmatter: &str) -> Option { if let Some(rest) = trimmed.strip_prefix("date:") { let value = rest.trim().trim_matches('"').trim_matches('\''); // Take only the first 10 chars in case of datetime like 2026-03-25T10:00:00 - let date_str = if value.len() >= 10 { &value[..10] } else { value }; + let date_str = if value.len() >= 10 { + &value[..10] + } else { + value + }; if let Some(ts) = parse_iso_date(date_str) { return Some(ts); } @@ -47,10 +51,11 @@ fn extract_date_from_filename(filename: &str) -> Option { } for i in 0..=bytes.len() - 10 { let candidate = &filename[i..i + 10]; - if candidate.as_bytes()[4] == b'-' && candidate.as_bytes()[7] == b'-' { - if let Some(ts) = parse_iso_date(candidate) { - return Some(ts); - } + if candidate.as_bytes()[4] == b'-' + && candidate.as_bytes()[7] == b'-' + && let Some(ts) = parse_iso_date(candidate) + { + return Some(ts); } } None @@ -109,10 +114,7 @@ pub fn parse_date_range_heuristic(query: &str) -> Option<(i64, i64)> { } /// Internal implementation with injectable reference time for testing. -fn parse_date_range_heuristic_with_ref( - query: &str, - now: OffsetDateTime, -) -> Option<(i64, i64)> { +fn parse_date_range_heuristic_with_ref(query: &str, now: OffsetDateTime) -> Option<(i64, i64)> { let lower = query.to_lowercase(); let today = now.date(); @@ -145,12 +147,12 @@ fn parse_date_range_heuristic_with_ref( // "last month" — previous month 1st to last day if lower.contains("last month") { let (prev_year, prev_month) = prev_month(today.year(), today.month()); - return Some(month_range(prev_year, prev_month)?); + return month_range(prev_year, prev_month); } // "this month" — current month 1st to last day if lower.contains("this month") { - return Some(month_range(today.year(), today.month())?); + return month_range(today.year(), today.month()); } // "recent" / "recently" — last 7 days @@ -226,7 +228,8 @@ fn monday_of_week(date: Date) -> Date { Weekday::Saturday => 5, Weekday::Sunday => 6, }; - date.checked_sub(Duration::days(days_since_monday)).expect("valid date subtraction") + date.checked_sub(Duration::days(days_since_monday)) + .expect("valid date subtraction") } /// Return the previous month and its year. @@ -269,15 +272,16 @@ fn find_iso_date_in_query(query: &str) -> Option<(i64, i64)> { } for i in 0..=bytes.len() - 10 { let candidate = &query[i..i + 10]; - if candidate.as_bytes()[4] == b'-' && candidate.as_bytes()[7] == b'-' { - if let Some(ts) = parse_iso_date(candidate) { - let fmt = format_description!("[year]-[month]-[day]"); - if let Ok(date) = Date::parse(candidate, &fmt) { - return Some(day_range(date)); - } - // Fallback: use the parsed timestamp - return Some((ts, ts + 86399)); + if candidate.as_bytes()[4] == b'-' + && candidate.as_bytes()[7] == b'-' + && let Some(ts) = parse_iso_date(candidate) + { + let fmt = format_description!("[year]-[month]-[day]"); + if let Ok(date) = Date::parse(candidate, &fmt) { + return Some(day_range(date)); } + // Fallback: use the parsed timestamp + return Some((ts, ts + 86399)); } } None @@ -481,21 +485,24 @@ mod tests { #[test] fn heuristic_today() { - let (start, end) = parse_date_range_heuristic_with_ref("what happened today", ref_time()).unwrap(); + let (start, end) = + parse_date_range_heuristic_with_ref("what happened today", ref_time()).unwrap(); assert_eq!(start, date_ts(2026, 3, 26)); assert_eq!(end, date_ts(2026, 3, 26) + 86399); } #[test] fn heuristic_this_morning() { - let (start, end) = parse_date_range_heuristic_with_ref("notes from this morning", ref_time()).unwrap(); + let (start, end) = + parse_date_range_heuristic_with_ref("notes from this morning", ref_time()).unwrap(); assert_eq!(start, date_ts(2026, 3, 26)); assert_eq!(end, date_ts(2026, 3, 26) + 86399); } #[test] fn heuristic_yesterday() { - let (start, end) = parse_date_range_heuristic_with_ref("yesterday's standup", ref_time()).unwrap(); + let (start, end) = + parse_date_range_heuristic_with_ref("yesterday's standup", ref_time()).unwrap(); assert_eq!(start, date_ts(2026, 3, 25)); assert_eq!(end, date_ts(2026, 3, 25) + 86399); } @@ -503,7 +510,8 @@ mod tests { #[test] fn heuristic_last_week() { // 2026-03-26 is Thursday. Last week = Mon Mar 16 – Sun Mar 22 - let (start, end) = parse_date_range_heuristic_with_ref("what did I do last week", ref_time()).unwrap(); + let (start, end) = + parse_date_range_heuristic_with_ref("what did I do last week", ref_time()).unwrap(); assert_eq!(start, date_ts(2026, 3, 16)); assert_eq!(end, date_ts(2026, 3, 22) + 86399); } @@ -511,7 +519,8 @@ mod tests { #[test] fn heuristic_this_week() { // 2026-03-26 is Thursday. This week = Mon Mar 23 – Sun Mar 29 - let (start, end) = parse_date_range_heuristic_with_ref("this week's tasks", ref_time()).unwrap(); + let (start, end) = + parse_date_range_heuristic_with_ref("this week's tasks", ref_time()).unwrap(); assert_eq!(start, date_ts(2026, 3, 23)); assert_eq!(end, date_ts(2026, 3, 29) + 86399); } @@ -519,7 +528,8 @@ mod tests { #[test] fn heuristic_last_month() { // Current: March 2026. Last month = Feb 1 – Feb 28, 2026 - let (start, end) = parse_date_range_heuristic_with_ref("last month summary", ref_time()).unwrap(); + let (start, end) = + parse_date_range_heuristic_with_ref("last month summary", ref_time()).unwrap(); assert_eq!(start, date_ts(2026, 2, 1)); assert_eq!(end, date_ts(2026, 2, 28) + 86399); } @@ -547,14 +557,16 @@ mod tests { #[test] fn heuristic_iso_date() { - let (start, end) = parse_date_range_heuristic_with_ref("notes from 2026-03-25", ref_time()).unwrap(); + let (start, end) = + parse_date_range_heuristic_with_ref("notes from 2026-03-25", ref_time()).unwrap(); assert_eq!(start, date_ts(2026, 3, 25)); assert_eq!(end, date_ts(2026, 3, 25) + 86399); } #[test] fn heuristic_month_name_with_year() { - let (start, end) = parse_date_range_heuristic_with_ref("notes from March 2026", ref_time()).unwrap(); + let (start, end) = + parse_date_range_heuristic_with_ref("notes from March 2026", ref_time()).unwrap(); assert_eq!(start, date_ts(2026, 3, 1)); assert_eq!(end, date_ts(2026, 3, 31) + 86399); } @@ -562,14 +574,16 @@ mod tests { #[test] fn heuristic_month_name_bare() { // Bare month name uses current year - let (start, end) = parse_date_range_heuristic_with_ref("february notes", ref_time()).unwrap(); + let (start, end) = + parse_date_range_heuristic_with_ref("february notes", ref_time()).unwrap(); assert_eq!(start, date_ts(2026, 2, 1)); assert_eq!(end, date_ts(2026, 2, 28) + 86399); } #[test] fn heuristic_month_to_month() { - let (start, end) = parse_date_range_heuristic_with_ref("january to march", ref_time()).unwrap(); + let (start, end) = + parse_date_range_heuristic_with_ref("january to march", ref_time()).unwrap(); assert_eq!(start, date_ts(2026, 1, 1)); assert_eq!(end, date_ts(2026, 3, 31) + 86399); } From 989c5116eafb53e1382667eeba85eaa85726a1c3 Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Thu, 26 Mar 2026 21:06:06 +0200 Subject: [PATCH 8/9] docs: update CLAUDE.md, README, CHANGELOG for v1.2 --- CHANGELOG.md | 20 ++++++++++++++++++++ CLAUDE.md | 11 ++++++----- README.md | 26 ++++++++++++++------------ 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c55e604..d53e904 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Changelog +## v1.2.0 — Temporal Search (2026-03-26) + +### Added +- **Temporal search lane** (`temporal.rs`) — 5th RRF lane for time-aware queries +- **Date extraction** — from frontmatter `date:` field or `YYYY-MM-DD` filename pattern +- **Heuristic date parsing** — "today", "yesterday", "last week", "this month", "recent", month names, ISO dates, date ranges +- **LLM date extraction** — orchestrator detects temporal intent and extracts date ranges from natural language +- **Temporal scoring** — smooth decay function for files near but outside the target date range +- **Temporal candidate injection** — date-matched files enter candidate pool as graph seeds +- **Confidence % display** — search results show normalized confidence (0-100%) instead of raw RRF scores +- **Date coverage stats** — `engraph status` shows how many files have extractable dates + +### Changed +- `QueryIntent` gains `Temporal` variant with custom lane weights (temporal: 1.5) +- `OrchestrationResult` gains `date_range` field (backward-compatible serde) +- `LaneWeights` gains `temporal` field (0.0 for non-temporal intents) +- `insert_file` signature extended with `note_date` parameter +- Module count: 22 → 23 +- Test count: 318 → 361 + ## [1.1.0] - 2026-03-26 — Complete Vault Gateway ### Added diff --git a/CLAUDE.md b/CLAUDE.md index 19b4840..e2a30c2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ Local knowledge graph + intelligence layer for Obsidian vaults. Rust CLI + MCP s ## Architecture -Single binary with 22 modules behind a lib crate: +Single binary with 23 modules behind a lib crate: - `config.rs` — loads `~/.engraph/config.toml` and `vault.toml`, merges CLI args, provides `data_dir()`. Includes `intelligence: Option`, `[models]` section for model overrides, `[obsidian]` section (CLI path, enabled flag), and `[agents]` section (registered AI agent names). `Config::save()` writes back to disk. - `chunker.rs` — smart chunking with break-point scoring algorithm. Finds optimal split points considering headings, code fences, blank lines, and thematic breaks. `split_oversized_chunks()` handles token-aware secondary splitting with overlap @@ -27,13 +27,14 @@ Single binary with 22 modules behind a lib crate: - `profile.rs` — vault profile detection. Auto-detects PARA/Folders/Flat structure, vault type (Obsidian/Logseq/Plain), wikilinks, frontmatter, tags. Content-based role detection for people/daily/archive folders by content patterns (not just names). Writes/loads `vault.toml` - `store.rs` — SQLite persistence. Tables: `meta`, `files` (with docid, created_by), `chunks` (with vector BLOBs), `chunks_fts` (FTS5), `edges` (vault graph), `tombstones`, `tag_registry`, `folder_centroids`, `placement_corrections`, `link_skiplist` (reserved), `llm_cache` (orchestrator result cache), `cli_events` (audit log for CLI operations). `vec_chunks` virtual table (sqlite-vec) for KNN search. Dynamic embedding dimension stored in meta. `has_dimension_mismatch()` and `reset_for_reindex()` for migration. Enhanced `resolve_file()` with fuzzy Levenshtein matching as final fallback - `indexer.rs` — orchestrates vault walking (via `ignore` crate for `.gitignore` support), diffing, chunking, embedding, writes to store + sqlite-vec + FTS5, vault graph edge building (wikilinks + people detection), and folder centroid computation. Exposes `index_file`, `remove_file`, `rename_file` as public per-file functions. `run_index_shared` accepts external store/embedder for watcher FullRescan. Dimension migration on model change. -- `search.rs` — hybrid search orchestrator. `search_with_intelligence()` runs the full pipeline: orchestrate (intent + expansions) → 3-lane retrieval per expansion → RRF pass 1 → reranker 4th lane → RRF pass 2. `search_internal()` is a thin wrapper without intelligence models. Adaptive lane weights per query intent. +- `temporal.rs` — temporal search lane. Extracts note dates from frontmatter `date:` field or `YYYY-MM-DD` filename patterns. Heuristic date parsing for natural language ("today", "yesterday", "last week", "this month", "recent", month names, ISO dates, date ranges). Smooth decay scoring for files near but outside target date range. Provides `extract_note_date()` for indexing and `score_temporal()` + `parse_date_range_heuristic()` for search +- `search.rs` — hybrid search orchestrator. `search_with_intelligence()` runs the full pipeline: orchestrate (intent + expansions) → 5-lane RRF retrieval (semantic + FTS5 + graph + reranker + temporal) per expansion → two-pass RRF fusion. `search_internal()` is a thin wrapper without intelligence models. Adaptive lane weights per query intent including temporal (1.5 weight for time-aware queries). Results display normalized confidence percentages (0-100%) instead of raw RRF scores. -`main.rs` is a thin clap CLI (async via `#[tokio::main]`). Subcommands: `index` (with progress bar), `search` (with `--explain`, loads intelligence models when enabled), `status` (shows intelligence state), `clear`, `init` (intelligence onboarding prompt, detects Obsidian CLI + AI agents), `configure` (`--enable-intelligence`, `--disable-intelligence`, `--model`, `--obsidian-cli`, `--no-obsidian-cli`, `--agent`), `models`, `graph` (show/stats), `context` (read/list/vault-map/who/project/topic), `write` (create/append/update-metadata/move/edit/rewrite/edit-frontmatter/delete), `serve` (MCP stdio server with file watcher + intelligence). +`main.rs` is a thin clap CLI (async via `#[tokio::main]`). Subcommands: `index` (with progress bar), `search` (with `--explain`, loads intelligence models when enabled), `status` (shows intelligence state + date coverage stats), `clear`, `init` (intelligence onboarding prompt, detects Obsidian CLI + AI agents), `configure` (`--enable-intelligence`, `--disable-intelligence`, `--model`, `--obsidian-cli`, `--no-obsidian-cli`, `--agent`), `models`, `graph` (show/stats), `context` (read/list/vault-map/who/project/topic), `write` (create/append/update-metadata/move/edit/rewrite/edit-frontmatter/delete), `serve` (MCP stdio server with file watcher + intelligence). ## Key patterns -- **4-lane hybrid search:** Queries run through up to four lanes — semantic (sqlite-vec KNN embeddings), keyword (FTS5 BM25), graph (wikilink expansion), and cross-encoder reranking. A research orchestrator classifies query intent and sets adaptive lane weights. Two-pass RRF: 3-lane retrieval → reranker scores top 30 → 4-lane fusion. When intelligence is off, falls back to heuristic intent classification with 3-lane search (v0.7 behavior) +- **5-lane hybrid search:** Queries run through up to five lanes — semantic (sqlite-vec KNN embeddings), keyword (FTS5 BM25), graph (wikilink expansion), cross-encoder reranking, and temporal (date-range scoring). A research orchestrator classifies query intent and sets adaptive lane weights. Two-pass RRF: retrieval lanes → reranker scores top 30 → 5-lane fusion. When intelligence is off, falls back to heuristic intent classification. Temporal intent detection works with both heuristic and LLM orchestrators - **Vault graph:** `edges` table stores bidirectional wikilink edges and mention edges. Built during indexing after all files are written. People detection scans for person name/alias mentions using notes from the configured People folder - **Graph agent:** Expands seed results by following wikilinks 1-2 hops. Decay: 0.8x for 1-hop, 0.5x for 2-hop. Relevance filter: must contain query term (FTS5) or share tags with seed. Multi-parent merge takes highest score - **Smart chunking:** Break-point scoring algorithm assigns scores to potential split points (headings 50-100, code fences 80, thematic breaks 60, blank lines 20). Code fence protection prevents splitting inside code blocks @@ -73,7 +74,7 @@ Single vault only. Re-indexing a different vault path triggers a confirmation pr ## Testing -- Unit tests in each module (`cargo test --lib`) — 318 tests, no network required +- Unit tests in each module (`cargo test --lib`) — 361 tests, no network required - Integration tests (`cargo test --test integration -- --ignored`) — require GGUF model download - Build requires CMake (for llama.cpp C++ compilation) diff --git a/README.md b/README.md index 646132e..1ec196d 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ engraph turns your markdown vault into a searchable knowledge graph that AI agen Plain vector search treats your notes as isolated documents. But knowledge isn't flat — your notes link to each other, share tags, reference the same people and projects. engraph understands these connections. -- **4-lane hybrid search** — semantic embeddings + BM25 full-text + graph expansion + cross-encoder reranking, fused via [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf). An LLM orchestrator classifies queries and adapts lane weights per intent. +- **5-lane hybrid search** — semantic embeddings + BM25 full-text + graph expansion + cross-encoder reranking + temporal scoring, fused via [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf). An LLM orchestrator classifies queries and adapts lane weights per intent. Time-aware queries like "what happened last week" or "March 2026 notes" activate the temporal lane automatically. - **MCP server for AI agents** — `engraph serve` exposes 19 tools (search, read, section-level editing, frontmatter mutations, vault health, context bundles, note creation) that Claude, Cursor, or any MCP client can call directly. - **Section-level editing** — AI agents can read, replace, prepend, or append to specific sections by heading. Full note rewriting with frontmatter preservation. Granular frontmatter mutations (set/remove fields, add/remove tags and aliases). - **Vault health diagnostics** — detect orphan notes, broken wikilinks, stale content, and tag hygiene issues. Available as MCP tool and CLI command. @@ -65,7 +65,7 @@ Your vault (markdown files) ``` 1. **Index** — walks your vault, chunks markdown by headings, embeds with a local GGUF model via llama.cpp (Metal GPU on macOS), stores everything in SQLite with FTS5 + sqlite-vec + a wikilink graph -2. **Search** — an orchestrator classifies the query and sets lane weights, then runs up to four lanes (semantic KNN, BM25 keyword, graph expansion, cross-encoder reranking), fused via RRF +2. **Search** — an orchestrator classifies the query and sets lane weights, then runs up to five lanes (semantic KNN, BM25 keyword, graph expansion, cross-encoder reranking, temporal scoring), fused via RRF 3. **Serve** — starts an MCP server that AI agents connect to, with a file watcher that re-indexes changes in real time ## Quick start @@ -98,13 +98,13 @@ engraph search "how does the auth system work" ``` ``` - 1. [0.04] 02-Areas/Development/Auth-Architecture.md > # Auth Architecture #6e1b70 + 1. [97%] 02-Areas/Development/Auth-Architecture.md > # Auth Architecture #6e1b70 OAuth 2.0 with PKCE for all client types. Session tokens stored in HTTP-only cookies... - 2. [0.04] 01-Projects/API-Design.md > # API Design #e3e350 + 2. [95%] 01-Projects/API-Design.md > # API Design #e3e350 All endpoints require Bearer token authentication. Tokens are issued by the OAuth 2.0... - 3. [0.04] 03-Resources/People/Sarah-Chen.md > # Sarah Chen #4adb39 + 3. [91%] 03-Resources/People/Sarah-Chen.md > # Sarah Chen #4adb39 Senior Backend Engineer. Tech lead for authentication and security systems... ``` @@ -145,7 +145,7 @@ engraph configure --enable-intelligence engraph search "how does authentication work" --explain ``` ``` - 1. [0.04] 01-Projects/API-Design.md > # API Design #e3e350 + 1. [97%] 01-Projects/API-Design.md > # API Design #e3e350 All endpoints require Bearer token authentication... Intent: Conceptual @@ -248,7 +248,7 @@ Returns orphan notes (no links in or out), broken wikilinks, stale notes, and ta | | engraph | Basic RAG (vector-only) | Obsidian search | |---|---|---|---| -| Search method | 4-lane RRF (semantic + BM25 + graph + reranker) | Vector similarity only | Keyword only | +| Search method | 5-lane RRF (semantic + BM25 + graph + reranker + temporal) | Vector similarity only | Keyword only | | Query understanding | LLM orchestrator classifies intent, adapts weights | None | None | | Understands note links | Yes (wikilink graph traversal) | No | Limited (backlinks panel) | | AI agent access | MCP server (19 tools) | Custom API needed | No | @@ -262,7 +262,9 @@ engraph is not a replacement for Obsidian — it's the intelligence layer that s ## Current capabilities -- 4-lane hybrid search (semantic + FTS5 + graph + cross-encoder reranker) with two-pass RRF fusion +- 5-lane hybrid search (semantic + FTS5 + graph + cross-encoder reranker + temporal) with two-pass RRF fusion +- Temporal search: natural language date queries ("last week", "March 2026", "recent"), date extraction from frontmatter and filenames, smooth decay scoring +- Confidence % display: search results show normalized 0-100% confidence instead of raw RRF scores - LLM research orchestrator: query intent classification + query expansion + adaptive lane weights - llama.cpp inference via Rust bindings (GGUF models, Metal GPU on macOS, CUDA on Linux) - Intelligence opt-in: heuristic fallback when disabled, LLM-powered when enabled @@ -281,7 +283,7 @@ engraph is not a replacement for Obsidian — it's the intelligence layer that s - Enhanced file resolution with fuzzy Levenshtein matching fallback - Content-based folder role detection (people, daily, archive) by content patterns - Configurable model overrides for multilingual support -- 318 unit tests, CI on macOS + Ubuntu +- 361 unit tests, CI on macOS + Ubuntu ## Roadmap @@ -290,7 +292,7 @@ engraph is not a replacement for Obsidian — it's the intelligence layer that s - [x] ~~MCP edit/rewrite tools — full note editing for AI agents~~ (v1.1) - [x] ~~Vault health monitor — orphan notes, broken links, stale content, tag hygiene~~ (v1.1) - [x] ~~Obsidian CLI integration — auto-detect and delegate with circuit breaker~~ (v1.1) -- [ ] Temporal search — find notes by time period, detect trends (v1.2) +- [x] ~~Temporal search — find notes by time period, date-aware queries~~ (v1.2) - [ ] HTTP/REST API — complement MCP with a standard web API (v1.3) - [ ] Multi-vault — search across multiple vaults (v1.4) @@ -326,7 +328,7 @@ All data stored in `~/.engraph/` — single SQLite database (~10MB typical), GGU ## Development ```bash -cargo test --lib # 318 unit tests, no network (requires CMake for llama.cpp) +cargo test --lib # 361 unit tests, no network (requires CMake for llama.cpp) cargo clippy -- -D warnings cargo fmt --check @@ -338,7 +340,7 @@ cargo test --test integration -- --ignored Contributions welcome. Please open an issue first to discuss what you'd like to change. -The codebase is 22 Rust modules behind a lib crate. `CLAUDE.md` in the repo root has detailed architecture documentation for AI-assisted development. +The codebase is 23 Rust modules behind a lib crate. `CLAUDE.md` in the repo root has detailed architecture documentation for AI-assisted development. ## License From 98aba42e2f2b037419f161e96ad8c15be038d99a Mon Sep 17 00:00:00 2001 From: Oleksandr Ostrovskyi Date: Thu, 26 Mar 2026 21:10:53 +0200 Subject: [PATCH 9/9] fix(temporal): handle multi-byte UTF-8 filenames in date extraction --- src/temporal.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/temporal.rs b/src/temporal.rs index 4db4899..78b78c5 100644 --- a/src/temporal.rs +++ b/src/temporal.rs @@ -44,12 +44,17 @@ fn extract_date_from_frontmatter(frontmatter: &str) -> Option { /// Extract YYYY-MM-DD pattern from a filename. fn extract_date_from_filename(filename: &str) -> Option { - // Look for YYYY-MM-DD pattern anywhere in the filename + // Look for YYYY-MM-DD pattern anywhere in the filename. + // Only check ASCII char boundaries to avoid panics on multi-byte UTF-8 filenames. let bytes = filename.as_bytes(); if bytes.len() < 10 { return None; } for i in 0..=bytes.len() - 10 { + // Skip non-ASCII-start positions to avoid slicing mid-character + if !filename.is_char_boundary(i) || !filename.is_char_boundary(i + 10) { + continue; + } let candidate = &filename[i..i + 10]; if candidate.as_bytes()[4] == b'-' && candidate.as_bytes()[7] == b'-'