From f0fb084da4deb8cf3c0522e3af6be49beb5a1f5c Mon Sep 17 00:00:00 2001 From: danpark Date: Tue, 12 May 2026 02:13:56 -0400 Subject: [PATCH] Escape time-like tokens in FTS queries --- lancedb_store.py | 7 ++++++- tests/test_search.py | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/lancedb_store.py b/lancedb_store.py index 7d9115b..56671b2 100644 --- a/lancedb_store.py +++ b/lancedb_store.py @@ -125,6 +125,11 @@ def _sql_escape(value: str) -> str: """Escape single quotes for safe use in SQL WHERE clauses.""" return value.replace("'", "''") + @staticmethod + def _escape_fts_query(query: str) -> str: + """Escape natural-language tokens Tantivy misreads as field syntax.""" + return re.sub(r"(? None: """Raise ValueError if *key* is not a safe SQL identifier.""" @@ -637,7 +642,7 @@ def keyword_search( table = self._vs.table except TableNotFoundError: return [] # Table not created yet — legitimately empty - q = table.search(query, query_type="fts") + q = table.search(self._escape_fts_query(query), query_type="fts") if where: q = q.where(where, prefilter=True) rows = q.limit(top_k).to_list() diff --git a/tests/test_search.py b/tests/test_search.py index ca8c9a6..f68feff 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -656,6 +656,31 @@ def test_prefilter_combined_source_and_folder(): assert result[0].doc_id == "b.pdf" +def test_keyword_search_escapes_time_like_tokens(): + """FTS keyword search should handle time-like natural-language tokens.""" + with tempfile.TemporaryDirectory() as tmpdir: + vec = [1.0] + [0.0] * 767 + nodes = [ + _make_node( + "appt.md", + "c:0", + "Appointment at 10:00 for the unit showing", + vec, + source_name="comm_messages", + status="active", + ), + ] + store = _build_store_with_fts(tmpdir, nodes) + + hits = store.keyword_search( + "10:00 showing", + top_k=10, + where="lower(metadata.source_name) = 'comm_messages'", + ) + + assert [hit.doc_id for hit in hits] == ["appt.md"] + + def test_prefilter_complex_filter_ast(): """Complex filter AST should restrict results before search scoring.""" with tempfile.TemporaryDirectory() as tmpdir: