diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..bb178984d
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,3 @@
+# Auto-generated files — collapsed in GitHub PR diffs
+src/db/index/column/fts_column/gen/** linguist-generated=true
+src/db/sqlengine/antlr/gen/** linguist-generated=true
diff --git a/.gitmodules b/.gitmodules
index 51934dfed..2f501c34b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -40,3 +40,12 @@
 [submodule "thirdparty/RaBitQ-Library/RaBitQ-Library-0.1"]
 	path = thirdparty/RaBitQ-Library/RaBitQ-Library-0.1
 	url = https://github.com/VectorDB-NTU/RaBitQ-Library.git
+[submodule "thirdparty/cppjieba/cppjieba-5.6.7"]
+	path = thirdparty/cppjieba/cppjieba-5.6.7
+	url = https://github.com/yanyiwu/cppjieba.git
+[submodule "thirdparty/FastPFOR/FastPFOR-0.4.0"]
+	path = thirdparty/FastPFOR/FastPFOR-0.4.0
+	url = https://github.com/fast-pack/FastPFOR.git
+[submodule "thirdparty/limonp/limonp-v1.0.2"]
+	path = thirdparty/limonp/limonp-v1.0.2
+	url = https://github.com/yanyiwu/limonp.git
diff --git a/python/tests/test_fts_query.py b/python/tests/test_fts_query.py
new file mode 100644
index 000000000..b3e132bd0
--- /dev/null
+++ b/python/tests/test_fts_query.py
@@ -0,0 +1,158 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for FTS (Full-Text Search) query support in the Python SDK."""
+
+import pickle
+
+import pytest
+
+from zvec.model.param.query import Fts, Query
+
+
+class TestFtsQueryValidation:
+    """Test FTS parameter validation in Query dataclass."""
+
+    def test_fts_query_string_only(self):
+        """Query with only query_string in Fts should be valid."""
+        q = Query(
+            field_name="content", fts=Fts(query_string='+hello -world "exact phrase"')
+        )
+        q._validate()
+        assert q.fts.query_string == '+hello -world "exact phrase"'
+        assert q.fts.match_string is None
+        assert q.has_fts() is True
+
+    def test_fts_match_string_only(self):
+        """Query with only match_string in Fts should be valid."""
+        q = Query(field_name="content", fts=Fts(match_string="machine learning"))
+        q._validate()
+        assert q.fts.match_string == "machine learning"
+        assert q.fts.query_string is None
+        assert q.has_fts() is True
+
+    def test_fts_query_string_and_match_string_mutually_exclusive(self):
+        """Cannot provide both query_string and match_string in Fts."""
+        q = Query(
+            field_name="content",
+            fts=Fts(query_string="+hello", match_string="hello world"),
+        )
+        with pytest.raises(ValueError, match="mutually exclusive"):
+            q._validate()
+
+    def test_no_fts(self):
+        """Query without FTS fields should have has_fts() == False."""
+        q = Query(field_name="embedding", vector=[0.1, 0.2, 0.3])
+        assert q.has_fts() is False
+
+    def test_vector_and_fts_mutually_exclusive(self):
+        """Cannot combine vector search with FTS in a single Query."""
+        q = Query(
+            field_name="embedding",
+            vector=[0.1, 0.2, 0.3],
+            fts=Fts(match_string="deep learning"),
+        )
+        with pytest.raises(ValueError, match="Cannot combine fts with vector search"):
+            q._validate()
+
+    def test_fts_without_vector_or_id(self):
+        """Query with only FTS (no vector, no id) should be valid."""
+        q = Query(field_name="content", fts=Fts(query_string="hello"))
+        q._validate()
+        assert q.has_vector() is False
+        assert q.has_id() is False
+        assert q.has_fts() is True
+
+
+class TestFtsQueryBinding:
+    """Test FTS binding layer (_FtsQuery)."""
+
+    def test_import_fts_query(self):
+        """_FtsQuery should be importable from _zvec.param."""
+        from _zvec.param import _FtsQuery
+
+        fts = _FtsQuery()
+        assert fts.query_string == ""
+        assert fts.match_string == ""
+
+    def test_fts_query_set_fields(self):
+        """Setting fields on _FtsQuery should work."""
+        from _zvec.param import _FtsQuery
+
+        fts = _FtsQuery()
+        fts.query_string = "+hello -world"
+        assert fts.query_string == "+hello -world"
+
+        fts2 = _FtsQuery()
+        fts2.match_string = "machine learning"
+        assert fts2.match_string == "machine learning"
+
+    def test_fts_query_pickle(self):
+        """_FtsQuery should support pickling."""
+        from _zvec.param import _FtsQuery
+
+        fts = _FtsQuery()
+        fts.query_string = "+vector search"
+        fts.match_string = ""
+
+        data = pickle.dumps(fts)
+        restored = pickle.loads(data)
+        assert restored.query_string == "+vector search"
+        assert restored.match_string == ""
+
+    def test_vector_query_fts_field(self):
+        """_VectorQuery should have fts_query field."""
+        from _zvec.param import _FtsQuery, _VectorQuery
+
+        vq = _VectorQuery()
+        # fts_query should be None by default (optional)
+        assert vq.fts_query is None
+
+        # set fts_query
+        fts = _FtsQuery()
+        fts.query_string = "hello"
+        vq.fts_query = fts
+        assert vq.fts_query is not None
+        assert vq.fts_query.query_string == "hello"
+
+    def test_vector_query_pickle_with_fts(self):
+        """_VectorQuery with fts_query should survive pickling."""
+        from _zvec.param import _FtsQuery, _VectorQuery
+
+        vq = _VectorQuery()
+        vq.topk = 10
+        vq.field_name = "embedding"
+        fts = _FtsQuery()
+        fts.match_string = "test query"
+        vq.fts_query = fts
+
+        data = pickle.dumps(vq)
+        restored = pickle.loads(data)
+        assert restored.topk == 10
+        assert restored.field_name == "embedding"
+        assert restored.fts_query is not None
+        assert restored.fts_query.match_string == "test query"
+
+    def test_vector_query_pickle_without_fts(self):
+        """_VectorQuery without fts_query should survive pickling."""
+        from _zvec.param import _VectorQuery
+
+        vq = _VectorQuery()
+        vq.topk = 5
+        vq.field_name = "vec"
+
+        data = pickle.dumps(vq)
+        restored = pickle.loads(data)
+        assert restored.topk == 5
+        assert restored.field_name == "vec"
+        assert restored.fts_query is None
diff --git a/python/zvec/__init__.py b/python/zvec/__init__.py
index 705f3e366..1f5044f66 100644
--- a/python/zvec/__init__.py
+++ b/python/zvec/__init__.py
@@ -56,11 +56,14 @@
 from .model.doc import Doc
 
 # —— Query & index parameters ——
+# —— FTS params (C++ binding) ——
 from .model.param import (
     AddColumnOption,
     AlterColumnOption,
     CollectionOption,
     FlatIndexParam,
+    FtsIndexParam,
+    FtsQueryParam,
     HnswIndexParam,
     HnswQueryParam,
     HnswRabitqIndexParam,
@@ -73,7 +76,7 @@
     VamanaIndexParam,
     VamanaQueryParam,
 )
-from .model.param.query import Query, VectorQuery
+from .model.param.query import Fts, Query, VectorQuery
 
 # —— Schema & field definitions ——
 from .model.schema import CollectionSchema, CollectionStats, FieldSchema, VectorSchema
diff --git a/python/zvec/executor/query_executor.py b/python/zvec/executor/query_executor.py
index 3e54e37d2..b2d2ea847 100644
--- a/python/zvec/executor/query_executor.py
+++ b/python/zvec/executor/query_executor.py
@@ -20,7 +20,7 @@
 
 import numpy as np
 from _zvec import _Collection
-from _zvec.param import _VectorQuery
+from _zvec.param import _FtsQuery, _VectorQuery
 
 from ..extension import ReRanker, RrfReRanker, WeightedReRanker
 from ..model.convert import convert_to_py_doc
@@ -141,6 +141,14 @@ def _do_build_query_wo_vector(self, ctx: QueryContext) -> _VectorQuery:
             core_vector.output_fields = ctx.output_fields
         return core_vector
 
+    def _do_build_fts_query(self, query: Query, core_vector: _VectorQuery) -> None:
+        """Set FTS query on core_vector if the query has FTS parameters."""
+        if query.has_fts():
+            fts = _FtsQuery()
+            fts.query_string = query.fts.query_string or ""
+            fts.match_string = query.fts.match_string or ""
+            core_vector.fts_query = fts
+
     def _do_build_query_with_vector(
         self, ctx: QueryContext, query: Query, collection: _Collection
     ) -> _VectorQuery:
@@ -149,6 +157,16 @@ def _do_build_query_with_vector(
         if query.param:
             core_vector.query_params = query.param
 
+        # set FTS query if provided
+        self._do_build_fts_query(query, core_vector)
+
+        # set output_fields
+        core_vector.output_fields = ctx.output_fields
+
+        # FTS-only query (no vector, no id) — skip vector resolution
+        if query.has_fts() and not query.has_vector() and not query.has_id():
+            return core_vector
+
         vector_schema = (
             self._schema.vector(query.field_name) if query else self._schema.vectors[0]
         )
@@ -156,18 +174,17 @@ def _do_build_query_with_vector(
         if vector_schema is None:
             raise ValueError("No vector field found")
 
-        # set output_fields
-        core_vector.output_fields = ctx.output_fields
-
         # set vector
         if query.has_vector():
             vec_data = query.vector
-        else:
+        elif query.has_id():
             fetched = collection.Fetch([query.id])
             doc = next(iter(fetched.values()))
             if not doc:
                 return core_vector
             vec_data = doc.get_any(vector_schema.name, vector_schema.data_type)
+        else:
+            return core_vector
 
         target_dtype = DTYPE_MAP.get(vector_schema.data_type.value)
         core_vector.set_vector(
diff --git a/python/zvec/model/__init__.py b/python/zvec/model/__init__.py
index f193f10bb..7d5b0689b 100644
--- a/python/zvec/model/__init__.py
+++ b/python/zvec/model/__init__.py
@@ -15,7 +15,7 @@
 
 from .collection import Collection
 from .doc import Doc
-from .param.query import Query, VectorQuery
+from .param.query import Fts, Query, VectorQuery
 from .schema.collection_schema import CollectionSchema
 from .schema.field_schema import FieldSchema
 
@@ -24,6 +24,7 @@
     "CollectionSchema",
     "Doc",
     "FieldSchema",
+    "Fts",
     "Query",
     "VectorQuery",
 ]
diff --git a/python/zvec/model/param/__init__.py b/python/zvec/model/param/__init__.py
index 5758218d9..05909e90c 100644
--- a/python/zvec/model/param/__init__.py
+++ b/python/zvec/model/param/__init__.py
@@ -18,6 +18,8 @@
     AlterColumnOption,
     CollectionOption,
     FlatIndexParam,
+    FtsIndexParam,
+    FtsQueryParam,
     HnswIndexParam,
     HnswQueryParam,
     HnswRabitqIndexParam,
@@ -36,6 +38,8 @@
     "AlterColumnOption",
     "CollectionOption",
     "FlatIndexParam",
+    "FtsIndexParam",
+    "FtsQueryParam",
     "HnswIndexParam",
     "HnswQueryParam",
     "HnswRabitqIndexParam",
diff --git a/python/zvec/model/param/query.py b/python/zvec/model/param/query.py
index f14c28509..f2c15ecd2 100644
--- a/python/zvec/model/param/query.py
+++ b/python/zvec/model/param/query.py
@@ -20,26 +20,42 @@
 from ...common import VectorType
 from . import HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam
 
-__all__ = ["Query", "VectorQuery"]
+__all__ = ["Fts", "Query", "VectorQuery"]
+
+
+@dataclass(frozen=True)
+class Fts:
+    """Full-text search query parameters.
+
+    Attributes:
+        query_string (Optional[str]): FTS query expression
+            (e.g. '+vector -slow "exact phrase"'). Mutually exclusive with match_string.
+        match_string (Optional[str]): Natural language match string,
+            tokenized and combined using the default operator.
+            Mutually exclusive with query_string.
+    """
+
+    query_string: Optional[str] = None
+    match_string: Optional[str] = None
 
 
 @dataclass(frozen=True)
 class Query:
     """Represents a search query for a specific field in a collection.
 
-    A `Query` can be constructed using either a document ID (to look up
-    its vector) or an explicit vector. It may optionally include index-specific
-    query parameters to control search behavior (e.g., `ef` for HNSW, `nprobe` for IVF).
+    A `Query` can be constructed for either vector search or full-text search,
+    but not both simultaneously.
 
-    Exactly one of `id` or `vector` should be provided. If both are given,
-    behavior is implementation-defined (typically `id` takes precedence).
+    For vector search, provide `id` or `vector` (and optionally `param`).
+    For FTS, provide `fts`.
 
     Attributes:
         field_name (str): Name of the field to query.
         id (Optional[str], optional): Document ID to fetch vector from. Default is None.
         vector (VectorType, optional): Explicit query vector. Default is None.
         param (Optional[Union[HnswQueryParam, IVFQueryParam]], optional):
-            Index-specific query parameters. Default is None.
+            Index-specific query parameters for vector search. Default is None.
+        fts (Optional[Fts], optional): Full-text search parameters. Default is None.
 
     Examples:
         >>> import zvec
@@ -51,12 +67,18 @@ class Query:
         ...     vector=[0.1, 0.2, 0.3],
         ...     param=HnswQueryParam(ef=300)
         ... )
+        >>> # FTS query
+        >>> q3 = zvec.Query(
+        ...     field_name="content",
+        ...     fts=Fts(match_string="machine learning")
+        ... )
     """
 
     field_name: str
     id: Optional[str] = None
     vector: VectorType = None
     param: Optional[Union[HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam]] = None
+    fts: Optional[Fts] = None
 
     def has_id(self) -> bool:
         """Check if the query is based on a document ID.
@@ -74,11 +96,32 @@ def has_vector(self) -> bool:
         """
         return self.vector is not None and len(self.vector) > 0
 
+    def has_fts(self) -> bool:
+        """Check if the query contains an FTS (full-text search) condition.
+
+        Returns:
+            bool: True if `fts` is set with a query_string or match_string.
+        """
+        if self.fts is not None:
+            return bool(self.fts.query_string) or bool(self.fts.match_string)
+        return False
+
     def _validate(self) -> None:
         if self.field_name is None:
             raise ValueError("Field name cannot be empty")
         if self.id and self.vector:
             raise ValueError("Cannot provide both id and vector")
+        if self.has_fts() and (
+            self.has_vector() or self.has_id() or self.param is not None
+        ):
+            raise ValueError(
+                "Cannot combine fts with vector search fields (id/vector/param) in a single Query"
+            )
+        if self.fts is not None and self.fts.query_string and self.fts.match_string:
+            raise ValueError(
+                "Cannot provide both query_string and match_string in Fts; "
+                "they are mutually exclusive"
+            )
 
 
 class VectorQuery(Query):
diff --git a/python/zvec/zvec.py b/python/zvec/zvec.py
index 114fb49c9..da44699e8 100644
--- a/python/zvec/zvec.py
+++ b/python/zvec/zvec.py
@@ -38,6 +38,7 @@ def init(
     optimize_threads: Optional[int] = None,
     invert_to_forward_scan_ratio: Optional[float] = None,
     brute_force_by_keys_ratio: Optional[float] = None,
+    fts_brute_force_by_keys_ratio: Optional[float] = None,
     memory_limit_mb: Optional[int] = None,
 ) -> None:
     """Initialize Zvec with configuration options.
@@ -88,6 +89,12 @@ def init(
             Threshold to use brute-force key lookup over index.
             Lower → prefer index; higher → prefer brute-force.
             Range: [0.0, 1.0]. Default: ``0.1``.
+        fts_brute_force_by_keys_ratio (Optional[float], optional):
+            Threshold to switch FTS scan from posting-driven to
+            candidate-driven (brute-force) when the invert filter is
+            highly selective. Independent from ``brute_force_by_keys_ratio``
+            because per-candidate FTS cost is higher.
+            Range: [0.0, 1.0]. Default: ``0.05``.
         memory_limit_mb (Optional[int], optional):
             Soft memory cap in MB. Zvec may throttle or fail operations
             approaching this limit.
@@ -157,6 +164,8 @@ def init(
         config_dict["invert_to_forward_scan_ratio"] = invert_to_forward_scan_ratio
     if brute_force_by_keys_ratio is not None:
         config_dict["brute_force_by_keys_ratio"] = brute_force_by_keys_ratio
+    if fts_brute_force_by_keys_ratio is not None:
+        config_dict["fts_brute_force_by_keys_ratio"] = fts_brute_force_by_keys_ratio
     if memory_limit_mb is not None:
         config_dict["memory_limit_mb"] = memory_limit_mb
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a3787dc6b..807c86208 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -138,10 +138,10 @@ target_include_directories(zvec_shared
 # Strip symbols in release builds to reduce library size
 if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
     if(UNIX AND NOT APPLE)
-        add_custom_command(TARGET zvec_shared POST_BUILD
-            COMMAND ${CMAKE_STRIP} $<TARGET_FILE:zvec_shared>
-            COMMENT "Stripping symbols from libzvec.so"
-        )
+        # add_custom_command(TARGET zvec_shared POST_BUILD
+        #     COMMAND ${CMAKE_STRIP} $<TARGET_FILE:zvec_shared>
+        #     COMMENT "Stripping symbols from libzvec.so"
+        # )
     elseif(APPLE)
         add_custom_command(TARGET zvec_shared POST_BUILD
             COMMAND /usr/bin/strip -x $<TARGET_FILE:zvec_shared>
diff --git a/src/binding/c/c_api.cc b/src/binding/c/c_api.cc
index 2c3489ab9..3e2725fab 100644
--- a/src/binding/c/c_api.cc
+++ b/src/binding/c/c_api.cc
@@ -627,6 +627,27 @@ float zvec_config_data_get_brute_force_by_keys_ratio(
   return cpp_config->brute_force_by_keys_ratio;
 }
 
+zvec_error_code_t zvec_config_data_set_fts_brute_force_by_keys_ratio(
+    zvec_config_data_t *config, float ratio) {
+  if (!config) {
+    SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, "Config pointer is null");
+    return ZVEC_ERROR_INVALID_ARGUMENT;
+  }
+  auto *cpp_config = reinterpret_cast<zvec::GlobalConfig::ConfigData *>(config);
+  cpp_config->fts_brute_force_by_keys_ratio = ratio;
+  return ZVEC_OK;
+}
+
+float zvec_config_data_get_fts_brute_force_by_keys_ratio(
+    const zvec_config_data_t *config) {
+  if (!config) {
+    return 0.0f;
+  }
+  auto *cpp_config =
+      reinterpret_cast<const zvec::GlobalConfig::ConfigData *>(config);
+  return cpp_config->fts_brute_force_by_keys_ratio;
+}
+
 zvec_error_code_t zvec_config_data_set_optimize_thread_count(
     zvec_config_data_t *config, uint32_t thread_count) {
   if (!config) {
diff --git a/src/binding/python/model/common/python_config.cc b/src/binding/python/model/common/python_config.cc
index bbcbb5bdb..9b8666a0d 100644
--- a/src/binding/python/model/common/python_config.cc
+++ b/src/binding/python/model/common/python_config.cc
@@ -177,6 +177,17 @@ void ZVecPyConfig::Initialize(pybind11::module_ &m) {
       data.brute_force_by_keys_ratio = static_cast<float>(v);
     }
 
+    // set fts_brute_force_by_keys_ratio
+    if (has_key(config_dict, "fts_brute_force_by_keys_ratio")) {
+      auto v =
+          get_if<double>(config_dict, "fts_brute_force_by_keys_ratio").value();
+      if (v < 0.0 || v > 1.0) {
+        throw py::value_error(
+            "fts_brute_force_by_keys_ratio must be in [0.0, 1.0]");
+      }
+      data.fts_brute_force_by_keys_ratio = static_cast<float>(v);
+    }
+
     // initialize (contains validate)
     Status status = GlobalConfig::Instance().Initialize(data);
     if (!status.ok()) {
diff --git a/src/binding/python/model/param/python_param.cc b/src/binding/python/model/param/python_param.cc
index 268246cd6..d9186693f 100644
--- a/src/binding/python/model/param/python_param.cc
+++ b/src/binding/python/model/param/python_param.cc
@@ -35,6 +35,8 @@ static std::string index_type_to_string(const IndexType type) {
       return "HNSW_RABITQ";
     case IndexType::VAMANA:
       return "VAMANA";
+    case IndexType::FTS:
+      return "FTS";
     default:
       return "UNDEFINED";
   }
@@ -251,6 +253,88 @@ Note: Prefix search is always enabled regardless of this setting.
                                                        t[1].cast<bool>());
           }));
 
+  // binding fts index params
+  py::class_<FtsIndexParams, IndexParams, std::shared_ptr<FtsIndexParams>>
+      fts_index_params(m, "FtsIndexParam", R"pbdoc(
+Parameters for configuring a full-text search (FTS) index.
+
+Controls the tokenizer pipeline used during indexing and querying.
+
+Attributes:
+    type (IndexType): Always ``IndexType.FTS``.
+    tokenizer_name (str): Name of the tokenizer (e.g., "standard", "jieba").
+        Default is "standard".
+    filters (list[str]): List of token filter names applied after tokenization.
+        Default is ["lowercase"].
+    extra_params (str): Additional parameters passed to the tokenizer.
+        Default is "".
+
+Examples:
+    >>> params = FtsIndexParam(tokenizer_name="jieba", filters=["lowercase"])
+    >>> print(params.tokenizer_name)
+    jieba
+)pbdoc");
+  fts_index_params
+      .def(py::init<std::string, std::vector<std::string>, std::string>(),
+           py::arg("tokenizer_name") = "standard",
+           py::arg("filters") = std::vector<std::string>{"lowercase"},
+           py::arg("extra_params") = "",
+           R"pbdoc(
+Constructs an FtsIndexParam instance.
+
+Args:
+    tokenizer_name (str, optional): Tokenizer name. Defaults to "standard".
+    filters (list[str], optional): Token filter names. Defaults to ["lowercase"].
+    extra_params (str, optional): Extra tokenizer parameters. Defaults to "".
+)pbdoc")
+      .def_property_readonly("tokenizer_name", &FtsIndexParams::tokenizer_name,
+                             "str: Name of the tokenizer.")
+      .def_property_readonly("filters", &FtsIndexParams::filters,
+                             "list[str]: Token filter names.")
+      .def_property_readonly("extra_params", &FtsIndexParams::extra_params,
+                             "str: Additional tokenizer parameters.")
+      .def(
+          "to_dict",
+          [](const FtsIndexParams &self) -> py::dict {
+            py::dict dict;
+            dict["type"] = index_type_to_string(self.type());
+            dict["tokenizer_name"] = self.tokenizer_name();
+            dict["filters"] = self.filters();
+            dict["extra_params"] = self.extra_params();
+            return dict;
+          },
+          "Convert to dictionary with all fields")
+      .def("__repr__",
+           [](const FtsIndexParams &self) -> std::string {
+             std::string filters_str = "[";
+             for (size_t i = 0; i < self.filters().size(); ++i) {
+               if (i > 0) {
+                 filters_str += ",";
+               }
+               filters_str += "\"" + self.filters()[i] + "\"";
+             }
+             filters_str += "]";
+             return "{"
+                    "\"type\":\"" +
+                    index_type_to_string(self.type()) +
+                    "\", \"tokenizer_name\":\"" + self.tokenizer_name() +
+                    "\", \"filters\":" + filters_str + ", \"extra_params\":\"" +
+                    self.extra_params() + "\"}";
+           })
+      .def(py::pickle(
+          [](const FtsIndexParams &self) {
+            return py::make_tuple(self.tokenizer_name(), self.filters(),
+                                  self.extra_params());
+          },
+          [](py::tuple t) {
+            if (t.size() != 3) {
+              throw std::runtime_error("Invalid state for FtsIndexParams");
+            }
+            return std::make_shared<FtsIndexParams>(
+                t[0].cast<std::string>(), t[1].cast<std::vector<std::string>>(),
+                t[2].cast<std::string>());
+          }));
+
   // binding base vector index params
   py::class_<VectorIndexParams, IndexParams, std::shared_ptr<VectorIndexParams>>
       vector_params(m, "VectorIndexParam", R"pbdoc(
@@ -1102,6 +1186,64 @@ Constructs a VamanaQueryParam instance.
             obj->set_is_using_refiner(t[3].cast<bool>());
             return obj;
           }));
+
+  // binding fts query params
+  py::class_<FtsQueryParams, QueryParams, std::shared_ptr<FtsQueryParams>>
+      fts_query_params(m, "FtsQueryParam", R"pbdoc(
+Query parameters for full-text search (FTS) index.
+
+Controls the default boolean operator used to combine adjacent bare terms
+in a query string.
+
+Attributes:
+    type (IndexType): Always ``IndexType.FTS``.
+    default_operator (str): Default boolean operator for adjacent bare terms.
+        Supported values (case-insensitive): "OR" (default), "AND".
+
+Examples:
+    >>> params = FtsQueryParam(default_operator="AND")
+    >>> print(params.default_operator)
+    AND
+)pbdoc");
+  fts_query_params
+      .def(py::init([](const std::string &default_operator) {
+             auto params = std::make_shared<FtsQueryParams>();
+             if (!default_operator.empty()) {
+               params->set_default_operator(default_operator);
+             }
+             return params;
+           }),
+           py::arg("default_operator") = "",
+           R"pbdoc(
+Constructs an FtsQueryParam instance.
+
+Args:
+    default_operator (str, optional): Default boolean operator for adjacent
+        bare terms. Supported: "OR", "AND". Defaults to "" (uses engine default).
+)pbdoc")
+      .def_property_readonly("default_operator",
+                             &FtsQueryParams::default_operator,
+                             "str: Default boolean operator for bare terms.")
+      .def("__repr__",
+           [](const FtsQueryParams &self) -> std::string {
+             return "{"
+                    "\"type\":\"" +
+                    index_type_to_string(self.type()) +
+                    "\", \"default_operator\":\"" + self.default_operator() +
+                    "\"}";
+           })
+      .def(py::pickle(
+          [](const FtsQueryParams &self) {
+            return py::make_tuple(self.default_operator());
+          },
+          [](py::tuple t) {
+            if (t.size() != 1) {
+              throw std::runtime_error("Invalid state for FtsQueryParams");
+            }
+            auto obj = std::make_shared<FtsQueryParams>();
+            obj->set_default_operator(t[0].cast<std::string>());
+            return obj;
+          }));
 }
 
 void ZVecPyParams::bind_options(py::module_ &m) {  // binding collection options
@@ -1372,6 +1514,24 @@ Constructs an AlterColumnOption instance.
 }
 
 void ZVecPyParams::bind_vector_query(py::module_ &m) {
+  // bind FtsQuery
+  py::class_<FtsQuery>(m, "_FtsQuery")
+      .def(py::init<>())
+      .def_readwrite("query_string", &FtsQuery::query_string_)
+      .def_readwrite("match_string", &FtsQuery::match_string_)
+      .def(py::pickle(
+          [](const FtsQuery &self) {
+            return py::make_tuple(self.query_string_, self.match_string_);
+          },
+          [](py::tuple t) {
+            if (t.size() != 2)
+              throw std::runtime_error("Invalid pickle data for FtsQuery");
+            FtsQuery obj{};
+            obj.query_string_ = t[0].cast<std::string>();
+            obj.match_string_ = t[1].cast<std::string>();
+            return obj;
+          }));
+
   py::class_<VectorQuery>(m, "_VectorQuery")
       .def(py::init<>())
       // properties
@@ -1381,6 +1541,21 @@ void ZVecPyParams::bind_vector_query(py::module_ &m) {
       .def_readwrite("include_vector", &VectorQuery::include_vector_)
       .def_readwrite("query_params", &VectorQuery::query_params_)
       .def_readwrite("output_fields", &VectorQuery::output_fields_)
+      .def_property(
+          "fts_query",
+          [](const VectorQuery &self) -> py::object {
+            if (self.fts_query_.has_value()) {
+              return py::cast(self.fts_query_.value());
+            }
+            return py::none();
+          },
+          [](VectorQuery &self, const py::object &obj) {
+            if (obj.is_none()) {
+              self.fts_query_ = std::nullopt;
+            } else {
+              self.fts_query_ = obj.cast<FtsQuery>();
+            }
+          })
       // vector
       .def("set_vector",
            [](VectorQuery &self, const FieldSchema &field_schema,
@@ -1588,11 +1763,16 @@ void ZVecPyParams::bind_vector_query(py::module_ &m) {
             return py::make_tuple(
                 self.topk_, self.field_name_, self.query_vector_,
                 self.query_sparse_indices_, self.query_sparse_values_,
-                self.filter_, self.include_vector_, self.output_fields_,
-                self.query_params_ ? py::cast(self.query_params_) : py::none());
+                self.filter_, self.include_vector_,
+                self.output_fields_.has_value()
+                    ? py::cast(self.output_fields_.value())
+                    : py::none(),
+                self.query_params_ ? py::cast(self.query_params_) : py::none(),
+                self.fts_query_.has_value() ? py::cast(self.fts_query_.value())
+                                            : py::none());
           },
           [](py::tuple t) {
-            if (t.size() != 9)
+            if (t.size() != 10)
               throw std::runtime_error("Invalid pickle data for VectorQuery");
 
             VectorQuery obj{};
@@ -1603,11 +1783,16 @@ void ZVecPyParams::bind_vector_query(py::module_ &m) {
             obj.query_sparse_values_ = t[4].cast<std::string>();
             obj.filter_ = t[5].cast<std::string>();
             obj.include_vector_ = t[6].cast<bool>();
-            obj.output_fields_ = t[7].cast<std::vector<std::string>>();
 
+            if (!t[7].is_none()) {
+              obj.output_fields_ = t[7].cast<std::vector<std::string>>();
+            }
             if (!t[8].is_none()) {
               obj.query_params_ = t[8].cast<QueryParams::Ptr>();
             }
+            if (!t[9].is_none()) {
+              obj.fts_query_ = t[9].cast<FtsQuery>();
+            }
             return obj;
           }));
 }
diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt
index b2689278a..4a756a880 100644
--- a/src/db/CMakeLists.txt
+++ b/src/db/CMakeLists.txt
@@ -13,6 +13,23 @@ cc_directory(sqlengine)
 
 file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h)
 
+# Ensure bitpacked_simd_sse41.cc is compiled with SSE4.1 flag and
+# bitpacked_simd_avx2.cc with AVX2 flag in the packed zvec_db target as well
+# (they are also compiled separately in zvec_index).
+if(NOT ANDROID AND AUTO_DETECT_ARCH)
+    if(HOST_ARCH MATCHES "^(x86|x64)$")
+        setup_compiler_march_for_x86(_DB_MARCH_SSE _DB_MARCH_AVX2 _DB_MARCH_AVX512 _DB_MARCH_AVX512FP16)
+        set_source_files_properties(
+            ${CMAKE_CURRENT_SOURCE_DIR}/index/column/fts_column/posting/bitpacked_simd_sse41.cc
+            PROPERTIES COMPILE_FLAGS "${_DB_MARCH_SSE}"
+        )
+        set_source_files_properties(
+            ${CMAKE_CURRENT_SOURCE_DIR}/index/column/fts_column/posting/bitpacked_simd_avx2.cc
+            PROPERTIES COMPILE_FLAGS "${_DB_MARCH_AVX2}"
+        )
+    endif()
+endif()
+
 cc_library(
   NAME zvec_db STATIC STRICT SRCS_NO_GLOB PACKED
   SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc
@@ -26,6 +43,8 @@ cc_library(
     rocksdb
     antlr4
     libprotobuf
+    FastPFOR
+    cppjieba
     Arrow::arrow_static
     Arrow::arrow_compute
     Arrow::arrow_dataset
diff --git a/src/db/collection.cc b/src/db/collection.cc
index 4e9fa2275..b19f9d2aa 100644
--- a/src/db/collection.cc
+++ b/src/db/collection.cc
@@ -1582,8 +1582,13 @@ Result<DocPtrList> CollectionImpl::Query(const VectorQuery &query) const {
   CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);
 
   VectorQuery sanitized = query;
-  auto s = sanitized.validate_and_sanitize(
-      schema_->get_vector_field(sanitized.field_name_));
+  // When field_name_ is set, use get_field to retrieve the schema uniformly.
+  // validate_and_sanitize checks that the field type matches the query type
+  // (FTS query requires an FTS field, vector query requires a vector field).
+  const FieldSchema *field_schema =
+      sanitized.field_name_.empty() ? nullptr
+                                    : schema_->get_field(sanitized.field_name_);
+  auto s = sanitized.validate_and_sanitize(field_schema);
   CHECK_RETURN_STATUS_EXPECTED(s);
 
   auto segments = get_all_segments();
diff --git a/src/db/common/config.cc b/src/db/common/config.cc
index 5938f5375..13d1c3607 100644
--- a/src/db/common/config.cc
+++ b/src/db/common/config.cc
@@ -37,6 +37,7 @@ GlobalConfig::ConfigData::ConfigData()
       query_thread_count(CgroupUtil::getCpuLimit()),
       invert_to_forward_scan_ratio(0.9),
       brute_force_by_keys_ratio(0.1),
+      fts_brute_force_by_keys_ratio(0.05),
       optimize_thread_count(CgroupUtil::getCpuLimit()) {}
 
 Status GlobalConfig::Validate(const ConfigData &config) const {
@@ -69,6 +70,13 @@ Status GlobalConfig::Validate(const ConfigData &config) const {
         "brute_force_by_keys_ratio must be between 0 and 1");
   }
 
+  // Validate fts_brute_force_by_keys_ratio (should be between 0 and 1)
+  if (config.fts_brute_force_by_keys_ratio < 0.0f ||
+      config.fts_brute_force_by_keys_ratio > 1.0f) {
+    return Status::InvalidArgument(
+        "fts_brute_force_by_keys_ratio must be between 0 and 1");
+  }
+
   // Validate optimize thread count
   if (config.optimize_thread_count == 0) {
     return Status::InvalidArgument(
diff --git a/src/db/common/constants.h b/src/db/common/constants.h
index f987aa289..3aa0512a5 100644
--- a/src/db/common/constants.h
+++ b/src/db/common/constants.h
@@ -80,5 +80,11 @@ const std::string INVERT_KEY_SEALED{"$ZVEC$SEALED"};
 
 const uint32_t INVERT_ID_LIST_SIZE_THRESHOLD = 3;
 
+// FTS (Full-Text Search) column family name suffixes and shared CF name
+constexpr const char *kFtsPositionsSuffix = "$POSITIONS";
+constexpr const char *kFtsTfSuffix = "$TF";
+constexpr const char *kFtsMaxTfSuffix = "$MAX_TF";
+constexpr const char *kFtsDocLenSuffix = "$DOC_LEN";
+constexpr const char *kFtsStatCfName = "$FTS_STAT";
 
 }  // namespace zvec
diff --git a/src/db/common/file_helper.h b/src/db/common/file_helper.h
index 065c80bd7..c983f4a86 100644
--- a/src/db/common/file_helper.h
+++ b/src/db/common/file_helper.h
@@ -139,6 +139,16 @@ class FileHelper {
         ailego::StringHelper::Concat("scalar.index.", block_id, ".rocksdb"));
   }
 
+  // e.g.: **/seg1/fts.rocksdb
+  static const std::string MakeFtsIndexPath(const std::string &path,
+                                            uint32_t seg_id) {
+    return ailego::FileHelper::PathJoin(path, seg_id, "fts.rocksdb");
+  }
+
+  static const std::string MakeFtsIndexPath(const std::string &seg_path) {
+    return ailego::FileHelper::PathJoin(seg_path, "fts.rocksdb");
+  }
+
   static const std::string MakeVectorIndexPath(const std::string &path,
                                                const std::string &column,
                                                uint32_t seg_id,
diff --git a/src/db/common/rocksdb_context.cc b/src/db/common/rocksdb_context.cc
index 42867cc7e..4bad92793 100644
--- a/src/db/common/rocksdb_context.cc
+++ b/src/db/common/rocksdb_context.cc
@@ -15,6 +15,8 @@
 
 #include "rocksdb_context.h"
 #include <rocksdb/filter_policy.h>
+#include <rocksdb/memtablerep.h>
+#include <rocksdb/slice_transform.h>
 #include <rocksdb/statistics.h>
 #include <rocksdb/table.h>
 #include <rocksdb/utilities/checkpoint.h>
@@ -27,39 +29,14 @@ namespace zvec {
 Status RocksdbContext::create(
     const std::string &db_path,
     std::shared_ptr<rocksdb::MergeOperator> merge_op) {
-  std::lock_guard<std::mutex> lock(mutex_);
-
-  if (db_) {
-    LOG_ERROR("RocksDB[%s] is already opened", db_path_.c_str());
-    return Status::PermissionDenied();
-  }
-
-  if (auto s = validate_and_set_db_path(db_path, false); !s.ok()) {
-    return s;
-  }
-
-  create_opts_.create_if_missing = true;
-  prepare_options(merge_op);
-
-  // Open RocksDB
-  rocksdb::DB *db;
-  if (auto s = rocksdb::DB::Open(create_opts_, db_path, &db); !s.ok()) {
-    LOG_ERROR("Failed to create RocksDB[%s], code[%d], reason[%s]",
-              db_path.c_str(), s.code(), s.ToString().c_str());
-    return Status::InternalError();
-  }
-
-  db_.reset(db);
-  read_only_ = false;
-  write_opts_.disableWAL = true;
-  LOG_DEBUG("Created RocksDB[%s]", db_path.c_str());
-  return Status::OK();
+  return create(Args{db_path, {}, std::move(merge_op), {}});
 }
 
 
-Status RocksdbContext::create(
-    const std::string &db_path, const std::vector<std::string> &column_names,
-    std::shared_ptr<rocksdb::MergeOperator> merge_op) {
+Status RocksdbContext::create(Args args) {
+  per_cf_merge_ops_ = std::move(args.per_cf_merge_ops);
+  enable_hash_skiplist_ = args.enable_hash_skiplist;
+
   std::lock_guard<std::mutex> lock(mutex_);
 
   if (db_) {
@@ -67,26 +44,24 @@ Status RocksdbContext::create(
     return Status::PermissionDenied();
   }
 
-  if (auto s = validate_and_set_db_path(db_path, false); !s.ok()) {
+  if (auto s = validate_and_set_db_path(args.db_path, false); !s.ok()) {
     return s;
   }
 
   create_opts_.create_if_missing = true;
-  prepare_options(merge_op);
+  prepare_options(std::move(args.merge_op));
 
-  // Open RocksDB
   rocksdb::DB *db;
-  rocksdb::Status s = rocksdb::DB::Open(create_opts_, db_path, &db);
+  rocksdb::Status s = rocksdb::DB::Open(create_opts_, args.db_path, &db);
   if (!s.ok()) {
     LOG_ERROR("Failed to create RocksDB[%s], code[%d], reason[%s]",
-              db_path.c_str(), s.code(), s.ToString().c_str());
+              args.db_path.c_str(), s.code(), s.ToString().c_str());
     return Status::InternalError();
   }
   db_.reset(db);
 
-  // Create column families
   bool has_default = false;
-  for (auto const &column_name : column_names) {
+  for (const auto &column_name : args.column_names) {
     if (column_name == rocksdb::kDefaultColumnFamilyName) {
       cf_handles_.push_back(db->DefaultColumnFamily());
       has_default = true;
@@ -94,10 +69,14 @@ Status RocksdbContext::create(
     }
     rocksdb::ColumnFamilyHandle *cf_handle{nullptr};
     rocksdb::ColumnFamilyOptions cf_options(create_opts_);
+    auto it = per_cf_merge_ops_.find(column_name);
+    if (it != per_cf_merge_ops_.end() && it->second) {
+      cf_options.merge_operator = it->second;
+    }
     s = db->CreateColumnFamily(cf_options, column_name, &cf_handle);
     if (!s.ok()) {
       LOG_ERROR("Failed to create cf[%s] in RocksDB[%s], code[%d], reason[%s]",
-                column_name.c_str(), db_path.c_str(), s.code(),
+                column_name.c_str(), args.db_path.c_str(), s.code(),
                 s.ToString().c_str());
       delete_cf_handles();
       db->Close();
@@ -112,53 +91,28 @@ Status RocksdbContext::create(
 
   read_only_ = false;
   write_opts_.disableWAL = true;
-  LOG_DEBUG("Created RocksDB[%s]", db_path.c_str());
+  LOG_DEBUG("Created RocksDB[%s] with Args", args.db_path.c_str());
   return Status::OK();
 }
 
 
-Status RocksdbContext::open(const std::string &db_path, bool read_only,
-                            std::shared_ptr<rocksdb::MergeOperator> merge_op) {
-  std::lock_guard<std::mutex> lock(mutex_);
-
-  if (db_) {
-    LOG_ERROR("RocksDB[%s] is already opened", db_path_.c_str());
-    return Status::PermissionDenied();
-  }
-
-  if (auto s = validate_and_set_db_path(db_path, true); !s.ok()) {
-    return s;
-  }
-
-  create_opts_.create_if_missing = false;
-  prepare_options(merge_op);
+Status RocksdbContext::create(
+    const std::string &db_path, const std::vector<std::string> &column_names,
+    std::shared_ptr<rocksdb::MergeOperator> merge_op) {
+  return create(Args{db_path, column_names, std::move(merge_op), {}});
+}
 
-  // Open RocksDB
-  rocksdb::DB *db;
-  rocksdb::Status s;
-  if (read_only) {
-    s = rocksdb::DB::OpenForReadOnly(create_opts_, db_path, &db);
-  } else {
-    s = rocksdb::DB::Open(create_opts_, db_path, &db);
-  }
-  if (!s.ok()) {
-    LOG_ERROR("Failed to open RocksDB[%s], code[%d], reason[%s]",
-              db_path.c_str(), s.code(), s.ToString().c_str());
-    return Status::InternalError();
-  }
 
-  db_.reset(db);
-  read_only_ = read_only;
-  write_opts_.disableWAL = true;
-  LOG_DEBUG("Opened RocksDB[%s]", db_path.c_str());
-  return Status::OK();
+Status RocksdbContext::open(const std::string &db_path, bool read_only,
+                            std::shared_ptr<rocksdb::MergeOperator> merge_op) {
+  return open(Args{db_path, {}, std::move(merge_op), {}}, read_only);
 }
 
 
-Status RocksdbContext::open(const std::string &db_path,
-                            const std::vector<std::string> &column_names,
-                            bool read_only,
-                            std::shared_ptr<rocksdb::MergeOperator> merge_op) {
+Status RocksdbContext::open(Args args, bool read_only) {
+  per_cf_merge_ops_ = std::move(args.per_cf_merge_ops);
+  enable_hash_skiplist_ = args.enable_hash_skiplist;
+
   std::lock_guard<std::mutex> lock(mutex_);
 
   if (db_) {
@@ -166,36 +120,44 @@ Status RocksdbContext::open(const std::string &db_path,
     return Status::PermissionDenied();
   }
 
-  if (auto s = validate_and_set_db_path(db_path, true); !s.ok()) {
+  if (auto s = validate_and_set_db_path(args.db_path, true); !s.ok()) {
     return s;
   }
 
   create_opts_.create_if_missing = false;
-  prepare_options(merge_op);
+  prepare_options(std::move(args.merge_op));
 
-  // Set up column families
   rocksdb::Status s;
   std::vector<std::string> existing_cf_names{};
   std::vector<rocksdb::ColumnFamilyDescriptor> cf_descriptors{};
-  s = rocksdb::DB::ListColumnFamilies(create_opts_, db_path,
+  s = rocksdb::DB::ListColumnFamilies(create_opts_, args.db_path,
                                       &existing_cf_names);
   if (!s.ok()) {
     LOG_ERROR("Failed to list cf in RocksDB[%s], code[%d], reason[%s]",
-              db_path.c_str(), s.code(), s.ToString().c_str());
+              args.db_path.c_str(), s.code(), s.ToString().c_str());
     return Status::InternalError();
   }
-  rocksdb::ColumnFamilyOptions cf_options(create_opts_);
-  if (column_names.empty()) {  // Get all column families from DB
-    for (auto const &column_name : existing_cf_names) {
-      cf_descriptors.emplace_back(column_name, cf_options);
+
+  auto make_cf_options = [&](const std::string &cf_name) {
+    rocksdb::ColumnFamilyOptions cf_options(create_opts_);
+    auto it = per_cf_merge_ops_.find(cf_name);
+    if (it != per_cf_merge_ops_.end() && it->second) {
+      cf_options.merge_operator = it->second;
+    }
+    return cf_options;
+  };
+
+  if (args.column_names.empty()) {
+    for (const auto &column_name : existing_cf_names) {
+      cf_descriptors.emplace_back(column_name, make_cf_options(column_name));
     }
   } else {
     bool has_default = false;
-    for (const auto &column_name : column_names) {
+    for (const auto &column_name : args.column_names) {
       if (std::find(existing_cf_names.begin(), existing_cf_names.end(),
                     column_name) == existing_cf_names.end()) {
-        LOG_ERROR("Column family[%s] does not exist in RocksDB[%s]",
-                  column_name.c_str(), db_path.c_str());
+        LOG_WARN("Column family[%s] does not exist in RocksDB[%s]",
+                 column_name.c_str(), args.db_path.c_str());
         return Status::InvalidArgument();
       }
       if (column_name == rocksdb::kDefaultColumnFamilyName) {
@@ -203,43 +165,51 @@ Status RocksdbContext::open(const std::string &db_path,
       }
     }
     if (read_only) {
-      for (const auto &column_name : column_names) {
-        cf_descriptors.emplace_back(column_name, cf_options);
+      for (const auto &column_name : args.column_names) {
+        cf_descriptors.emplace_back(column_name, make_cf_options(column_name));
       }
       if (!has_default) {
-        cf_descriptors.emplace_back(rocksdb::kDefaultColumnFamilyName,
-                                    cf_options);
+        cf_descriptors.emplace_back(
+            rocksdb::kDefaultColumnFamilyName,
+            make_cf_options(rocksdb::kDefaultColumnFamilyName));
       }
-    } else {  // Rocksdb must be opened with all column families in write mode
-      for (auto const &column_name : existing_cf_names) {
-        cf_descriptors.emplace_back(column_name, cf_options);
+    } else {
+      for (const auto &column_name : existing_cf_names) {
+        cf_descriptors.emplace_back(column_name, make_cf_options(column_name));
       }
     }
   }
 
-  // Open RocksDB
   rocksdb::DB *db;
   if (read_only) {
-    s = rocksdb::DB::OpenForReadOnly(create_opts_, db_path, cf_descriptors,
+    s = rocksdb::DB::OpenForReadOnly(create_opts_, args.db_path, cf_descriptors,
                                      &cf_handles_, &db);
   } else {
-    s = rocksdb::DB::Open(create_opts_, db_path, cf_descriptors, &cf_handles_,
-                          &db);
+    s = rocksdb::DB::Open(create_opts_, args.db_path, cf_descriptors,
+                          &cf_handles_, &db);
   }
   if (!s.ok()) {
     LOG_ERROR("Failed to open RocksDB[%s], code[%d], reason[%s]",
-              db_path.c_str(), s.code(), s.ToString().c_str());
+              args.db_path.c_str(), s.code(), s.ToString().c_str());
     return Status::InternalError();
   }
 
   db_.reset(db);
   read_only_ = read_only;
   write_opts_.disableWAL = true;
-  LOG_DEBUG("Opened RocksDB[%s]", db_path.c_str());
+  LOG_DEBUG("Opened RocksDB[%s] with Args", args.db_path.c_str());
   return Status::OK();
 }
 
 
+Status RocksdbContext::open(const std::string &db_path,
+                            const std::vector<std::string> &column_names,
+                            bool read_only,
+                            std::shared_ptr<rocksdb::MergeOperator> merge_op) {
+  return open(Args{db_path, column_names, std::move(merge_op), {}}, read_only);
+}
+
+
 Status RocksdbContext::validate_and_set_db_path(const std::string &db_path,
                                                 bool should_exist) {
   if (db_path.empty()) {
@@ -321,6 +291,18 @@ void RocksdbContext::prepare_options(
 
   // Disable direct reads (use buffered I/O instead)
   create_opts_.use_direct_reads = false;
+
+  // Hash skip list memtable for prefix-based lookups
+  if (enable_hash_skiplist_) {
+    create_opts_.prefix_extractor.reset(rocksdb::NewCappedPrefixTransform(8));
+    create_opts_.memtable_factory.reset(rocksdb::NewHashSkipListRepFactory(
+        1000000,  // bucket_count
+        4,        // skiplist_height
+        4         // skiplist_branching_factor
+        ));
+    create_opts_.allow_concurrent_memtable_write = false;
+    read_opts_.total_order_seek = true;
+  }
 }
 
 
@@ -443,8 +425,13 @@ Status RocksdbContext::create_cf(const std::string &cf_name) {
   }
 
   rocksdb::ColumnFamilyHandle *cf_handle{nullptr};
-  auto s = db_->CreateColumnFamily(rocksdb::ColumnFamilyOptions(create_opts_),
-                                   cf_name, &cf_handle);
+  rocksdb::ColumnFamilyOptions cf_options(create_opts_);
+  // Apply per-CF merge operator if one was registered for this CF name
+  auto it = per_cf_merge_ops_.find(cf_name);
+  if (it != per_cf_merge_ops_.end() && it->second) {
+    cf_options.merge_operator = it->second;
+  }
+  auto s = db_->CreateColumnFamily(cf_options, cf_name, &cf_handle);
   if (s.ok()) {
     cf_handles_.push_back(cf_handle);
     LOG_DEBUG("Created cf[%s] in RocksDB[%s]", cf_name.c_str(),
@@ -590,6 +577,4 @@ size_t RocksdbContext::count() {
     return 0;
   }
 }
-
-
 }  // namespace zvec
\ No newline at end of file
diff --git a/src/db/common/rocksdb_context.h b/src/db/common/rocksdb_context.h
index 302d7ca8c..d47d90245 100644
--- a/src/db/common/rocksdb_context.h
+++ b/src/db/common/rocksdb_context.h
@@ -16,7 +16,12 @@
 #pragma once
 
 
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
 #include <rocksdb/db.h>
+#include <rocksdb/write_batch.h>
 #include <zvec/ailego/io/file.h>
 #include <zvec/db/status.h>
 
@@ -27,9 +32,18 @@ namespace zvec {
 // A very thin wrapper around RocksDB
 struct RocksdbContext {
  public:
+  struct Args {
+    std::string db_path;
+    std::vector<std::string> column_names;
+    std::shared_ptr<rocksdb::MergeOperator> merge_op;
+    std::unordered_map<std::string, std::shared_ptr<rocksdb::MergeOperator>>
+        per_cf_merge_ops;
+    bool enable_hash_skiplist = false;
+  };
   std::unique_ptr<rocksdb::DB> db_{nullptr};
   std::string db_path_;
   bool read_only_;
+  bool enable_hash_skiplist_{false};
   std::vector<rocksdb::ColumnFamilyHandle *> cf_handles_;
   rocksdb::Options create_opts_;
   rocksdb::WriteOptions write_opts_;
@@ -37,6 +51,9 @@ struct RocksdbContext {
   rocksdb::FlushOptions flush_opts_;
   rocksdb::CompactRangeOptions compact_range_opts_;
   std::mutex mutex_;
+  // Per-CF merge operators (keyed by CF name)
+  std::unordered_map<std::string, std::shared_ptr<rocksdb::MergeOperator>>
+      per_cf_merge_ops_;
 
 
  public:
@@ -79,7 +96,7 @@ struct RocksdbContext {
   rocksdb::ColumnFamilyHandle *get_cf(const std::string &cf_name);
 
 
-  // Create a column family
+  // Create a column family (uses per_cf_merge_ops_ if set for cf_name)
   Status create_cf(const std::string &cf_name);
 
 
@@ -103,6 +120,13 @@ struct RocksdbContext {
   size_t count();
 
 
+  // Create a Rocksdb instance from Args
+  Status create(Args args);
+
+  // Open an existing Rocksdb instance from Args
+  Status open(Args args, bool read_only);
+
+
  private:
   using FILE = ailego::File;
 
diff --git a/src/db/index/CMakeLists.txt b/src/db/index/CMakeLists.txt
index 4420050e6..d4efc32c9 100644
--- a/src/db/index/CMakeLists.txt
+++ b/src/db/index/CMakeLists.txt
@@ -1,9 +1,25 @@
 include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
 include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
 
+if(NOT ANDROID AND AUTO_DETECT_ARCH)
+    if (HOST_ARCH MATCHES "^(x86|x64)$")
+        setup_compiler_march_for_x86(INDEX_MARCH_FLAG_SSE INDEX_MARCH_FLAG_AVX2 INDEX_MARCH_FLAG_AVX512 INDEX_MARCH_FLAG_AVX512FP16)
+        set_source_files_properties(
+            ${CMAKE_CURRENT_SOURCE_DIR}/column/fts_column/posting/bitpacked_simd_sse41.cc
+            PROPERTIES
+            COMPILE_FLAGS "${INDEX_MARCH_FLAG_SSE}"
+        )
+        set_source_files_properties(
+            ${CMAKE_CURRENT_SOURCE_DIR}/column/fts_column/posting/bitpacked_simd_avx2.cc
+            PROPERTIES
+            COMPILE_FLAGS "${INDEX_MARCH_FLAG_AVX2}"
+        )
+    endif()
+endif()
+
 cc_library(
     NAME zvec_index STATIC STRICT
-    SRCS *.cc segment/*.cc column/vector_column/*.cc column/inverted_column/*.cc storage/*.cc storage/wal/*.cc common/*.cc
+    SRCS *.cc segment/*.cc column/vector_column/*.cc column/inverted_column/*.cc column/fts_column/*.cc column/fts_column/tokenizer/*.cc column/fts_column/posting/*.cc column/fts_column/iterator/*.cc storage/*.cc storage/wal/*.cc common/*.cc
     LIBS zvec_common
          zvec_proto
          rocksdb
@@ -11,6 +27,8 @@ cc_library(
          Arrow::arrow_static
          Arrow::arrow_compute
          Arrow::arrow_dataset
+         cppjieba
+         FastPFOR
     INCS .  ${PROJECT_ROOT_DIR}/src
     VERSION "${PROXIMA_ZVEC_VERSION}"
   )
diff --git a/src/db/index/column/fts_column/FtsLexer.g4 b/src/db/index/column/fts_column/FtsLexer.g4
new file mode 100644
index 000000000..1456e4ba5
--- /dev/null
+++ b/src/db/index/column/fts_column/FtsLexer.g4
@@ -0,0 +1,59 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+lexer grammar FtsLexer;
+
+// ── Boolean operators ────────────────────────────────────────────────────────
+OR  : [Oo][Rr];
+AND : [Aa][Nn][Dd];
+NOT : [Nn][Oo][Tt];
+
+// ── Modifier prefixes ────────────────────────────────────────────────────────
+PLUS_SIGN:   '+';
+MINUS_SIGN:  '-';
+
+COLON:       ':';
+CARET:       '^';
+
+// ── Grouping ─────────────────────────────────────────────────────────────────
+LP:          '(';
+RP:          ')';
+
+// ── Quoted strings (phrase queries) ──────────────────────────────────────────
+DQUOTA_STRING
+    : '"' (~["\\\r\n] | '\\' .)* '"'
+    ;
+
+
+fragment ASCII_ALNUM : [A-Za-z0-9_];
+fragment ESCAPED_CHAR
+    : '\\' [-+=&|!(){}[\]^"~*?:\\/]
+    ;
+fragment UNI_CHAR    : [\u0080-\uFFFF];
+fragment TERM_START  : ASCII_ALNUM | UNI_CHAR;
+fragment TERM_BODY   : ASCII_ALNUM | UNI_CHAR | [._#/%\-'@] | ESCAPED_CHAR;
+
+// Matches sequences of letters, digits, underscores and hyphens that start
+// with a letter or underscore (same as the original SQLLexer REGULAR_ID).
+REGULAR_ID: [A-Za-z_] [A-Za-z0-9_\-]*;
+
+NUMBER: [0-9]+ ('.' [0-9]+)?;
+
+// Generic term
+TERM: TERM_START TERM_BODY*;
+
+// ── Whitespace (skip) ─────────────────────────────────────────────────────────
+SPACES: [ \t\r\n]+ -> skip;
+
+DEFAULT: . ;
diff --git a/src/db/index/column/fts_column/FtsParser.g4 b/src/db/index/column/fts_column/FtsParser.g4
new file mode 100644
index 000000000..82613748e
--- /dev/null
+++ b/src/db/index/column/fts_column/FtsParser.g4
@@ -0,0 +1,92 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+parser grammar FtsParser;
+
+options { tokenVocab = FtsLexer; }
+
+// ── Entry point ───────────────────────────────────────────────────────────────
+fts_query_unit
+    : fts_or_expr EOF
+    ;
+
+// ── OR (lowest precedence) ────────────────────────────────────────────────────
+fts_or_expr
+    : fts_and_expr (OR fts_and_expr)*
+    ;
+
+// ── AND / NOT (same precedence) ──────────────────────────────────────────────
+// `a NOT b` is the binary `a AND NOT b` operator: documents matching `a`
+// excluding those matching `b`. The explicit form `a AND NOT b` is also
+// accepted for readability; semantically it is identical to `a NOT b`.
+fts_and_expr
+    : fts_seq_expr ((AND NOT? | NOT) fts_seq_expr)*
+    ;
+
+// ── Implicit adjacency ────────────────────────────────────────────────────────
+// Adjacent atoms without an explicit operator are grouped together; the
+// builder treats them as an implicit OR (same behaviour as the original SQL
+// parser).
+fts_seq_expr
+    : fts_unary+
+    ;
+
+// ── Unary modifier ────────────────────────────────────────────────────────────
+// NOT is *not* a unary modifier here — it is consumed by fts_and_expr above
+// as a binary operator. Unary modifiers are limited to `+` (must) and `-`
+// (must_not).
+fts_unary
+    : PLUS_SIGN  fts_atom   # must_atom
+    | MINUS_SIGN fts_atom   # must_not_atom
+    | fts_atom              # plain_atom
+    ;
+
+// ── Atom: optional field prefix + primary + optional boost ───────────────────
+fts_atom
+    : fts_field_prefix? fts_primary fts_boost?
+    ;
+
+// ── Field prefix: REGULAR_ID ':' ─────────────────────────────────────────────
+fts_field_prefix
+    : REGULAR_ID COLON
+    ;
+
+// ── Primary: term | phrase | parenthesised sub-expression ────────────────────
+fts_primary
+    : fts_term
+    | fts_phrase
+    | LP fts_or_expr RP
+    ;
+
+// ── Boost: '^' NUMBER ────────────────────────────────────────────────────────
+fts_boost
+    : CARET NUMBER
+    ;
+
+fts_natural_term
+    : DEFAULT+   // One or more default characters forming a natural language term
+    ;
+
+// ── Term: identifier, number, or generic token ───────────────────────────────
+fts_term
+    : TERM
+    | REGULAR_ID
+    | NUMBER
+    | fts_natural_term
+    ;
+
+// ── Phrase: double-quoted string ─────────────────────────────────────────────
+fts_phrase
+    : DQUOTA_STRING
+    ;
diff --git a/src/db/index/column/fts_column/bm25_scorer.cc b/src/db/index/column/fts_column/bm25_scorer.cc
new file mode 100644
index 000000000..df989998a
--- /dev/null
+++ b/src/db/index/column/fts_column/bm25_scorer.cc
@@ -0,0 +1,181 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bm25_scorer.h"
+#include <cmath>
+#include <cstring>
+#include <zvec/ailego/logger/logger.h>
+#include "fts_utils.h"
+
+namespace zvec::fts {
+
+// ============================================================
+// BM25Scorer implementation
+// ============================================================
+
+int BM25Scorer::load_segment_stats(const std::string &field_name,
+                                   RocksdbContext *ctx,
+                                   rocksdb::ColumnFamilyHandle *stat_cf) {
+  if (!ctx || !stat_cf) {
+    LOG_WARN("BM25Scorer::load_segment_stats: null ctx/stat_cf for field[%s]",
+             field_name.c_str());
+    return -1;
+  }
+
+  // Read total_docs
+  std::string total_docs_value;
+  auto ret = ctx->db_->Get(ctx->read_opts_, stat_cf,
+                           make_total_docs_key(field_name), &total_docs_value);
+  if (!ret.ok()) {
+    LOG_ERROR(
+        "BM25Scorer::load_segment_stats: failed to read total_docs. "
+        "field[%s]",
+        field_name.c_str());
+    return -1;
+  }
+  if (total_docs_value.size() < sizeof(uint64_t)) {
+    LOG_ERROR(
+        "BM25Scorer::load_segment_stats: total_docs value too small. "
+        "field[%s] value_size[%zu]",
+        field_name.c_str(), total_docs_value.size());
+    return -1;
+  }
+  uint64_t total_docs = decode_uint64_value(total_docs_value.data());
+  stats_.total_docs.store(total_docs, std::memory_order_release);
+
+  // Read total_tokens
+  std::string total_tokens_value;
+  auto status =
+      ctx->db_->Get(ctx->read_opts_, stat_cf, make_total_tokens_key(field_name),
+                    &total_tokens_value);
+  if (!status.ok()) {
+    LOG_ERROR(
+        "BM25Scorer::load_segment_stats: failed to read total_tokens. "
+        "field[%s]",
+        field_name.c_str());
+    return -1;
+  }
+  if (total_tokens_value.size() < sizeof(uint64_t)) {
+    LOG_ERROR(
+        "BM25Scorer::load_segment_stats: total_tokens value too small. "
+        "field[%s] value_size[%zu]",
+        field_name.c_str(), total_tokens_value.size());
+    return -1;
+  }
+  uint64_t total_tokens = decode_uint64_value(total_tokens_value.data());
+  stats_.total_tokens.store(total_tokens, std::memory_order_release);
+
+  return 0;
+}
+
+float BM25Scorer::idf(uint64_t term_doc_freq) const {
+  const auto snap = stats_.snapshot();
+  if (snap.total_docs == 0) {
+    return 0.0f;
+  }
+  // Robertson-Sparck Jones IDF formula (with smoothing):
+  // IDF(t) = ln((N - df + 0.5) / (df + 0.5) + 1)
+  const float total_docs = static_cast<float>(snap.total_docs);
+  const float df = static_cast<float>(term_doc_freq);
+  return std::log((total_docs - df + 0.5f) / (df + 0.5f) + 1.0f);
+}
+
+float BM25Scorer::score(uint64_t term_doc_freq, uint32_t term_freq,
+                        uint32_t doc_len) const {
+  // Take a single snapshot so that IDF and TF normalization use the same
+  // consistent values of total_docs / total_tokens.
+  const auto snap = stats_.snapshot();
+  if (snap.total_docs == 0) {
+    return 0.0f;
+  }
+
+  // IDF
+  const float total_docs = static_cast<float>(snap.total_docs);
+  const float df = static_cast<float>(term_doc_freq);
+  const float idf_value =
+      std::log((total_docs - df + 0.5f) / (df + 0.5f) + 1.0f);
+  if (idf_value <= 0.0f) {
+    return 0.0f;
+  }
+
+  // TF normalization
+  const float tf = static_cast<float>(term_freq);
+  const float doc_length = static_cast<float>(doc_len);
+  const float avg_dl = snap.avg_doc_len();
+
+  // BM25 TF normalization formula:
+  // tf_norm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * |d| / avgdl))
+  const float tf_norm =
+      tf * (params_.k1 + 1.0f) /
+      (tf + params_.k1 * (1.0f - params_.b + params_.b * doc_length / avg_dl));
+
+  return idf_value * tf_norm;
+}
+
+float BM25Scorer::score_with_idf(float idf_value, uint32_t term_freq,
+                                 uint32_t doc_len) const {
+  if (idf_value <= 0.0f) {
+    return 0.0f;
+  }
+  const auto snap = stats_.snapshot();
+  if (snap.total_docs == 0) {
+    return 0.0f;
+  }
+
+  const float tf = static_cast<float>(term_freq);
+  const float doc_length = static_cast<float>(doc_len);
+  const float avg_dl = snap.avg_doc_len();
+
+  const float tf_norm =
+      tf * (params_.k1 + 1.0f) /
+      (tf + params_.k1 * (1.0f - params_.b + params_.b * doc_length / avg_dl));
+
+  return idf_value * tf_norm;
+}
+
+// ============================================================
+// WandOptimizer implementation
+// ============================================================
+
+int WandOptimizer::open(BM25ScorerPtr scorer, RocksdbContext *ctx,
+                        rocksdb::ColumnFamilyHandle *max_tf_cf, uint32_t topk) {
+  if (!scorer || !ctx || !max_tf_cf) {
+    LOG_ERROR(
+        "WandOptimizer open failed: null arguments scorer[%p] ctx[%p] "
+        "max_tf_cf[%p]",
+        (void *)scorer.get(), (void *)ctx, (void *)max_tf_cf);
+    return -1;
+  }
+  scorer_ = std::move(scorer);
+  ctx_ = ctx;
+  max_tf_cf_ = max_tf_cf;
+  topk_ = topk;
+  return 0;
+}
+
+uint32_t WandOptimizer::read_max_tf(const std::string &term) const {
+  if (!max_tf_cf_) {
+    return 1;
+  }
+  std::string max_tf_value;
+  if (!ctx_->db_->Get(ctx_->read_opts_, max_tf_cf_, term, &max_tf_value).ok() ||
+      max_tf_value.size() < sizeof(uint32_t)) {
+    return 1;  // Default max term frequency is 1
+  }
+  uint32_t max_tf = 0;
+  std::memcpy(&max_tf, max_tf_value.data(), sizeof(uint32_t));
+  return max_tf;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/bm25_scorer.h b/src/db/index/column/fts_column/bm25_scorer.h
new file mode 100644
index 000000000..6a31a393b
--- /dev/null
+++ b/src/db/index/column/fts_column/bm25_scorer.h
@@ -0,0 +1,195 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include "db/common/rocksdb_context.h"
+
+namespace zvec::fts {
+
+/*! BM25 scoring parameters
+ */
+struct BM25Params {
+  // Term frequency saturation parameter, typical value 1.2
+  float k1{1.2f};
+  // Document length normalization parameter, typical value 0.75
+  float b{0.75f};
+};
+
+/*! Plain snapshot of per-segment BM25 statistics (non-atomic, for callers)
+ */
+struct SegmentStatsSnapshot {
+  uint64_t total_docs{0};
+  uint64_t total_tokens{0};
+
+  float avg_doc_len() const {
+    if (total_docs == 0) {
+      return 1.0f;
+    }
+    return static_cast<float>(total_tokens) / static_cast<float>(total_docs);
+  }
+};
+
+/*! Per-segment BM25 statistics (thread-safe)
+ *  Fields are std::atomic so that concurrent insert (writer) and search
+ *  (reader) threads do not race on the raw values.
+ */
+struct SegmentStats {
+  // Total number of documents in segment
+  std::atomic<uint64_t> total_docs{0};
+  // Total number of tokens in all documents in segment (used to calculate
+  // average document length)
+  std::atomic<uint64_t> total_tokens{0};
+
+  SegmentStats() = default;
+
+  // std::atomic is neither copyable nor movable; provide manual move
+  // semantics so that BM25Scorer (which embeds SegmentStats) stays movable.
+  // These are only used during single-threaded construction / NRVO and are
+  // therefore safe with relaxed ordering.
+  SegmentStats(SegmentStats &&other) noexcept
+      : total_docs(other.total_docs.load(std::memory_order_relaxed)),
+        total_tokens(other.total_tokens.load(std::memory_order_relaxed)) {}
+
+  SegmentStats &operator=(SegmentStats &&other) noexcept {
+    total_docs.store(other.total_docs.load(std::memory_order_relaxed),
+                     std::memory_order_relaxed);
+    total_tokens.store(other.total_tokens.load(std::memory_order_relaxed),
+                       std::memory_order_relaxed);
+    return *this;
+  }
+
+  SegmentStats(const SegmentStats &) = delete;
+  SegmentStats &operator=(const SegmentStats &) = delete;
+
+  // Take a consistent snapshot: load total_tokens first (the value that
+  // grows together with total_docs) so the pair is *at least* as fresh as
+  // the docs count, avoiding avg_doc_len() returning an inflated value.
+  SegmentStatsSnapshot snapshot() const {
+    const uint64_t tokens = total_tokens.load(std::memory_order_acquire);
+    const uint64_t docs = total_docs.load(std::memory_order_acquire);
+    return {docs, tokens};
+  }
+
+  // Average document length (total_tokens / total_docs)
+  float avg_doc_len() const {
+    return snapshot().avg_doc_len();
+  }
+};
+
+/*! BM25 scorer
+ *  Encapsulates standard BM25 formula, supports per-segment statistics loading
+ * and WAND optimization
+ *
+ *  BM25 formula:
+ *    score(q, d) = Σ IDF(t) * (tf(t,d) * (k1+1)) / (tf(t,d) +
+ * k1*(1-b+b*|d|/avgdl)) IDF(t) = ln((N - df(t) + 0.5) / (df(t) + 0.5) + 1)
+ */
+class BM25Scorer {
+ public:
+  explicit BM25Scorer(BM25Params params = BM25Params{}) : params_(params) {}
+
+  /*! Load per-segment statistics from $SEGMENT_STAT CF
+   *  \param field_name  Field name
+   *  \param stat_cf     $SEGMENT_STAT CF
+   *  \return 0 for success, non-0 for failure
+   */
+  int load_segment_stats(const std::string &field_name, RocksdbContext *ctx,
+                         rocksdb::ColumnFamilyHandle *stat_cf);
+
+  /*! Calculate BM25 contribution score of a single term for a single document
+   *  \param term_doc_freq   Document frequency of this term in segment (df)
+   *  \param term_freq       Term frequency of this term in current document
+   * (tf) \param doc_len         Length of current document (number of tokens)
+   *  \return BM25 score contribution
+   */
+  float score(uint64_t term_doc_freq, uint32_t term_freq,
+              uint32_t doc_len) const;
+
+  /*! Calculate IDF value of a term
+   *  \param term_doc_freq  Document frequency of this term in segment (df)
+   *  \return IDF value
+   */
+  float idf(uint64_t term_doc_freq) const;
+
+  /*! Calculate BM25 score using a pre-computed IDF value.
+   *  Avoids recomputing log() on every call — IDF is constant per term.
+   *  \param idf_value      Pre-computed IDF value (from idf())
+   *  \param term_freq      Term frequency in current document
+   *  \param doc_len        Document length (number of tokens)
+   *  \return BM25 score contribution
+   */
+  float score_with_idf(float idf_value, uint32_t term_freq,
+                       uint32_t doc_len) const;
+
+  /*! Update in-memory segment statistics (called by FtsColumnIndexer after
+   *  each insert so that search() uses up-to-date stats for BM25 scoring)
+   *  \param total_docs    Current total number of documents
+   *  \param total_tokens  Current total number of tokens
+   */
+  void update_stats(uint64_t total_docs, uint64_t total_tokens) {
+    // Store total_docs first so that a concurrent reader calling snapshot()
+    // (which loads total_tokens before total_docs) never sees a new docs
+    // count paired with a stale tokens count, which would deflate avg_doc_len.
+    stats_.total_docs.store(total_docs, std::memory_order_release);
+    stats_.total_tokens.store(total_tokens, std::memory_order_release);
+  }
+
+  SegmentStatsSnapshot stats() const {
+    return stats_.snapshot();
+  }
+  const BM25Params &params() const {
+    return params_;
+  }
+
+ private:
+  BM25Params params_;
+  SegmentStats stats_;
+};
+
+using BM25ScorerPtr = std::shared_ptr<BM25Scorer>;
+
+/*! WAND optimizer
+ *  Uses $MAX_TF as upper bound for TopK pruning, reduces unnecessary document
+ * scoring
+ */
+class WandOptimizer {
+ public:
+  /*! Initialize WAND optimizer
+   *  \param scorer        BM25 scorer (with segment statistics loaded)
+   *  \param max_tf_cf     $MAX_TF CF (stores maximum term frequency for each
+   * term) \param topk          Number of TopK results to return
+   */
+  int open(BM25ScorerPtr scorer, RocksdbContext *ctx,
+           rocksdb::ColumnFamilyHandle *max_tf_cf, uint32_t topk);
+
+  /*! Read the maximum term frequency for a term from $MAX_TF CF.
+   *  Used by TermDocIterator to precompute WAND upper bound score.
+   *  \param term  The term to look up
+   *  \return Maximum term frequency, or 1 if not found
+   */
+  uint32_t read_max_tf(const std::string &term) const;
+
+ private:
+  BM25ScorerPtr scorer_;
+  RocksdbContext *ctx_{nullptr};
+  rocksdb::ColumnFamilyHandle *max_tf_cf_{nullptr};
+  uint32_t topk_{10};
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/fts_column_indexer.cc b/src/db/index/column/fts_column/fts_column_indexer.cc
new file mode 100644
index 000000000..2a9cfadff
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_column_indexer.cc
@@ -0,0 +1,889 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fts_column_indexer.h"
+#include <chrono>
+#include <cstring>
+#include <queue>
+#include <thread>
+#include <unordered_map>
+#include <roaring/roaring.h>
+#include <rocksdb/write_batch.h>
+#include <zvec/ailego/logger/logger.h>
+#include <zvec/db/status.h>
+#include "db/common/typedef.h"
+#include "iterator/fts_candidate_iterator.h"
+#include "iterator/fts_conjunction_iterator.h"
+#include "iterator/fts_disjunction_iterator.h"
+#include "iterator/fts_phrase_iterator.h"
+#include "iterator/fts_term_iterator.h"
+#include "posting/bitpacked_posting_list.h"
+#include "tokenizer/tokenizer_pipeline_manager.h"
+#include "fts_utils.h"
+
+namespace zvec::fts {
+
+// ============================================================
+// Lifecycle
+// ============================================================
+
+FtsColumnIndexer::~FtsColumnIndexer() {
+  // Pipeline release is handled by FtsIndexParams destructor via fts_params_.
+  if (opened_.load()) {
+    (void)close();
+  }
+}
+
+// ============================================================
+// Initialization — shared reader core
+// ============================================================
+
+Result<void> FtsColumnIndexer::open_reader(
+    const std::string &field_name, RocksdbContext *ctx,
+    rocksdb::ColumnFamilyHandle *postings_cf,
+    rocksdb::ColumnFamilyHandle *positions_cf,
+    rocksdb::ColumnFamilyHandle *term_freq_cf,
+    rocksdb::ColumnFamilyHandle *max_tf_cf,
+    rocksdb::ColumnFamilyHandle *doc_len_cf,
+    rocksdb::ColumnFamilyHandle *stat_cf, BM25Params bm25_params) {
+  if (opened_.load()) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsColumnIndexer already opened. field=", field_name));
+  }
+
+  field_name_ = field_name;
+  ctx_ = ctx;
+  postings_cf_ = postings_cf;
+  positions_cf_ = positions_cf;
+  term_freq_cf_ = term_freq_cf;
+  max_tf_cf_ = max_tf_cf;
+  doc_len_cf_ = doc_len_cf;
+  stat_cf_ = stat_cf;
+
+  scorer_ = std::make_shared<BM25Scorer>(bm25_params);
+
+  // doc_len_cf == nullptr → immutable path, load persisted stats.
+  // doc_len_cf != nullptr → mutable path, stats maintained in-memory.
+  if (doc_len_cf == nullptr) {
+    int ret = scorer_->load_segment_stats(field_name, ctx, stat_cf);
+    if (ret != 0) {
+      return tl::make_unexpected(Status::InternalError(
+          "FtsColumnIndexer failed to load segment stats. field=", field_name));
+    }
+  }
+
+  opened_.store(true);
+  return {};
+}
+
+// ============================================================
+// Initialization — read+write (mutable)
+// ============================================================
+
+Result<void> FtsColumnIndexer::open(FieldSchema::Ptr field_meta,
+                                    RocksdbContext *ctx,
+                                    rocksdb::ColumnFamilyHandle *postings_cf,
+                                    rocksdb::ColumnFamilyHandle *positions_cf,
+                                    rocksdb::ColumnFamilyHandle *term_freq_cf,
+                                    rocksdb::ColumnFamilyHandle *max_tf_cf,
+                                    rocksdb::ColumnFamilyHandle *doc_len_cf,
+                                    rocksdb::ColumnFamilyHandle *stat_cf) {
+  if (!field_meta || !ctx) {
+    return tl::make_unexpected(
+        Status::InvalidArgument("FtsColumnIndexer: null field_meta or ctx"));
+  }
+
+  // Obtain FtsIndexParams from field_meta's index_params.
+  auto index_params = field_meta->index_params();
+  auto fts_param =
+      std::dynamic_pointer_cast<zvec::FtsIndexParams>(index_params);
+  if (!fts_param) {
+    return tl::make_unexpected(Status::InvalidArgument(
+        "FtsColumnIndexer: field has no FtsIndexParams. field=",
+        field_meta->name()));
+  }
+
+  auto pipeline_result = fts_param->create_pipeline();
+  if (!pipeline_result.has_value()) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsColumnIndexer: failed to create tokenizer pipeline. field=",
+        field_meta->name(), " err=", pipeline_result.error().message()));
+  }
+
+  field_meta_ = std::move(field_meta);
+  tokenizer_pipeline_ = std::move(pipeline_result.value());
+  fts_params_ = fts_param;
+
+  return open_reader(field_meta_->name(), ctx, postings_cf, positions_cf,
+                     term_freq_cf, max_tf_cf, doc_len_cf, stat_cf);
+}
+
+// ============================================================
+// Initialization — read-only (immutable / standalone)
+// ============================================================
+
+// ============================================================
+// Close
+// ============================================================
+
+Result<void> FtsColumnIndexer::close() {
+  if (!opened_.load()) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsColumnIndexer::close: not opened. field=", field_name_));
+  }
+
+  postings_cf_ = nullptr;
+  positions_cf_ = nullptr;
+  term_freq_cf_.store(nullptr, std::memory_order_release);
+  max_tf_cf_.store(nullptr, std::memory_order_release);
+  doc_len_cf_.store(nullptr, std::memory_order_release);
+  stat_cf_ = nullptr;
+  scorer_.reset();
+
+  opened_.store(false);
+  return {};
+}
+
+// ============================================================
+// Query entry point
+// ============================================================
+
+Result<std::vector<FtsResult>> FtsColumnIndexer::search(
+    const FtsAstNode &ast, const FtsQueryParams &query_params) const {
+  if (!scorer_) {
+    LOG_ERROR("FtsColumnIndexer::search: not opened. field[%s]",
+              field_name_.c_str());
+    return tl::make_unexpected(Status::InternalError(
+        "FtsColumnIndexer::search: not opened. field=", field_name_));
+  }
+
+  if (ast.must_not) {
+    LOG_WARN(
+        "FtsColumnIndexer::search: must_not on root is not allowed. field[%s]",
+        field_name_.c_str());
+    return tl::make_unexpected(Status::InvalidArgument(
+        "FtsColumnIndexer::search: must_not on root is not allowed. field=",
+        field_name_));
+  }
+
+  auto iter_result = build_iterator(ast);
+  if (!iter_result.has_value()) {
+    LOG_ERROR("FtsColumnIndexer::search: build_iterator failed. field[%s] %s",
+              field_name_.c_str(), iter_result.error().message().c_str());
+    return tl::make_unexpected(iter_result.error());
+  }
+  DocIteratorPtr root_iter = std::move(iter_result.value());
+  if (!root_iter) {
+    // No matching terms found — valid empty result, not an error.
+    return std::vector<FtsResult>{};
+  }
+
+  // Candidate-driven mode: AND a CandidateDocIterator into the root so the
+  // small candidate set leads (Conjunction sorts by cost asc), turning the
+  // posting walk into per-candidate advance()+matches()+score().
+  if (!query_params.candidate_ids.empty()) {
+    std::vector<DocIteratorPtr> musts;
+    musts.reserve(2);
+    musts.push_back(
+        std::make_unique<CandidateDocIterator>(query_params.candidate_ids));
+    musts.push_back(std::move(root_iter));
+    root_iter = std::make_unique<ConjunctionIterator>(
+        std::move(musts), std::vector<DocIteratorPtr>{});
+  }
+
+  const uint32_t topk = query_params.topk;
+  const zvec::IndexFilter *filter_ptr = query_params.filter.get();
+
+  using MinHeap = std::priority_queue<FtsResult, std::vector<FtsResult>,
+                                      std::greater<FtsResult>>;
+  MinHeap min_heap;
+
+  // Filter pushdown: when a filter is present, use the filter-aware next_doc
+  // overload so composite iterators skip filtered docs before paying for
+  // block-max binary search, do_next alignment, or phase-2 position checks.
+  uint32_t doc_id =
+      filter_ptr ? root_iter->next_doc(filter_ptr) : root_iter->next_doc();
+  while (doc_id != DocIterator::NO_MORE_DOCS) {
+    const uint64_t global_doc_id = static_cast<uint64_t>(doc_id);
+
+    if (root_iter->matches()) {
+      float s = root_iter->score();
+      if (s > 0.0f) {
+        if (min_heap.size() < topk) {
+          min_heap.push({global_doc_id, s});
+          if (min_heap.size() == topk) {
+            root_iter->set_min_competitive_score(min_heap.top().score);
+          }
+        } else if (s > min_heap.top().score) {
+          min_heap.pop();
+          min_heap.push({global_doc_id, s});
+          root_iter->set_min_competitive_score(min_heap.top().score);
+        }
+      }
+    }
+    doc_id =
+        filter_ptr ? root_iter->next_doc(filter_ptr) : root_iter->next_doc();
+  }
+
+  std::vector<FtsResult> results(min_heap.size());
+  for (auto it = results.rbegin(); it != results.rend(); ++it) {
+    *it = min_heap.top();
+    min_heap.pop();
+  }
+
+  return results;
+}
+
+// ============================================================
+// Side CF reset (dump path)
+// ============================================================
+
+void FtsColumnIndexer::reset_side_cfs() {
+  cf_dropped_.store(true);
+  while (cf_counter_.load() > 0) {
+    std::this_thread::sleep_for(std::chrono::milliseconds(1));
+  }
+  term_freq_cf_.store(nullptr, std::memory_order_release);
+  max_tf_cf_.store(nullptr, std::memory_order_release);
+  doc_len_cf_.store(nullptr, std::memory_order_release);
+}
+
+// ============================================================
+// Iterator tree construction
+// ============================================================
+
+Result<DocIteratorPtr> FtsColumnIndexer::build_iterator(
+    const FtsAstNode &node) const {
+  switch (node.type()) {
+    case FtsNodeType::TERM:
+      return build_term_iterator(static_cast<const TermNode &>(node));
+    case FtsNodeType::PHRASE:
+      return build_phrase_iterator(static_cast<const PhraseNode &>(node));
+    case FtsNodeType::AND:
+      return build_and_iterator(static_cast<const AndNode &>(node));
+    case FtsNodeType::OR:
+      return build_or_iterator(static_cast<const OrNode &>(node));
+    default:
+      return tl::make_unexpected(Status::InternalError(
+          "FtsColumnIndexer::build_iterator: unknown node type. field=",
+          field_name_));
+  }
+}
+
+Result<DocIteratorPtr> FtsColumnIndexer::create_term_iterator_from_raw(
+    const std::string &term, rocksdb::PinnableSlice raw_data) const {
+  if (BitPackedPostingList::is_bitpacked_format(raw_data.data(),
+                                                raw_data.size())) {
+    BitPackedPostingIterator probe;
+    if (probe.open(raw_data.data(), raw_data.size()) != 0) {
+      return tl::make_unexpected(Status::InternalError(
+          "FtsColumnIndexer: failed to open BitPacked postings. field=",
+          field_name_, " term=", term));
+    }
+    const uint64_t df = probe.cost();
+    if (df == 0) {
+      return DocIteratorPtr{nullptr};
+    }
+    const float max_score_val = probe.max_score();
+    return std::make_unique<TermDocIterator>(term, std::move(raw_data), df,
+                                             scorer_, max_score_val);
+  }
+
+  roaring_bitmap_t *bitmap = roaring_bitmap_portable_deserialize_safe(
+      raw_data.data(), raw_data.size());
+  if (!bitmap) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsColumnIndexer: failed to deserialize roaring bitmap. field=",
+        field_name_, " term=", term));
+  }
+
+  const uint64_t df = roaring_bitmap_get_cardinality(bitmap);
+  if (df == 0) {
+    roaring_bitmap_free(bitmap);
+    return nullptr;
+  }
+
+  ++cf_counter_;
+  auto *term_freq_cf = term_freq_cf_.load(std::memory_order_acquire);
+  auto *doc_len_cf = doc_len_cf_.load(std::memory_order_acquire);
+  auto *max_tf_cf = max_tf_cf_.load(std::memory_order_acquire);
+  auto *cf_counter = &cf_counter_;
+  if (cf_dropped_) {
+    term_freq_cf = nullptr;
+    doc_len_cf = nullptr;
+    cf_counter = nullptr;
+    max_tf_cf = nullptr;
+    --cf_counter_;
+  }
+
+  float max_score_val = 0.0f;
+  if (max_tf_cf) {
+    WandOptimizer wand;
+    if (wand.open(scorer_, ctx_, max_tf_cf, 0) == 0) {
+      uint32_t max_tf = wand.read_max_tf(term);
+      uint32_t min_dl = min_doc_count_.load(std::memory_order_relaxed);
+      if (min_dl == std::numeric_limits<uint32_t>::max()) {
+        min_dl = 1;
+      }
+      max_score_val = scorer_->score(df, max_tf, min_dl);
+    }
+  }
+
+  return std::make_unique<TermDocIterator>(term, bitmap, df, scorer_,
+                                           max_score_val, ctx_, term_freq_cf,
+                                           doc_len_cf, cf_counter);
+}
+
+Result<DocIteratorPtr> FtsColumnIndexer::build_term_iterator(
+    const TermNode &term_node) const {
+  const std::string &term = term_node.term;
+
+  rocksdb::PinnableSlice raw_data;
+  auto s = ctx_->db_->Get(ctx_->read_opts_, postings_cf_, term, &raw_data);
+  if (!s.ok() || raw_data.empty()) {
+    return DocIteratorPtr{nullptr};
+  }
+
+  return create_term_iterator_from_raw(term, std::move(raw_data));
+}
+
+std::vector<rocksdb::PinnableSlice> FtsColumnIndexer::batch_get_postings(
+    const std::vector<rocksdb::Slice> &terms) const {
+  std::vector<rocksdb::PinnableSlice> raw_postings(terms.size());
+  if (terms.empty()) {
+    return raw_postings;
+  }
+
+  std::vector<rocksdb::ColumnFamilyHandle *> cfs(terms.size(), postings_cf_);
+  std::vector<rocksdb::Status> statuses(terms.size());
+  ctx_->db_->MultiGet(ctx_->read_opts_, terms.size(), cfs.data(), terms.data(),
+                      raw_postings.data(), statuses.data());
+  // Ignore failed lookups as callers can check via empty()
+  return raw_postings;
+}
+
+Result<DocIteratorPtr> FtsColumnIndexer::build_phrase_iterator(
+    const PhraseNode &phrase_node) const {
+  if (phrase_node.terms.empty()) {
+    return DocIteratorPtr{nullptr};
+  }
+
+  const std::vector<std::string> &terms = phrase_node.terms;
+  std::vector<rocksdb::Slice> term_slices;
+  term_slices.reserve(terms.size());
+  for (const auto &t : terms) {
+    term_slices.emplace_back(t);
+  }
+  auto raw_postings = batch_get_postings(term_slices);
+
+  std::vector<DocIteratorPtr> term_iterators;
+  term_iterators.reserve(terms.size());
+
+  for (size_t i = 0; i < terms.size(); ++i) {
+    if (raw_postings[i].empty()) {
+      return DocIteratorPtr{nullptr};
+    }
+    auto iter_result =
+        create_term_iterator_from_raw(terms[i], std::move(raw_postings[i]));
+    if (!iter_result.has_value()) {
+      return iter_result;
+    }
+    if (!iter_result.value()) {
+      return DocIteratorPtr{nullptr};
+    }
+    term_iterators.push_back(std::move(iter_result.value()));
+  }
+
+  if (term_iterators.empty()) {
+    return DocIteratorPtr{nullptr};
+  }
+
+  auto conjunction = std::make_unique<ConjunctionIterator>(
+      std::move(term_iterators), std::vector<DocIteratorPtr>{});
+
+  return std::make_unique<PhraseDocIterator>(std::move(conjunction), terms,
+                                             ctx_, positions_cf_);
+}
+
+Result<DocIteratorPtr> FtsColumnIndexer::build_and_iterator(
+    const AndNode &and_node) const {
+  if (and_node.children.empty()) {
+    return DocIteratorPtr{nullptr};
+  }
+
+  std::vector<rocksdb::Slice> term_key_slices;
+  std::vector<size_t> term_child_indices;
+  term_key_slices.reserve(and_node.children.size());
+  term_child_indices.reserve(and_node.children.size());
+
+  for (size_t i = 0; i < and_node.children.size(); ++i) {
+    const auto &child = and_node.children[i];
+    if (child && child->type() == FtsNodeType::TERM) {
+      term_key_slices.emplace_back(static_cast<const TermNode &>(*child).term);
+      term_child_indices.push_back(i);
+    }
+  }
+
+  auto term_raw_postings = batch_get_postings(term_key_slices);
+
+  std::vector<DocIteratorPtr> must_iterators;
+  std::vector<DocIteratorPtr> must_not_iterators;
+  size_t batched_cursor = 0;
+
+  for (size_t i = 0; i < and_node.children.size(); ++i) {
+    const auto &child = and_node.children[i];
+    const bool is_must_not = child->must_not;
+
+    DocIteratorPtr iter;
+    if (batched_cursor < term_child_indices.size() &&
+        term_child_indices[batched_cursor] == i) {
+      rocksdb::PinnableSlice &raw = term_raw_postings[batched_cursor];
+      const std::string &term = static_cast<const TermNode &>(*child).term;
+      if (!raw.empty()) {
+        auto iter_result = create_term_iterator_from_raw(term, std::move(raw));
+        if (!iter_result.has_value()) {
+          return iter_result;
+        }
+        iter = std::move(iter_result.value());
+      }
+      ++batched_cursor;
+    } else {
+      auto iter_result = build_iterator(*child);
+      if (!iter_result.has_value()) {
+        return iter_result;
+      }
+      iter = std::move(iter_result.value());
+    }
+
+    if (!iter) {
+      if (!is_must_not) {
+        return DocIteratorPtr{nullptr};
+      }
+      continue;
+    }
+
+    if (is_must_not) {
+      must_not_iterators.push_back(std::move(iter));
+    } else {
+      must_iterators.push_back(std::move(iter));
+    }
+  }
+
+  if (must_iterators.empty()) {
+    return DocIteratorPtr{nullptr};
+  }
+
+  if (must_iterators.size() == 1 && must_not_iterators.empty()) {
+    return std::move(must_iterators[0]);
+  }
+
+  return std::make_unique<ConjunctionIterator>(std::move(must_iterators),
+                                               std::move(must_not_iterators));
+}
+
+Result<DocIteratorPtr> FtsColumnIndexer::build_or_iterator(
+    const OrNode &or_node) const {
+  if (or_node.children.empty()) {
+    return DocIteratorPtr{nullptr};
+  }
+
+  std::vector<rocksdb::Slice> term_key_slices;
+  std::vector<size_t> term_child_indices;
+  term_key_slices.reserve(or_node.children.size());
+  term_child_indices.reserve(or_node.children.size());
+
+  for (size_t i = 0; i < or_node.children.size(); ++i) {
+    const auto &child = or_node.children[i];
+    if (child && child->type() == FtsNodeType::TERM) {
+      term_key_slices.emplace_back(static_cast<const TermNode &>(*child).term);
+      term_child_indices.push_back(i);
+    }
+  }
+
+  auto term_raw_postings = batch_get_postings(term_key_slices);
+
+  std::vector<DocIteratorPtr> positive_iterators;
+  std::vector<DocIteratorPtr> must_not_iterators;
+  size_t batched_cursor = 0;
+
+  for (size_t i = 0; i < or_node.children.size(); ++i) {
+    const auto &child = or_node.children[i];
+    const bool is_must_not = child->must_not;
+
+    DocIteratorPtr iter;
+    if (batched_cursor < term_child_indices.size() &&
+        term_child_indices[batched_cursor] == i) {
+      rocksdb::PinnableSlice &raw = term_raw_postings[batched_cursor];
+      const std::string &term = static_cast<const TermNode &>(*child).term;
+      if (!raw.empty()) {
+        auto iter_result = create_term_iterator_from_raw(term, std::move(raw));
+        if (!iter_result.has_value()) {
+          return iter_result;
+        }
+        iter = std::move(iter_result.value());
+      }
+      ++batched_cursor;
+    } else {
+      auto iter_result = build_iterator(*child);
+      if (!iter_result.has_value()) {
+        return iter_result;
+      }
+      iter = std::move(iter_result.value());
+    }
+
+    if (!iter) {
+      continue;
+    }
+
+    if (is_must_not) {
+      must_not_iterators.push_back(std::move(iter));
+    } else {
+      positive_iterators.push_back(std::move(iter));
+    }
+  }
+
+  if (positive_iterators.empty()) {
+    return DocIteratorPtr{nullptr};
+  }
+
+  DocIteratorPtr or_iter;
+  if (positive_iterators.size() == 1) {
+    or_iter = std::move(positive_iterators[0]);
+  } else {
+    or_iter =
+        std::make_unique<DisjunctionIterator>(std::move(positive_iterators));
+  }
+
+  if (!must_not_iterators.empty()) {
+    std::vector<DocIteratorPtr> must_vec;
+    must_vec.push_back(std::move(or_iter));
+    return std::make_unique<ConjunctionIterator>(std::move(must_vec),
+                                                 std::move(must_not_iterators));
+  }
+
+  return or_iter;
+}
+
+// ============================================================
+// Write operations
+// ============================================================
+
+Result<void> FtsColumnIndexer::insert(uint64_t seg_doc_id,
+                                      const std::string &text) {
+  // safe access check
+
+  if (!tokenizer_pipeline_ || !ctx_) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsColumnIndexer::insert: not opened. field=", field_name_));
+  }
+
+  // Tokenize
+  std::vector<Token> tokens = tokenizer_pipeline_->process(text);
+  const uint32_t doc_len = static_cast<uint32_t>(tokens.size());
+
+  // Aggregate position lists by term
+  std::unordered_map<std::string, std::vector<uint32_t>> term_positions;
+  for (const auto &token : tokens) {
+    term_positions[token.text].push_back(token.position);
+  }
+
+  // Store seg_doc_id in RocksDB directly, similar to invert indexer
+  const uint32_t doc_id_32 = static_cast<uint32_t>(seg_doc_id);
+
+  // Pre-serialize a single-element Roaring Bitmap for this doc_id once,
+  // reused across all terms to avoid repeated create/serialize/free overhead.
+  roaring_bitmap_t *single_bitmap = roaring_bitmap_create_with_capacity(1);
+  roaring_bitmap_add(single_bitmap, doc_id_32);
+  size_t bitmap_size = roaring_bitmap_portable_size_in_bytes(single_bitmap);
+  std::string bitmap_data(bitmap_size, '\0');
+  roaring_bitmap_portable_serialize(single_bitmap, bitmap_data.data());
+  roaring_bitmap_free(single_bitmap);
+
+  // Batch all writes for this document into a single cross-CF WriteBatch,
+  // reducing 4N+1 individual RocksDB Write() calls to one atomic write.
+  rocksdb::WriteBatch batch;
+
+  for (const auto &[term, positions] : term_positions) {
+    const uint32_t tf = static_cast<uint32_t>(positions.size());
+
+    // 1. Postings CF: merge doc_id bitmap
+    batch.Merge(postings_cf_, term, bitmap_data);
+
+    // 2. Positions CF: term\0doc_id -> delta-varint positions
+    const std::string doc_term_key = make_doc_term_key(term, doc_id_32);
+    batch.Put(positions_cf_, doc_term_key, encode_positions(positions));
+
+    // 3. Term-freq CF: term\0doc_id -> uint32_t tf
+    std::string tf_value(sizeof(uint32_t), '\0');
+    std::memcpy(tf_value.data(), &tf, sizeof(uint32_t));
+    batch.Put(term_freq_cf_.load(), doc_term_key, tf_value);
+
+    // 4. Max-TF CF: term -> max(tf) via merge
+    batch.Merge(max_tf_cf_.load(), term, tf_value);
+  }
+
+  // 5. Doc-len CF: doc_id -> uint32_t doc_len
+  std::string doc_id_key(sizeof(uint32_t), '\0');
+  std::memcpy(doc_id_key.data(), &doc_id_32, sizeof(uint32_t));
+  std::string doc_len_value(sizeof(uint32_t), '\0');
+  std::memcpy(doc_len_value.data(), &doc_len, sizeof(uint32_t));
+  batch.Put(doc_len_cf_.load(), doc_id_key, doc_len_value);
+
+  if (!ctx_->db_->Write(ctx_->write_opts_, &batch).ok()) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsColumnIndexer::insert: write batch failed. field=", field_name_));
+  }
+
+  // 6. Update in-memory statistics atomically so concurrent search() calls
+  //    see up-to-date values for BM25 scoring.
+  const uint64_t new_total_docs =
+      total_docs_.fetch_add(1, std::memory_order_relaxed) + 1;
+  const uint64_t new_total_tokens =
+      total_tokens_.fetch_add(doc_len, std::memory_order_relaxed) + doc_len;
+
+  // Propagate updated stats to the scorer so that search() uses current avgdl.
+  if (scorer_) {
+    scorer_->update_stats(new_total_docs, new_total_tokens);
+  }
+
+  // CAS-update min_doc_count_ only when this document has tokens (doc_len > 0).
+  if (doc_len > 0) {
+    uint32_t cur = min_doc_count_.load(std::memory_order_relaxed);
+    while (doc_len < cur && !min_doc_count_.compare_exchange_weak(
+                                cur, doc_len, std::memory_order_relaxed)) {
+    }
+  }
+
+  return {};
+}
+
+Result<void> FtsColumnIndexer::flush() {
+  // safe access check
+
+  if (!stat_cf_) {
+    return {};
+  }
+
+  // Write total_docs and total_tokens to $SEGMENT_STAT CF.
+  // Use acquire ordering so we see all inserts that happened before flush().
+  const uint64_t snapshot_total_docs =
+      total_docs_.load(std::memory_order_acquire);
+  const uint64_t snapshot_total_tokens =
+      total_tokens_.load(std::memory_order_acquire);
+
+  ctx_->db_->Put(ctx_->write_opts_, stat_cf_, make_total_docs_key(field_name_),
+                 encode_uint64_value(snapshot_total_docs));
+  ctx_->db_->Put(ctx_->write_opts_, stat_cf_,
+                 make_total_tokens_key(field_name_),
+                 encode_uint64_value(snapshot_total_tokens));
+
+  return {};
+}
+
+// ============================================================
+// BitPacked conversion (called by MutableSegment::dump_fts_column_indexers)
+// ============================================================
+
+Result<void> FtsColumnIndexer::convert_postings_to_bitpacked() {
+  // safe access check
+
+  if (!postings_cf_ || !term_freq_cf_ || !doc_len_cf_ || !scorer_) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsColumnIndexer::convert_postings_to_bitpacked: not opened. field=",
+        field_name_));
+  }
+
+  // ---------------------------------------------------------------
+  // 1) Load doc_len_cf into an in-memory vector indexed by local doc_id.
+  //    Single segment is at most a few MB even for 1M docs (4B per doc),
+  //    so a flat vector is by far the cheapest lookup structure.
+  // ---------------------------------------------------------------
+  std::vector<uint32_t> doc_lens;
+  {
+    std::unique_ptr<rocksdb::Iterator> iter(
+        ctx_->db_->NewIterator(ctx_->read_opts_, doc_len_cf_.load()));
+    iter->SeekToFirst();
+    while (iter->Valid()) {
+      const std::string key = iter->key().ToString();
+      const std::string value = iter->value().ToString();
+      if (key.size() != sizeof(uint32_t) || value.size() != sizeof(uint32_t)) {
+        LOG_WARN(
+            "FtsColumnIndexer::convert_postings_to_bitpacked: malformed "
+            "doc_len entry. field[%s] key_size[%zu] value_size[%zu]",
+            field_name_.c_str(), key.size(), value.size());
+        iter->Next();
+        continue;
+      }
+      uint32_t local_doc_id = 0;
+      uint32_t doc_len = 0;
+      std::memcpy(&local_doc_id, key.data(), sizeof(uint32_t));
+      std::memcpy(&doc_len, value.data(), sizeof(uint32_t));
+      if (local_doc_id >= doc_lens.size()) {
+        // Resize with default 1 to avoid divide-by-zero / log(0) downstream
+        // if a stray doc_id ever shows up without a doc_len entry.
+        doc_lens.resize(local_doc_id + 1, 1);
+      }
+      doc_lens[local_doc_id] = doc_len;
+      iter->Next();
+    }
+  }
+
+  // ---------------------------------------------------------------
+  // 2) Streaming scan of term_freq_cf, grouped by term.
+  //    RocksDB BytewiseComparator + big-endian doc_id encoding guarantees
+  //    that within a term, doc_ids appear in ascending order — exactly what
+  //    BitPackedPostingList::encode() requires.
+  // ---------------------------------------------------------------
+  std::string current_term;
+  std::vector<uint32_t> doc_ids;
+  std::vector<uint32_t> tfs;
+  std::vector<uint32_t> term_doc_lens;  // reused buffer
+
+  auto flush_current_term = [&]() -> Result<void> {
+    if (current_term.empty() || doc_ids.empty()) {
+      return {};
+    }
+    // Idempotency: skip if this term's postings are already BitPacked.
+    // Important for crash-recovery — a re-run of dump after a partial
+    // conversion must not double-encode.
+    std::string existing;
+    auto get_ret =
+        ctx_->db_->Get(ctx_->read_opts_, postings_cf_, current_term, &existing);
+    if (get_ret.ok() && !existing.empty() &&
+        BitPackedPostingList::is_bitpacked_format(existing.data(),
+                                                  existing.size())) {
+      return {};
+    }
+
+    term_doc_lens.assign(doc_ids.size(), 1);
+    for (size_t i = 0; i < doc_ids.size(); ++i) {
+      const uint32_t did = doc_ids[i];
+      if (did < doc_lens.size() && doc_lens[did] > 0) {
+        term_doc_lens[i] = doc_lens[did];
+      }
+    }
+    std::string packed = BitPackedPostingList::encode(
+        doc_ids.data(), tfs.data(), term_doc_lens.data(), doc_ids.size(),
+        /*df=*/doc_ids.size(), *scorer_);
+    if (!ctx_->db_->Put(ctx_->write_opts_, postings_cf_, current_term, packed)
+             .ok()) {
+      return tl::make_unexpected(Status::InternalError(
+          "FtsColumnIndexer::convert_postings_to_bitpacked: put failed. field=",
+          field_name_, " term=", current_term));
+    }
+    return {};
+  };
+
+  {
+    std::unique_ptr<rocksdb::Iterator> iter(
+        ctx_->db_->NewIterator(ctx_->read_opts_, term_freq_cf_.load()));
+    iter->SeekToFirst();
+    while (iter->Valid()) {
+      const std::string key = iter->key().ToString();
+      const std::string value = iter->value().ToString();
+      std::string term;
+      uint32_t local_doc_id = 0;
+      if (!parse_doc_term_key(key, &term, &local_doc_id) ||
+          value.size() != sizeof(uint32_t)) {
+        LOG_WARN(
+            "FtsColumnIndexer::convert_postings_to_bitpacked: malformed "
+            "term_freq entry. field[%s] key_size[%zu] value_size[%zu]",
+            field_name_.c_str(), key.size(), value.size());
+        iter->Next();
+        continue;
+      }
+      uint32_t tf = 0;
+      std::memcpy(&tf, value.data(), sizeof(uint32_t));
+
+      if (term != current_term) {
+        auto ret = flush_current_term();
+        if (!ret) {
+          return ret;
+        }
+        current_term = std::move(term);
+        doc_ids.clear();
+        tfs.clear();
+      }
+      doc_ids.push_back(local_doc_id);
+      tfs.push_back(tf);
+      iter->Next();
+    }
+  }
+  // Flush the last term.
+  auto ret = flush_current_term();
+  if (!ret) {
+    return ret;
+  }
+
+  // ---------------------------------------------------------------
+  // 3) Clear $TF / $DOC_LEN / $MAX_TF CFs via DeleteRange.
+  //
+  // All payloads (tf, doc_len, max_score) have been inlined into the
+  // BitPacked postings in step 2.  Wiping them here ensures the SST files
+  // are cleaned up during the dump-side compaction, so the dumped immutable
+  // segment is significantly smaller.  MutableSegment then drops the CFs
+  // entirely after all indexers finish conversion.
+  //
+  // DeleteRange uses [begin, end) semantics; an empty begin and a 256-byte
+  // 0xFF end together cover every possible key in these CFs.
+  // ---------------------------------------------------------------
+  static const std::string kClearBegin{};
+  static const std::string kClearEnd(256, '\xFF');
+
+  const std::pair<const char *, rocksdb::ColumnFamilyHandle *> cfs_to_clear[] =
+      {
+          {"$TF", term_freq_cf_.load()},
+          {"$DOC_LEN", doc_len_cf_.load()},
+          {"$MAX_TF", max_tf_cf_.load()},
+      };
+  for (const auto &[cf_name, cf] : cfs_to_clear) {
+    if (cf == nullptr) {
+      continue;
+    }
+    if (!ctx_->db_->DeleteRange(ctx_->write_opts_, cf, kClearBegin, kClearEnd)
+             .ok()) {
+      return tl::make_unexpected(Status::InternalError(
+          "FtsColumnIndexer::convert_postings_to_bitpacked: failed to clear ",
+          cf_name, " CF. field=", field_name_));
+    }
+  }
+
+  return {};
+}
+
+// ============================================================
+// Private helper methods
+// ============================================================
+
+void FtsColumnIndexer::encode_varint(uint32_t value, std::string *output) {
+  while (value >= 0x80) {
+    output->push_back(static_cast<char>((value & 0x7F) | 0x80));
+    value >>= 7;
+  }
+  output->push_back(static_cast<char>(value));
+}
+
+std::string FtsColumnIndexer::encode_positions(
+    const std::vector<uint32_t> &positions) {
+  std::string result;
+  uint32_t prev_position = 0;
+  for (uint32_t position : positions) {
+    // Delta encoding: store the difference between adjacent positions
+    encode_varint(position - prev_position, &result);
+    prev_position = position;
+  }
+  return result;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/fts_column_indexer.h b/src/db/index/column/fts_column/fts_column_indexer.h
new file mode 100644
index 000000000..48bf84805
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_column_indexer.h
@@ -0,0 +1,223 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+#include <zvec/db/schema.h>
+#include <zvec/db/status.h>
+#include "db/common/rocksdb_context.h"
+#include "db/index/column/fts_column/fts_types.h"
+#include "iterator/fts_doc_iterator.h"
+#include "tokenizer/tokenizer_factory.h"
+#include "bm25_scorer.h"
+#include "fts_query_ast.h"
+
+
+namespace zvec::fts {
+
+/*! Single document in FTS query results.
+ *
+ *  Note: `doc_id` here is the GLOBAL doc_id */
+struct FtsResult {
+  uint64_t doc_id{0};
+  float score{0.0f};
+
+  bool operator>(const FtsResult &other) const {
+    return score > other.score;
+  }
+};
+
+/*! FTS column indexer
+ *  Handles both read (search with BM25 + WAND) and write (insert / flush)
+ *  operations on a single FTS column backed by RocksDB.
+ *  Uses cross-CF WriteBatch to batch all per-document writes into a single
+ *  atomic RocksDB Write() call for optimal write throughput.
+ */
+class FtsColumnIndexer {
+ public:
+  FtsColumnIndexer() = default;
+  ~FtsColumnIndexer();
+
+  // -----------------------------------------------------------------
+  // Initialization
+  // -----------------------------------------------------------------
+
+  /*! Initialize for read+write (mutable path).
+   *  \param field_meta    Field meta describing this FTS field; provides both
+   *                       the field name and the tokenizer extra params used
+   *                       to acquire/release the shared pipeline.
+   *  \param ctx           RocksdbContext pointer
+   *  \param postings_cf   postings CF (main CF)
+   *  \param positions_cf  $POS CF
+   *  \param term_freq_cf  $TF CF
+   *  \param max_tf_cf     $MAX_TF CF
+   *  \param doc_len_cf    $DOC_LEN CF
+   *  \param stat_cf       $SEGMENT_STAT CF
+   *  \return Result<void> on success, or Status on failure
+   */
+  Result<void> open(FieldSchema::Ptr field_meta, RocksdbContext *ctx,
+                    rocksdb::ColumnFamilyHandle *postings_cf,
+                    rocksdb::ColumnFamilyHandle *positions_cf,
+                    rocksdb::ColumnFamilyHandle *term_freq_cf,
+                    rocksdb::ColumnFamilyHandle *max_tf_cf,
+                    rocksdb::ColumnFamilyHandle *doc_len_cf,
+                    rocksdb::ColumnFamilyHandle *stat_cf);
+
+  /*! Initialize for read-only (immutable / standalone reader path).
+   *  No tokenizer is acquired; insert() will fail if called.
+   *  \param field_name   Field name
+   *  \param ctx          RocksdbContext pointer
+   *  \param postings_cf  postings CF
+   *  \param positions_cf $POS CF
+   *  \param term_freq_cf $TF CF  (may be nullptr for immutable)
+   *  \param max_tf_cf    $MAX_TF CF (may be nullptr)
+   *  \param doc_len_cf   $DOC_LEN CF (may be nullptr)
+   *  \param stat_cf      $SEGMENT_STAT CF
+   *  \param bm25_params  BM25 parameters (k1, b)
+   *  \return Result<void> on success, or Status on failure
+   */
+  Result<void> open_reader(const std::string &field_name, RocksdbContext *ctx,
+                           rocksdb::ColumnFamilyHandle *postings_cf,
+                           rocksdb::ColumnFamilyHandle *positions_cf,
+                           rocksdb::ColumnFamilyHandle *term_freq_cf,
+                           rocksdb::ColumnFamilyHandle *max_tf_cf,
+                           rocksdb::ColumnFamilyHandle *doc_len_cf,
+                           rocksdb::ColumnFamilyHandle *stat_cf,
+                           BM25Params bm25_params = BM25Params{});
+
+  /*! Release all CF pointers and reset internal state.
+   *  Thread-safe: waits for in-flight search() calls to drain before
+   *  invalidating any state. Must be called before the underlying
+   *  RocksdbStore is closed.
+   *  \return Result<void> on success, or Status on failure (e.g. already
+   *  closed).
+   */
+  Result<void> close();
+
+  // -----------------------------------------------------------------
+  // Query
+  // -----------------------------------------------------------------
+
+  /*! Execute FTS query and return result list with BM25 scores
+   *  \param ast          Pre-parsed FTS AST (caller owns the parse step)
+   *  \param query_params Query parameters (topk, filter, etc.)
+   *  \return Result containing sorted results (descending score), or Status
+   */
+  Result<std::vector<FtsResult>> search(
+      const FtsAstNode &ast, const FtsQueryParams &query_params) const;
+
+  /*! Atomically reset $TF/$MAX_TF/$DOC_LEN CF pointers to nullptr.
+   *  Called before dropping these CFs so that concurrent search() calls
+   *  on the Roaring path gracefully degrade (return default tf=1/doc_len=1).
+   */
+  void reset_side_cfs();
+
+  // -----------------------------------------------------------------
+  // Write
+  // -----------------------------------------------------------------
+
+  /*! Insert FTS field content for a document
+   *  \param seg_doc_id  Segment-local document ID
+   *  \param text        UTF-8 encoded text content
+   *  \return Result<void> on success, or Status on failure
+   */
+  Result<void> insert(uint64_t seg_doc_id, const std::string &text);
+
+  /*! Flush in-memory statistics to RocksDB (called before segment dump)
+   *  \return Result<void> on success, or Status on failure
+   */
+  Result<void> flush();
+
+  /*! Convert all Roaring-format postings in postings_cf to BitPacked format
+   *  with inline tf/doc_len/max_score payloads, then DeleteRange-clear the
+   *  $TF, $DOC_LEN, and $MAX_TF CFs.
+   *
+   *  Called by MutableSegment::dump_fts_column_indexers() right before the
+   *  SST dump.  After all indexers finish conversion, MutableSegment drops
+   *  the $TF/$MAX_TF/$DOC_LEN CFs entirely (via reset_side_cfs() +
+   *  RocksdbStore::drop_column_family()), so the dumped immutable segment
+   *  no longer contains these CFs at all.
+   *
+   *  Idempotent: terms whose postings are already in BitPacked format are
+   *  skipped, so re-running after a partial-failure dump is safe.
+   *
+   *  Must be called after flush() so that the BM25 scorer used by encode()
+   *  sees the up-to-date segment statistics.
+   *
+   *  \return Result<void> on success, or Status on failure
+   */
+  Result<void> convert_postings_to_bitpacked();
+
+  uint64_t total_docs() const {
+    return total_docs_.load(std::memory_order_relaxed);
+  }
+  uint64_t total_tokens() const {
+    return total_tokens_.load(std::memory_order_relaxed);
+  }
+
+ private:
+  // --- Iterator tree construction (search internals) ---
+  Result<DocIteratorPtr> build_iterator(const FtsAstNode &node) const;
+  Result<DocIteratorPtr> build_term_iterator(const TermNode &term_node) const;
+  Result<DocIteratorPtr> build_phrase_iterator(
+      const PhraseNode &phrase_node) const;
+  Result<DocIteratorPtr> build_and_iterator(const AndNode &and_node) const;
+  Result<DocIteratorPtr> build_or_iterator(const OrNode &or_node) const;
+  Result<DocIteratorPtr> create_term_iterator_from_raw(
+      const std::string &term, rocksdb::PinnableSlice raw_data) const;
+  std::vector<rocksdb::PinnableSlice> batch_get_postings(
+      const std::vector<rocksdb::Slice> &terms) const;
+
+  // --- Write helpers ---
+  static void encode_varint(uint32_t value, std::string *output);
+  static std::string encode_positions(const std::vector<uint32_t> &positions);
+
+  // --- Tokenizer (write path only) ---
+  FieldSchema::Ptr field_meta_{};
+  TokenizerPipelinePtr tokenizer_pipeline_{nullptr};
+  std::shared_ptr<zvec::FtsIndexParams> fts_params_;
+
+  // --- Reader state ---
+  std::string field_name_;
+  RocksdbContext *ctx_{nullptr};
+  BM25ScorerPtr scorer_;
+
+  rocksdb::ColumnFamilyHandle *postings_cf_{nullptr};
+  rocksdb::ColumnFamilyHandle *positions_cf_{nullptr};
+  std::atomic<rocksdb::ColumnFamilyHandle *> term_freq_cf_{nullptr};
+  std::atomic<rocksdb::ColumnFamilyHandle *> max_tf_cf_{nullptr};
+  std::atomic<rocksdb::ColumnFamilyHandle *> doc_len_cf_{nullptr};
+  mutable std::atomic<int> cf_counter_{0};
+  std::atomic<bool> cf_dropped_{false};
+  rocksdb::ColumnFamilyHandle *stat_cf_{nullptr};
+
+  std::atomic<uint32_t> min_doc_count_{std::numeric_limits<uint32_t>::max()};
+
+  mutable std::atomic<int> counter_{0};
+  std::atomic<bool> opened_{false};
+
+  // --- Write-path statistics ---
+  std::atomic<uint64_t> total_docs_{0};
+  std::atomic<uint64_t> total_tokens_{0};
+};
+
+using FtsColumnIndexerPtr = std::shared_ptr<FtsColumnIndexer>;
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/fts_index_results.h b/src/db/index/column/fts_column/fts_index_results.h
new file mode 100644
index 000000000..dc65c42a8
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_index_results.h
@@ -0,0 +1,85 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+#include "db/common/constants.h"
+#include "db/index/column/common/index_results.h"
+#include "db/index/column/fts_column/fts_column_indexer.h"
+
+namespace zvec {
+
+// IndexResults adapter for FTS search results (doc_id + BM25 score pairs).
+// Results are ordered by descending score from FtsColumnIndexer::search().
+class FtsIndexResults : public IndexResults,
+                        public std::enable_shared_from_this<FtsIndexResults> {
+ public:
+  using Ptr = std::shared_ptr<FtsIndexResults>;
+
+  explicit FtsIndexResults(std::vector<fts::FtsResult> results)
+      : results_(std::move(results)) {}
+
+  size_t count() const override {
+    return results_.size();
+  }
+
+  const std::vector<fts::FtsResult> &results() const {
+    return results_;
+  }
+
+  class FtsIterator : public Iterator {
+   public:
+    explicit FtsIterator(std::shared_ptr<const FtsIndexResults> owner)
+        : owner_(std::move(owner)), pos_(0) {}
+
+    idx_t doc_id() const override {
+      if (pos_ < owner_->results_.size()) {
+        return static_cast<idx_t>(owner_->results_[pos_].doc_id);
+      }
+      return INVALID_DOC_ID;
+    }
+
+    float score() const override {
+      if (pos_ < owner_->results_.size()) {
+        return owner_->results_[pos_].score;
+      }
+      return 0.0f;
+    }
+
+    void next() override {
+      if (pos_ < owner_->results_.size()) {
+        ++pos_;
+      }
+    }
+
+    bool valid() const override {
+      return pos_ < owner_->results_.size();
+    }
+
+   private:
+    std::shared_ptr<const FtsIndexResults> owner_;
+    size_t pos_;
+  };
+
+  IteratorUPtr create_iterator() override {
+    return std::make_unique<FtsIterator>(shared_from_this());
+  }
+
+ private:
+  std::vector<fts::FtsResult> results_;
+};
+
+}  // namespace zvec
diff --git a/src/db/index/column/fts_column/fts_query_ast.h b/src/db/index/column/fts_column/fts_query_ast.h
new file mode 100644
index 000000000..45a9a9a94
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_query_ast.h
@@ -0,0 +1,155 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace zvec::fts {
+
+/*! AST node type enumeration
+ */
+enum class FtsNodeType {
+  TERM,    // Term node, e.g., "vector"
+  PHRASE,  // Phrase node, e.g., "\"exact phrase\""
+  AND,     // AND combination node (intersection)
+  OR,      // OR combination node (union)
+};
+
+/*! AST node base class
+ *  All FTS AST nodes carry must/must_not modifiers so that the +/- prefix
+ *  (and AND NOT semantics) can be applied uniformly to terms, phrases and
+ *  composite (AND/OR) sub-expressions.
+ */
+struct FtsAstNode {
+  bool must{false};      // Prefix + means must
+  bool must_not{false};  // Prefix - / right-hand side of AND NOT means must_not
+
+  virtual ~FtsAstNode() = default;
+  virtual FtsNodeType type() const = 0;
+
+  // Return a human-readable text representation for debugging / logging
+  virtual std::string text() const = 0;
+
+ protected:
+  // Helper: prepend +/- modifier prefix
+  std::string modifier_prefix() const {
+    if (must) {
+      return "+";
+    }
+    if (must_not) {
+      return "-";
+    }
+    return "";
+  }
+};
+
+using FtsAstNodePtr = std::unique_ptr<FtsAstNode>;
+
+/*! Term node
+ *  Represents a single query term, can have must (+) or must_not (-) modifiers
+ *  inherited from FtsAstNode.
+ */
+struct TermNode : public FtsAstNode {
+  std::string term;
+
+  explicit TermNode(std::string term_text, bool is_must = false,
+                    bool is_must_not = false)
+      : term(std::move(term_text)) {
+    must = is_must;
+    must_not = is_must_not;
+  }
+
+  FtsNodeType type() const override {
+    return FtsNodeType::TERM;
+  }
+
+  std::string text() const override {
+    return modifier_prefix() + term;
+  }
+};
+
+/*! Phrase node
+ *  Represents an exact phrase query, e.g., "exact phrase"
+ *  Requires exact match of word order and adjacent positions
+ */
+struct PhraseNode : public FtsAstNode {
+  std::vector<std::string> terms;  // Individual words in the phrase
+
+  FtsNodeType type() const override {
+    return FtsNodeType::PHRASE;
+  }
+
+  std::string text() const override {
+    std::string result = modifier_prefix() + "\"";
+    for (size_t i = 0; i < terms.size(); ++i) {
+      if (i > 0) {
+        result += " ";
+      }
+      result += terms[i];
+    }
+    result += "\"";
+    return result;
+  }
+};
+
+/*! AND combination node
+ *  All child nodes must match (intersection semantics)
+ */
+struct AndNode : public FtsAstNode {
+  std::vector<FtsAstNodePtr> children;
+
+  FtsNodeType type() const override {
+    return FtsNodeType::AND;
+  }
+
+  std::string text() const override {
+    std::string result = modifier_prefix() + "AND(";
+    for (size_t i = 0; i < children.size(); ++i) {
+      if (i > 0) {
+        result += " ";
+      }
+      result += children[i]->text();
+    }
+    result += ")";
+    return result;
+  }
+};
+
+/*! OR combination node
+ *  Any child node matches (union semantics)
+ */
+struct OrNode : public FtsAstNode {
+  std::vector<FtsAstNodePtr> children;
+
+  FtsNodeType type() const override {
+    return FtsNodeType::OR;
+  }
+
+  std::string text() const override {
+    std::string result = modifier_prefix() + "OR(";
+    for (size_t i = 0; i < children.size(); ++i) {
+      if (i > 0) {
+        result += " ";
+      }
+      result += children[i]->text();
+    }
+    result += ")";
+    return result;
+  }
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/fts_rocksdb_merge.cc b/src/db/index/column/fts_column/fts_rocksdb_merge.cc
new file mode 100644
index 000000000..737e321ca
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_rocksdb_merge.cc
@@ -0,0 +1,181 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fts_rocksdb_merge.h"
+#include <cstring>
+#include <roaring/roaring.h>
+#include <zvec/ailego/logger/logger.h>
+#include "db/index/column/fts_column/posting/bitpacked_posting_list.h"
+
+namespace zvec::fts {
+
+// ============================================================
+// Helper: deserialize a posting value (Roaring Bitmap or BitPacked) into a
+// Roaring Bitmap. Caller owns the returned bitmap and must free it.
+// Returns nullptr on failure.
+// ============================================================
+
+static roaring_bitmap_t *deserialize_posting_to_roaring(const char *data,
+                                                        size_t size) {
+  if (BitPackedPostingList::is_bitpacked_format(data, size)) {
+    // Decode BitPacked format into a new Roaring Bitmap
+    BitPackedPostingIterator bp_iter;
+    if (bp_iter.open(data, size) != 0) {
+      LOG_ERROR(
+          "FtsPostingsMerge: failed to open bitpacked posting during merge, "
+          "size[%zu]",
+          size);
+      return nullptr;
+    }
+    roaring_bitmap_t *bitmap = roaring_bitmap_create();
+    uint32_t doc_id = bp_iter.next_doc();
+    while (doc_id != BitPackedPostingIterator::NO_MORE_DOCS) {
+      roaring_bitmap_add(bitmap, doc_id);
+      doc_id = bp_iter.next_doc();
+    }
+    return bitmap;
+  }
+
+  // Roaring Bitmap format
+  return roaring_bitmap_portable_deserialize_safe(data, size);
+}
+
+// ============================================================
+// FtsPostingsMerge: Roaring Bitmap OR merge (supports BitPacked input)
+// ============================================================
+
+bool FtsPostingsMerge::FullMergeV2(const MergeOperationInput &merge_in,
+                                   MergeOperationOutput *merge_out) const {
+  // If there is only one operand and no existing_value, return directly
+  if (merge_in.existing_value == nullptr && merge_in.operand_list.size() == 1) {
+    merge_out->new_value = std::string(merge_in.operand_list[0].data(),
+                                       merge_in.operand_list[0].size());
+    return true;
+  }
+
+  // Deserialize bitmap from existing_value
+  roaring_bitmap_t *result_bitmap = roaring_bitmap_create();
+
+  if (merge_in.existing_value != nullptr) {
+    roaring_bitmap_t *existing_bitmap = deserialize_posting_to_roaring(
+        merge_in.existing_value->data(), merge_in.existing_value->size());
+    if (existing_bitmap != nullptr) {
+      roaring_bitmap_or_inplace(result_bitmap, existing_bitmap);
+      roaring_bitmap_free(existing_bitmap);
+    }
+  }
+
+  // Merge all operands
+  for (const auto &operand : merge_in.operand_list) {
+    roaring_bitmap_t *operand_bitmap =
+        deserialize_posting_to_roaring(operand.data(), operand.size());
+    if (operand_bitmap != nullptr) {
+      roaring_bitmap_or_inplace(result_bitmap, operand_bitmap);
+      roaring_bitmap_free(operand_bitmap);
+    }
+  }
+
+  // Serialize result as Roaring Bitmap
+  roaring_bitmap_run_optimize(result_bitmap);
+  size_t serialized_size = roaring_bitmap_portable_size_in_bytes(result_bitmap);
+  merge_out->new_value.resize(serialized_size);
+  roaring_bitmap_portable_serialize(result_bitmap, merge_out->new_value.data());
+  roaring_bitmap_free(result_bitmap);
+  return true;
+}
+
+bool FtsPostingsMerge::PartialMerge(const rocksdb::Slice & /*key*/,
+                                    const rocksdb::Slice &left_operand,
+                                    const rocksdb::Slice &right_operand,
+                                    std::string *new_value,
+                                    rocksdb::Logger * /*logger*/) const {
+  roaring_bitmap_t *left_bitmap =
+      deserialize_posting_to_roaring(left_operand.data(), left_operand.size());
+  roaring_bitmap_t *right_bitmap = deserialize_posting_to_roaring(
+      right_operand.data(), right_operand.size());
+
+  if (left_bitmap == nullptr || right_bitmap == nullptr) {
+    LOG_ERROR(
+        "FtsPostingsMerge::PartialMerge: failed to deserialize operand. "
+        "left_size[%zu] right_size[%zu]",
+        left_operand.size(), right_operand.size());
+    if (left_bitmap != nullptr) roaring_bitmap_free(left_bitmap);
+    if (right_bitmap != nullptr) roaring_bitmap_free(right_bitmap);
+    return false;
+  }
+
+  roaring_bitmap_or_inplace(left_bitmap, right_bitmap);
+  roaring_bitmap_free(right_bitmap);
+
+  size_t serialized_size = roaring_bitmap_portable_size_in_bytes(left_bitmap);
+  new_value->resize(serialized_size);
+  roaring_bitmap_portable_serialize(left_bitmap, new_value->data());
+  roaring_bitmap_free(left_bitmap);
+  return true;
+}
+
+// ============================================================
+// FtsMaxTfMerge: uint32_t max merge
+// ============================================================
+
+bool FtsMaxTfMerge::FullMergeV2(const MergeOperationInput &merge_in,
+                                MergeOperationOutput *merge_out) const {
+  uint32_t max_tf = 0;
+
+  if (merge_in.existing_value != nullptr &&
+      merge_in.existing_value->size() >= sizeof(uint32_t)) {
+    std::memcpy(&max_tf, merge_in.existing_value->data(), sizeof(uint32_t));
+  }
+
+  for (const auto &operand : merge_in.operand_list) {
+    if (operand.size() >= sizeof(uint32_t)) {
+      uint32_t operand_tf = 0;
+      std::memcpy(&operand_tf, operand.data(), sizeof(uint32_t));
+      if (operand_tf > max_tf) {
+        max_tf = operand_tf;
+      }
+    }
+  }
+
+  merge_out->new_value.resize(sizeof(uint32_t));
+  std::memcpy(merge_out->new_value.data(), &max_tf, sizeof(uint32_t));
+  return true;
+}
+
+bool FtsMaxTfMerge::PartialMerge(const rocksdb::Slice & /*key*/,
+                                 const rocksdb::Slice &left_operand,
+                                 const rocksdb::Slice &right_operand,
+                                 std::string *new_value,
+                                 rocksdb::Logger * /*logger*/) const {
+  if (left_operand.size() < sizeof(uint32_t) ||
+      right_operand.size() < sizeof(uint32_t)) {
+    LOG_ERROR(
+        "FtsMaxTfMerge::PartialMerge: operand too small. "
+        "left_size[%zu] right_size[%zu] expected[%zu]",
+        left_operand.size(), right_operand.size(), sizeof(uint32_t));
+    return false;
+  }
+
+  uint32_t left_tf = 0;
+  uint32_t right_tf = 0;
+  std::memcpy(&left_tf, left_operand.data(), sizeof(uint32_t));
+  std::memcpy(&right_tf, right_operand.data(), sizeof(uint32_t));
+
+  uint32_t max_tf = (left_tf > right_tf) ? left_tf : right_tf;
+  new_value->resize(sizeof(uint32_t));
+  std::memcpy(new_value->data(), &max_tf, sizeof(uint32_t));
+  return true;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/fts_rocksdb_merge.h b/src/db/index/column/fts_column/fts_rocksdb_merge.h
new file mode 100644
index 000000000..1bed8f4b6
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_rocksdb_merge.h
@@ -0,0 +1,59 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <rocksdb/merge_operator.h>
+
+namespace zvec::fts {
+
+/*! FTS postings CF-specific Merge Operator
+ *  Performs OR merge on Roaring Bitmap serialized values, used for
+ * incrementally updating term document lists
+ */
+class FtsPostingsMerge : public ROCKSDB_NAMESPACE::MergeOperator {
+ public:
+  bool FullMergeV2(const MergeOperationInput &merge_in,
+                   MergeOperationOutput *merge_out) const override;
+
+  bool PartialMerge(const rocksdb::Slice &key,
+                    const rocksdb::Slice &left_operand,
+                    const rocksdb::Slice &right_operand, std::string *new_value,
+                    rocksdb::Logger *logger) const override;
+
+  const char *Name() const override {
+    return "FtsPostingsMerge";
+  }
+};
+
+/*! FTS $MAX_TF CF-specific Merge Operator
+ *  Performs max merge on uint32_t values, used for maintaining the maximum term
+ * frequency for each term (WAND upper bound)
+ */
+class FtsMaxTfMerge : public ROCKSDB_NAMESPACE::MergeOperator {
+ public:
+  bool FullMergeV2(const MergeOperationInput &merge_in,
+                   MergeOperationOutput *merge_out) const override;
+
+  bool PartialMerge(const rocksdb::Slice &key,
+                    const rocksdb::Slice &left_operand,
+                    const rocksdb::Slice &right_operand, std::string *new_value,
+                    rocksdb::Logger *logger) const override;
+
+  const char *Name() const override {
+    return "FtsMaxTfMerge";
+  }
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/fts_rocksdb_reducer.cc b/src/db/index/column/fts_column/fts_rocksdb_reducer.cc
new file mode 100644
index 000000000..f4ea5aa93
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_rocksdb_reducer.cc
@@ -0,0 +1,488 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fts_rocksdb_reducer.h"
+#include <cstring>
+#include <vector>
+#include <zvec/ailego/logger/logger.h>
+#include <zvec/db/status.h>
+#include "db/index/column/fts_column/fts_utils.h"
+#include "db/index/column/fts_column/posting/bitpacked_posting_list.h"
+
+namespace zvec::fts {
+
+// ============================================================
+// Design notes
+// ============================================================
+//
+// Every immutable FTS segment stores its data in three CFs:
+//   - postings_cf   : term -> BitPacked posting list (inline
+//                     tf / doc_len / per-block max_score)
+//   - positions_cf  : term\0doc_id -> varint delta-encoded positions
+//                     (needed for phrase queries)
+//   - stat_cf       : field_name_total_docs / field_name_total_tokens
+//
+// The reducer performs a multi-way merge of N source segments into one
+// destination segment.  It iterates each source segment's BitPacked
+// postings_cf, decodes (doc_id, tf, doc_len) triples directly from the
+// inline payloads, applies the delete filter, remaps doc_ids to the new
+// segment's local range, and emits a single merged BitPacked posting list
+// per term into dst_postings_cf.  positions_cf is merged key-by-key for
+// phrase support.  stat_cf is recomputed from the surviving docs.
+//
+// All input postings_cf values must be in BitPacked format.
+//
+// doc_id encoding contract (aligned with InvertRocksdbStreamer2):
+// every src segment's RocksDB stores LOCAL doc_ids, i.e.
+//   local_doc_id = global_doc_id - segment_stats[i].min_doc_id
+// so that values fit into uint32_t and reduce_* logic can safely
+// reconstruct global_doc_id via
+//   global_doc_id = stats.min_doc_id + local_doc_id
+// and remap into the dst segment local space via
+//   new_local_doc_id = global_doc_id - dst_min_doc_id_.
+// FtsColumnIndexer::insert() is responsible for storing local doc_id
+// (see start_doc_id_ in FtsColumnIndexer).
+//
+// Two-pass streaming design:
+//
+// Pass 1 (collect_effective_stats): iterates all source posting lists to
+// compute effective_total_docs_ and effective_total_tokens_ WITHOUT
+// storing any PostingEntry.
+// - effective_total_docs_ is derived from each segment's
+//   [min_doc_id, max_doc_id] range minus filtered docs.
+// - effective_total_tokens_ is accumulated from inline doc_len payloads
+//   of surviving docs (empty docs contribute 0).
+// - Per-segment seen-doc dedup uses vector<bool> instead of
+//   unordered_set<uint32_t> (~125KB vs ~40MB per million docs).
+//
+// Pass 2 (merge_and_flush_postings): opens N RocksDB iterators (one per
+// source segment) and performs a multi-way merge by term in lexicographic
+// order.  For each term, entries from all segments are aggregated into a
+// temporary vector, immediately encoded as BitPacked and put to
+// dst_postings_cf, then the vector is cleared.  Peak memory is bounded
+// by the single largest term's entries rather than all terms combined.
+//
+// No Roaring intermediate format is involved, and no $TF/$MAX_TF/$DOC_LEN
+// side CF is read or written.
+
+// ============================================================
+// Public interface
+// ============================================================
+
+Result<void> FtsRocksdbReducer::init(
+    const std::string &field_name, RocksdbContext *ctx,
+    rocksdb::ColumnFamilyHandle *dst_postings_cf,
+    rocksdb::ColumnFamilyHandle *dst_positions_cf,
+    rocksdb::ColumnFamilyHandle *dst_stat_cf) {
+  if (!dst_postings_cf || !dst_positions_cf || !dst_stat_cf) {
+    return tl::make_unexpected(Status::InvalidArgument(
+        "FtsRocksdbReducer: null destination CF. field=", field_name));
+  }
+
+  field_name_ = field_name;
+  ctx_ = ctx;
+  dst_postings_cf_ = dst_postings_cf;
+  dst_positions_cf_ = dst_positions_cf;
+  dst_stat_cf_ = dst_stat_cf;
+
+  state_ = STATE_INITED;
+  return {};
+}
+
+Result<void> FtsRocksdbReducer::cleanup() {
+  segment_stats_.clear();
+  src_ctxs_.clear();
+  src_postings_cfs_.clear();
+  src_positions_cfs_.clear();
+  num_segments_ = 0;
+  state_ = STATE_UNINITED;
+  return {};
+}
+
+Result<void> FtsRocksdbReducer::feed(
+    FtsSegmentStats segment_stats, RocksdbContext *src_ctx,
+    rocksdb::ColumnFamilyHandle *src_postings_cf,
+    rocksdb::ColumnFamilyHandle *src_positions_cf) {
+  if (state_ != STATE_INITED && state_ != STATE_FEED) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsRocksdbReducer: call init() before feed(). field=", field_name_));
+  }
+
+  if (!src_postings_cf || !src_positions_cf) {
+    return tl::make_unexpected(Status::InvalidArgument(
+        "FtsRocksdbReducer: null source CF. field=", field_name_));
+  }
+
+  // Track global min_doc_id from the first segment; require consecutive
+  // doc_id ranges across segments so that downstream remap is safe.
+  if (segment_stats_.empty()) {
+    min_doc_id_ = segment_stats.min_doc_id;
+  } else {
+    if (segment_stats.min_doc_id != segment_stats_.back().max_doc_id + 1) {
+      return tl::make_unexpected(Status::InternalError(
+          "FtsRocksdbReducer: segments not in consecutive doc_id order. field=",
+          field_name_));
+    }
+  }
+
+  segment_stats_.emplace_back(std::move(segment_stats));
+  src_ctxs_.emplace_back(src_ctx);
+  src_postings_cfs_.emplace_back(src_postings_cf);
+  src_positions_cfs_.emplace_back(src_positions_cf);
+  ++num_segments_;
+
+  state_ = STATE_FEED;
+  return {};
+}
+
+Result<void> FtsRocksdbReducer::reduce(const IndexFilter &filter) {
+  if (state_ != STATE_FEED || num_segments_ == 0) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsRocksdbReducer: call feed() before reduce(). field=", field_name_));
+  }
+
+  effective_total_docs_ = 0;
+  effective_total_tokens_ = 0;
+
+  // Phase 1: Streaming per-term merge across all source segments.  Decodes
+  // BitPacked postings inline, applies the filter, remaps doc_ids, and
+  // emits one merged BitPacked posting list per term to dst_postings_cf.
+  // Also accumulates effective_total_docs_ / effective_total_tokens_ from
+  // inline doc_len payloads (each surviving doc counted once across all
+  // its terms within a segment).
+  auto ret = reduce_postings(filter);
+  if (!ret) {
+    LOG_ERROR("FtsRocksdbReducer: reduce_postings failed. field[%s]",
+              field_name_.c_str());
+    return ret;
+  }
+
+  // Phase 2: Merge positions CF per segment for phrase query support.
+  for (uint32_t segment_index = 0; segment_index < num_segments_;
+       ++segment_index) {
+    ret = reduce_positions(segment_index, filter);
+    if (!ret) {
+      LOG_ERROR(
+          "FtsRocksdbReducer: reduce_positions failed. segment[%u] field[%s]",
+          segment_index, field_name_.c_str());
+      return ret;
+    }
+  }
+
+  // Phase 3: Persist effective stats so search-time IDF / avgdl matches the
+  // encode-time block_max_score (single source of truth, derived from the
+  // documents that actually survived the filter).
+  ret = flush_stat(effective_total_docs_, effective_total_tokens_);
+  if (!ret) {
+    LOG_ERROR("FtsRocksdbReducer: flush_stat failed. field[%s]",
+              field_name_.c_str());
+    return ret;
+  }
+
+  state_ = STATE_REDUCE;
+  LOG_INFO(
+      "FtsRocksdbReducer: reduce done. field[%s] segments[%u] "
+      "effective_docs[%zu] effective_tokens[%zu]",
+      field_name_.c_str(), num_segments_, (size_t)effective_total_docs_,
+      (size_t)effective_total_tokens_);
+  return {};
+}
+
+// ============================================================
+// Private: streaming postings merge (single stage, BitPacked in/out)
+// ============================================================
+
+Result<void> FtsRocksdbReducer::reduce_postings(const IndexFilter &filter) {
+  // Pass 1: collect effective stats (no PostingEntry storage).
+  auto ret = collect_effective_stats(filter);
+  if (!ret) {
+    return ret;
+  }
+
+  // Initialize BM25 scorer with final effective stats.
+  scorer_ = std::make_shared<BM25Scorer>();
+  scorer_->update_stats(effective_total_docs_, effective_total_tokens_);
+
+  // Pass 2: multi-way merge + streaming encode/flush.
+  return merge_and_flush_postings(filter);
+}
+
+// ============================================================
+// Private: Pass 1 — collect effective stats without storing entries
+// ============================================================
+
+Result<void> FtsRocksdbReducer::collect_effective_stats(
+    const IndexFilter &filter) {
+  effective_total_docs_ = 0;
+  effective_total_tokens_ = 0;
+
+  for (uint32_t seg = 0; seg < num_segments_; ++seg) {
+    const auto &stats = segment_stats_[seg];
+    const uint64_t seg_doc_count = stats.max_doc_id - stats.min_doc_id + 1;
+
+    // ---------- effective_total_docs_: from doc_id range - filtered ----------
+    // Count how many docs in [min_doc_id, max_doc_id] survive the filter.
+    // This includes empty docs (no tokens), matching mutable indexer semantics
+    // where total_docs_++ on every insert regardless of doc_len.
+    uint64_t seg_filtered = 0;
+    for (uint64_t gid = stats.min_doc_id; gid <= stats.max_doc_id; ++gid) {
+      if (filter.is_filtered(gid)) {
+        ++seg_filtered;
+      }
+    }
+    effective_total_docs_ += (seg_doc_count - seg_filtered);
+
+    // ---------- effective_total_tokens_: from posting inline doc_len
+    // ---------- Use vector<bool> for per-segment seen-doc dedup (local_doc_id
+    // is a contiguous small integer).  Memory: ~125KB per million docs vs ~40MB
+    // for unordered_set<uint32_t>.
+    const uint64_t local_range = seg_doc_count;
+    std::vector<bool> seen_docs(local_range, false);
+
+    auto *src_cf = src_postings_cfs_[seg];
+    auto iter = std::unique_ptr<rocksdb::Iterator>(
+        src_ctxs_[seg]->db_->NewIterator(src_ctxs_[seg]->read_opts_, src_cf));
+    iter->SeekToFirst();
+
+    while (iter->Valid()) {
+      const std::string posting_data = iter->value().ToString();
+
+      if (!BitPackedPostingList::is_bitpacked_format(posting_data.data(),
+                                                     posting_data.size())) {
+        return tl::make_unexpected(Status::InternalError(
+            "FtsRocksdbReducer: source postings is not BitPacked. field=",
+            field_name_));
+      }
+
+      BitPackedPostingIterator bp_iter;
+      if (bp_iter.open(posting_data.data(), posting_data.size()) != 0) {
+        return tl::make_unexpected(Status::InternalError(
+            "FtsRocksdbReducer: failed to open bitpacked postings. field=",
+            field_name_));
+      }
+
+      uint32_t local_doc_id = bp_iter.next_doc();
+      while (local_doc_id != BitPackedPostingIterator::NO_MORE_DOCS) {
+        const uint64_t global_doc_id =
+            stats.min_doc_id + static_cast<uint64_t>(local_doc_id);
+        if (!filter.is_filtered(global_doc_id)) {
+          if (local_doc_id < local_range && !seen_docs[local_doc_id]) {
+            seen_docs[local_doc_id] = true;
+            effective_total_tokens_ += bp_iter.doc_len();
+          }
+        }
+        local_doc_id = bp_iter.next_doc();
+      }
+      iter->Next();
+    }
+  }
+
+  LOG_INFO(
+      "FtsRocksdbReducer: collect_effective_stats done. field[%s] "
+      "effective_docs[%zu] effective_tokens[%zu]",
+      field_name_.c_str(), (size_t)effective_total_docs_,
+      (size_t)effective_total_tokens_);
+  return {};
+}
+
+// ============================================================
+// Private: Pass 2 — multi-way merge + streaming encode/flush
+// ============================================================
+
+Result<void> FtsRocksdbReducer::merge_and_flush_postings(
+    const IndexFilter &filter) {
+  struct PostingEntry {
+    uint32_t doc_id;
+    uint32_t tf;
+    uint32_t doc_len;
+  };
+
+  // Open N iterators, one per source segment.
+  struct SegmentCursor {
+    uint32_t segment_index;
+    std::unique_ptr<rocksdb::Iterator> iter;
+    const FtsSegmentStats *stats;
+  };
+  std::vector<SegmentCursor> cursors;
+  cursors.reserve(num_segments_);
+  for (uint32_t i = 0; i < num_segments_; ++i) {
+    auto it = std::unique_ptr<rocksdb::Iterator>(src_ctxs_[i]->db_->NewIterator(
+        src_ctxs_[i]->read_opts_, src_postings_cfs_[i]));
+    it->SeekToFirst();
+    cursors.push_back(SegmentCursor{i, std::move(it), &segment_stats_[i]});
+  }
+
+  // Reusable buffers.
+  std::vector<PostingEntry> term_entries;
+  std::vector<uint32_t> doc_ids_buf, tfs_buf, doc_lens_buf;
+
+  while (true) {
+    // Find the lexicographically smallest current term across all cursors.
+    std::string min_term;
+    bool found = false;
+    for (auto &c : cursors) {
+      if (!c.iter->Valid()) {
+        continue;
+      }
+      const std::string t = c.iter->key().ToString();
+      if (!found || t < min_term) {
+        min_term = t;
+        found = true;
+      }
+    }
+    if (!found) {
+      break;  // All iterators exhausted.
+    }
+
+    // Collect entries for min_term from every cursor that has it.
+    // Process cursors in segment order to maintain doc_id ascending order.
+    term_entries.clear();
+    for (auto &c : cursors) {
+      if (!c.iter->Valid()) {
+        continue;
+      }
+      if (c.iter->key().ToString() != min_term) {
+        continue;
+      }
+
+      const std::string posting_data = c.iter->value().ToString();
+      if (!BitPackedPostingList::is_bitpacked_format(posting_data.data(),
+                                                     posting_data.size())) {
+        return tl::make_unexpected(Status::InternalError(
+            "FtsRocksdbReducer: source postings is not BitPacked. field=",
+            field_name_, " term=", min_term));
+      }
+
+      BitPackedPostingIterator bp_iter;
+      if (bp_iter.open(posting_data.data(), posting_data.size()) != 0) {
+        return tl::make_unexpected(Status::InternalError(
+            "FtsRocksdbReducer: failed to open bitpacked postings. field=",
+            field_name_, " term=", min_term));
+      }
+
+      term_entries.reserve(term_entries.size() + bp_iter.cost());
+      uint32_t local_doc_id = bp_iter.next_doc();
+      while (local_doc_id != BitPackedPostingIterator::NO_MORE_DOCS) {
+        const uint64_t global_doc_id =
+            c.stats->min_doc_id + static_cast<uint64_t>(local_doc_id);
+        if (!filter.is_filtered(global_doc_id)) {
+          const uint32_t new_doc_id =
+              static_cast<uint32_t>(global_doc_id - min_doc_id_);
+          term_entries.push_back(
+              {new_doc_id, bp_iter.term_freq(), bp_iter.doc_len()});
+        }
+        local_doc_id = bp_iter.next_doc();
+      }
+      c.iter->Next();  // Advance past this term in this cursor.
+    }
+
+    if (term_entries.empty()) {
+      continue;
+    }
+
+    // Encode and put immediately — peak memory is one term's entries.
+    doc_ids_buf.clear();
+    tfs_buf.clear();
+    doc_lens_buf.clear();
+    doc_ids_buf.reserve(term_entries.size());
+    tfs_buf.reserve(term_entries.size());
+    doc_lens_buf.reserve(term_entries.size());
+    for (const auto &e : term_entries) {
+      doc_ids_buf.push_back(e.doc_id);
+      tfs_buf.push_back(e.tf);
+      doc_lens_buf.push_back(e.doc_len);
+    }
+
+    std::string packed = BitPackedPostingList::encode(
+        doc_ids_buf.data(), tfs_buf.data(), doc_lens_buf.data(),
+        doc_ids_buf.size(), doc_ids_buf.size(), *scorer_);
+
+    if (!ctx_->db_->Put(ctx_->write_opts_, dst_postings_cf_, min_term, packed)
+             .ok()) {
+      return tl::make_unexpected(Status::InternalError(
+          "FtsRocksdbReducer: failed to put bitpacked postings. field=",
+          field_name_));
+    }
+  }
+
+  return {};
+}
+
+Result<void> FtsRocksdbReducer::reduce_positions(uint32_t segment_index,
+                                                 const IndexFilter &filter) {
+  const FtsSegmentStats &stats = segment_stats_[segment_index];
+  auto *src_positions_cf = src_positions_cfs_[segment_index];
+
+  auto iter = std::unique_ptr<rocksdb::Iterator>(
+      src_ctxs_[segment_index]->db_->NewIterator(
+          src_ctxs_[segment_index]->read_opts_, src_positions_cf));
+  iter->SeekToFirst();
+
+  for (; iter->Valid(); iter->Next()) {
+    const std::string key = iter->key().ToString();
+
+    std::string term;
+    uint32_t local_doc_id = 0;
+    if (!parse_doc_term_key(key, &term, &local_doc_id)) {
+      return tl::make_unexpected(Status::InternalError(
+          "FtsRocksdbReducer: malformed positions key. field=", field_name_));
+    }
+
+    const uint64_t global_doc_id =
+        stats.min_doc_id + static_cast<uint64_t>(local_doc_id);
+    if (filter.is_filtered(global_doc_id)) {
+      continue;
+    }
+
+    const uint32_t new_doc_id =
+        static_cast<uint32_t>(global_doc_id - min_doc_id_);
+    const std::string new_key = make_doc_term_key(term, new_doc_id);
+
+    if (!ctx_->db_
+             ->Put(ctx_->write_opts_, dst_positions_cf_, new_key,
+                   iter->value().ToString())
+             .ok()) {
+      return tl::make_unexpected(Status::InternalError(
+          "FtsRocksdbReducer: failed to write positions. field=", field_name_));
+    }
+  }
+
+  return {};
+}
+
+Result<void> FtsRocksdbReducer::flush_stat(uint64_t total_docs,
+                                           uint64_t total_tokens) {
+  if (!ctx_->db_
+           ->Put(ctx_->write_opts_, dst_stat_cf_,
+                 make_total_docs_key(field_name_),
+                 encode_uint64_value(total_docs))
+           .ok()) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsRocksdbReducer: failed to write total_docs. field=", field_name_));
+  }
+
+  if (!ctx_->db_
+           ->Put(ctx_->write_opts_, dst_stat_cf_,
+                 make_total_tokens_key(field_name_),
+                 encode_uint64_value(total_tokens))
+           .ok()) {
+    return tl::make_unexpected(Status::InternalError(
+        "FtsRocksdbReducer: failed to write total_tokens. field=",
+        field_name_));
+  }
+
+  return {};
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/fts_rocksdb_reducer.h b/src/db/index/column/fts_column/fts_rocksdb_reducer.h
new file mode 100644
index 000000000..389b0d4f2
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_rocksdb_reducer.h
@@ -0,0 +1,159 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <zvec/db/status.h>
+#include "db/common/rocksdb_context.h"
+#include "db/index/column/fts_column/bm25_scorer.h"
+#include "db/index/column/fts_column/fts_types.h"
+
+namespace zvec::fts {
+
+class FtsRocksdbReducer;
+using FtsRocksdbReducerPtr = std::shared_ptr<FtsRocksdbReducer>;
+
+/*! FTS RocksDB segment reducer
+ *  Merges FTS index data from multiple source segments into one destination
+ *  segment, remapping doc_ids and filtering deleted documents.  Reads only
+ *  postings_cf (BitPacked) and positions_cf from each source segment; writes
+ *  only postings_cf, positions_cf, and stat_cf on the destination side.
+ */
+class FtsRocksdbReducer {
+ public:
+  /*! Initialize the reducer with destination column families.
+   *  \param field_name          FTS field name (used for stat_cf keys)
+   *  \param dst_postings_cf     Destination postings CF (BitPacked output)
+   *  \param dst_positions_cf    Destination positions CF (phrase support)
+   *  \param dst_stat_cf         Destination segment-stat CF
+   *  \return Result<void> on success, or Status on failure
+   */
+  Result<void> init(const std::string &field_name, RocksdbContext *ctx,
+                    rocksdb::ColumnFamilyHandle *dst_postings_cf,
+                    rocksdb::ColumnFamilyHandle *dst_positions_cf,
+                    rocksdb::ColumnFamilyHandle *dst_stat_cf);
+
+  /*! Clean up internal state. */
+  Result<void> cleanup();
+
+  /*! Feed a source segment to be merged.
+   *  Segments must be fed in consecutive doc_id order.
+   *  \param segment_stats       Stats of the source segment (min/max doc_id)
+   *  \param src_ctx             RocksdbContext owning the source CFs
+   *  \param src_postings_cf     Source postings CF (must be BitPacked)
+   *  \param src_positions_cf    Source positions CF
+   *  \return Result<void> on success, or Status on failure
+   */
+  Result<void> feed(FtsSegmentStats segment_stats, RocksdbContext *src_ctx,
+                    rocksdb::ColumnFamilyHandle *src_postings_cf,
+                    rocksdb::ColumnFamilyHandle *src_positions_cf);
+
+  /*! Merge all fed segments into the destination store.
+   *  Reads BitPacked posting lists from each source postings_cf, applies
+   *  the delete filter, remaps doc_ids, and emits one merged BitPacked
+   *  posting list per term to dst_postings_cf.  Also accumulates effective
+   *  total_docs / total_tokens from inline doc_len payloads and writes them
+   *  to dst_stat_cf for BM25 IDF / avgdl.
+   *
+   *  \param filter   Returns true for doc_ids that should be filtered out
+   *                  (i.e., deleted documents).
+   *  \return Result<void> on success, or Status on failure
+   */
+  Result<void> reduce(const IndexFilter &filter);
+
+  /*! No-op: FTS data is written directly during reduce(). */
+  Result<void> dump() {
+    return {};
+  }
+
+ private:
+  // Two-pass streaming merge of postings.  Pass 1 collects effective stats
+  // without storing any PostingEntry; Pass 2 does multi-way merge across all
+  // source segment iterators by term (lexicographic order), encodes + puts
+  // each term's merged BitPacked posting list immediately, keeping peak
+  // memory at one term's worth of entries.
+  Result<void> reduce_postings(const IndexFilter &filter);
+
+  // Pass 1: collect effective_total_docs_ / effective_total_tokens_ without
+  // storing any PostingEntry.
+  // - effective_total_docs_ is computed from segment doc_id ranges minus
+  //   filtered docs (includes empty docs, matching mutable indexer semantics).
+  // - effective_total_tokens_ is accumulated from inline doc_len payloads
+  //   of surviving docs seen in postings (empty docs contribute 0).
+  Result<void> collect_effective_stats(const IndexFilter &filter);
+
+  // Pass 2: multi-way merge across all source segment iterators by term
+  // (lexicographic order), accumulate per-term entries, encode + put as
+  // BitPacked into dst_postings_cf_ immediately after each term boundary,
+  // keeping peak memory at one term's worth of entries.
+  Result<void> merge_and_flush_postings(const IndexFilter &filter);
+
+  // Merge positions CF for one source segment: iterate src positions_cf,
+  // drop entries whose doc_id is filtered, remap to the new doc_id space,
+  // and put into dst_positions_cf.  Required for phrase query support.
+  Result<void> reduce_positions(uint32_t segment_index,
+                                const IndexFilter &filter);
+
+  // Write accumulated stats to destination stat CF.
+  Result<void> flush_stat(uint64_t total_docs, uint64_t total_tokens);
+
+ private:
+  enum State {
+    STATE_UNINITED = 0,
+    STATE_INITED = 1,
+    STATE_FEED = 2,
+    STATE_REDUCE = 3,
+  };
+
+  std::string field_name_{};
+
+  // RocksdbContext for CF-level operations (get/put/create_iter)
+  RocksdbContext *ctx_{nullptr};
+
+  // Destination column families (only the 3 active ones are tracked here;
+  // $TF/$MAX_TF/$DOC_LEN dst CFs exist in the RocksDB schema but the reducer
+  // never writes them — they will be empty in the output SST).
+  rocksdb::ColumnFamilyHandle *dst_postings_cf_{nullptr};
+  rocksdb::ColumnFamilyHandle *dst_positions_cf_{nullptr};
+  rocksdb::ColumnFamilyHandle *dst_stat_cf_{nullptr};
+
+  // Per-segment source RocksdbContexts, column families and stats (only
+  // postings + positions are needed; the empty $TF/$MAX_TF/$DOC_LEN side CFs
+  // are not opened here).
+  std::vector<FtsSegmentStats> segment_stats_{};
+  std::vector<RocksdbContext *> src_ctxs_{};
+  std::vector<rocksdb::ColumnFamilyHandle *> src_postings_cfs_{};
+  std::vector<rocksdb::ColumnFamilyHandle *> src_positions_cfs_{};
+
+  uint32_t num_segments_{0};
+  uint64_t min_doc_id_{0};
+
+  // Effective per-segment statistics accumulated during reduce_postings()
+  // from BitPacked inline doc_len payloads.  Reflect only documents that
+  // survive the filter, and are used both as the truth fed into scorer_ for
+  // block_max_score computation and as the values written into dst stat_cf.
+  uint64_t effective_total_docs_{0};
+  uint64_t effective_total_tokens_{0};
+
+  // BM25 scorer for computing block_max_score during BitPacked encoding.
+  // Initialized inside reduce() once effective stats are known.
+  BM25ScorerPtr scorer_;
+
+  State state_{STATE_UNINITED};
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/fts_types.h b/src/db/index/column/fts_column/fts_types.h
new file mode 100644
index 000000000..d085a2d72
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_types.h
@@ -0,0 +1,50 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <vector>
+#include "db/index/common/index_filter.h"
+
+namespace zvec::fts {
+
+/*! FTS query parameters passed to FtsColumnIndexer::search(). */
+struct FtsQueryParams {
+  uint32_t topk{10};
+  // Optional filter: returns true if a doc should be EXCLUDED.
+  // Wraps zvec::IndexFilter for push-down filtering inside the search loop.
+  IndexFilter::Ptr filter{nullptr};
+  // Candidate-driven (brute-force) mode: ascending segment-local doc_ids;
+  // when non-empty, FtsColumnIndexer restricts evaluation to this set by
+  // AND-ing it with the root iterator. Filled by the planner via
+  // DocFilter::get_bf_by_keys_and_update when an invert result is highly
+  // selective.
+  std::vector<uint64_t> candidate_ids;
+};
+
+/*! Per-segment statistics needed by the FTS reducer for doc_id remapping. */
+struct FtsSegmentStats {
+  uint64_t min_doc_id{0};
+  uint64_t max_doc_id{0};
+};
+
+struct FtsIndexParams {
+  std::string tokenizer_name{"standard"};
+  std::vector<std::string> filters{"lowercase"};
+  std::string extra_params;
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/fts_utils.cc b/src/db/index/column/fts_column/fts_utils.cc
new file mode 100644
index 000000000..7cf8e495c
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_utils.cc
@@ -0,0 +1,38 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fts_utils.h"
+#include <zvec/ailego/logger/logger.h>
+
+namespace zvec::fts {
+
+bool parse_doc_term_key(const std::string &key, std::string *term_out,
+                        uint32_t *doc_id_out) {
+  // Key format: term + '\0' + doc_id(4B big-endian)
+  // Minimum length: 1 byte term + 1 byte '\0' + 4 bytes doc_id = 6 bytes.
+  if (key.size() < 6) {
+    LOG_WARN("parse_doc_term_key: key too short. size[%zu]", key.size());
+    return false;
+  }
+  const size_t separator_pos = key.size() - sizeof(uint32_t) - 1;
+  if (key[separator_pos] != '\0') {
+    LOG_WARN("parse_doc_term_key: missing separator. size[%zu]", key.size());
+    return false;
+  }
+  *term_out = key.substr(0, separator_pos);
+  *doc_id_out = decode_uint32_big_endian(key.data() + separator_pos + 1);
+  return true;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/fts_utils.h b/src/db/index/column/fts_column/fts_utils.h
new file mode 100644
index 000000000..06fc2c8ff
--- /dev/null
+++ b/src/db/index/column/fts_column/fts_utils.h
@@ -0,0 +1,86 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <string>
+
+namespace zvec::fts {
+
+// Big-endian uint32 encoding/decoding.
+inline uint32_t decode_uint32_big_endian(const char *data) {
+  return (static_cast<uint32_t>(static_cast<uint8_t>(data[0])) << 24) |
+         (static_cast<uint32_t>(static_cast<uint8_t>(data[1])) << 16) |
+         (static_cast<uint32_t>(static_cast<uint8_t>(data[2])) << 8) |
+         static_cast<uint32_t>(static_cast<uint8_t>(data[3]));
+}
+
+inline void encode_uint32_big_endian(uint32_t value, std::string *output) {
+  output->push_back(static_cast<char>((value >> 24) & 0xFF));
+  output->push_back(static_cast<char>((value >> 16) & 0xFF));
+  output->push_back(static_cast<char>((value >> 8) & 0xFF));
+  output->push_back(static_cast<char>(value & 0xFF));
+}
+
+// Doc-term key: term + '\0' + doc_id (4-byte big-endian).
+// Used by postings ($TF/$POS) column families.
+inline std::string make_doc_term_key(const std::string &term, uint32_t doc_id) {
+  std::string key;
+  key.reserve(term.size() + 1 + sizeof(uint32_t));
+  key.append(term);
+  key.push_back('\0');
+  encode_uint32_big_endian(doc_id, &key);
+  return key;
+}
+
+bool parse_doc_term_key(const std::string &key, std::string *term_out,
+                        uint32_t *doc_id_out);
+
+// Per-field segment-stat keys (stat_cf) for BM25 scoring.
+inline std::string make_total_docs_key(const std::string &field_name) {
+  return field_name + "_total_docs";
+}
+
+inline std::string make_total_tokens_key(const std::string &field_name) {
+  return field_name + "_total_tokens";
+}
+
+// uint64 big-endian encoding for stat values.
+inline std::string encode_uint64_value(uint64_t value) {
+  std::string out(sizeof(uint64_t), '\0');
+  out[0] = static_cast<char>((value >> 56) & 0xFF);
+  out[1] = static_cast<char>((value >> 48) & 0xFF);
+  out[2] = static_cast<char>((value >> 40) & 0xFF);
+  out[3] = static_cast<char>((value >> 32) & 0xFF);
+  out[4] = static_cast<char>((value >> 24) & 0xFF);
+  out[5] = static_cast<char>((value >> 16) & 0xFF);
+  out[6] = static_cast<char>((value >> 8) & 0xFF);
+  out[7] = static_cast<char>(value & 0xFF);
+  return out;
+}
+
+inline uint64_t decode_uint64_value(const char *data) {
+  return (static_cast<uint64_t>(static_cast<uint8_t>(data[0])) << 56) |
+         (static_cast<uint64_t>(static_cast<uint8_t>(data[1])) << 48) |
+         (static_cast<uint64_t>(static_cast<uint8_t>(data[2])) << 40) |
+         (static_cast<uint64_t>(static_cast<uint8_t>(data[3])) << 32) |
+         (static_cast<uint64_t>(static_cast<uint8_t>(data[4])) << 24) |
+         (static_cast<uint64_t>(static_cast<uint8_t>(data[5])) << 16) |
+         (static_cast<uint64_t>(static_cast<uint8_t>(data[6])) << 8) |
+         static_cast<uint64_t>(static_cast<uint8_t>(data[7]));
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/gen/FtsLexer.cc b/src/db/index/column/fts_column/gen/FtsLexer.cc
new file mode 100644
index 000000000..0034ad5f8
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsLexer.cc
@@ -0,0 +1,257 @@
+
+// Generated from FtsLexer.g4 by ANTLR 4.8
+
+
+#include "FtsLexer.h"
+
+
+using namespace antlr4;
+
+using namespace antlr4;
+
+FtsLexer::FtsLexer(CharStream *input) : Lexer(input) {
+  _interpreter = new atn::LexerATNSimulator(this, _atn, _decisionToDFA,
+                                            _sharedContextCache);
+}
+
+FtsLexer::~FtsLexer() {
+  delete _interpreter;
+}
+
+std::string FtsLexer::getGrammarFileName() const {
+  return "FtsLexer.g4";
+}
+
+const std::vector<std::string> &FtsLexer::getRuleNames() const {
+  return _ruleNames;
+}
+
+const std::vector<std::string> &FtsLexer::getChannelNames() const {
+  return _channelNames;
+}
+
+const std::vector<std::string> &FtsLexer::getModeNames() const {
+  return _modeNames;
+}
+
+const std::vector<std::string> &FtsLexer::getTokenNames() const {
+  return _tokenNames;
+}
+
+dfa::Vocabulary &FtsLexer::getVocabulary() const {
+  return _vocabulary;
+}
+
+const std::vector<uint16_t> FtsLexer::getSerializedATN() const {
+  return _serializedATN;
+}
+
+const atn::ATN &FtsLexer::getATN() const {
+  return _atn;
+}
+
+
+// Static vars and initialization.
+std::vector<dfa::DFA> FtsLexer::_decisionToDFA;
+atn::PredictionContextCache FtsLexer::_sharedContextCache;
+
+// We own the ATN which in turn owns the ATN states.
+atn::ATN FtsLexer::_atn;
+std::vector<uint16_t> FtsLexer::_serializedATN;
+
+std::vector<std::string> FtsLexer::_ruleNames = {
+    "OR",          "AND",          "NOT",      "PLUS_SIGN",  "MINUS_SIGN",
+    "COLON",       "CARET",        "LP",       "RP",         "DQUOTA_STRING",
+    "ASCII_ALNUM", "ESCAPED_CHAR", "UNI_CHAR", "TERM_START", "TERM_BODY",
+    "REGULAR_ID",  "NUMBER",       "TERM",     "SPACES",     "DEFAULT"};
+
+std::vector<std::string> FtsLexer::_channelNames = {"DEFAULT_TOKEN_CHANNEL",
+                                                    "HIDDEN"};
+
+std::vector<std::string> FtsLexer::_modeNames = {"DEFAULT_MODE"};
+
+std::vector<std::string> FtsLexer::_literalNames = {
+    "", "", "", "", "'+'", "'-'", "':'", "'^'", "'('", "')'"};
+
+std::vector<std::string> FtsLexer::_symbolicNames = {
+    "",       "OR",    "AND",    "NOT",    "PLUS_SIGN",     "MINUS_SIGN",
+    "COLON",  "CARET", "LP",     "RP",     "DQUOTA_STRING", "REGULAR_ID",
+    "NUMBER", "TERM",  "SPACES", "DEFAULT"};
+
+dfa::Vocabulary FtsLexer::_vocabulary(_literalNames, _symbolicNames);
+
+std::vector<std::string> FtsLexer::_tokenNames;
+
+FtsLexer::Initializer::Initializer() {
+  // This code could be in a static initializer lambda, but VS doesn't allow
+  // access to private class members from there.
+  for (size_t i = 0; i < _symbolicNames.size(); ++i) {
+    std::string name = _vocabulary.getLiteralName(i);
+    if (name.empty()) {
+      name = _vocabulary.getSymbolicName(i);
+    }
+
+    if (name.empty()) {
+      _tokenNames.push_back("<INVALID>");
+    } else {
+      _tokenNames.push_back(name);
+    }
+  }
+
+  _serializedATN = {
+      0x3,  0x608b, 0xa72a, 0x8133, 0xb9ed, 0x417c, 0x3be7, 0x7786, 0x5964,
+      0x2,  0x11,   0x82,   0x8,    0x1,    0x4,    0x2,    0x9,    0x2,
+      0x4,  0x3,    0x9,    0x3,    0x4,    0x4,    0x9,    0x4,    0x4,
+      0x5,  0x9,    0x5,    0x4,    0x6,    0x9,    0x6,    0x4,    0x7,
+      0x9,  0x7,    0x4,    0x8,    0x9,    0x8,    0x4,    0x9,    0x9,
+      0x9,  0x4,    0xa,    0x9,    0xa,    0x4,    0xb,    0x9,    0xb,
+      0x4,  0xc,    0x9,    0xc,    0x4,    0xd,    0x9,    0xd,    0x4,
+      0xe,  0x9,    0xe,    0x4,    0xf,    0x9,    0xf,    0x4,    0x10,
+      0x9,  0x10,   0x4,    0x11,   0x9,    0x11,   0x4,    0x12,   0x9,
+      0x12, 0x4,    0x13,   0x9,    0x13,   0x4,    0x14,   0x9,    0x14,
+      0x4,  0x15,   0x9,    0x15,   0x3,    0x2,    0x3,    0x2,    0x3,
+      0x2,  0x3,    0x3,    0x3,    0x3,    0x3,    0x3,    0x3,    0x3,
+      0x3,  0x4,    0x3,    0x4,    0x3,    0x4,    0x3,    0x4,    0x3,
+      0x5,  0x3,    0x5,    0x3,    0x6,    0x3,    0x6,    0x3,    0x7,
+      0x3,  0x7,    0x3,    0x8,    0x3,    0x8,    0x3,    0x9,    0x3,
+      0x9,  0x3,    0xa,    0x3,    0xa,    0x3,    0xb,    0x3,    0xb,
+      0x3,  0xb,    0x3,    0xb,    0x7,    0xb,    0x47,   0xa,    0xb,
+      0xc,  0xb,    0xe,    0xb,    0x4a,   0xb,    0xb,    0x3,    0xb,
+      0x3,  0xb,    0x3,    0xc,    0x3,    0xc,    0x3,    0xd,    0x3,
+      0xd,  0x3,    0xd,    0x3,    0xe,    0x3,    0xe,    0x3,    0xf,
+      0x3,  0xf,    0x5,    0xf,    0x57,   0xa,    0xf,    0x3,    0x10,
+      0x3,  0x10,   0x3,    0x10,   0x3,    0x10,   0x5,    0x10,   0x5d,
+      0xa,  0x10,   0x3,    0x11,   0x3,    0x11,   0x7,    0x11,   0x61,
+      0xa,  0x11,   0xc,    0x11,   0xe,    0x11,   0x64,   0xb,    0x11,
+      0x3,  0x12,   0x6,    0x12,   0x67,   0xa,    0x12,   0xd,    0x12,
+      0xe,  0x12,   0x68,   0x3,    0x12,   0x3,    0x12,   0x6,    0x12,
+      0x6d, 0xa,    0x12,   0xd,    0x12,   0xe,    0x12,   0x6e,   0x5,
+      0x12, 0x71,   0xa,    0x12,   0x3,    0x13,   0x3,    0x13,   0x7,
+      0x13, 0x75,   0xa,    0x13,   0xc,    0x13,   0xe,    0x13,   0x78,
+      0xb,  0x13,   0x3,    0x14,   0x6,    0x14,   0x7b,   0xa,    0x14,
+      0xd,  0x14,   0xe,    0x14,   0x7c,   0x3,    0x14,   0x3,    0x14,
+      0x3,  0x15,   0x3,    0x15,   0x2,    0x2,    0x16,   0x3,    0x3,
+      0x5,  0x4,    0x7,    0x5,    0x9,    0x6,    0xb,    0x7,    0xd,
+      0x8,  0xf,    0x9,    0x11,   0xa,    0x13,   0xb,    0x15,   0xc,
+      0x17, 0x2,    0x19,   0x2,    0x1b,   0x2,    0x1d,   0x2,    0x1f,
+      0x2,  0x21,   0xd,    0x23,   0xe,    0x25,   0xf,    0x27,   0x10,
+      0x29, 0x11,   0x3,    0x2,    0x11,   0x4,    0x2,    0x51,   0x51,
+      0x71, 0x71,   0x4,    0x2,    0x54,   0x54,   0x74,   0x74,   0x4,
+      0x2,  0x43,   0x43,   0x63,   0x63,   0x4,    0x2,    0x50,   0x50,
+      0x70, 0x70,   0x4,    0x2,    0x46,   0x46,   0x66,   0x66,   0x4,
+      0x2,  0x56,   0x56,   0x76,   0x76,   0x6,    0x2,    0xc,    0xc,
+      0xf,  0xf,    0x24,   0x24,   0x5e,   0x5e,   0x6,    0x2,    0x32,
+      0x3b, 0x43,   0x5c,   0x61,   0x61,   0x63,   0x7c,   0xc,    0x2,
+      0x23, 0x24,   0x28,   0x28,   0x2a,   0x2d,   0x2f,   0x2f,   0x31,
+      0x31, 0x3c,   0x3c,   0x3f,   0x3f,   0x41,   0x41,   0x5d,   0x60,
+      0x7d, 0x80,   0x3,    0x2,    0x82,   0x1,    0x8,    0x2,    0x25,
+      0x25, 0x27,   0x27,   0x29,   0x29,   0x2f,   0x31,   0x42,   0x42,
+      0x61, 0x61,   0x5,    0x2,    0x43,   0x5c,   0x61,   0x61,   0x63,
+      0x7c, 0x7,    0x2,    0x2f,   0x2f,   0x32,   0x3b,   0x43,   0x5c,
+      0x61, 0x61,   0x63,   0x7c,   0x3,    0x2,    0x32,   0x3b,   0x5,
+      0x2,  0xb,    0xc,    0xf,    0xf,    0x22,   0x22,   0x2,    0x88,
+      0x2,  0x3,    0x3,    0x2,    0x2,    0x2,    0x2,    0x5,    0x3,
+      0x2,  0x2,    0x2,    0x2,    0x7,    0x3,    0x2,    0x2,    0x2,
+      0x2,  0x9,    0x3,    0x2,    0x2,    0x2,    0x2,    0xb,    0x3,
+      0x2,  0x2,    0x2,    0x2,    0xd,    0x3,    0x2,    0x2,    0x2,
+      0x2,  0xf,    0x3,    0x2,    0x2,    0x2,    0x2,    0x11,   0x3,
+      0x2,  0x2,    0x2,    0x2,    0x13,   0x3,    0x2,    0x2,    0x2,
+      0x2,  0x15,   0x3,    0x2,    0x2,    0x2,    0x2,    0x21,   0x3,
+      0x2,  0x2,    0x2,    0x2,    0x23,   0x3,    0x2,    0x2,    0x2,
+      0x2,  0x25,   0x3,    0x2,    0x2,    0x2,    0x2,    0x27,   0x3,
+      0x2,  0x2,    0x2,    0x2,    0x29,   0x3,    0x2,    0x2,    0x2,
+      0x3,  0x2b,   0x3,    0x2,    0x2,    0x2,    0x5,    0x2e,   0x3,
+      0x2,  0x2,    0x2,    0x7,    0x32,   0x3,    0x2,    0x2,    0x2,
+      0x9,  0x36,   0x3,    0x2,    0x2,    0x2,    0xb,    0x38,   0x3,
+      0x2,  0x2,    0x2,    0xd,    0x3a,   0x3,    0x2,    0x2,    0x2,
+      0xf,  0x3c,   0x3,    0x2,    0x2,    0x2,    0x11,   0x3e,   0x3,
+      0x2,  0x2,    0x2,    0x13,   0x40,   0x3,    0x2,    0x2,    0x2,
+      0x15, 0x42,   0x3,    0x2,    0x2,    0x2,    0x17,   0x4d,   0x3,
+      0x2,  0x2,    0x2,    0x19,   0x4f,   0x3,    0x2,    0x2,    0x2,
+      0x1b, 0x52,   0x3,    0x2,    0x2,    0x2,    0x1d,   0x56,   0x3,
+      0x2,  0x2,    0x2,    0x1f,   0x5c,   0x3,    0x2,    0x2,    0x2,
+      0x21, 0x5e,   0x3,    0x2,    0x2,    0x2,    0x23,   0x66,   0x3,
+      0x2,  0x2,    0x2,    0x25,   0x72,   0x3,    0x2,    0x2,    0x2,
+      0x27, 0x7a,   0x3,    0x2,    0x2,    0x2,    0x29,   0x80,   0x3,
+      0x2,  0x2,    0x2,    0x2b,   0x2c,   0x9,    0x2,    0x2,    0x2,
+      0x2c, 0x2d,   0x9,    0x3,    0x2,    0x2,    0x2d,   0x4,    0x3,
+      0x2,  0x2,    0x2,    0x2e,   0x2f,   0x9,    0x4,    0x2,    0x2,
+      0x2f, 0x30,   0x9,    0x5,    0x2,    0x2,    0x30,   0x31,   0x9,
+      0x6,  0x2,    0x2,    0x31,   0x6,    0x3,    0x2,    0x2,    0x2,
+      0x32, 0x33,   0x9,    0x5,    0x2,    0x2,    0x33,   0x34,   0x9,
+      0x2,  0x2,    0x2,    0x34,   0x35,   0x9,    0x7,    0x2,    0x2,
+      0x35, 0x8,    0x3,    0x2,    0x2,    0x2,    0x36,   0x37,   0x7,
+      0x2d, 0x2,    0x2,    0x37,   0xa,    0x3,    0x2,    0x2,    0x2,
+      0x38, 0x39,   0x7,    0x2f,   0x2,    0x2,    0x39,   0xc,    0x3,
+      0x2,  0x2,    0x2,    0x3a,   0x3b,   0x7,    0x3c,   0x2,    0x2,
+      0x3b, 0xe,    0x3,    0x2,    0x2,    0x2,    0x3c,   0x3d,   0x7,
+      0x60, 0x2,    0x2,    0x3d,   0x10,   0x3,    0x2,    0x2,    0x2,
+      0x3e, 0x3f,   0x7,    0x2a,   0x2,    0x2,    0x3f,   0x12,   0x3,
+      0x2,  0x2,    0x2,    0x40,   0x41,   0x7,    0x2b,   0x2,    0x2,
+      0x41, 0x14,   0x3,    0x2,    0x2,    0x2,    0x42,   0x48,   0x7,
+      0x24, 0x2,    0x2,    0x43,   0x47,   0xa,    0x8,    0x2,    0x2,
+      0x44, 0x45,   0x7,    0x5e,   0x2,    0x2,    0x45,   0x47,   0xb,
+      0x2,  0x2,    0x2,    0x46,   0x43,   0x3,    0x2,    0x2,    0x2,
+      0x46, 0x44,   0x3,    0x2,    0x2,    0x2,    0x47,   0x4a,   0x3,
+      0x2,  0x2,    0x2,    0x48,   0x46,   0x3,    0x2,    0x2,    0x2,
+      0x48, 0x49,   0x3,    0x2,    0x2,    0x2,    0x49,   0x4b,   0x3,
+      0x2,  0x2,    0x2,    0x4a,   0x48,   0x3,    0x2,    0x2,    0x2,
+      0x4b, 0x4c,   0x7,    0x24,   0x2,    0x2,    0x4c,   0x16,   0x3,
+      0x2,  0x2,    0x2,    0x4d,   0x4e,   0x9,    0x9,    0x2,    0x2,
+      0x4e, 0x18,   0x3,    0x2,    0x2,    0x2,    0x4f,   0x50,   0x7,
+      0x5e, 0x2,    0x2,    0x50,   0x51,   0x9,    0xa,    0x2,    0x2,
+      0x51, 0x1a,   0x3,    0x2,    0x2,    0x2,    0x52,   0x53,   0x9,
+      0xb,  0x2,    0x2,    0x53,   0x1c,   0x3,    0x2,    0x2,    0x2,
+      0x54, 0x57,   0x5,    0x17,   0xc,    0x2,    0x55,   0x57,   0x5,
+      0x1b, 0xe,    0x2,    0x56,   0x54,   0x3,    0x2,    0x2,    0x2,
+      0x56, 0x55,   0x3,    0x2,    0x2,    0x2,    0x57,   0x1e,   0x3,
+      0x2,  0x2,    0x2,    0x58,   0x5d,   0x5,    0x17,   0xc,    0x2,
+      0x59, 0x5d,   0x5,    0x1b,   0xe,    0x2,    0x5a,   0x5d,   0x9,
+      0xc,  0x2,    0x2,    0x5b,   0x5d,   0x5,    0x19,   0xd,    0x2,
+      0x5c, 0x58,   0x3,    0x2,    0x2,    0x2,    0x5c,   0x59,   0x3,
+      0x2,  0x2,    0x2,    0x5c,   0x5a,   0x3,    0x2,    0x2,    0x2,
+      0x5c, 0x5b,   0x3,    0x2,    0x2,    0x2,    0x5d,   0x20,   0x3,
+      0x2,  0x2,    0x2,    0x5e,   0x62,   0x9,    0xd,    0x2,    0x2,
+      0x5f, 0x61,   0x9,    0xe,    0x2,    0x2,    0x60,   0x5f,   0x3,
+      0x2,  0x2,    0x2,    0x61,   0x64,   0x3,    0x2,    0x2,    0x2,
+      0x62, 0x60,   0x3,    0x2,    0x2,    0x2,    0x62,   0x63,   0x3,
+      0x2,  0x2,    0x2,    0x63,   0x22,   0x3,    0x2,    0x2,    0x2,
+      0x64, 0x62,   0x3,    0x2,    0x2,    0x2,    0x65,   0x67,   0x9,
+      0xf,  0x2,    0x2,    0x66,   0x65,   0x3,    0x2,    0x2,    0x2,
+      0x67, 0x68,   0x3,    0x2,    0x2,    0x2,    0x68,   0x66,   0x3,
+      0x2,  0x2,    0x2,    0x68,   0x69,   0x3,    0x2,    0x2,    0x2,
+      0x69, 0x70,   0x3,    0x2,    0x2,    0x2,    0x6a,   0x6c,   0x7,
+      0x30, 0x2,    0x2,    0x6b,   0x6d,   0x9,    0xf,    0x2,    0x2,
+      0x6c, 0x6b,   0x3,    0x2,    0x2,    0x2,    0x6d,   0x6e,   0x3,
+      0x2,  0x2,    0x2,    0x6e,   0x6c,   0x3,    0x2,    0x2,    0x2,
+      0x6e, 0x6f,   0x3,    0x2,    0x2,    0x2,    0x6f,   0x71,   0x3,
+      0x2,  0x2,    0x2,    0x70,   0x6a,   0x3,    0x2,    0x2,    0x2,
+      0x70, 0x71,   0x3,    0x2,    0x2,    0x2,    0x71,   0x24,   0x3,
+      0x2,  0x2,    0x2,    0x72,   0x76,   0x5,    0x1d,   0xf,    0x2,
+      0x73, 0x75,   0x5,    0x1f,   0x10,   0x2,    0x74,   0x73,   0x3,
+      0x2,  0x2,    0x2,    0x75,   0x78,   0x3,    0x2,    0x2,    0x2,
+      0x76, 0x74,   0x3,    0x2,    0x2,    0x2,    0x76,   0x77,   0x3,
+      0x2,  0x2,    0x2,    0x77,   0x26,   0x3,    0x2,    0x2,    0x2,
+      0x78, 0x76,   0x3,    0x2,    0x2,    0x2,    0x79,   0x7b,   0x9,
+      0x10, 0x2,    0x2,    0x7a,   0x79,   0x3,    0x2,    0x2,    0x2,
+      0x7b, 0x7c,   0x3,    0x2,    0x2,    0x2,    0x7c,   0x7a,   0x3,
+      0x2,  0x2,    0x2,    0x7c,   0x7d,   0x3,    0x2,    0x2,    0x2,
+      0x7d, 0x7e,   0x3,    0x2,    0x2,    0x2,    0x7e,   0x7f,   0x8,
+      0x14, 0x2,    0x2,    0x7f,   0x28,   0x3,    0x2,    0x2,    0x2,
+      0x80, 0x81,   0xb,    0x2,    0x2,    0x2,    0x81,   0x2a,   0x3,
+      0x2,  0x2,    0x2,    0xd,    0x2,    0x46,   0x48,   0x56,   0x5c,
+      0x62, 0x68,   0x6e,   0x70,   0x76,   0x7c,   0x3,    0x8,    0x2,
+      0x2,
+  };
+
+  atn::ATNDeserializer deserializer;
+  _atn = deserializer.deserialize(_serializedATN);
+
+  size_t count = _atn.getNumberOfDecisions();
+  _decisionToDFA.reserve(count);
+  for (size_t i = 0; i < count; i++) {
+    _decisionToDFA.emplace_back(_atn.getDecisionState(i), i);
+  }
+}
+
+FtsLexer::Initializer FtsLexer::_init;
diff --git a/src/db/index/column/fts_column/gen/FtsLexer.h b/src/db/index/column/fts_column/gen/FtsLexer.h
new file mode 100644
index 000000000..9843b865e
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsLexer.h
@@ -0,0 +1,73 @@
+
+// Generated from FtsLexer.g4 by ANTLR 4.8
+
+#pragma once
+
+
+#include "antlr4-runtime.h"
+
+
+namespace antlr4 {
+
+
+class FtsLexer : public antlr4::Lexer {
+ public:
+  enum {
+    OR = 1,
+    AND = 2,
+    NOT = 3,
+    PLUS_SIGN = 4,
+    MINUS_SIGN = 5,
+    COLON = 6,
+    CARET = 7,
+    LP = 8,
+    RP = 9,
+    DQUOTA_STRING = 10,
+    REGULAR_ID = 11,
+    NUMBER = 12,
+    TERM = 13,
+    SPACES = 14,
+    DEFAULT = 15
+  };
+
+  FtsLexer(antlr4::CharStream *input);
+  ~FtsLexer();
+
+  virtual std::string getGrammarFileName() const override;
+  virtual const std::vector<std::string> &getRuleNames() const override;
+
+  virtual const std::vector<std::string> &getChannelNames() const override;
+  virtual const std::vector<std::string> &getModeNames() const override;
+  virtual const std::vector<std::string> &getTokenNames()
+      const override;  // deprecated, use vocabulary instead
+  virtual antlr4::dfa::Vocabulary &getVocabulary() const override;
+
+  virtual const std::vector<uint16_t> getSerializedATN() const override;
+  virtual const antlr4::atn::ATN &getATN() const override;
+
+ private:
+  static std::vector<antlr4::dfa::DFA> _decisionToDFA;
+  static antlr4::atn::PredictionContextCache _sharedContextCache;
+  static std::vector<std::string> _ruleNames;
+  static std::vector<std::string> _tokenNames;
+  static std::vector<std::string> _channelNames;
+  static std::vector<std::string> _modeNames;
+
+  static std::vector<std::string> _literalNames;
+  static std::vector<std::string> _symbolicNames;
+  static antlr4::dfa::Vocabulary _vocabulary;
+  static antlr4::atn::ATN _atn;
+  static std::vector<uint16_t> _serializedATN;
+
+
+  // Individual action functions triggered by action() above.
+
+  // Individual semantic predicate functions triggered by sempred() above.
+
+  struct Initializer {
+    Initializer();
+  };
+  static Initializer _init;
+};
+
+}  // namespace antlr4
diff --git a/src/db/index/column/fts_column/gen/FtsLexer.interp b/src/db/index/column/fts_column/gen/FtsLexer.interp
new file mode 100644
index 000000000..384c23305
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsLexer.interp
@@ -0,0 +1,67 @@
+token literal names:
+null
+null
+null
+null
+'+'
+'-'
+':'
+'^'
+'('
+')'
+null
+null
+null
+null
+null
+null
+
+token symbolic names:
+null
+OR
+AND
+NOT
+PLUS_SIGN
+MINUS_SIGN
+COLON
+CARET
+LP
+RP
+DQUOTA_STRING
+REGULAR_ID
+NUMBER
+TERM
+SPACES
+DEFAULT
+
+rule names:
+OR
+AND
+NOT
+PLUS_SIGN
+MINUS_SIGN
+COLON
+CARET
+LP
+RP
+DQUOTA_STRING
+ASCII_ALNUM
+ESCAPED_CHAR
+UNI_CHAR
+TERM_START
+TERM_BODY
+REGULAR_ID
+NUMBER
+TERM
+SPACES
+DEFAULT
+
+channel names:
+DEFAULT_TOKEN_CHANNEL
+HIDDEN
+
+mode names:
+DEFAULT_MODE
+
+atn:
+[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 17, 130, 8, 1, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 14, 9, 14, 4, 15, 9, 15, 4, 16, 9, 16, 4, 17, 9, 17, 4, 18, 9, 18, 4, 19, 9, 19, 4, 20, 9, 20, 4, 21, 9, 21, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6, 3, 6, 3, 7, 3, 7, 3, 8, 3, 8, 3, 9, 3, 9, 3, 10, 3, 10, 3, 11, 3, 11, 3, 11, 3, 11, 7, 11, 71, 10, 11, 12, 11, 14, 11, 74, 11, 11, 3, 11, 3, 11, 3, 12, 3, 12, 3, 13, 3, 13, 3, 13, 3, 14, 3, 14, 3, 15, 3, 15, 5, 15, 87, 10, 15, 3, 16, 3, 16, 3, 16, 3, 16, 5, 16, 93, 10, 16, 3, 17, 3, 17, 7, 17, 97, 10, 17, 12, 17, 14, 17, 100, 11, 17, 3, 18, 6, 18, 103, 10, 18, 13, 18, 14, 18, 104, 3, 18, 3, 18, 6, 18, 109, 10, 18, 13, 18, 14, 18, 110, 5, 18, 113, 10, 18, 3, 19, 3, 19, 7, 19, 117, 10, 19, 12, 19, 14, 19, 120, 11, 19, 3, 20, 6, 20, 123, 10, 20, 13, 20, 14, 20, 124, 3, 20, 3, 20, 3, 21, 3, 21, 2, 2, 22, 3, 3, 5, 4, 7, 5, 9, 6, 11, 7, 13, 8, 15, 9, 17, 10, 19, 11, 21, 12, 23, 2, 25, 2, 27, 2, 29, 2, 31, 2, 33, 13, 35, 14, 37, 15, 39, 16, 41, 17, 3, 2, 17, 4, 2, 81, 81, 113, 113, 4, 2, 84, 84, 116, 116, 4, 2, 67, 67, 99, 99, 4, 2, 80, 80, 112, 112, 4, 2, 70, 70, 102, 102, 4, 2, 86, 86, 118, 118, 6, 2, 12, 12, 15, 15, 36, 36, 94, 94, 6, 2, 50, 59, 67, 92, 97, 97, 99, 124, 12, 2, 35, 36, 40, 40, 42, 45, 47, 47, 49, 49, 60, 60, 63, 63, 65, 65, 93, 96, 125, 128, 3, 2, 130, 1, 8, 2, 37, 37, 39, 39, 41, 41, 47, 49, 66, 66, 97, 97, 5, 2, 67, 92, 97, 97, 99, 124, 7, 2, 47, 47, 50, 59, 67, 92, 97, 97, 99, 124, 3, 2, 50, 59, 5, 2, 11, 12, 15, 15, 34, 34, 2, 136, 2, 3, 3, 2, 2, 2, 2, 5, 3, 2, 2, 2, 2, 7, 3, 2, 2, 2, 2, 9, 3, 2, 2, 2, 2, 11, 3, 2, 2, 2, 2, 13, 3, 2, 2, 2, 2, 15, 3, 2, 2, 2, 2, 17, 3, 2, 2, 2, 2, 19, 3, 2, 2, 2, 2, 21, 3, 2, 2, 2, 2, 33, 3, 2, 2, 2, 2, 35, 3, 2, 2, 2, 2, 37, 3, 2, 2, 2, 2, 39, 3, 2, 2, 2, 2, 41, 3, 2, 2, 2, 3, 43, 3, 2, 2, 2, 5, 46, 3, 2, 2, 2, 7, 50, 3, 2, 2, 2, 9, 54, 3, 2, 2, 2, 11, 56, 3, 2, 2, 2, 13, 58, 3, 2, 2, 2, 15, 60, 3, 2, 2, 2, 17, 62, 3, 2, 2, 2, 19, 64, 3, 2, 2, 2, 21, 66, 3, 2, 2, 2, 23, 77, 3, 2, 2, 2, 25, 79, 3, 2, 2, 2, 27, 82, 3, 2, 2, 2, 29, 86, 3, 2, 2, 2, 31, 92, 3, 2, 2, 2, 33, 94, 3, 2, 2, 2, 35, 102, 3, 2, 2, 2, 37, 114, 3, 2, 2, 2, 39, 122, 3, 2, 2, 2, 41, 128, 3, 2, 2, 2, 43, 44, 9, 2, 2, 2, 44, 45, 9, 3, 2, 2, 45, 4, 3, 2, 2, 2, 46, 47, 9, 4, 2, 2, 47, 48, 9, 5, 2, 2, 48, 49, 9, 6, 2, 2, 49, 6, 3, 2, 2, 2, 50, 51, 9, 5, 2, 2, 51, 52, 9, 2, 2, 2, 52, 53, 9, 7, 2, 2, 53, 8, 3, 2, 2, 2, 54, 55, 7, 45, 2, 2, 55, 10, 3, 2, 2, 2, 56, 57, 7, 47, 2, 2, 57, 12, 3, 2, 2, 2, 58, 59, 7, 60, 2, 2, 59, 14, 3, 2, 2, 2, 60, 61, 7, 96, 2, 2, 61, 16, 3, 2, 2, 2, 62, 63, 7, 42, 2, 2, 63, 18, 3, 2, 2, 2, 64, 65, 7, 43, 2, 2, 65, 20, 3, 2, 2, 2, 66, 72, 7, 36, 2, 2, 67, 71, 10, 8, 2, 2, 68, 69, 7, 94, 2, 2, 69, 71, 11, 2, 2, 2, 70, 67, 3, 2, 2, 2, 70, 68, 3, 2, 2, 2, 71, 74, 3, 2, 2, 2, 72, 70, 3, 2, 2, 2, 72, 73, 3, 2, 2, 2, 73, 75, 3, 2, 2, 2, 74, 72, 3, 2, 2, 2, 75, 76, 7, 36, 2, 2, 76, 22, 3, 2, 2, 2, 77, 78, 9, 9, 2, 2, 78, 24, 3, 2, 2, 2, 79, 80, 7, 94, 2, 2, 80, 81, 9, 10, 2, 2, 81, 26, 3, 2, 2, 2, 82, 83, 9, 11, 2, 2, 83, 28, 3, 2, 2, 2, 84, 87, 5, 23, 12, 2, 85, 87, 5, 27, 14, 2, 86, 84, 3, 2, 2, 2, 86, 85, 3, 2, 2, 2, 87, 30, 3, 2, 2, 2, 88, 93, 5, 23, 12, 2, 89, 93, 5, 27, 14, 2, 90, 93, 9, 12, 2, 2, 91, 93, 5, 25, 13, 2, 92, 88, 3, 2, 2, 2, 92, 89, 3, 2, 2, 2, 92, 90, 3, 2, 2, 2, 92, 91, 3, 2, 2, 2, 93, 32, 3, 2, 2, 2, 94, 98, 9, 13, 2, 2, 95, 97, 9, 14, 2, 2, 96, 95, 3, 2, 2, 2, 97, 100, 3, 2, 2, 2, 98, 96, 3, 2, 2, 2, 98, 99, 3, 2, 2, 2, 99, 34, 3, 2, 2, 2, 100, 98, 3, 2, 2, 2, 101, 103, 9, 15, 2, 2, 102, 101, 3, 2, 2, 2, 103, 104, 3, 2, 2, 2, 104, 102, 3, 2, 2, 2, 104, 105, 3, 2, 2, 2, 105, 112, 3, 2, 2, 2, 106, 108, 7, 48, 2, 2, 107, 109, 9, 15, 2, 2, 108, 107, 3, 2, 2, 2, 109, 110, 3, 2, 2, 2, 110, 108, 3, 2, 2, 2, 110, 111, 3, 2, 2, 2, 111, 113, 3, 2, 2, 2, 112, 106, 3, 2, 2, 2, 112, 113, 3, 2, 2, 2, 113, 36, 3, 2, 2, 2, 114, 118, 5, 29, 15, 2, 115, 117, 5, 31, 16, 2, 116, 115, 3, 2, 2, 2, 117, 120, 3, 2, 2, 2, 118, 116, 3, 2, 2, 2, 118, 119, 3, 2, 2, 2, 119, 38, 3, 2, 2, 2, 120, 118, 3, 2, 2, 2, 121, 123, 9, 16, 2, 2, 122, 121, 3, 2, 2, 2, 123, 124, 3, 2, 2, 2, 124, 122, 3, 2, 2, 2, 124, 125, 3, 2, 2, 2, 125, 126, 3, 2, 2, 2, 126, 127, 8, 20, 2, 2, 127, 40, 3, 2, 2, 2, 128, 129, 11, 2, 2, 2, 129, 42, 3, 2, 2, 2, 13, 2, 70, 72, 86, 92, 98, 104, 110, 112, 118, 124, 3, 8, 2, 2]
diff --git a/src/db/index/column/fts_column/gen/FtsLexer.tokens b/src/db/index/column/fts_column/gen/FtsLexer.tokens
new file mode 100644
index 000000000..cd6e2db20
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsLexer.tokens
@@ -0,0 +1,21 @@
+OR=1
+AND=2
+NOT=3
+PLUS_SIGN=4
+MINUS_SIGN=5
+COLON=6
+CARET=7
+LP=8
+RP=9
+DQUOTA_STRING=10
+REGULAR_ID=11
+NUMBER=12
+TERM=13
+SPACES=14
+DEFAULT=15
+'+'=4
+'-'=5
+':'=6
+'^'=7
+'('=8
+')'=9
diff --git a/src/db/index/column/fts_column/gen/FtsParser.cc b/src/db/index/column/fts_column/gen/FtsParser.cc
new file mode 100644
index 000000000..8fc31950b
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsParser.cc
@@ -0,0 +1,1116 @@
+
+// Generated from FtsParser.g4 by ANTLR 4.8
+
+
+#include "FtsParser.h"
+#include "FtsParserListener.h"
+
+
+using namespace antlrcpp;
+using namespace antlr4;
+using namespace antlr4;
+
+FtsParser::FtsParser(TokenStream *input) : Parser(input) {
+  _interpreter = new atn::ParserATNSimulator(this, _atn, _decisionToDFA,
+                                             _sharedContextCache);
+}
+
+FtsParser::~FtsParser() {
+  delete _interpreter;
+}
+
+std::string FtsParser::getGrammarFileName() const {
+  return "FtsParser.g4";
+}
+
+const std::vector<std::string> &FtsParser::getRuleNames() const {
+  return _ruleNames;
+}
+
+dfa::Vocabulary &FtsParser::getVocabulary() const {
+  return _vocabulary;
+}
+
+
+//----------------- Fts_query_unitContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_query_unitContext::Fts_query_unitContext(
+    ParserRuleContext *parent_ctx, size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+FtsParser::Fts_or_exprContext *FtsParser::Fts_query_unitContext::fts_or_expr() {
+  return getRuleContext<FtsParser::Fts_or_exprContext>(0);
+}
+
+tree::TerminalNode *FtsParser::Fts_query_unitContext::EOF() {
+  return getToken(FtsParser::EOF, 0);
+}
+
+
+size_t FtsParser::Fts_query_unitContext::getRuleIndex() const {
+  return FtsParser::RuleFts_query_unit;
+}
+
+void FtsParser::Fts_query_unitContext::enterRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_query_unit(this);
+}
+
+void FtsParser::Fts_query_unitContext::exitRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_query_unit(this);
+}
+
+FtsParser::Fts_query_unitContext *FtsParser::fts_query_unit() {
+  Fts_query_unitContext *_localctx =
+      _tracker.createInstance<Fts_query_unitContext>(_ctx, getState());
+  enterRule(_localctx, 0, FtsParser::RuleFts_query_unit);
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    enterOuterAlt(_localctx, 1);
+    setState(24);
+    fts_or_expr();
+    setState(25);
+    match(FtsParser::EOF);
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_or_exprContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_or_exprContext::Fts_or_exprContext(ParserRuleContext *parent_ctx,
+                                                  size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+std::vector<FtsParser::Fts_and_exprContext *>
+FtsParser::Fts_or_exprContext::fts_and_expr() {
+  return getRuleContexts<FtsParser::Fts_and_exprContext>();
+}
+
+FtsParser::Fts_and_exprContext *FtsParser::Fts_or_exprContext::fts_and_expr(
+    size_t i) {
+  return getRuleContext<FtsParser::Fts_and_exprContext>(i);
+}
+
+std::vector<tree::TerminalNode *> FtsParser::Fts_or_exprContext::OR() {
+  return getTokens(FtsParser::OR);
+}
+
+tree::TerminalNode *FtsParser::Fts_or_exprContext::OR(size_t i) {
+  return getToken(FtsParser::OR, i);
+}
+
+
+size_t FtsParser::Fts_or_exprContext::getRuleIndex() const {
+  return FtsParser::RuleFts_or_expr;
+}
+
+void FtsParser::Fts_or_exprContext::enterRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_or_expr(this);
+}
+
+void FtsParser::Fts_or_exprContext::exitRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_or_expr(this);
+}
+
+FtsParser::Fts_or_exprContext *FtsParser::fts_or_expr() {
+  Fts_or_exprContext *_localctx =
+      _tracker.createInstance<Fts_or_exprContext>(_ctx, getState());
+  enterRule(_localctx, 2, FtsParser::RuleFts_or_expr);
+  size_t _la = 0;
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    enterOuterAlt(_localctx, 1);
+    setState(27);
+    fts_and_expr();
+    setState(32);
+    _errHandler->sync(this);
+    _la = _input->LA(1);
+    while (_la == FtsParser::OR) {
+      setState(28);
+      match(FtsParser::OR);
+      setState(29);
+      fts_and_expr();
+      setState(34);
+      _errHandler->sync(this);
+      _la = _input->LA(1);
+    }
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_and_exprContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_and_exprContext::Fts_and_exprContext(
+    ParserRuleContext *parent_ctx, size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+std::vector<FtsParser::Fts_seq_exprContext *>
+FtsParser::Fts_and_exprContext::fts_seq_expr() {
+  return getRuleContexts<FtsParser::Fts_seq_exprContext>();
+}
+
+FtsParser::Fts_seq_exprContext *FtsParser::Fts_and_exprContext::fts_seq_expr(
+    size_t i) {
+  return getRuleContext<FtsParser::Fts_seq_exprContext>(i);
+}
+
+std::vector<tree::TerminalNode *> FtsParser::Fts_and_exprContext::AND() {
+  return getTokens(FtsParser::AND);
+}
+
+tree::TerminalNode *FtsParser::Fts_and_exprContext::AND(size_t i) {
+  return getToken(FtsParser::AND, i);
+}
+
+std::vector<tree::TerminalNode *> FtsParser::Fts_and_exprContext::NOT() {
+  return getTokens(FtsParser::NOT);
+}
+
+tree::TerminalNode *FtsParser::Fts_and_exprContext::NOT(size_t i) {
+  return getToken(FtsParser::NOT, i);
+}
+
+
+size_t FtsParser::Fts_and_exprContext::getRuleIndex() const {
+  return FtsParser::RuleFts_and_expr;
+}
+
+void FtsParser::Fts_and_exprContext::enterRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_and_expr(this);
+}
+
+void FtsParser::Fts_and_exprContext::exitRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_and_expr(this);
+}
+
+FtsParser::Fts_and_exprContext *FtsParser::fts_and_expr() {
+  Fts_and_exprContext *_localctx =
+      _tracker.createInstance<Fts_and_exprContext>(_ctx, getState());
+  enterRule(_localctx, 4, FtsParser::RuleFts_and_expr);
+  size_t _la = 0;
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    enterOuterAlt(_localctx, 1);
+    setState(35);
+    fts_seq_expr();
+    setState(46);
+    _errHandler->sync(this);
+    _la = _input->LA(1);
+    while (_la == FtsParser::AND
+
+           || _la == FtsParser::NOT) {
+      setState(41);
+      _errHandler->sync(this);
+      switch (_input->LA(1)) {
+        case FtsParser::AND: {
+          setState(36);
+          match(FtsParser::AND);
+          setState(38);
+          _errHandler->sync(this);
+
+          _la = _input->LA(1);
+          if (_la == FtsParser::NOT) {
+            setState(37);
+            match(FtsParser::NOT);
+          }
+          break;
+        }
+
+        case FtsParser::NOT: {
+          setState(40);
+          match(FtsParser::NOT);
+          break;
+        }
+
+        default:
+          throw NoViableAltException(this);
+      }
+      setState(43);
+      fts_seq_expr();
+      setState(48);
+      _errHandler->sync(this);
+      _la = _input->LA(1);
+    }
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_seq_exprContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_seq_exprContext::Fts_seq_exprContext(
+    ParserRuleContext *parent_ctx, size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+std::vector<FtsParser::Fts_unaryContext *>
+FtsParser::Fts_seq_exprContext::fts_unary() {
+  return getRuleContexts<FtsParser::Fts_unaryContext>();
+}
+
+FtsParser::Fts_unaryContext *FtsParser::Fts_seq_exprContext::fts_unary(
+    size_t i) {
+  return getRuleContext<FtsParser::Fts_unaryContext>(i);
+}
+
+
+size_t FtsParser::Fts_seq_exprContext::getRuleIndex() const {
+  return FtsParser::RuleFts_seq_expr;
+}
+
+void FtsParser::Fts_seq_exprContext::enterRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_seq_expr(this);
+}
+
+void FtsParser::Fts_seq_exprContext::exitRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_seq_expr(this);
+}
+
+FtsParser::Fts_seq_exprContext *FtsParser::fts_seq_expr() {
+  Fts_seq_exprContext *_localctx =
+      _tracker.createInstance<Fts_seq_exprContext>(_ctx, getState());
+  enterRule(_localctx, 6, FtsParser::RuleFts_seq_expr);
+  size_t _la = 0;
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    enterOuterAlt(_localctx, 1);
+    setState(50);
+    _errHandler->sync(this);
+    _la = _input->LA(1);
+    do {
+      setState(49);
+      fts_unary();
+      setState(52);
+      _errHandler->sync(this);
+      _la = _input->LA(1);
+    } while (
+        (((_la & ~0x3fULL) == 0) &&
+         ((1ULL << _la) &
+          ((1ULL << FtsParser::PLUS_SIGN) | (1ULL << FtsParser::MINUS_SIGN) |
+           (1ULL << FtsParser::LP) | (1ULL << FtsParser::DQUOTA_STRING) |
+           (1ULL << FtsParser::REGULAR_ID) | (1ULL << FtsParser::NUMBER) |
+           (1ULL << FtsParser::TERM) | (1ULL << FtsParser::DEFAULT))) != 0));
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_unaryContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_unaryContext::Fts_unaryContext(ParserRuleContext *parent_ctx,
+                                              size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+
+size_t FtsParser::Fts_unaryContext::getRuleIndex() const {
+  return FtsParser::RuleFts_unary;
+}
+
+void FtsParser::Fts_unaryContext::copyFrom(Fts_unaryContext *ctx) {
+  ParserRuleContext::copyFrom(ctx);
+}
+
+//----------------- Must_not_atomContext
+//------------------------------------------------------------------
+
+tree::TerminalNode *FtsParser::Must_not_atomContext::MINUS_SIGN() {
+  return getToken(FtsParser::MINUS_SIGN, 0);
+}
+
+FtsParser::Fts_atomContext *FtsParser::Must_not_atomContext::fts_atom() {
+  return getRuleContext<FtsParser::Fts_atomContext>(0);
+}
+
+FtsParser::Must_not_atomContext::Must_not_atomContext(Fts_unaryContext *ctx) {
+  copyFrom(ctx);
+}
+
+void FtsParser::Must_not_atomContext::enterRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterMust_not_atom(this);
+}
+void FtsParser::Must_not_atomContext::exitRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitMust_not_atom(this);
+}
+//----------------- Must_atomContext
+//------------------------------------------------------------------
+
+tree::TerminalNode *FtsParser::Must_atomContext::PLUS_SIGN() {
+  return getToken(FtsParser::PLUS_SIGN, 0);
+}
+
+FtsParser::Fts_atomContext *FtsParser::Must_atomContext::fts_atom() {
+  return getRuleContext<FtsParser::Fts_atomContext>(0);
+}
+
+FtsParser::Must_atomContext::Must_atomContext(Fts_unaryContext *ctx) {
+  copyFrom(ctx);
+}
+
+void FtsParser::Must_atomContext::enterRule(tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterMust_atom(this);
+}
+void FtsParser::Must_atomContext::exitRule(tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitMust_atom(this);
+}
+//----------------- Plain_atomContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_atomContext *FtsParser::Plain_atomContext::fts_atom() {
+  return getRuleContext<FtsParser::Fts_atomContext>(0);
+}
+
+FtsParser::Plain_atomContext::Plain_atomContext(Fts_unaryContext *ctx) {
+  copyFrom(ctx);
+}
+
+void FtsParser::Plain_atomContext::enterRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterPlain_atom(this);
+}
+void FtsParser::Plain_atomContext::exitRule(tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitPlain_atom(this);
+}
+FtsParser::Fts_unaryContext *FtsParser::fts_unary() {
+  Fts_unaryContext *_localctx =
+      _tracker.createInstance<Fts_unaryContext>(_ctx, getState());
+  enterRule(_localctx, 8, FtsParser::RuleFts_unary);
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    setState(59);
+    _errHandler->sync(this);
+    switch (_input->LA(1)) {
+      case FtsParser::PLUS_SIGN: {
+        _localctx = dynamic_cast<Fts_unaryContext *>(
+            _tracker.createInstance<FtsParser::Must_atomContext>(_localctx));
+        enterOuterAlt(_localctx, 1);
+        setState(54);
+        match(FtsParser::PLUS_SIGN);
+        setState(55);
+        fts_atom();
+        break;
+      }
+
+      case FtsParser::MINUS_SIGN: {
+        _localctx = dynamic_cast<Fts_unaryContext *>(
+            _tracker.createInstance<FtsParser::Must_not_atomContext>(
+                _localctx));
+        enterOuterAlt(_localctx, 2);
+        setState(56);
+        match(FtsParser::MINUS_SIGN);
+        setState(57);
+        fts_atom();
+        break;
+      }
+
+      case FtsParser::LP:
+      case FtsParser::DQUOTA_STRING:
+      case FtsParser::REGULAR_ID:
+      case FtsParser::NUMBER:
+      case FtsParser::TERM:
+      case FtsParser::DEFAULT: {
+        _localctx = dynamic_cast<Fts_unaryContext *>(
+            _tracker.createInstance<FtsParser::Plain_atomContext>(_localctx));
+        enterOuterAlt(_localctx, 3);
+        setState(58);
+        fts_atom();
+        break;
+      }
+
+      default:
+        throw NoViableAltException(this);
+    }
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_atomContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_atomContext::Fts_atomContext(ParserRuleContext *parent_ctx,
+                                            size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+FtsParser::Fts_primaryContext *FtsParser::Fts_atomContext::fts_primary() {
+  return getRuleContext<FtsParser::Fts_primaryContext>(0);
+}
+
+FtsParser::Fts_field_prefixContext *
+FtsParser::Fts_atomContext::fts_field_prefix() {
+  return getRuleContext<FtsParser::Fts_field_prefixContext>(0);
+}
+
+FtsParser::Fts_boostContext *FtsParser::Fts_atomContext::fts_boost() {
+  return getRuleContext<FtsParser::Fts_boostContext>(0);
+}
+
+
+size_t FtsParser::Fts_atomContext::getRuleIndex() const {
+  return FtsParser::RuleFts_atom;
+}
+
+void FtsParser::Fts_atomContext::enterRule(tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_atom(this);
+}
+
+void FtsParser::Fts_atomContext::exitRule(tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_atom(this);
+}
+
+FtsParser::Fts_atomContext *FtsParser::fts_atom() {
+  Fts_atomContext *_localctx =
+      _tracker.createInstance<Fts_atomContext>(_ctx, getState());
+  enterRule(_localctx, 10, FtsParser::RuleFts_atom);
+  size_t _la = 0;
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    enterOuterAlt(_localctx, 1);
+    setState(62);
+    _errHandler->sync(this);
+
+    switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(
+        _input, 6, _ctx)) {
+      case 1: {
+        setState(61);
+        fts_field_prefix();
+        break;
+      }
+    }
+    setState(64);
+    fts_primary();
+    setState(66);
+    _errHandler->sync(this);
+
+    _la = _input->LA(1);
+    if (_la == FtsParser::CARET) {
+      setState(65);
+      fts_boost();
+    }
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_field_prefixContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_field_prefixContext::Fts_field_prefixContext(
+    ParserRuleContext *parent_ctx, size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+tree::TerminalNode *FtsParser::Fts_field_prefixContext::REGULAR_ID() {
+  return getToken(FtsParser::REGULAR_ID, 0);
+}
+
+tree::TerminalNode *FtsParser::Fts_field_prefixContext::COLON() {
+  return getToken(FtsParser::COLON, 0);
+}
+
+
+size_t FtsParser::Fts_field_prefixContext::getRuleIndex() const {
+  return FtsParser::RuleFts_field_prefix;
+}
+
+void FtsParser::Fts_field_prefixContext::enterRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_field_prefix(this);
+}
+
+void FtsParser::Fts_field_prefixContext::exitRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_field_prefix(this);
+}
+
+FtsParser::Fts_field_prefixContext *FtsParser::fts_field_prefix() {
+  Fts_field_prefixContext *_localctx =
+      _tracker.createInstance<Fts_field_prefixContext>(_ctx, getState());
+  enterRule(_localctx, 12, FtsParser::RuleFts_field_prefix);
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    enterOuterAlt(_localctx, 1);
+    setState(68);
+    match(FtsParser::REGULAR_ID);
+    setState(69);
+    match(FtsParser::COLON);
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_primaryContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_primaryContext::Fts_primaryContext(ParserRuleContext *parent_ctx,
+                                                  size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+FtsParser::Fts_termContext *FtsParser::Fts_primaryContext::fts_term() {
+  return getRuleContext<FtsParser::Fts_termContext>(0);
+}
+
+FtsParser::Fts_phraseContext *FtsParser::Fts_primaryContext::fts_phrase() {
+  return getRuleContext<FtsParser::Fts_phraseContext>(0);
+}
+
+tree::TerminalNode *FtsParser::Fts_primaryContext::LP() {
+  return getToken(FtsParser::LP, 0);
+}
+
+FtsParser::Fts_or_exprContext *FtsParser::Fts_primaryContext::fts_or_expr() {
+  return getRuleContext<FtsParser::Fts_or_exprContext>(0);
+}
+
+tree::TerminalNode *FtsParser::Fts_primaryContext::RP() {
+  return getToken(FtsParser::RP, 0);
+}
+
+
+size_t FtsParser::Fts_primaryContext::getRuleIndex() const {
+  return FtsParser::RuleFts_primary;
+}
+
+void FtsParser::Fts_primaryContext::enterRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_primary(this);
+}
+
+void FtsParser::Fts_primaryContext::exitRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_primary(this);
+}
+
+FtsParser::Fts_primaryContext *FtsParser::fts_primary() {
+  Fts_primaryContext *_localctx =
+      _tracker.createInstance<Fts_primaryContext>(_ctx, getState());
+  enterRule(_localctx, 14, FtsParser::RuleFts_primary);
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    setState(77);
+    _errHandler->sync(this);
+    switch (_input->LA(1)) {
+      case FtsParser::REGULAR_ID:
+      case FtsParser::NUMBER:
+      case FtsParser::TERM:
+      case FtsParser::DEFAULT: {
+        enterOuterAlt(_localctx, 1);
+        setState(71);
+        fts_term();
+        break;
+      }
+
+      case FtsParser::DQUOTA_STRING: {
+        enterOuterAlt(_localctx, 2);
+        setState(72);
+        fts_phrase();
+        break;
+      }
+
+      case FtsParser::LP: {
+        enterOuterAlt(_localctx, 3);
+        setState(73);
+        match(FtsParser::LP);
+        setState(74);
+        fts_or_expr();
+        setState(75);
+        match(FtsParser::RP);
+        break;
+      }
+
+      default:
+        throw NoViableAltException(this);
+    }
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_boostContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_boostContext::Fts_boostContext(ParserRuleContext *parent_ctx,
+                                              size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+tree::TerminalNode *FtsParser::Fts_boostContext::CARET() {
+  return getToken(FtsParser::CARET, 0);
+}
+
+tree::TerminalNode *FtsParser::Fts_boostContext::NUMBER() {
+  return getToken(FtsParser::NUMBER, 0);
+}
+
+
+size_t FtsParser::Fts_boostContext::getRuleIndex() const {
+  return FtsParser::RuleFts_boost;
+}
+
+void FtsParser::Fts_boostContext::enterRule(tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_boost(this);
+}
+
+void FtsParser::Fts_boostContext::exitRule(tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_boost(this);
+}
+
+FtsParser::Fts_boostContext *FtsParser::fts_boost() {
+  Fts_boostContext *_localctx =
+      _tracker.createInstance<Fts_boostContext>(_ctx, getState());
+  enterRule(_localctx, 16, FtsParser::RuleFts_boost);
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    enterOuterAlt(_localctx, 1);
+    setState(79);
+    match(FtsParser::CARET);
+    setState(80);
+    match(FtsParser::NUMBER);
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_natural_termContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_natural_termContext::Fts_natural_termContext(
+    ParserRuleContext *parent_ctx, size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+std::vector<tree::TerminalNode *>
+FtsParser::Fts_natural_termContext::DEFAULT() {
+  return getTokens(FtsParser::DEFAULT);
+}
+
+tree::TerminalNode *FtsParser::Fts_natural_termContext::DEFAULT(size_t i) {
+  return getToken(FtsParser::DEFAULT, i);
+}
+
+
+size_t FtsParser::Fts_natural_termContext::getRuleIndex() const {
+  return FtsParser::RuleFts_natural_term;
+}
+
+void FtsParser::Fts_natural_termContext::enterRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_natural_term(this);
+}
+
+void FtsParser::Fts_natural_termContext::exitRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_natural_term(this);
+}
+
+FtsParser::Fts_natural_termContext *FtsParser::fts_natural_term() {
+  Fts_natural_termContext *_localctx =
+      _tracker.createInstance<Fts_natural_termContext>(_ctx, getState());
+  enterRule(_localctx, 18, FtsParser::RuleFts_natural_term);
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    size_t alt;
+    enterOuterAlt(_localctx, 1);
+    setState(83);
+    _errHandler->sync(this);
+    alt = 1;
+    do {
+      switch (alt) {
+        case 1: {
+          setState(82);
+          match(FtsParser::DEFAULT);
+          break;
+        }
+
+        default:
+          throw NoViableAltException(this);
+      }
+      setState(85);
+      _errHandler->sync(this);
+      alt = getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(_input,
+                                                                       9, _ctx);
+    } while (alt != 2 && alt != atn::ATN::INVALID_ALT_NUMBER);
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_termContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_termContext::Fts_termContext(ParserRuleContext *parent_ctx,
+                                            size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+tree::TerminalNode *FtsParser::Fts_termContext::TERM() {
+  return getToken(FtsParser::TERM, 0);
+}
+
+tree::TerminalNode *FtsParser::Fts_termContext::REGULAR_ID() {
+  return getToken(FtsParser::REGULAR_ID, 0);
+}
+
+tree::TerminalNode *FtsParser::Fts_termContext::NUMBER() {
+  return getToken(FtsParser::NUMBER, 0);
+}
+
+FtsParser::Fts_natural_termContext *
+FtsParser::Fts_termContext::fts_natural_term() {
+  return getRuleContext<FtsParser::Fts_natural_termContext>(0);
+}
+
+
+size_t FtsParser::Fts_termContext::getRuleIndex() const {
+  return FtsParser::RuleFts_term;
+}
+
+void FtsParser::Fts_termContext::enterRule(tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_term(this);
+}
+
+void FtsParser::Fts_termContext::exitRule(tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_term(this);
+}
+
+FtsParser::Fts_termContext *FtsParser::fts_term() {
+  Fts_termContext *_localctx =
+      _tracker.createInstance<Fts_termContext>(_ctx, getState());
+  enterRule(_localctx, 20, FtsParser::RuleFts_term);
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    setState(91);
+    _errHandler->sync(this);
+    switch (_input->LA(1)) {
+      case FtsParser::TERM: {
+        enterOuterAlt(_localctx, 1);
+        setState(87);
+        match(FtsParser::TERM);
+        break;
+      }
+
+      case FtsParser::REGULAR_ID: {
+        enterOuterAlt(_localctx, 2);
+        setState(88);
+        match(FtsParser::REGULAR_ID);
+        break;
+      }
+
+      case FtsParser::NUMBER: {
+        enterOuterAlt(_localctx, 3);
+        setState(89);
+        match(FtsParser::NUMBER);
+        break;
+      }
+
+      case FtsParser::DEFAULT: {
+        enterOuterAlt(_localctx, 4);
+        setState(90);
+        fts_natural_term();
+        break;
+      }
+
+      default:
+        throw NoViableAltException(this);
+    }
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+//----------------- Fts_phraseContext
+//------------------------------------------------------------------
+
+FtsParser::Fts_phraseContext::Fts_phraseContext(ParserRuleContext *parent_ctx,
+                                                size_t invoking_state)
+    : ParserRuleContext(parent_ctx, invoking_state) {}
+
+tree::TerminalNode *FtsParser::Fts_phraseContext::DQUOTA_STRING() {
+  return getToken(FtsParser::DQUOTA_STRING, 0);
+}
+
+
+size_t FtsParser::Fts_phraseContext::getRuleIndex() const {
+  return FtsParser::RuleFts_phrase;
+}
+
+void FtsParser::Fts_phraseContext::enterRule(
+    tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->enterFts_phrase(this);
+}
+
+void FtsParser::Fts_phraseContext::exitRule(tree::ParseTreeListener *listener) {
+  auto parserListener = dynamic_cast<FtsParserListener *>(listener);
+  if (parserListener != nullptr) parserListener->exitFts_phrase(this);
+}
+
+FtsParser::Fts_phraseContext *FtsParser::fts_phrase() {
+  Fts_phraseContext *_localctx =
+      _tracker.createInstance<Fts_phraseContext>(_ctx, getState());
+  enterRule(_localctx, 22, FtsParser::RuleFts_phrase);
+
+  auto onExit = finally([=] { exitRule(); });
+  try {
+    enterOuterAlt(_localctx, 1);
+    setState(93);
+    match(FtsParser::DQUOTA_STRING);
+
+  } catch (RecognitionException &e) {
+    _errHandler->reportError(this, e);
+    _localctx->exception = std::current_exception();
+    _errHandler->recover(this, _localctx->exception);
+  }
+
+  return _localctx;
+}
+
+// Static vars and initialization.
+std::vector<dfa::DFA> FtsParser::_decisionToDFA;
+atn::PredictionContextCache FtsParser::_sharedContextCache;
+
+// We own the ATN which in turn owns the ATN states.
+atn::ATN FtsParser::_atn;
+std::vector<uint16_t> FtsParser::_serializedATN;
+
+std::vector<std::string> FtsParser::_ruleNames = {
+    "fts_query_unit", "fts_or_expr",      "fts_and_expr",     "fts_seq_expr",
+    "fts_unary",      "fts_atom",         "fts_field_prefix", "fts_primary",
+    "fts_boost",      "fts_natural_term", "fts_term",         "fts_phrase"};
+
+std::vector<std::string> FtsParser::_literalNames = {
+    "", "", "", "", "'+'", "'-'", "':'", "'^'", "'('", "')'"};
+
+std::vector<std::string> FtsParser::_symbolicNames = {
+    "",       "OR",    "AND",    "NOT",    "PLUS_SIGN",     "MINUS_SIGN",
+    "COLON",  "CARET", "LP",     "RP",     "DQUOTA_STRING", "REGULAR_ID",
+    "NUMBER", "TERM",  "SPACES", "DEFAULT"};
+
+dfa::Vocabulary FtsParser::_vocabulary(_literalNames, _symbolicNames);
+
+std::vector<std::string> FtsParser::_tokenNames;
+
+FtsParser::Initializer::Initializer() {
+  for (size_t i = 0; i < _symbolicNames.size(); ++i) {
+    std::string name = _vocabulary.getLiteralName(i);
+    if (name.empty()) {
+      name = _vocabulary.getSymbolicName(i);
+    }
+
+    if (name.empty()) {
+      _tokenNames.push_back("<INVALID>");
+    } else {
+      _tokenNames.push_back(name);
+    }
+  }
+
+  _serializedATN = {
+      0x3,  0x608b, 0xa72a, 0x8133, 0xb9ed, 0x417c, 0x3be7, 0x7786, 0x5964,
+      0x3,  0x11,   0x62,   0x4,    0x2,    0x9,    0x2,    0x4,    0x3,
+      0x9,  0x3,    0x4,    0x4,    0x9,    0x4,    0x4,    0x5,    0x9,
+      0x5,  0x4,    0x6,    0x9,    0x6,    0x4,    0x7,    0x9,    0x7,
+      0x4,  0x8,    0x9,    0x8,    0x4,    0x9,    0x9,    0x9,    0x4,
+      0xa,  0x9,    0xa,    0x4,    0xb,    0x9,    0xb,    0x4,    0xc,
+      0x9,  0xc,    0x4,    0xd,    0x9,    0xd,    0x3,    0x2,    0x3,
+      0x2,  0x3,    0x2,    0x3,    0x3,    0x3,    0x3,    0x3,    0x3,
+      0x7,  0x3,    0x21,   0xa,    0x3,    0xc,    0x3,    0xe,    0x3,
+      0x24, 0xb,    0x3,    0x3,    0x4,    0x3,    0x4,    0x3,    0x4,
+      0x5,  0x4,    0x29,   0xa,    0x4,    0x3,    0x4,    0x5,    0x4,
+      0x2c, 0xa,    0x4,    0x3,    0x4,    0x7,    0x4,    0x2f,   0xa,
+      0x4,  0xc,    0x4,    0xe,    0x4,    0x32,   0xb,    0x4,    0x3,
+      0x5,  0x6,    0x5,    0x35,   0xa,    0x5,    0xd,    0x5,    0xe,
+      0x5,  0x36,   0x3,    0x6,    0x3,    0x6,    0x3,    0x6,    0x3,
+      0x6,  0x3,    0x6,    0x5,    0x6,    0x3e,   0xa,    0x6,    0x3,
+      0x7,  0x5,    0x7,    0x41,   0xa,    0x7,    0x3,    0x7,    0x3,
+      0x7,  0x5,    0x7,    0x45,   0xa,    0x7,    0x3,    0x8,    0x3,
+      0x8,  0x3,    0x8,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,
+      0x3,  0x9,    0x3,    0x9,    0x3,    0x9,    0x5,    0x9,    0x50,
+      0xa,  0x9,    0x3,    0xa,    0x3,    0xa,    0x3,    0xa,    0x3,
+      0xb,  0x6,    0xb,    0x56,   0xa,    0xb,    0xd,    0xb,    0xe,
+      0xb,  0x57,   0x3,    0xc,    0x3,    0xc,    0x3,    0xc,    0x3,
+      0xc,  0x5,    0xc,    0x5e,   0xa,    0xc,    0x3,    0xd,    0x3,
+      0xd,  0x3,    0xd,    0x2,    0x2,    0xe,    0x2,    0x4,    0x6,
+      0x8,  0xa,    0xc,    0xe,    0x10,   0x12,   0x14,   0x16,   0x18,
+      0x2,  0x2,    0x2,    0x64,   0x2,    0x1a,   0x3,    0x2,    0x2,
+      0x2,  0x4,    0x1d,   0x3,    0x2,    0x2,    0x2,    0x6,    0x25,
+      0x3,  0x2,    0x2,    0x2,    0x8,    0x34,   0x3,    0x2,    0x2,
+      0x2,  0xa,    0x3d,   0x3,    0x2,    0x2,    0x2,    0xc,    0x40,
+      0x3,  0x2,    0x2,    0x2,    0xe,    0x46,   0x3,    0x2,    0x2,
+      0x2,  0x10,   0x4f,   0x3,    0x2,    0x2,    0x2,    0x12,   0x51,
+      0x3,  0x2,    0x2,    0x2,    0x14,   0x55,   0x3,    0x2,    0x2,
+      0x2,  0x16,   0x5d,   0x3,    0x2,    0x2,    0x2,    0x18,   0x5f,
+      0x3,  0x2,    0x2,    0x2,    0x1a,   0x1b,   0x5,    0x4,    0x3,
+      0x2,  0x1b,   0x1c,   0x7,    0x2,    0x2,    0x3,    0x1c,   0x3,
+      0x3,  0x2,    0x2,    0x2,    0x1d,   0x22,   0x5,    0x6,    0x4,
+      0x2,  0x1e,   0x1f,   0x7,    0x3,    0x2,    0x2,    0x1f,   0x21,
+      0x5,  0x6,    0x4,    0x2,    0x20,   0x1e,   0x3,    0x2,    0x2,
+      0x2,  0x21,   0x24,   0x3,    0x2,    0x2,    0x2,    0x22,   0x20,
+      0x3,  0x2,    0x2,    0x2,    0x22,   0x23,   0x3,    0x2,    0x2,
+      0x2,  0x23,   0x5,    0x3,    0x2,    0x2,    0x2,    0x24,   0x22,
+      0x3,  0x2,    0x2,    0x2,    0x25,   0x30,   0x5,    0x8,    0x5,
+      0x2,  0x26,   0x28,   0x7,    0x4,    0x2,    0x2,    0x27,   0x29,
+      0x7,  0x5,    0x2,    0x2,    0x28,   0x27,   0x3,    0x2,    0x2,
+      0x2,  0x28,   0x29,   0x3,    0x2,    0x2,    0x2,    0x29,   0x2c,
+      0x3,  0x2,    0x2,    0x2,    0x2a,   0x2c,   0x7,    0x5,    0x2,
+      0x2,  0x2b,   0x26,   0x3,    0x2,    0x2,    0x2,    0x2b,   0x2a,
+      0x3,  0x2,    0x2,    0x2,    0x2c,   0x2d,   0x3,    0x2,    0x2,
+      0x2,  0x2d,   0x2f,   0x5,    0x8,    0x5,    0x2,    0x2e,   0x2b,
+      0x3,  0x2,    0x2,    0x2,    0x2f,   0x32,   0x3,    0x2,    0x2,
+      0x2,  0x30,   0x2e,   0x3,    0x2,    0x2,    0x2,    0x30,   0x31,
+      0x3,  0x2,    0x2,    0x2,    0x31,   0x7,    0x3,    0x2,    0x2,
+      0x2,  0x32,   0x30,   0x3,    0x2,    0x2,    0x2,    0x33,   0x35,
+      0x5,  0xa,    0x6,    0x2,    0x34,   0x33,   0x3,    0x2,    0x2,
+      0x2,  0x35,   0x36,   0x3,    0x2,    0x2,    0x2,    0x36,   0x34,
+      0x3,  0x2,    0x2,    0x2,    0x36,   0x37,   0x3,    0x2,    0x2,
+      0x2,  0x37,   0x9,    0x3,    0x2,    0x2,    0x2,    0x38,   0x39,
+      0x7,  0x6,    0x2,    0x2,    0x39,   0x3e,   0x5,    0xc,    0x7,
+      0x2,  0x3a,   0x3b,   0x7,    0x7,    0x2,    0x2,    0x3b,   0x3e,
+      0x5,  0xc,    0x7,    0x2,    0x3c,   0x3e,   0x5,    0xc,    0x7,
+      0x2,  0x3d,   0x38,   0x3,    0x2,    0x2,    0x2,    0x3d,   0x3a,
+      0x3,  0x2,    0x2,    0x2,    0x3d,   0x3c,   0x3,    0x2,    0x2,
+      0x2,  0x3e,   0xb,    0x3,    0x2,    0x2,    0x2,    0x3f,   0x41,
+      0x5,  0xe,    0x8,    0x2,    0x40,   0x3f,   0x3,    0x2,    0x2,
+      0x2,  0x40,   0x41,   0x3,    0x2,    0x2,    0x2,    0x41,   0x42,
+      0x3,  0x2,    0x2,    0x2,    0x42,   0x44,   0x5,    0x10,   0x9,
+      0x2,  0x43,   0x45,   0x5,    0x12,   0xa,    0x2,    0x44,   0x43,
+      0x3,  0x2,    0x2,    0x2,    0x44,   0x45,   0x3,    0x2,    0x2,
+      0x2,  0x45,   0xd,    0x3,    0x2,    0x2,    0x2,    0x46,   0x47,
+      0x7,  0xd,    0x2,    0x2,    0x47,   0x48,   0x7,    0x8,    0x2,
+      0x2,  0x48,   0xf,    0x3,    0x2,    0x2,    0x2,    0x49,   0x50,
+      0x5,  0x16,   0xc,    0x2,    0x4a,   0x50,   0x5,    0x18,   0xd,
+      0x2,  0x4b,   0x4c,   0x7,    0xa,    0x2,    0x2,    0x4c,   0x4d,
+      0x5,  0x4,    0x3,    0x2,    0x4d,   0x4e,   0x7,    0xb,    0x2,
+      0x2,  0x4e,   0x50,   0x3,    0x2,    0x2,    0x2,    0x4f,   0x49,
+      0x3,  0x2,    0x2,    0x2,    0x4f,   0x4a,   0x3,    0x2,    0x2,
+      0x2,  0x4f,   0x4b,   0x3,    0x2,    0x2,    0x2,    0x50,   0x11,
+      0x3,  0x2,    0x2,    0x2,    0x51,   0x52,   0x7,    0x9,    0x2,
+      0x2,  0x52,   0x53,   0x7,    0xe,    0x2,    0x2,    0x53,   0x13,
+      0x3,  0x2,    0x2,    0x2,    0x54,   0x56,   0x7,    0x11,   0x2,
+      0x2,  0x55,   0x54,   0x3,    0x2,    0x2,    0x2,    0x56,   0x57,
+      0x3,  0x2,    0x2,    0x2,    0x57,   0x55,   0x3,    0x2,    0x2,
+      0x2,  0x57,   0x58,   0x3,    0x2,    0x2,    0x2,    0x58,   0x15,
+      0x3,  0x2,    0x2,    0x2,    0x59,   0x5e,   0x7,    0xf,    0x2,
+      0x2,  0x5a,   0x5e,   0x7,    0xd,    0x2,    0x2,    0x5b,   0x5e,
+      0x7,  0xe,    0x2,    0x2,    0x5c,   0x5e,   0x5,    0x14,   0xb,
+      0x2,  0x5d,   0x59,   0x3,    0x2,    0x2,    0x2,    0x5d,   0x5a,
+      0x3,  0x2,    0x2,    0x2,    0x5d,   0x5b,   0x3,    0x2,    0x2,
+      0x2,  0x5d,   0x5c,   0x3,    0x2,    0x2,    0x2,    0x5e,   0x17,
+      0x3,  0x2,    0x2,    0x2,    0x5f,   0x60,   0x7,    0xc,    0x2,
+      0x2,  0x60,   0x19,   0x3,    0x2,    0x2,    0x2,    0xd,    0x22,
+      0x28, 0x2b,   0x30,   0x36,   0x3d,   0x40,   0x44,   0x4f,   0x57,
+      0x5d,
+  };
+
+  atn::ATNDeserializer deserializer;
+  _atn = deserializer.deserialize(_serializedATN);
+
+  size_t count = _atn.getNumberOfDecisions();
+  _decisionToDFA.reserve(count);
+  for (size_t i = 0; i < count; i++) {
+    _decisionToDFA.emplace_back(_atn.getDecisionState(i), i);
+  }
+}
+
+FtsParser::Initializer FtsParser::_init;
diff --git a/src/db/index/column/fts_column/gen/FtsParser.h b/src/db/index/column/fts_column/gen/FtsParser.h
new file mode 100644
index 000000000..3f291557b
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsParser.h
@@ -0,0 +1,303 @@
+
+// Generated from FtsParser.g4 by ANTLR 4.8
+
+#pragma once
+
+
+#include "antlr4-runtime.h"
+
+
+namespace antlr4 {
+
+
+class FtsParser : public antlr4::Parser {
+ public:
+  enum {
+    OR = 1,
+    AND = 2,
+    NOT = 3,
+    PLUS_SIGN = 4,
+    MINUS_SIGN = 5,
+    COLON = 6,
+    CARET = 7,
+    LP = 8,
+    RP = 9,
+    DQUOTA_STRING = 10,
+    REGULAR_ID = 11,
+    NUMBER = 12,
+    TERM = 13,
+    SPACES = 14,
+    DEFAULT = 15
+  };
+
+  enum {
+    RuleFts_query_unit = 0,
+    RuleFts_or_expr = 1,
+    RuleFts_and_expr = 2,
+    RuleFts_seq_expr = 3,
+    RuleFts_unary = 4,
+    RuleFts_atom = 5,
+    RuleFts_field_prefix = 6,
+    RuleFts_primary = 7,
+    RuleFts_boost = 8,
+    RuleFts_natural_term = 9,
+    RuleFts_term = 10,
+    RuleFts_phrase = 11
+  };
+
+  FtsParser(antlr4::TokenStream *input);
+  ~FtsParser();
+
+  virtual std::string getGrammarFileName() const override;
+  virtual const antlr4::atn::ATN &getATN() const override {
+    return _atn;
+  };
+  virtual const std::vector<std::string> &getTokenNames() const override {
+    return _tokenNames;
+  };  // deprecated: use vocabulary instead.
+  virtual const std::vector<std::string> &getRuleNames() const override;
+  virtual antlr4::dfa::Vocabulary &getVocabulary() const override;
+
+
+  class Fts_query_unitContext;
+  class Fts_or_exprContext;
+  class Fts_and_exprContext;
+  class Fts_seq_exprContext;
+  class Fts_unaryContext;
+  class Fts_atomContext;
+  class Fts_field_prefixContext;
+  class Fts_primaryContext;
+  class Fts_boostContext;
+  class Fts_natural_termContext;
+  class Fts_termContext;
+  class Fts_phraseContext;
+
+  class Fts_query_unitContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_query_unitContext(antlr4::ParserRuleContext *parent_ctx,
+                          size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    Fts_or_exprContext *fts_or_expr();
+    antlr4::tree::TerminalNode *EOF();
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_query_unitContext *fts_query_unit();
+
+  class Fts_or_exprContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_or_exprContext(antlr4::ParserRuleContext *parent_ctx,
+                       size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    std::vector<Fts_and_exprContext *> fts_and_expr();
+    Fts_and_exprContext *fts_and_expr(size_t i);
+    std::vector<antlr4::tree::TerminalNode *> OR();
+    antlr4::tree::TerminalNode *OR(size_t i);
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_or_exprContext *fts_or_expr();
+
+  class Fts_and_exprContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_and_exprContext(antlr4::ParserRuleContext *parent_ctx,
+                        size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    std::vector<Fts_seq_exprContext *> fts_seq_expr();
+    Fts_seq_exprContext *fts_seq_expr(size_t i);
+    std::vector<antlr4::tree::TerminalNode *> AND();
+    antlr4::tree::TerminalNode *AND(size_t i);
+    std::vector<antlr4::tree::TerminalNode *> NOT();
+    antlr4::tree::TerminalNode *NOT(size_t i);
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_and_exprContext *fts_and_expr();
+
+  class Fts_seq_exprContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_seq_exprContext(antlr4::ParserRuleContext *parent_ctx,
+                        size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    std::vector<Fts_unaryContext *> fts_unary();
+    Fts_unaryContext *fts_unary(size_t i);
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_seq_exprContext *fts_seq_expr();
+
+  class Fts_unaryContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_unaryContext(antlr4::ParserRuleContext *parent_ctx,
+                     size_t invoking_state);
+
+    Fts_unaryContext() = default;
+    void copyFrom(Fts_unaryContext *context);
+    using antlr4::ParserRuleContext::copyFrom;
+
+    virtual size_t getRuleIndex() const override;
+  };
+
+  class Must_not_atomContext : public Fts_unaryContext {
+   public:
+    Must_not_atomContext(Fts_unaryContext *ctx);
+
+    antlr4::tree::TerminalNode *MINUS_SIGN();
+    Fts_atomContext *fts_atom();
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  class Must_atomContext : public Fts_unaryContext {
+   public:
+    Must_atomContext(Fts_unaryContext *ctx);
+
+    antlr4::tree::TerminalNode *PLUS_SIGN();
+    Fts_atomContext *fts_atom();
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  class Plain_atomContext : public Fts_unaryContext {
+   public:
+    Plain_atomContext(Fts_unaryContext *ctx);
+
+    Fts_atomContext *fts_atom();
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_unaryContext *fts_unary();
+
+  class Fts_atomContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_atomContext(antlr4::ParserRuleContext *parent_ctx,
+                    size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    Fts_primaryContext *fts_primary();
+    Fts_field_prefixContext *fts_field_prefix();
+    Fts_boostContext *fts_boost();
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_atomContext *fts_atom();
+
+  class Fts_field_prefixContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_field_prefixContext(antlr4::ParserRuleContext *parent_ctx,
+                            size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    antlr4::tree::TerminalNode *REGULAR_ID();
+    antlr4::tree::TerminalNode *COLON();
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_field_prefixContext *fts_field_prefix();
+
+  class Fts_primaryContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_primaryContext(antlr4::ParserRuleContext *parent_ctx,
+                       size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    Fts_termContext *fts_term();
+    Fts_phraseContext *fts_phrase();
+    antlr4::tree::TerminalNode *LP();
+    Fts_or_exprContext *fts_or_expr();
+    antlr4::tree::TerminalNode *RP();
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_primaryContext *fts_primary();
+
+  class Fts_boostContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_boostContext(antlr4::ParserRuleContext *parent_ctx,
+                     size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    antlr4::tree::TerminalNode *CARET();
+    antlr4::tree::TerminalNode *NUMBER();
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_boostContext *fts_boost();
+
+  class Fts_natural_termContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_natural_termContext(antlr4::ParserRuleContext *parent_ctx,
+                            size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    std::vector<antlr4::tree::TerminalNode *> DEFAULT();
+    antlr4::tree::TerminalNode *DEFAULT(size_t i);
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_natural_termContext *fts_natural_term();
+
+  class Fts_termContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_termContext(antlr4::ParserRuleContext *parent_ctx,
+                    size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    antlr4::tree::TerminalNode *TERM();
+    antlr4::tree::TerminalNode *REGULAR_ID();
+    antlr4::tree::TerminalNode *NUMBER();
+    Fts_natural_termContext *fts_natural_term();
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_termContext *fts_term();
+
+  class Fts_phraseContext : public antlr4::ParserRuleContext {
+   public:
+    Fts_phraseContext(antlr4::ParserRuleContext *parent_ctx,
+                      size_t invoking_state);
+    virtual size_t getRuleIndex() const override;
+    antlr4::tree::TerminalNode *DQUOTA_STRING();
+
+    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
+    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
+  };
+
+  Fts_phraseContext *fts_phrase();
+
+
+ private:
+  static std::vector<antlr4::dfa::DFA> _decisionToDFA;
+  static antlr4::atn::PredictionContextCache _sharedContextCache;
+  static std::vector<std::string> _ruleNames;
+  static std::vector<std::string> _tokenNames;
+
+  static std::vector<std::string> _literalNames;
+  static std::vector<std::string> _symbolicNames;
+  static antlr4::dfa::Vocabulary _vocabulary;
+  static antlr4::atn::ATN _atn;
+  static std::vector<uint16_t> _serializedATN;
+
+
+  struct Initializer {
+    Initializer();
+  };
+  static Initializer _init;
+};
+
+}  // namespace antlr4
diff --git a/src/db/index/column/fts_column/gen/FtsParser.interp b/src/db/index/column/fts_column/gen/FtsParser.interp
new file mode 100644
index 000000000..88d3cfe81
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsParser.interp
@@ -0,0 +1,53 @@
+token literal names:
+null
+null
+null
+null
+'+'
+'-'
+':'
+'^'
+'('
+')'
+null
+null
+null
+null
+null
+null
+
+token symbolic names:
+null
+OR
+AND
+NOT
+PLUS_SIGN
+MINUS_SIGN
+COLON
+CARET
+LP
+RP
+DQUOTA_STRING
+REGULAR_ID
+NUMBER
+TERM
+SPACES
+DEFAULT
+
+rule names:
+fts_query_unit
+fts_or_expr
+fts_and_expr
+fts_seq_expr
+fts_unary
+fts_atom
+fts_field_prefix
+fts_primary
+fts_boost
+fts_natural_term
+fts_term
+fts_phrase
+
+
+atn:
+[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 17, 98, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 7, 3, 33, 10, 3, 12, 3, 14, 3, 36, 11, 3, 3, 4, 3, 4, 3, 4, 5, 4, 41, 10, 4, 3, 4, 5, 4, 44, 10, 4, 3, 4, 7, 4, 47, 10, 4, 12, 4, 14, 4, 50, 11, 4, 3, 5, 6, 5, 53, 10, 5, 13, 5, 14, 5, 54, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 5, 6, 62, 10, 6, 3, 7, 5, 7, 65, 10, 7, 3, 7, 3, 7, 5, 7, 69, 10, 7, 3, 8, 3, 8, 3, 8, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 5, 9, 80, 10, 9, 3, 10, 3, 10, 3, 10, 3, 11, 6, 11, 86, 10, 11, 13, 11, 14, 11, 87, 3, 12, 3, 12, 3, 12, 3, 12, 5, 12, 94, 10, 12, 3, 13, 3, 13, 3, 13, 2, 2, 14, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 2, 2, 2, 100, 2, 26, 3, 2, 2, 2, 4, 29, 3, 2, 2, 2, 6, 37, 3, 2, 2, 2, 8, 52, 3, 2, 2, 2, 10, 61, 3, 2, 2, 2, 12, 64, 3, 2, 2, 2, 14, 70, 3, 2, 2, 2, 16, 79, 3, 2, 2, 2, 18, 81, 3, 2, 2, 2, 20, 85, 3, 2, 2, 2, 22, 93, 3, 2, 2, 2, 24, 95, 3, 2, 2, 2, 26, 27, 5, 4, 3, 2, 27, 28, 7, 2, 2, 3, 28, 3, 3, 2, 2, 2, 29, 34, 5, 6, 4, 2, 30, 31, 7, 3, 2, 2, 31, 33, 5, 6, 4, 2, 32, 30, 3, 2, 2, 2, 33, 36, 3, 2, 2, 2, 34, 32, 3, 2, 2, 2, 34, 35, 3, 2, 2, 2, 35, 5, 3, 2, 2, 2, 36, 34, 3, 2, 2, 2, 37, 48, 5, 8, 5, 2, 38, 40, 7, 4, 2, 2, 39, 41, 7, 5, 2, 2, 40, 39, 3, 2, 2, 2, 40, 41, 3, 2, 2, 2, 41, 44, 3, 2, 2, 2, 42, 44, 7, 5, 2, 2, 43, 38, 3, 2, 2, 2, 43, 42, 3, 2, 2, 2, 44, 45, 3, 2, 2, 2, 45, 47, 5, 8, 5, 2, 46, 43, 3, 2, 2, 2, 47, 50, 3, 2, 2, 2, 48, 46, 3, 2, 2, 2, 48, 49, 3, 2, 2, 2, 49, 7, 3, 2, 2, 2, 50, 48, 3, 2, 2, 2, 51, 53, 5, 10, 6, 2, 52, 51, 3, 2, 2, 2, 53, 54, 3, 2, 2, 2, 54, 52, 3, 2, 2, 2, 54, 55, 3, 2, 2, 2, 55, 9, 3, 2, 2, 2, 56, 57, 7, 6, 2, 2, 57, 62, 5, 12, 7, 2, 58, 59, 7, 7, 2, 2, 59, 62, 5, 12, 7, 2, 60, 62, 5, 12, 7, 2, 61, 56, 3, 2, 2, 2, 61, 58, 3, 2, 2, 2, 61, 60, 3, 2, 2, 2, 62, 11, 3, 2, 2, 2, 63, 65, 5, 14, 8, 2, 64, 63, 3, 2, 2, 2, 64, 65, 3, 2, 2, 2, 65, 66, 3, 2, 2, 2, 66, 68, 5, 16, 9, 2, 67, 69, 5, 18, 10, 2, 68, 67, 3, 2, 2, 2, 68, 69, 3, 2, 2, 2, 69, 13, 3, 2, 2, 2, 70, 71, 7, 13, 2, 2, 71, 72, 7, 8, 2, 2, 72, 15, 3, 2, 2, 2, 73, 80, 5, 22, 12, 2, 74, 80, 5, 24, 13, 2, 75, 76, 7, 10, 2, 2, 76, 77, 5, 4, 3, 2, 77, 78, 7, 11, 2, 2, 78, 80, 3, 2, 2, 2, 79, 73, 3, 2, 2, 2, 79, 74, 3, 2, 2, 2, 79, 75, 3, 2, 2, 2, 80, 17, 3, 2, 2, 2, 81, 82, 7, 9, 2, 2, 82, 83, 7, 14, 2, 2, 83, 19, 3, 2, 2, 2, 84, 86, 7, 17, 2, 2, 85, 84, 3, 2, 2, 2, 86, 87, 3, 2, 2, 2, 87, 85, 3, 2, 2, 2, 87, 88, 3, 2, 2, 2, 88, 21, 3, 2, 2, 2, 89, 94, 7, 15, 2, 2, 90, 94, 7, 13, 2, 2, 91, 94, 7, 14, 2, 2, 92, 94, 5, 20, 11, 2, 93, 89, 3, 2, 2, 2, 93, 90, 3, 2, 2, 2, 93, 91, 3, 2, 2, 2, 93, 92, 3, 2, 2, 2, 94, 23, 3, 2, 2, 2, 95, 96, 7, 12, 2, 2, 96, 25, 3, 2, 2, 2, 13, 34, 40, 43, 48, 54, 61, 64, 68, 79, 87, 93]
diff --git a/src/db/index/column/fts_column/gen/FtsParser.tokens b/src/db/index/column/fts_column/gen/FtsParser.tokens
new file mode 100644
index 000000000..cd6e2db20
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsParser.tokens
@@ -0,0 +1,21 @@
+OR=1
+AND=2
+NOT=3
+PLUS_SIGN=4
+MINUS_SIGN=5
+COLON=6
+CARET=7
+LP=8
+RP=9
+DQUOTA_STRING=10
+REGULAR_ID=11
+NUMBER=12
+TERM=13
+SPACES=14
+DEFAULT=15
+'+'=4
+'-'=5
+':'=6
+'^'=7
+'('=8
+')'=9
diff --git a/src/db/index/column/fts_column/gen/FtsParserBaseListener.cc b/src/db/index/column/fts_column/gen/FtsParserBaseListener.cc
new file mode 100644
index 000000000..a78804a3a
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsParserBaseListener.cc
@@ -0,0 +1,8 @@
+
+// Generated from FtsParser.g4 by ANTLR 4.8
+
+
+#include "FtsParserBaseListener.h"
+
+
+using namespace antlr4;
diff --git a/src/db/index/column/fts_column/gen/FtsParserBaseListener.h b/src/db/index/column/fts_column/gen/FtsParserBaseListener.h
new file mode 100644
index 000000000..e88465570
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsParserBaseListener.h
@@ -0,0 +1,89 @@
+
+// Generated from FtsParser.g4 by ANTLR 4.8
+
+#pragma once
+
+
+#include "FtsParserListener.h"
+#include "antlr4-runtime.h"
+
+
+namespace antlr4 {
+
+/**
+ * This class provides an empty implementation of FtsParserListener,
+ * which can be extended to create a listener which only needs to handle a
+ * subset of the available methods.
+ */
+class FtsParserBaseListener : public FtsParserListener {
+ public:
+  virtual void enterFts_query_unit(
+      FtsParser::Fts_query_unitContext * /*ctx*/) override {}
+  virtual void exitFts_query_unit(
+      FtsParser::Fts_query_unitContext * /*ctx*/) override {}
+
+  virtual void enterFts_or_expr(
+      FtsParser::Fts_or_exprContext * /*ctx*/) override {}
+  virtual void exitFts_or_expr(
+      FtsParser::Fts_or_exprContext * /*ctx*/) override {}
+
+  virtual void enterFts_and_expr(
+      FtsParser::Fts_and_exprContext * /*ctx*/) override {}
+  virtual void exitFts_and_expr(
+      FtsParser::Fts_and_exprContext * /*ctx*/) override {}
+
+  virtual void enterFts_seq_expr(
+      FtsParser::Fts_seq_exprContext * /*ctx*/) override {}
+  virtual void exitFts_seq_expr(
+      FtsParser::Fts_seq_exprContext * /*ctx*/) override {}
+
+  virtual void enterMust_atom(FtsParser::Must_atomContext * /*ctx*/) override {}
+  virtual void exitMust_atom(FtsParser::Must_atomContext * /*ctx*/) override {}
+
+  virtual void enterMust_not_atom(
+      FtsParser::Must_not_atomContext * /*ctx*/) override {}
+  virtual void exitMust_not_atom(
+      FtsParser::Must_not_atomContext * /*ctx*/) override {}
+
+  virtual void enterPlain_atom(
+      FtsParser::Plain_atomContext * /*ctx*/) override {}
+  virtual void exitPlain_atom(FtsParser::Plain_atomContext * /*ctx*/) override {
+  }
+
+  virtual void enterFts_atom(FtsParser::Fts_atomContext * /*ctx*/) override {}
+  virtual void exitFts_atom(FtsParser::Fts_atomContext * /*ctx*/) override {}
+
+  virtual void enterFts_field_prefix(
+      FtsParser::Fts_field_prefixContext * /*ctx*/) override {}
+  virtual void exitFts_field_prefix(
+      FtsParser::Fts_field_prefixContext * /*ctx*/) override {}
+
+  virtual void enterFts_primary(
+      FtsParser::Fts_primaryContext * /*ctx*/) override {}
+  virtual void exitFts_primary(
+      FtsParser::Fts_primaryContext * /*ctx*/) override {}
+
+  virtual void enterFts_boost(FtsParser::Fts_boostContext * /*ctx*/) override {}
+  virtual void exitFts_boost(FtsParser::Fts_boostContext * /*ctx*/) override {}
+
+  virtual void enterFts_natural_term(
+      FtsParser::Fts_natural_termContext * /*ctx*/) override {}
+  virtual void exitFts_natural_term(
+      FtsParser::Fts_natural_termContext * /*ctx*/) override {}
+
+  virtual void enterFts_term(FtsParser::Fts_termContext * /*ctx*/) override {}
+  virtual void exitFts_term(FtsParser::Fts_termContext * /*ctx*/) override {}
+
+  virtual void enterFts_phrase(
+      FtsParser::Fts_phraseContext * /*ctx*/) override {}
+  virtual void exitFts_phrase(FtsParser::Fts_phraseContext * /*ctx*/) override {
+  }
+
+
+  virtual void enterEveryRule(antlr4::ParserRuleContext * /*ctx*/) override {}
+  virtual void exitEveryRule(antlr4::ParserRuleContext * /*ctx*/) override {}
+  virtual void visitTerminal(antlr4::tree::TerminalNode * /*node*/) override {}
+  virtual void visitErrorNode(antlr4::tree::ErrorNode * /*node*/) override {}
+};
+
+}  // namespace antlr4
diff --git a/src/db/index/column/fts_column/gen/FtsParserListener.cc b/src/db/index/column/fts_column/gen/FtsParserListener.cc
new file mode 100644
index 000000000..b794fd4db
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsParserListener.cc
@@ -0,0 +1,8 @@
+
+// Generated from FtsParser.g4 by ANTLR 4.8
+
+
+#include "FtsParserListener.h"
+
+
+using namespace antlr4;
diff --git a/src/db/index/column/fts_column/gen/FtsParserListener.h b/src/db/index/column/fts_column/gen/FtsParserListener.h
new file mode 100644
index 000000000..71be04b8a
--- /dev/null
+++ b/src/db/index/column/fts_column/gen/FtsParserListener.h
@@ -0,0 +1,66 @@
+
+// Generated from FtsParser.g4 by ANTLR 4.8
+
+#pragma once
+
+
+#include "FtsParser.h"
+#include "antlr4-runtime.h"
+
+
+namespace antlr4 {
+
+/**
+ * This interface defines an abstract listener for a parse tree produced by
+ * FtsParser.
+ */
+class FtsParserListener : public antlr4::tree::ParseTreeListener {
+ public:
+  virtual void enterFts_query_unit(FtsParser::Fts_query_unitContext *ctx) = 0;
+  virtual void exitFts_query_unit(FtsParser::Fts_query_unitContext *ctx) = 0;
+
+  virtual void enterFts_or_expr(FtsParser::Fts_or_exprContext *ctx) = 0;
+  virtual void exitFts_or_expr(FtsParser::Fts_or_exprContext *ctx) = 0;
+
+  virtual void enterFts_and_expr(FtsParser::Fts_and_exprContext *ctx) = 0;
+  virtual void exitFts_and_expr(FtsParser::Fts_and_exprContext *ctx) = 0;
+
+  virtual void enterFts_seq_expr(FtsParser::Fts_seq_exprContext *ctx) = 0;
+  virtual void exitFts_seq_expr(FtsParser::Fts_seq_exprContext *ctx) = 0;
+
+  virtual void enterMust_atom(FtsParser::Must_atomContext *ctx) = 0;
+  virtual void exitMust_atom(FtsParser::Must_atomContext *ctx) = 0;
+
+  virtual void enterMust_not_atom(FtsParser::Must_not_atomContext *ctx) = 0;
+  virtual void exitMust_not_atom(FtsParser::Must_not_atomContext *ctx) = 0;
+
+  virtual void enterPlain_atom(FtsParser::Plain_atomContext *ctx) = 0;
+  virtual void exitPlain_atom(FtsParser::Plain_atomContext *ctx) = 0;
+
+  virtual void enterFts_atom(FtsParser::Fts_atomContext *ctx) = 0;
+  virtual void exitFts_atom(FtsParser::Fts_atomContext *ctx) = 0;
+
+  virtual void enterFts_field_prefix(
+      FtsParser::Fts_field_prefixContext *ctx) = 0;
+  virtual void exitFts_field_prefix(
+      FtsParser::Fts_field_prefixContext *ctx) = 0;
+
+  virtual void enterFts_primary(FtsParser::Fts_primaryContext *ctx) = 0;
+  virtual void exitFts_primary(FtsParser::Fts_primaryContext *ctx) = 0;
+
+  virtual void enterFts_boost(FtsParser::Fts_boostContext *ctx) = 0;
+  virtual void exitFts_boost(FtsParser::Fts_boostContext *ctx) = 0;
+
+  virtual void enterFts_natural_term(
+      FtsParser::Fts_natural_termContext *ctx) = 0;
+  virtual void exitFts_natural_term(
+      FtsParser::Fts_natural_termContext *ctx) = 0;
+
+  virtual void enterFts_term(FtsParser::Fts_termContext *ctx) = 0;
+  virtual void exitFts_term(FtsParser::Fts_termContext *ctx) = 0;
+
+  virtual void enterFts_phrase(FtsParser::Fts_phraseContext *ctx) = 0;
+  virtual void exitFts_phrase(FtsParser::Fts_phraseContext *ctx) = 0;
+};
+
+}  // namespace antlr4
diff --git a/src/db/index/column/fts_column/gen_parser.sh b/src/db/index/column/fts_column/gen_parser.sh
new file mode 100644
index 000000000..8797a4d5e
--- /dev/null
+++ b/src/db/index/column/fts_column/gen_parser.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+#****************************************************************#
+# ScriptName: gen_parser.sh
+# Author: fancy.lf
+# Function: command to generate antlr sql parser code in se directory
+#***************************************************************#
+
+java -jar ../../../../deps/thirdparty/antlr/antlr-4.8-complete.jar -Dlanguage=Cpp -package antlr4 FtsLexer.g4 FtsParser.g4 -o gen
+sed -i 's/\bu8"/"/g' gen/*.cc
diff --git a/src/db/index/column/fts_column/iterator/fts_candidate_iterator.cc b/src/db/index/column/fts_column/iterator/fts_candidate_iterator.cc
new file mode 100644
index 000000000..5b1d3687d
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_candidate_iterator.cc
@@ -0,0 +1,53 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fts_candidate_iterator.h"
+#include <algorithm>
+
+namespace zvec::fts {
+
+CandidateDocIterator::CandidateDocIterator(
+    const std::vector<uint64_t> &sorted_local_ids) {
+  ids_.reserve(sorted_local_ids.size());
+  for (uint64_t id : sorted_local_ids) {
+    ids_.push_back(static_cast<uint32_t>(id));
+  }
+  cached_max_score_ = 0.0f;
+}
+
+
+uint32_t CandidateDocIterator::next_doc() {
+  if (pos_ >= ids_.size()) {
+    cached_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+  cached_doc_id_ = ids_[pos_++];
+  return cached_doc_id_;
+}
+
+uint32_t CandidateDocIterator::advance(uint32_t target) {
+  // Start from pos_: everything before it is already consumed.
+  auto begin = ids_.begin() + pos_;
+  auto it = std::lower_bound(begin, ids_.end(), target);
+  if (it == ids_.end()) {
+    pos_ = ids_.size();
+    cached_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+  pos_ = static_cast<size_t>(it - ids_.begin()) + 1;
+  cached_doc_id_ = *it;
+  return cached_doc_id_;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/iterator/fts_candidate_iterator.h b/src/db/index/column/fts_column/iterator/fts_candidate_iterator.h
new file mode 100644
index 000000000..5f7cce1dd
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_candidate_iterator.h
@@ -0,0 +1,55 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+#include "fts_doc_iterator.h"
+
+namespace zvec::fts {
+
+/*! Candidate-driven document iterator.
+ *
+ *  AND-ed with an FTS iterator tree under ConjunctionIterator: since cost()
+ *  returns the (small) candidate count, this iterator becomes the lead and
+ *  the FTS tree is only asked to advance() to each candidate — reusing the
+ *  existing BM25 / matches / filter-pushdown machinery.
+ *
+ *  Input MUST be ascending segment-local doc_ids (the space TermDocIterator
+ *  uses; no GLOBAL→LOCAL translation needed in zvec).
+ */
+class CandidateDocIterator : public DocIterator {
+ public:
+  explicit CandidateDocIterator(const std::vector<uint64_t> &sorted_local_ids);
+
+  uint32_t next_doc() override;
+  uint32_t advance(uint32_t target) override;
+
+  float score() override {
+    return 0.0f;
+  }
+  uint64_t cost() const override {
+    return ids_.size();
+  }
+  float max_score() const override {
+    return 0.0f;
+  }
+
+ private:
+  std::vector<uint32_t> ids_;  // ascending segment-local doc_ids
+  size_t pos_{0};              // index of next element to return
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.cc b/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.cc
new file mode 100644
index 000000000..51e92c44c
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.cc
@@ -0,0 +1,187 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fts_conjunction_iterator.h"
+#include <algorithm>
+
+namespace zvec::fts {
+
+ConjunctionIterator::ConjunctionIterator(
+    std::vector<DocIteratorPtr> must_iterators,
+    std::vector<DocIteratorPtr> must_not_iterators)
+    : must_iterators_(std::move(must_iterators)),
+      must_not_iterators_(std::move(must_not_iterators)) {
+  // Sort must iterators by cost (ascending) so the cheapest leads
+  std::sort(must_iterators_.begin(), must_iterators_.end(),
+            [](const DocIteratorPtr &a, const DocIteratorPtr &b) {
+              return a->cost() < b->cost();
+            });
+  // Compute and cache max_score in base class field
+  float total = 0.0f;
+  for (auto &iter : must_iterators_) {
+    total += iter->cached_max_score_;
+  }
+  cached_max_score_ = total;
+}
+
+uint32_t ConjunctionIterator::next_doc() {
+  if (must_iterators_.empty()) {
+    cached_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  // MaxScore pruning: If the maximum possible score of this AND node
+  // cannot beat the threshold, terminate iteration early.
+  if (min_competitive_score_ > 0.0f && max_score() < min_competitive_score_) {
+    cached_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  // Advance the lead iterator and try to find agreement
+  uint32_t candidate = must_iterators_[0]->next_doc();
+  cached_doc_id_ = do_next(candidate);
+  return cached_doc_id_;
+}
+
+uint32_t ConjunctionIterator::next_doc(const zvec::IndexFilter *filter) {
+  if (must_iterators_.empty()) {
+    cached_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  // MaxScore pruning
+  if (min_competitive_score_ > 0.0f && max_score() < min_competitive_score_) {
+    cached_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  // Lead iterator advances with filter-awareness so filtered docs never
+  // reach do_next() alignment.
+  uint32_t candidate = must_iterators_[0]->next_doc(filter);
+  while (candidate != NO_MORE_DOCS) {
+    candidate = do_next(candidate);
+    if (candidate == NO_MORE_DOCS || !filter->is_filtered(candidate)) {
+      break;
+    }
+    // do_next may have re-anchored the lead onto a filtered doc; advance
+    // the lead past it (still filter-aware) and try again.
+    candidate = must_iterators_[0]->next_doc(filter);
+  }
+  cached_doc_id_ = candidate;
+  return candidate;
+}
+
+uint32_t ConjunctionIterator::advance(uint32_t target) {
+  if (must_iterators_.empty()) {
+    cached_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  // MaxScore pruning
+  if (min_competitive_score_ > 0.0f && max_score() < min_competitive_score_) {
+    cached_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  uint32_t candidate = must_iterators_[0]->advance(target);
+  cached_doc_id_ = do_next(candidate);
+  return cached_doc_id_;
+}
+
+uint32_t ConjunctionIterator::do_next(uint32_t candidate) {
+  if (candidate == NO_MORE_DOCS) {
+    return NO_MORE_DOCS;
+  }
+
+  while (true) {
+    // Try to advance all other must iterators to the candidate
+    bool all_match = true;
+    for (size_t i = 1; i < must_iterators_.size(); ++i) {
+      uint32_t other_doc = must_iterators_[i]->advance(candidate);
+      if (other_doc == NO_MORE_DOCS) {
+        return NO_MORE_DOCS;
+      }
+      if (other_doc != candidate) {
+        // Mismatch: use the higher doc_id as the new candidate
+        // and re-advance the lead iterator
+        candidate = must_iterators_[0]->advance(other_doc);
+        if (candidate == NO_MORE_DOCS) {
+          return NO_MORE_DOCS;
+        }
+        all_match = false;
+        break;
+      }
+    }
+
+    if (all_match) {
+      // All must iterators agree on this candidate
+      // Check must_not exclusion
+      if (!is_excluded(candidate)) {
+        return candidate;
+      }
+      // Excluded by must_not, advance lead to next doc
+      candidate = must_iterators_[0]->next_doc();
+      if (candidate == NO_MORE_DOCS) {
+        return NO_MORE_DOCS;
+      }
+    }
+  }
+}
+
+bool ConjunctionIterator::is_excluded(uint32_t candidate) {
+  for (auto &not_iter : must_not_iterators_) {
+    uint32_t not_doc = not_iter->advance(candidate);
+    if (not_doc == candidate) {
+      // This document is excluded by a must_not clause
+      return true;
+    }
+  }
+  return false;
+}
+
+bool ConjunctionIterator::matches() {
+  // Phase-2 verification: all must sub-iterators must pass matches()
+  for (auto &iter : must_iterators_) {
+    if (!iter->matches()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+float ConjunctionIterator::score() {
+  float total = 0.0f;
+  for (auto &iter : must_iterators_) {
+    total += iter->score();
+  }
+  return total;
+}
+
+uint64_t ConjunctionIterator::cost() const {
+  if (must_iterators_.empty()) {
+    return 0;
+  }
+  // Cost is determined by the shortest (lead) iterator
+  return must_iterators_[0]->cost();
+}
+
+float ConjunctionIterator::max_score() const {
+  float total = 0.0f;
+  for (auto &iter : must_iterators_) {
+    total += iter->max_score();
+  }
+  return total;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.h b/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.h
new file mode 100644
index 000000000..561fa8f07
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.h
@@ -0,0 +1,69 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+#include "fts_doc_iterator.h"
+
+namespace zvec::fts {
+
+/*! Conjunction (AND) document iterator
+ *
+ *  Implements multi-way intersection of must sub-iterators with must_not
+ *  exclusion filtering. The lead iterator (lowest cost) drives the iteration;
+ *  other iterators are advanced to match the lead's current doc_id.
+ */
+class ConjunctionIterator : public DocIterator {
+ public:
+  /*! Construct a conjunction iterator.
+   *  \param must_iterators      Sub-iterators that must all match (AND)
+   *  \param must_not_iterators  Sub-iterators whose matches are excluded (NOT)
+   */
+  ConjunctionIterator(std::vector<DocIteratorPtr> must_iterators,
+                      std::vector<DocIteratorPtr> must_not_iterators);
+
+  uint32_t next_doc() override;
+  //! Internal-driven filter skip: pushes filter into the lead iterator so
+  //! filtered candidates never trigger the do_next alignment cascade.
+  uint32_t next_doc(const zvec::IndexFilter *filter) override;
+  uint32_t advance(uint32_t target) override;
+  bool matches() override;
+  float score() override;
+  uint64_t cost() const override;
+  float max_score() const override;
+
+  void set_min_competitive_score(float min_score) override {
+    min_competitive_score_ = min_score;
+  }
+
+ private:
+  // Try to find the next doc_id where all must iterators agree,
+  // starting from the lead iterator's current position.
+  // Returns NO_MORE_DOCS if no such document exists.
+  uint32_t do_next(uint32_t candidate);
+
+  // Check if candidate doc_id is excluded by any must_not iterator
+  bool is_excluded(uint32_t candidate);
+
+ private:
+  // must_iterators_[0] is the lead (lowest cost)
+  std::vector<DocIteratorPtr> must_iterators_;
+  std::vector<DocIteratorPtr> must_not_iterators_;
+  float min_competitive_score_{0.0f};
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/iterator/fts_disjunction_iterator.cc b/src/db/index/column/fts_column/iterator/fts_disjunction_iterator.cc
new file mode 100644
index 000000000..8a23eb790
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_disjunction_iterator.cc
@@ -0,0 +1,259 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fts_disjunction_iterator.h"
+#include <algorithm>
+
+namespace zvec::fts {
+
+namespace {
+
+// Move element at `idx` forward (toward higher indices) to restore sorted
+// order. Only the element at `idx` may be out of place; all other elements
+// must already be sorted.
+inline void sift_forward(std::vector<DocIterator *> &vec, size_t idx) {
+  DocIterator *elem = vec[idx];
+  uint32_t elem_doc = elem->cached_doc_id_;
+  size_t pos = idx;
+  size_t end = vec.size();
+  while (pos + 1 < end && vec[pos + 1]->cached_doc_id_ < elem_doc) {
+    vec[pos] = vec[pos + 1];
+    ++pos;
+  }
+  vec[pos] = elem;
+}
+
+}  // namespace
+
+DisjunctionIterator::DisjunctionIterator(
+    std::vector<DocIteratorPtr> sub_iterators)
+    : sub_iterators_(std::move(sub_iterators)) {
+  // Initialize each sub-iterator to its first doc and prepare postings array
+  total_cost_ = 0;
+  total_max_score_ = 0.0f;
+  for (auto &iter : sub_iterators_) {
+    total_cost_ += iter->cost();
+    total_max_score_ += iter->cached_max_score_;
+    iter->next_doc();
+    postings_.push_back(iter.get());
+  }
+  // Initial sort to establish sorted order
+  resort_postings();
+  cached_max_score_ = total_max_score_;
+}
+
+void DisjunctionIterator::set_min_competitive_score(float min_score) {
+  min_competitive_score_ = min_score;
+}
+
+// Re-establish sorted order of postings_ by cached_doc_id_ ascending.
+// Called when multiple iterators may have changed position.
+void DisjunctionIterator::resort_postings() {
+  std::sort(postings_.begin(), postings_.end(),
+            [](const DocIterator *a, const DocIterator *b) {
+              return a->cached_doc_id_ < b->cached_doc_id_;
+            });
+}
+
+uint32_t DisjunctionIterator::next_doc() {
+  return next_doc_impl(nullptr);
+}
+
+uint32_t DisjunctionIterator::next_doc(const zvec::IndexFilter *filter) {
+  return next_doc_impl(filter);
+}
+
+uint32_t DisjunctionIterator::next_doc_impl(const zvec::IndexFilter *filter) {
+  // Advance matched from the previous document
+  for (auto *iter : matching_iterators_) {
+    iter->next_doc();
+  }
+  matching_iterators_.clear();
+
+  // Restore sorted order — multiple iterators may have changed
+  resort_postings();
+
+  while (true) {
+    // 1. postings_ is maintained in sorted order
+
+    if (postings_.empty() || postings_[0]->cached_doc_id_ == NO_MORE_DOCS) {
+      cached_doc_id_ = NO_MORE_DOCS;
+      return NO_MORE_DOCS;
+    }
+
+    // 2. Find Pivot: accumulate max_score until it reaches the threshold
+    float partial_max_score = 0.0f;
+    size_t pivot_idx = 0;
+    bool found_pivot = false;
+    for (; pivot_idx < postings_.size(); ++pivot_idx) {
+      if (postings_[pivot_idx]->cached_doc_id_ == NO_MORE_DOCS) {
+        break;
+      }
+      partial_max_score += postings_[pivot_idx]->cached_max_score_;
+      if (partial_max_score >= min_competitive_score_) {
+        found_pivot = true;
+        break;
+      }
+    }
+
+    if (!found_pivot) {
+      // If all remaining iterators' max_score sum is less than threshold,
+      // no more competitive documents can be produced.
+      cached_doc_id_ = NO_MORE_DOCS;
+      return NO_MORE_DOCS;
+    }
+
+    uint32_t pivot_doc = postings_[pivot_idx]->cached_doc_id_;
+
+    // 3. Check alignment
+    if (postings_[0]->cached_doc_id_ == pivot_doc) {
+      // 3.1 Filter pushdown: if pivot_doc is filtered, skip it before paying
+      //     for block-max accumulation, matches(), or score(). Advance every
+      //     posting currently sitting at pivot_doc past it, then resort.
+      if (filter && filter->is_filtered(pivot_doc)) {
+        for (size_t i = 0; i < postings_.size(); ++i) {
+          if (postings_[i]->cached_doc_id_ == pivot_doc) {
+            postings_[i]->next_doc();
+          } else {
+            break;  // postings_ is sorted; rest are > pivot_doc
+          }
+        }
+        resort_postings();
+        continue;
+      }
+
+      // 3.5 Block-Max WAND pruning (Ding & Suel 2011).
+      //     First accumulate block_max_scores from [0..pivot_idx].
+      //     If already >= threshold, skip the pruning check (fast path).
+      //     Otherwise, lazily include iterators beyond pivot_idx whose
+      //     posting lists may also contain pivot_doc — their block_max_score
+      //     contributions must be counted to avoid underestimating the
+      //     potential score and incorrectly skipping TopK documents.
+      if (min_competitive_score_ > 0.0f) {
+        float block_score_sum = 0.0f;
+        uint32_t min_block_end = NO_MORE_DOCS;
+        bool can_skip = true;
+
+        // Phase 1: accumulate [0..pivot_idx] (always needed)
+        for (size_t i = 0; i <= pivot_idx; ++i) {
+          auto info = postings_[i]->block_max_info_for(pivot_doc);
+          block_score_sum += info.block_max_score;
+          if (info.block_last_doc < min_block_end) {
+            min_block_end = info.block_last_doc;
+          }
+        }
+
+        // Phase 2: if [0..pivot_idx] sum is already sufficient, no pruning
+        if (block_score_sum >= min_competitive_score_) {
+          can_skip = false;
+        } else {
+          // Lazily accumulate remaining iterators beyond pivot_idx.
+          // They may also contribute scores for pivot_doc.
+          for (size_t i = pivot_idx + 1; i < postings_.size(); ++i) {
+            if (postings_[i]->cached_doc_id_ == NO_MORE_DOCS) {
+              break;
+            }
+            auto info = postings_[i]->block_max_info_for(pivot_doc);
+            block_score_sum += info.block_max_score;
+            if (info.block_last_doc < min_block_end) {
+              min_block_end = info.block_last_doc;
+            }
+            if (block_score_sum >= min_competitive_score_) {
+              can_skip = false;
+              break;
+            }
+          }
+        }
+
+        if (can_skip && block_score_sum < min_competitive_score_ &&
+            min_block_end != NO_MORE_DOCS) {
+          // All iterators' blocks containing pivot_doc cannot produce a
+          // competitive score. Advance ALL iterators in [0..pivot_idx] past
+          // the smallest block boundary to maximize the jump distance.
+          uint32_t skip_target = min_block_end + 1;
+          for (size_t i = 0; i <= pivot_idx; ++i) {
+            if (postings_[i]->cached_doc_id_ < skip_target) {
+              postings_[i]->advance(skip_target);
+            }
+          }
+          // Multiple iterators changed — full resort
+          resort_postings();
+          continue;
+        }
+      }
+
+      // Candidate doc passed block-level check. Collect all matching iterators.
+      for (size_t i = 0; i < postings_.size(); ++i) {
+        if (postings_[i]->cached_doc_id_ == pivot_doc) {
+          matching_iterators_.push_back(postings_[i]);
+        } else {
+          break;  // because postings_ is sorted by cached_doc_id_
+        }
+      }
+      cached_doc_id_ = pivot_doc;
+      cached_doc_id_ = pivot_doc;
+      return pivot_doc;
+    } else {
+      // 4. Iterator Jumping: advance the iterator with the smallest doc_id
+      // to at least the pivot's doc_id. This bypasses scoring and checking
+      // for all documents smaller than pivot_doc!
+      // Only postings_[0] changed — use sift_forward instead of full sort.
+      postings_[0]->advance(pivot_doc);
+      sift_forward(postings_, 0);
+    }
+  }
+}
+
+uint32_t DisjunctionIterator::advance(uint32_t target) {
+  // Clear pending matches as they will be re-advanced below
+  matching_iterators_.clear();
+
+  for (auto *iter : postings_) {
+    if (iter->cached_doc_id_ < target) {
+      iter->advance(target);
+    }
+  }
+  return next_doc();
+}
+
+bool DisjunctionIterator::matches() {
+  // At least one matching sub-iterator must pass phase-2 verification
+  for (DocIterator *iter : matching_iterators_) {
+    if (iter->matches()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+float DisjunctionIterator::score() {
+  // Sum scores of all matching sub-iterators that pass phase-2 verification
+  float total = 0.0f;
+  for (DocIterator *iter : matching_iterators_) {
+    if (iter->matches()) {
+      total += iter->score();
+    }
+  }
+  return total;
+}
+
+uint64_t DisjunctionIterator::cost() const {
+  return total_cost_;
+}
+
+float DisjunctionIterator::max_score() const {
+  return total_max_score_;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/iterator/fts_disjunction_iterator.h b/src/db/index/column/fts_column/iterator/fts_disjunction_iterator.h
new file mode 100644
index 000000000..41fe55ae7
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_disjunction_iterator.h
@@ -0,0 +1,63 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+#include "fts_doc_iterator.h"
+
+namespace zvec::fts {
+
+/*! Disjunction (OR) document iterator with WAND pruning
+ */
+class DisjunctionIterator : public DocIterator {
+ public:
+  /*! Construct a disjunction iterator.
+   *  \param sub_iterators  Sub-iterators to merge (OR semantics)
+   */
+  explicit DisjunctionIterator(std::vector<DocIteratorPtr> sub_iterators);
+
+  uint32_t next_doc() override;
+  //! Internal-driven filter skip: checks filter inside the WAND loop after
+  //! pivot alignment, before block-max accumulation and resort overhead.
+  uint32_t next_doc(const zvec::IndexFilter *filter) override;
+  uint32_t advance(uint32_t target) override;
+  bool matches() override;
+  float score() override;
+  uint64_t cost() const override;
+  float max_score() const override;
+
+  //! Update the minimum competitive score threshold for WAND pruning.
+  //! Documents whose total max_score sum falls below this threshold
+  //! are skipped without exact scoring.
+  void set_min_competitive_score(float min_score) override;
+
+ private:
+  void resort_postings();
+
+  //! Unified WAND loop body. \p filter may be null (no-filter fast path).
+  uint32_t next_doc_impl(const zvec::IndexFilter *filter);
+
+ private:
+  std::vector<DocIteratorPtr> sub_iterators_;  // Owns the sub-iterators
+  std::vector<DocIterator *> postings_;  // Pointers for fast sorting (WAND)
+  std::vector<DocIterator *> matching_iterators_;  // Current doc matches
+  float min_competitive_score_{0.0f};
+  uint64_t total_cost_{0};
+  float total_max_score_{0.0f};
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/iterator/fts_doc_iterator.h b/src/db/index/column/fts_column/iterator/fts_doc_iterator.h
new file mode 100644
index 000000000..58f0782c0
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_doc_iterator.h
@@ -0,0 +1,123 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include "db/index/common/index_filter.h"
+
+namespace zvec::fts {
+
+/*! Abstract base class for FTS document iterators.
+ *
+ *  All query nodes (Term, Phrase, AND, OR) implement this interface to form
+ *  a composable iterator tree. The iterator produces matching documents in
+ *  ascending doc_id order.
+ *
+ *  Two-phase iteration:
+ *    Phase 1: next_doc() / advance() locate candidate documents using only
+ *             doc_id information (cheap).
+ *    Phase 2: matches() performs exact verification (e.g. position check for
+ *             phrase queries). Only called after Phase 1 succeeds.
+ */
+class DocIterator {
+ public:
+  virtual ~DocIterator() = default;
+
+  //! Sentinel value indicating no more matching documents
+  static constexpr uint32_t NO_MORE_DOCS = UINT32_MAX;
+
+  //! Cached doc_id for hot-path access without virtual dispatch.
+  //! Sub-classes MUST update this in next_doc() / advance() before returning.
+  uint32_t cached_doc_id_{NO_MORE_DOCS};
+
+  //! Cached max_score for hot-path access without virtual dispatch.
+  //! Sub-classes MUST set this in constructors (and update if max_score
+  //! changes, which is rare for most iterators).
+  float cached_max_score_{0.0f};
+
+  //! Advance to the next matching document.
+  //! \return doc_id of the next match, or NO_MORE_DOCS if exhausted.
+  virtual uint32_t next_doc() = 0;
+
+  //! Filter-aware next_doc. Composite iterators (Disjunction/Conjunction/
+  //! Phrase) override to check the filter at the optimal point inside their
+  //! loops — before block-max binary search, do_next alignment, or phase-2
+  //! position verification — so filtered docs do not pay that cost.
+  //! Default implementation just loops over next_doc() and skips filtered
+  //! docs (functionally equivalent to a caller-side post-filter check).
+  //! \param filter Must be non-null; true means SKIP the doc.
+  virtual uint32_t next_doc(const zvec::IndexFilter *filter) {
+    uint32_t doc = next_doc();
+    while (doc != NO_MORE_DOCS && filter->is_filtered(doc)) {
+      doc = next_doc();
+    }
+    return doc;
+  }
+
+  //! Advance to the first matching document with doc_id >= target.
+  //! \param target  Minimum doc_id to seek to.
+  //! \return doc_id of the match (>= target), or NO_MORE_DOCS if exhausted.
+  virtual uint32_t advance(uint32_t target) = 0;
+
+  //! Return the current document ID.
+  //! Undefined before the first call to next_doc() or advance().
+  uint32_t doc_id() const {
+    return cached_doc_id_;
+  }
+
+  //! Phase-2 exact verification for the current document.
+  //! For most iterators this is a no-op (returns true).
+  //! PhraseDocIterator overrides this to check position adjacency.
+  //! \return true if the current document truly matches.
+  virtual bool matches() {
+    return true;
+  }
+
+  //! Compute the BM25 score of the current document.
+  //! Must only be called after matches() returns true.
+  virtual float score() = 0;
+
+  //! Estimated cost of this iterator (e.g. posting list length).
+  //! Used to order sub-iterators in ConjunctionIterator (shortest first).
+  virtual uint64_t cost() const = 0;
+
+  //! Upper bound on the score this iterator can produce for any document.
+  //! Used by WAND pruning in DisjunctionIterator.
+  virtual float max_score() const {
+    return std::numeric_limits<float>::max();
+  }
+
+  //! Update the minimum competitive score threshold for WAND pruning.
+  //! Only DisjunctionIterator implements meaningful behavior; other iterators
+  //! ignore this call.
+  //! \param min_score  Current minimum score needed to enter the TopK heap.
+  virtual void set_min_competitive_score(float /*min_score*/) {}
+
+  //! Block-Max WAND support: return both block_max_score and max_doc_id
+  //! for the block containing \p target in a single skip list binary search.
+  struct BlockMaxInfo {
+    float block_max_score{0.0f};
+    uint32_t block_last_doc{NO_MORE_DOCS};
+  };
+  virtual BlockMaxInfo block_max_info_for(uint32_t /*target*/) const {
+    return {max_score(), NO_MORE_DOCS};
+  }
+};
+
+using DocIteratorPtr = std::unique_ptr<DocIterator>;
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/iterator/fts_phrase_iterator.cc b/src/db/index/column/fts_column/iterator/fts_phrase_iterator.cc
new file mode 100644
index 000000000..565bd6024
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_phrase_iterator.cc
@@ -0,0 +1,142 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fts_phrase_iterator.h"
+#include <algorithm>
+#include <cstring>
+#include "../fts_utils.h"
+
+namespace zvec::fts {
+
+PhraseDocIterator::PhraseDocIterator(DocIteratorPtr conjunction,
+                                     std::vector<std::string> terms,
+                                     RocksdbContext *ctx,
+                                     rocksdb::ColumnFamilyHandle *positions_cf)
+    : conjunction_(std::move(conjunction)),
+      terms_(std::move(terms)),
+      ctx_(ctx),
+      positions_cf_(positions_cf) {
+  cached_max_score_ = conjunction_->cached_max_score_;
+}
+
+uint32_t PhraseDocIterator::next_doc() {
+  cached_doc_id_ = conjunction_->next_doc();
+  return cached_doc_id_;
+}
+
+uint32_t PhraseDocIterator::next_doc(const zvec::IndexFilter *filter) {
+  cached_doc_id_ = conjunction_->next_doc(filter);
+  return cached_doc_id_;
+}
+
+uint32_t PhraseDocIterator::advance(uint32_t target) {
+  cached_doc_id_ = conjunction_->advance(target);
+  return cached_doc_id_;
+}
+
+bool PhraseDocIterator::matches() {
+  if (cached_doc_id_ == NO_MORE_DOCS) {
+    return false;
+  }
+  // Phase 2: verify position adjacency (deferred IO)
+  return verify_phrase_positions(cached_doc_id_);
+}
+
+float PhraseDocIterator::score() {
+  return conjunction_->score();
+}
+
+uint64_t PhraseDocIterator::cost() const {
+  return conjunction_->cost();
+}
+
+float PhraseDocIterator::max_score() const {
+  return conjunction_->max_score();
+}
+
+bool PhraseDocIterator::verify_phrase_positions(uint32_t doc_id) const {
+  if (terms_.empty()) {
+    return false;
+  }
+
+  // Read position list of first term as anchor.
+  // Empty anchor means the term has no position record for this doc — this is
+  // normal for non-matching docs filtered through the conjunction without a
+  // position-CF entry, so do NOT log here.
+  std::vector<uint32_t> anchor_positions = read_positions(terms_[0], doc_id);
+  if (anchor_positions.empty()) {
+    return false;
+  }
+
+  // For each anchor position, verify if subsequent terms appear at consecutive
+  // positions
+  for (uint32_t anchor_pos : anchor_positions) {
+    bool phrase_matched = true;
+    for (size_t term_index = 1; term_index < terms_.size(); ++term_index) {
+      const uint32_t expected_pos =
+          anchor_pos + static_cast<uint32_t>(term_index);
+      std::vector<uint32_t> positions =
+          read_positions(terms_[term_index], doc_id);
+      bool found =
+          std::binary_search(positions.begin(), positions.end(), expected_pos);
+      if (!found) {
+        phrase_matched = false;
+        break;
+      }
+    }
+    if (phrase_matched) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+std::vector<uint32_t> PhraseDocIterator::read_positions(const std::string &term,
+                                                        uint32_t doc_id) const {
+  const std::string key = fts::make_doc_term_key(term, doc_id);
+  std::string value;
+  if (!ctx_->db_->Get(ctx_->read_opts_, positions_cf_, key, &value).ok() ||
+      value.empty()) {
+    return {};
+  }
+  return decode_positions(value);
+}
+
+std::vector<uint32_t> PhraseDocIterator::decode_positions(
+    const std::string &data) {
+  std::vector<uint32_t> positions;
+  size_t index = 0;
+  uint32_t current_position = 0;
+
+  while (index < data.size()) {
+    // Decode varint
+    uint32_t delta = 0;
+    uint32_t shift = 0;
+    while (index < data.size()) {
+      const uint8_t byte = static_cast<uint8_t>(data[index++]);
+      delta |= static_cast<uint32_t>(byte & 0x7F) << shift;
+      shift += 7;
+      if ((byte & 0x80) == 0) {
+        break;
+      }
+    }
+    current_position += delta;
+    positions.push_back(current_position);
+  }
+
+  return positions;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/iterator/fts_phrase_iterator.h b/src/db/index/column/fts_column/iterator/fts_phrase_iterator.h
new file mode 100644
index 000000000..6222c6547
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_phrase_iterator.h
@@ -0,0 +1,77 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+#include "db/common/rocksdb_context.h"
+#include "fts_conjunction_iterator.h"
+#include "fts_doc_iterator.h"
+#include "../bm25_scorer.h"
+
+namespace zvec::fts {
+
+/*! Phrase document iterator (two-phase)
+ *
+ *  Internally wraps a ConjunctionIterator for phase-1 doc_id intersection.
+ *  Phase-2 matches() reads position payloads and checks adjacency.
+ */
+class PhraseDocIterator : public DocIterator {
+ public:
+  /*! Construct a phrase iterator.
+   *  \param conjunction   ConjunctionIterator over all terms in the phrase
+   *  \param terms         Processed (tokenized) term strings in phrase order
+   *  \param positions_cf  $POS column family for reading position lists
+   */
+  PhraseDocIterator(DocIteratorPtr conjunction, std::vector<std::string> terms,
+                    RocksdbContext *ctx,
+                    rocksdb::ColumnFamilyHandle *positions_cf);
+
+  uint32_t next_doc() override;
+  //! Internal-driven filter skip: delegates to the inner conjunction so the
+  //! expensive phase-2 verify_phrase_positions() ($POS CF reads) is never
+  //! run on filtered docs.
+  uint32_t next_doc(const zvec::IndexFilter *filter) override;
+  uint32_t advance(uint32_t target) override;
+
+  //! Phase-2: verify position adjacency for the current document.
+  //! Reads position lists from $POS CF (deferred IO).
+  bool matches() override;
+
+  float score() override;
+  uint64_t cost() const override;
+  float max_score() const override;
+
+ private:
+  // Read position list for a term in a specific document
+  std::vector<uint32_t> read_positions(const std::string &term,
+                                       uint32_t doc_id) const;
+
+  // Verify that terms appear at consecutive positions in the document
+  bool verify_phrase_positions(uint32_t doc_id) const;
+
+  // Decode varint delta-encoded position list
+  static std::vector<uint32_t> decode_positions(const std::string &data);
+
+ private:
+  DocIteratorPtr conjunction_;
+  std::vector<std::string> terms_;
+  RocksdbContext *ctx_;
+  rocksdb::ColumnFamilyHandle *positions_cf_;
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/iterator/fts_term_iterator.cc b/src/db/index/column/fts_column/iterator/fts_term_iterator.cc
new file mode 100644
index 000000000..cc500b681
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_term_iterator.cc
@@ -0,0 +1,200 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fts_term_iterator.h"
+#include <cstring>
+#include <roaring/roaring.h>
+#include <zvec/ailego/logger/logger.h>
+#include "../fts_utils.h"
+
+namespace zvec::fts {
+
+// ============================================================
+// Constructors
+// ============================================================
+
+// Roaring Bitmap mode — takes ownership of bitmap, iterates lazily.
+TermDocIterator::TermDocIterator(std::string term, roaring_bitmap_t *bitmap,
+                                 uint64_t df, BM25ScorerPtr scorer,
+                                 float max_score_val, RocksdbContext *ctx,
+                                 rocksdb::ColumnFamilyHandle *term_freq_cf,
+                                 rocksdb::ColumnFamilyHandle *doc_len_cf,
+                                 std::atomic<int> *cf_counter)
+    : mode_(Mode::ROARING),
+      term_(std::move(term)),
+      df_(df),
+      scorer_(std::move(scorer)),
+      max_score_val_(max_score_val),
+      bitmap_(bitmap),
+      ctx_(ctx),
+      term_freq_cf_(term_freq_cf),
+      doc_len_cf_(doc_len_cf),
+      cf_counter_(cf_counter) {
+  roaring_init_iterator(bitmap_, &roaring_iter_);
+  cached_max_score_ = max_score_val_;
+  idf_weight_ = scorer_->idf(df_);
+}
+
+TermDocIterator::~TermDocIterator() {
+  if (bitmap_) {
+    roaring_bitmap_free(bitmap_);
+    bitmap_ = nullptr;
+  }
+  if (cf_counter_) {
+    --*cf_counter_;
+  }
+}
+
+// BitPacked mode
+TermDocIterator::TermDocIterator(std::string term,
+                                 rocksdb::PinnableSlice packed_data,
+                                 uint64_t df, BM25ScorerPtr scorer,
+                                 float max_score_val)
+    : mode_(Mode::BITPACKED),
+      term_(std::move(term)),
+      df_(df),
+      scorer_(std::move(scorer)),
+      max_score_val_(max_score_val),
+      packed_data_(std::move(packed_data)) {
+  // Failure here means the term will produce no docs (next_doc returns
+  // NO_MORE_DOCS). bp_iter_.open() already logs the underlying parse error;
+  // surface it once more here with the term context for easier triage.
+  if (bp_iter_.open(packed_data_.data(), packed_data_.size()) != 0) {
+    LOG_ERROR(
+        "TermDocIterator: failed to open bitpacked posting for term[%s], "
+        "iterator will yield no documents",
+        term_.c_str());
+  }
+  cached_max_score_ = max_score_val_;
+  idf_weight_ = scorer_->idf(df_);
+}
+
+// ============================================================
+// Iterator interface
+// ============================================================
+
+uint32_t TermDocIterator::next_doc() {
+  if (mode_ == Mode::BITPACKED) {
+    cached_doc_id_ = bp_iter_.next_doc();
+    return cached_doc_id_;
+  }
+
+  // Roaring mode: stream via roaring_uint32_iterator_t
+  if (!roaring_iter_started_) {
+    // First call: iterator already points at the first element after
+    // roaring_init_iterator in the constructor.
+    roaring_iter_started_ = true;
+  } else {
+    roaring_advance_uint32_iterator(&roaring_iter_);
+  }
+  if (!roaring_iter_.has_value) {
+    cached_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+  cached_doc_id_ = roaring_iter_.current_value;
+  return cached_doc_id_;
+}
+
+uint32_t TermDocIterator::advance(uint32_t target) {
+  if (mode_ == Mode::BITPACKED) {
+    cached_doc_id_ = bp_iter_.advance(target);
+    return cached_doc_id_;
+  }
+
+  // Roaring mode: skip to the first doc_id >= target
+  roaring_iter_started_ = true;
+  if (!roaring_move_uint32_iterator_equalorlarger(&roaring_iter_, target)) {
+    cached_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+  cached_doc_id_ = roaring_iter_.current_value;
+  return cached_doc_id_;
+}
+
+float TermDocIterator::score() {
+  if (cached_doc_id_ == NO_MORE_DOCS) {
+    return 0.0f;
+  }
+
+  if (mode_ == Mode::BITPACKED) {
+    // Fast path: read tf/doc_len from inline payload (zero I/O)
+    const uint32_t tf = bp_iter_.term_freq();
+    const uint32_t dl = bp_iter_.doc_len();
+    return scorer_->score_with_idf(idf_weight_, tf, dl);
+  }
+
+  // Roaring mode: read from RocksDB
+  const uint32_t tf = read_term_freq(cached_doc_id_);
+  const uint32_t doc_len = read_doc_len(cached_doc_id_);
+  return scorer_->score_with_idf(idf_weight_, tf, doc_len);
+}
+
+uint64_t TermDocIterator::cost() const {
+  if (mode_ == Mode::BITPACKED) {
+    return bp_iter_.cost();
+  }
+  return df_;
+}
+
+// ============================================================
+// Block-Max WAND support
+// ============================================================
+
+DocIterator::BlockMaxInfo TermDocIterator::block_max_info_for(
+    uint32_t target) const {
+  if (mode_ == Mode::BITPACKED) {
+    auto info = bp_iter_.block_max_info_for(target);
+    return {info.block_max_score, info.block_last_doc};
+  }
+  // Roaring mode: fall back to global max_score, no block structure
+  return {max_score_val_, NO_MORE_DOCS};
+}
+
+// ============================================================
+// Roaring mode helpers
+// ============================================================
+
+uint32_t TermDocIterator::read_term_freq(uint32_t doc_id) const {
+  if (!term_freq_cf_) {
+    return 1;  // CF dropped after convert_postings_to_bitpacked
+  }
+  const std::string key = fts::make_doc_term_key(term_, doc_id);
+  std::string value;
+  if (!ctx_->db_->Get(ctx_->read_opts_, term_freq_cf_, key, &value).ok() ||
+      value.size() < sizeof(uint32_t)) {
+    return 1;  // Default term frequency is 1
+  }
+  uint32_t tf = 0;
+  std::memcpy(&tf, value.data(), sizeof(uint32_t));
+  return tf;
+}
+
+uint32_t TermDocIterator::read_doc_len(uint32_t doc_id) const {
+  if (!doc_len_cf_) {
+    return 1;  // CF dropped after convert_postings_to_bitpacked
+  }
+  std::string doc_id_key(sizeof(uint32_t), '\0');
+  std::memcpy(doc_id_key.data(), &doc_id, sizeof(uint32_t));
+
+  std::string value;
+  if (!ctx_->db_->Get(ctx_->read_opts_, doc_len_cf_, doc_id_key, &value).ok() ||
+      value.size() < sizeof(uint32_t)) {
+    return 1;  // Default document length is 1
+  }
+  uint32_t doc_len = 0;
+  std::memcpy(&doc_len, value.data(), sizeof(uint32_t));
+  return doc_len;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/iterator/fts_term_iterator.h b/src/db/index/column/fts_column/iterator/fts_term_iterator.h
new file mode 100644
index 000000000..515675832
--- /dev/null
+++ b/src/db/index/column/fts_column/iterator/fts_term_iterator.h
@@ -0,0 +1,127 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <roaring/roaring.h>
+#include <rocksdb/slice.h>
+#include "db/common/rocksdb_context.h"
+#include "fts_doc_iterator.h"
+#include "../bm25_scorer.h"
+#include "../posting/bitpacked_posting_list.h"
+
+namespace zvec::fts {
+
+/*! Term document iterator
+ *  Supports two internal modes:
+ *    1. Roaring mode: sorted doc_id array + RocksDB Get for tf/doc_len
+ *    2. BitPacked mode: inline payloads, zero RocksDB I/O for score()
+ */
+class TermDocIterator : public DocIterator {
+ public:
+  /*! Roaring Bitmap mode constructor.
+   *  Takes ownership of the bitmap and iterates lazily via
+   *  roaring_uint32_iterator_t — no N×4-byte doc_id array is materialised.
+   *
+   *  \param term           Processed (tokenized) term string
+   *  \param bitmap         Deserialized Roaring bitmap (ownership transferred)
+   *  \param df             Document frequency of this term in the segment
+   *  \param scorer         BM25 scorer (with segment stats loaded)
+   *  \param max_score_val  Precomputed WAND upper bound score for this term
+   *  \param term_freq_cf   $TF column family for reading per-doc term freq
+   *  \param doc_len_cf     $DOC_LEN column family for reading doc length
+   *  \param cf_counter     CF reference counter for term_freq_cf and doc_len_cf
+   */
+  TermDocIterator(std::string term, roaring_bitmap_t *bitmap, uint64_t df,
+                  BM25ScorerPtr scorer, float max_score_val,
+                  RocksdbContext *ctx,
+                  rocksdb::ColumnFamilyHandle *term_freq_cf,
+                  rocksdb::ColumnFamilyHandle *doc_len_cf,
+                  std::atomic<int> *cf_counter);
+
+  ~TermDocIterator() override;
+
+  /*! BitPacked mode constructor.
+   *  All payloads (tf, doc_len, per-block max_score, global max_score) are
+   *  embedded inline in packed_data, so this iterator is completely
+   *  self-contained on the read path:
+   *    - score() reads tf/doc_len from bp_iter_ — zero RocksDB I/O.
+   *    - block_max_info_for() / max_score() all read from the BitPacked
+   *      skip-list / block headers — no $MAX_TF lookup needed.
+   *  Construction takes neither $TF, $DOC_LEN, nor $MAX_TF column families:
+   *  the immutable segment SST may have these CFs entirely empty (cleared
+   *  by FtsColumnIndexer::convert_postings_to_bitpacked at dump time) and
+   *  this iterator still works correctly.
+   *
+   *  \param term           Processed (tokenized) term string
+   *  \param packed_data    Serialized BitPacked posting list (ownership taken)
+   *  \param df             Document frequency of this term in the segment
+   *  \param scorer         BM25 scorer (with segment stats loaded)
+   *  \param max_score_val  Precomputed WAND upper bound score for this term
+   */
+  TermDocIterator(std::string term, rocksdb::PinnableSlice packed_data,
+                  uint64_t df, BM25ScorerPtr scorer, float max_score_val);
+
+  // Prevent move/copy: bp_iter_ holds a raw pointer into packed_data_'s
+  // buffer, so moving would create a dangling pointer.
+  TermDocIterator(const TermDocIterator &) = delete;
+  TermDocIterator &operator=(const TermDocIterator &) = delete;
+  TermDocIterator(TermDocIterator &&) = delete;
+  TermDocIterator &operator=(TermDocIterator &&) = delete;
+
+  uint32_t next_doc() override;
+  uint32_t advance(uint32_t target) override;
+  float score() override;
+  uint64_t cost() const override;
+  float max_score() const override {
+    return max_score_val_;
+  }
+
+  // Block-Max WAND support (only effective in BitPacked mode)
+  BlockMaxInfo block_max_info_for(uint32_t target) const override;
+
+ private:
+  // Read term frequency for the current document (Roaring mode only)
+  uint32_t read_term_freq(uint32_t doc_id) const;
+
+  // Read document length for the current document (Roaring mode only)
+  uint32_t read_doc_len(uint32_t doc_id) const;
+
+ private:
+  enum class Mode { ROARING, BITPACKED };
+  Mode mode_;
+
+  std::string term_;
+  uint64_t df_;
+  BM25ScorerPtr scorer_;
+  float max_score_val_;
+  float idf_weight_{0.0f};  // Pre-computed IDF to avoid log() per score()
+
+  // Roaring mode state (owns the bitmap; iterator is stack-allocated)
+  roaring_bitmap_t *bitmap_{nullptr};
+  roaring_uint32_iterator_t roaring_iter_{};
+  bool roaring_iter_started_{false};  // tracks whether first next_doc called
+  RocksdbContext *ctx_{nullptr};
+  rocksdb::ColumnFamilyHandle *term_freq_cf_{nullptr};
+  rocksdb::ColumnFamilyHandle *doc_len_cf_{nullptr};
+  std::atomic<int> *cf_counter_{nullptr};
+
+  // BitPacked mode state
+  rocksdb::PinnableSlice packed_data_;  // owns the serialized data (zero-copy)
+  BitPackedPostingIterator bp_iter_;    // zero-copy iterator over packed_data_
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/parser/fts_query_parser.cc b/src/db/index/column/fts_column/parser/fts_query_parser.cc
new file mode 100644
index 000000000..9ad0d394f
--- /dev/null
+++ b/src/db/index/column/fts_column/parser/fts_query_parser.cc
@@ -0,0 +1,368 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fts_query_parser.h"
+#include <cctype>
+#include <zvec/ailego/utility/string_helper.h>
+#include "db/index/column/fts_column/gen/FtsLexer.h"
+#include "db/index/column/fts_column/gen/FtsParser.h"
+#include "antlr4-runtime.h"
+
+using namespace antlr4;
+
+namespace zvec::fts {
+
+// ============================================================
+// Error listener that captures the first error message
+// ============================================================
+
+class FtsErrorListener : public BaseErrorListener {
+ public:
+  void syntaxError(Recognizer * /*recognizer*/, Token * /*offending_symbol*/,
+                   size_t line, size_t char_position_in_line,
+                   const std::string &msg,
+                   std::exception_ptr /*exception*/) override {
+    if (err_msg_.empty()) {
+      err_msg_ = ailego::StringHelper::Concat(
+          "[", line, " ", char_position_in_line, " ", msg, "]");
+    }
+  }
+
+  const std::string &err_msg() const {
+    return err_msg_;
+  }
+
+ private:
+  std::string err_msg_;
+};
+
+// ============================================================
+// AST builder helpers (anonymous namespace)
+// ============================================================
+
+namespace {
+
+// Forward declaration
+FtsAstNodePtr build_fts_or_expr(FtsParser::Fts_or_exprContext *or_ctx,
+                                FtsDefaultOperator default_op,
+                                std::string *err_msg);
+
+// Strip surrounding single or double quotes from a quoted string token.
+std::string strip_quotes(const std::string &quoted) {
+  if (quoted.size() >= 2 &&
+      ((quoted.front() == '\'' && quoted.back() == '\'') ||
+       (quoted.front() == '"' && quoted.back() == '"'))) {
+    return quoted.substr(1, quoted.size() - 2);
+  }
+  return quoted;
+}
+
+// Split a phrase string (already stripped of quotes) into individual words.
+// Words are separated by ASCII whitespace.
+std::vector<std::string> split_phrase_words(const std::string &phrase) {
+  std::vector<std::string> words;
+  size_t start = 0;
+  while (start < phrase.size()) {
+    while (start < phrase.size() &&
+           std::isspace(static_cast<unsigned char>(phrase[start]))) {
+      ++start;
+    }
+    size_t end = start;
+    while (end < phrase.size() &&
+           !std::isspace(static_cast<unsigned char>(phrase[end]))) {
+      ++end;
+    }
+    if (end > start) {
+      words.push_back(phrase.substr(start, end - start));
+    }
+    start = end;
+  }
+  return words;
+}
+
+// Propagate must/must_not modifier to the root of an already-built AST node.
+// Now that must/must_not live on the FtsAstNode base class, this works
+// uniformly for terms, phrases and composite (AND/OR) sub-expressions.
+void apply_modifier(FtsAstNode *node, bool is_must, bool is_must_not) {
+  if (!node || (!is_must && !is_must_not)) {
+    return;
+  }
+  node->must = is_must;
+  node->must_not = is_must_not;
+}
+
+// atom: fts_field_prefix? fts_primary fts_boost?
+//
+// fts_field_prefix (e.g. "title:") and fts_boost (e.g. "^2") are parsed by
+// the grammar but not supported at query execution time — return an error.
+//
+// fts_primary: fts_term | fts_phrase | LP fts_or_expr RP
+FtsAstNodePtr build_fts_atom(FtsParser::Fts_atomContext *atom_ctx, bool is_must,
+                             bool is_must_not, FtsDefaultOperator default_op,
+                             std::string *err_msg) {
+  // Reject field-prefixed queries (e.g. "title:cancer")
+  if (atom_ctx->fts_field_prefix() != nullptr) {
+    if (err_msg) {
+      *err_msg = "field-prefixed queries are not supported";
+    }
+    return nullptr;
+  }
+
+  // Reject boosted queries (e.g. "term^2")
+  if (atom_ctx->fts_boost() != nullptr) {
+    if (err_msg) {
+      *err_msg = "boost queries are not supported";
+    }
+    return nullptr;
+  }
+
+  FtsParser::Fts_primaryContext *primary_ctx = atom_ctx->fts_primary();
+  if (primary_ctx == nullptr) {
+    return nullptr;
+  }
+
+  if (primary_ctx->fts_term() != nullptr) {
+    std::string term_text = primary_ctx->fts_term()->getText();
+    return std::make_unique<TermNode>(std::move(term_text), is_must,
+                                      is_must_not);
+  }
+
+  if (primary_ctx->fts_phrase() != nullptr) {
+    std::string raw = primary_ctx->fts_phrase()->getText();
+    std::string phrase_text = strip_quotes(raw);
+    auto phrase_node = std::make_unique<PhraseNode>();
+    phrase_node->must = is_must;
+    phrase_node->must_not = is_must_not;
+    phrase_node->terms = split_phrase_words(phrase_text);
+    return phrase_node;
+  }
+
+  if (primary_ctx->fts_or_expr() != nullptr) {
+    // Parenthesised sub-expression — propagate default_op so that adjacent
+    // bare terms inside the parentheses share the same implicit semantics.
+    auto inner =
+        build_fts_or_expr(primary_ctx->fts_or_expr(), default_op, err_msg);
+    apply_modifier(inner.get(), is_must, is_must_not);
+    return inner;
+  }
+
+  return nullptr;
+}
+
+// unary: (PLUS_SIGN | MINUS_SIGN)? atom
+// NOT is no longer a unary modifier — it is handled as a binary operator in
+// build_fts_and_expr. antlr4 generates separate subclasses for each labeled
+// alternative.
+FtsAstNodePtr build_fts_unary(FtsParser::Fts_unaryContext *unary_ctx,
+                              FtsDefaultOperator default_op,
+                              std::string *err_msg) {
+  if (auto *must_ctx = dynamic_cast<FtsParser::Must_atomContext *>(unary_ctx)) {
+    return build_fts_atom(must_ctx->fts_atom(), /*is_must=*/true,
+                          /*is_must_not=*/false, default_op, err_msg);
+  }
+  if (auto *must_not_ctx =
+          dynamic_cast<FtsParser::Must_not_atomContext *>(unary_ctx)) {
+    return build_fts_atom(must_not_ctx->fts_atom(), /*is_must=*/false,
+                          /*is_must_not=*/true, default_op, err_msg);
+  }
+  // Plain_atomContext (no modifier)
+  if (auto *plain_ctx =
+          dynamic_cast<FtsParser::Plain_atomContext *>(unary_ctx)) {
+    return build_fts_atom(plain_ctx->fts_atom(), /*is_must=*/false,
+                          /*is_must_not=*/false, default_op, err_msg);
+  }
+  return nullptr;
+}
+
+// seqExpr: unary+
+// Adjacent terms use the implicit default operator passed in (OR or AND).
+// This is the only place where FtsDefaultOperator actually changes the AST
+// structure; all other build_* helpers simply propagate the value.
+FtsAstNodePtr build_fts_seq_expr(FtsParser::Fts_seq_exprContext *seq_ctx,
+                                 FtsDefaultOperator default_op,
+                                 std::string *err_msg) {
+  auto unary_list = seq_ctx->fts_unary();
+  if (unary_list.size() == 1) {
+    return build_fts_unary(unary_list[0], default_op, err_msg);
+  }
+
+  // Parse all children first
+  std::vector<FtsAstNodePtr> children;
+  for (auto *unary_ctx : unary_list) {
+    auto child = build_fts_unary(unary_ctx, default_op, err_msg);
+    if (!child) {
+      if (err_msg && !err_msg->empty()) {
+        return nullptr;
+      }
+      continue;
+    }
+    children.push_back(std::move(child));
+  }
+  if (children.size() == 1) {
+    return std::move(children[0]);
+  }
+
+  // Assign children to the appropriate node type
+  if (default_op == FtsDefaultOperator::AND) {
+    auto and_node = std::make_unique<AndNode>();
+    and_node->children = std::move(children);
+    return and_node;
+  }
+  auto or_node = std::make_unique<OrNode>();
+  or_node->children = std::move(children);
+  return or_node;
+}
+
+// andExpr: seqExpr ((AND | NOT) seqExpr)*
+//
+// NOT shares the same precedence as AND. Each `NOT seqExpr` on the right of
+// the operator marks the produced child as must_not, then the whole
+// sub-expression collapses into a single AndNode. Example:
+//   `a NOT b`        => And[a, b{must_not}]
+//   `a AND b NOT c`  => And[a, b, c{must_not}]
+FtsAstNodePtr build_fts_and_expr(FtsParser::Fts_and_exprContext *and_ctx,
+                                 FtsDefaultOperator default_op,
+                                 std::string *err_msg) {
+  auto and_node = std::make_unique<AndNode>();
+  bool next_is_not = false;
+  for (auto *raw : and_ctx->children) {
+    if (auto *term = dynamic_cast<antlr4::tree::TerminalNode *>(raw)) {
+      const auto token_type = term->getSymbol()->getType();
+      if (token_type == FtsParser::AND) {
+        next_is_not = false;
+      } else if (token_type == FtsParser::NOT) {
+        next_is_not = true;
+      }
+      continue;
+    }
+    auto *seq_ctx = dynamic_cast<FtsParser::Fts_seq_exprContext *>(raw);
+    if (seq_ctx == nullptr) {
+      continue;
+    }
+    auto child = build_fts_seq_expr(seq_ctx, default_op, err_msg);
+    bool is_not_for_this_child = next_is_not;
+    next_is_not = false;
+    if (!child) {
+      if (err_msg && !err_msg->empty()) {
+        return nullptr;
+      }
+      continue;
+    }
+    if (is_not_for_this_child) {
+      apply_modifier(child.get(), /*is_must=*/false, /*is_must_not=*/true);
+    }
+    and_node->children.push_back(std::move(child));
+  }
+  if (and_node->children.empty()) {
+    return nullptr;
+  }
+  if (and_node->children.size() == 1) {
+    return std::move(and_node->children[0]);
+  }
+  return and_node;
+}
+
+// orExpr: andExpr (OR andExpr)*
+FtsAstNodePtr build_fts_or_expr(FtsParser::Fts_or_exprContext *or_ctx,
+                                FtsDefaultOperator default_op,
+                                std::string *err_msg) {
+  auto and_list = or_ctx->fts_and_expr();
+  if (and_list.size() == 1) {
+    return build_fts_and_expr(and_list[0], default_op, err_msg);
+  }
+  auto or_node = std::make_unique<OrNode>();
+  for (auto *and_ctx : and_list) {
+    auto child = build_fts_and_expr(and_ctx, default_op, err_msg);
+    if (!child) {
+      if (err_msg && !err_msg->empty()) {
+        return nullptr;
+      }
+      continue;
+    }
+    or_node->children.push_back(std::move(child));
+  }
+  if (or_node->children.size() == 1) {
+    return std::move(or_node->children[0]);
+  }
+  return or_node;
+}
+
+}  // anonymous namespace
+
+// ============================================================
+// FtsQueryParser::parse()
+// ============================================================
+
+FtsAstNodePtr FtsQueryParser::parse(const std::string &query,
+                                    FtsDefaultOperator default_op) {
+  err_msg_.clear();
+
+  try {
+    ANTLRInputStream input(query);
+    FtsLexer lexer(&input);
+
+    FtsErrorListener lexer_error_listener;
+    lexer.removeErrorListeners();
+    lexer.addErrorListener(&lexer_error_listener);
+
+    CommonTokenStream tokens(&lexer);
+
+    FtsParser parser(&tokens);
+
+    FtsErrorListener parser_error_listener;
+    parser.removeErrorListeners();
+    parser.addErrorListener(&parser_error_listener);
+
+    // First attempt with SLL prediction mode (fast path)
+    parser.getInterpreter<atn::ParserATNSimulator>()->setPredictionMode(
+        atn::PredictionMode::SLL);
+    FtsParser::Fts_query_unitContext *tree = parser.fts_query_unit();
+
+    // Fall back to full LL mode if SLL produced errors
+    if (lexer.getNumberOfSyntaxErrors() > 0 ||
+        parser.getNumberOfSyntaxErrors() > 0) {
+      tokens.reset();
+      parser.reset();
+      parser.getInterpreter<atn::ParserATNSimulator>()->setPredictionMode(
+          atn::PredictionMode::LL);
+      tree = parser.fts_query_unit();
+    }
+
+    if (lexer.getNumberOfSyntaxErrors() > 0) {
+      err_msg_ = "fts lexer error " + lexer_error_listener.err_msg();
+      return nullptr;
+    }
+    if (parser.getNumberOfSyntaxErrors() > 0) {
+      err_msg_ = "fts syntax error " + parser_error_listener.err_msg();
+      return nullptr;
+    }
+
+    if (tree == nullptr || tree->fts_or_expr() == nullptr) {
+      err_msg_ = "fts parse error: empty or invalid query";
+      return nullptr;
+    }
+
+    auto result = build_fts_or_expr(tree->fts_or_expr(), default_op, &err_msg_);
+    if (!result && !err_msg_.empty()) {
+      return nullptr;
+    }
+    return result;
+
+  } catch (const std::exception &exception) {
+    err_msg_ = "fts parse exception: " + std::string(exception.what());
+    return nullptr;
+  }
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/parser/fts_query_parser.h b/src/db/index/column/fts_column/parser/fts_query_parser.h
new file mode 100644
index 000000000..6ea1418ec
--- /dev/null
+++ b/src/db/index/column/fts_column/parser/fts_query_parser.h
@@ -0,0 +1,62 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include "db/index/column/fts_column/fts_query_ast.h"
+
+namespace zvec::fts {
+
+/*! Default boolean operator applied to adjacent bare terms that are not
+ *  separated by an explicit operator (AND / OR / + / -).
+ *  This is equivalent to Lucene/Elasticsearch's `default_operator` semantics.
+ */
+enum class FtsDefaultOperator {
+  OR,   // Adjacent bare terms are combined with OR (historical default).
+  AND,  // Adjacent bare terms are combined with AND.
+};
+
+/*! FTS query parser
+ *  Thread-compatible but not thread-safe: create one instance per parse call
+ *  or protect with a mutex.
+ */
+class FtsQueryParser {
+ public:
+  FtsQueryParser() = default;
+
+  /*! Parse an FTS query expression string into an AST.
+   *  \param query        Query string, e.g. '+vector -slow "exact phrase" 中文
+   *                      AND 分词'
+   *  \param default_op   Default operator for adjacent bare terms with no
+   *                      explicit operator. Defaults to OR for backward
+   *                      compatibility. Does not change the semantics of
+   *                      explicit AND / OR / + / - usages.
+   *  \return Root AST node, or nullptr on parse failure. Call err_msg() to
+   *          retrieve the error description.
+   */
+  FtsAstNodePtr parse(const std::string &query,
+                      FtsDefaultOperator default_op = FtsDefaultOperator::OR);
+
+  /*! Return the error message from the most recent failed parse() call. */
+  const std::string &err_msg() const {
+    return err_msg_;
+  }
+
+ private:
+  std::string err_msg_;
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/posting/bitpacked_posting_list.cc b/src/db/index/column/fts_column/posting/bitpacked_posting_list.cc
new file mode 100644
index 000000000..c085681cc
--- /dev/null
+++ b/src/db/index/column/fts_column/posting/bitpacked_posting_list.cc
@@ -0,0 +1,704 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bitpacked_posting_list.h"
+#include <bitpackinghelpers.h>
+#include <cstdlib>
+#include <memory>
+#include <zvec/ailego/logger/logger.h>
+#include "bitpacked_simd_dispatch.h"
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#include <malloc.h>
+#endif
+
+namespace zvec::fts {
+
+// ============================================================
+// BitPacked Posting List on-disk format
+// ============================================================
+//
+// Encodes doc_id deltas, term frequencies, and document lengths using
+// per-block bitpacking.  Each block stores up to 128 entries and carries
+// a precomputed BM25 score upper bound to support Block-Max WAND pruning.
+//
+// File layout:
+//   [Header 16B] [SkipList N*12B] [Block0] [Block1] ...
+//
+// Block layout:
+//   [BlockHeader 12B] [packed_deltas] [packed_tfs] [packed_dlens]
+
+namespace {
+
+/// Round up \p value to the next multiple of \p alignment.
+constexpr size_t align_up(size_t value, size_t alignment) {
+  return (value + alignment - 1) & ~(alignment - 1);
+}
+
+/// Allocate 16-byte-aligned memory for \p count uint32_t values, returned as
+/// a unique_ptr with a custom deleter.
+inline auto make_aligned_uint32_array(size_t count) {
+  const size_t num_bytes = align_up(count * sizeof(uint32_t), 16);
+#ifdef _MSC_VER
+  auto *ptr = static_cast<uint32_t *>(_aligned_malloc(num_bytes, 16));
+  return std::unique_ptr<uint32_t[], decltype(&_aligned_free)>(ptr,
+                                                               _aligned_free);
+#else
+  auto *ptr = static_cast<uint32_t *>(std::aligned_alloc(16, num_bytes));
+  return std::unique_ptr<uint32_t[], decltype(&std::free)>(ptr, std::free);
+#endif
+}
+
+}  // namespace
+
+// ============================================================
+// Low-level bitpacking primitives
+// ============================================================
+
+uint8_t BitPackedPostingList::bits_needed(uint32_t max_value) {
+  if (max_value == 0) return 0;
+#ifdef _MSC_VER
+  unsigned long index = 0;
+  _BitScanReverse(&index, max_value);
+  return static_cast<uint8_t>(index + 1);
+#else
+  return static_cast<uint8_t>(32 - __builtin_clz(max_value));
+#endif
+}
+
+void BitPackedPostingList::pack_uint32(const uint32_t *in, uint8_t bitwidth,
+                                       uint32_t count, uint8_t *out) {
+  if (bitwidth == 0 || count == 0) return;
+
+  // Full block path: 128 values at once via dispatch (SIMD or scalar)
+  if (count == DOCS_PER_BLOCK) {
+    simd::get_dispatch().pack_uint32_128(in, bitwidth, out);
+    return;
+  }
+
+  // Tail block path (count < 128): use scalar fastpack, 32 at a time
+  const size_t total_bytes = packed_byte_size(bitwidth, count);
+  std::memset(out, 0, total_bytes);
+
+  uint32_t *out32 = reinterpret_cast<uint32_t *>(out);
+  uint32_t offset = 0;
+
+  while (offset + 32 <= count) {
+    FastPForLib::fastpackwithoutmask(in + offset, out32, bitwidth);
+    out32 += bitwidth;
+    offset += 32;
+  }
+
+  // Tail: fewer than 32 integers
+  if (offset < count) {
+    alignas(16) uint32_t padded_in[32] = {};
+    std::memcpy(padded_in, in + offset, (count - offset) * sizeof(uint32_t));
+    alignas(16) uint32_t padded_out[32] = {};
+    FastPForLib::fastpackwithoutmask(padded_in, padded_out, bitwidth);
+    size_t tail_bytes = packed_byte_size(bitwidth, count - offset);
+    std::memcpy(out32, padded_out, tail_bytes);
+  }
+}
+
+void BitPackedPostingList::unpack_uint32(const uint8_t *in, uint8_t bitwidth,
+                                         uint32_t count, uint32_t *out) {
+  if (bitwidth == 0 || count == 0) {
+    for (uint32_t i = 0; i < count; ++i) {
+      out[i] = 0;
+    }
+    return;
+  }
+
+  // Full block path: 128 values at once via dispatch (SIMD or scalar)
+  if (count == DOCS_PER_BLOCK) {
+    simd::get_dispatch().unpack_uint32_128(in, bitwidth, out);
+    return;
+  }
+
+  // Tail block path (count < 128): use scalar fastunpack, 32 at a time
+  const uint32_t *in32 = reinterpret_cast<const uint32_t *>(in);
+  uint32_t offset = 0;
+
+  while (offset + 32 <= count) {
+    FastPForLib::fastunpack(in32, out + offset, bitwidth);
+    in32 += bitwidth;
+    offset += 32;
+  }
+
+  // Tail: fewer than 32 integers
+  if (offset < count) {
+    const size_t tail_bytes = packed_byte_size(bitwidth, count - offset);
+    alignas(16) uint32_t padded_in[32] = {};
+    std::memcpy(padded_in, in32, tail_bytes);
+    alignas(16) uint32_t padded_out[32] = {};
+    FastPForLib::fastunpack(padded_in, padded_out, bitwidth);
+    std::memcpy(out + offset, padded_out, (count - offset) * sizeof(uint32_t));
+  }
+}
+
+// ============================================================
+// Encoder
+// ============================================================
+
+std::string BitPackedPostingList::encode(const uint32_t *doc_ids,
+                                         const uint32_t *tfs,
+                                         const uint32_t *doc_lens, size_t count,
+                                         uint64_t df,
+                                         const BM25Scorer &scorer) {
+  if (count == 0) {
+    // Encode an empty posting list (just the header)
+    Header hdr{};
+    hdr.magic = MAGIC;
+    hdr.version = VERSION;
+    hdr.num_docs = 0;
+    hdr.num_blocks = 0;
+    std::string result(HEADER_SIZE, '\0');
+    std::memcpy(result.data(), &hdr, HEADER_SIZE);
+    return result;
+  }
+
+  const uint32_t num_blocks =
+      static_cast<uint32_t>((count + DOCS_PER_BLOCK - 1) / DOCS_PER_BLOCK);
+
+  // ---- Phase 1: Compute delta-encoded doc_ids ----
+  // Use 16-byte-aligned allocation so SIMD pack/max paths can use aligned loads
+  auto deltas = make_aligned_uint32_array(count);
+  deltas[0] = doc_ids[0];
+  for (size_t i = 1; i < count; ++i) {
+    deltas[i] = doc_ids[i] - doc_ids[i - 1];
+  }
+
+  // ---- Phase 2: Compute per-block metadata and packed sizes ----
+  struct BlockInfo {
+    size_t start;        // index into the arrays
+    uint32_t num_docs;   // number of docs in this block
+    uint8_t bw_id;       // bitwidth for doc_id deltas
+    uint8_t bw_tf;       // bitwidth for tfs
+    uint8_t bw_dl;       // bitwidth for doc_lens
+    float max_score;     // block max BM25 score
+    size_t packed_size;  // total packed data size for this block
+  };
+
+  std::vector<BlockInfo> blocks(num_blocks);
+
+  for (uint32_t b = 0; b < num_blocks; ++b) {
+    const size_t start = static_cast<size_t>(b) * DOCS_PER_BLOCK;
+    const uint32_t num_docs = static_cast<uint32_t>(
+        std::min(static_cast<size_t>(DOCS_PER_BLOCK), count - start));
+
+    // Find max values in block for bitwidth computation
+    uint32_t max_delta = 0, max_tf = 0, max_dl = 0;
+    float block_max = 0.0f;
+
+    if (num_docs == DOCS_PER_BLOCK) {
+      // Dispatch max for full blocks (SSE4.1 or scalar fallback)
+      simd::get_dispatch().max_128(deltas.get(), tfs, doc_lens, start,
+                                   DOCS_PER_BLOCK, max_delta, max_tf, max_dl);
+      // block_max_score still needs scalar loop (float BM25 scoring)
+      for (uint32_t i = 0; i < DOCS_PER_BLOCK; ++i) {
+        float s = scorer.score(df, tfs[start + i], doc_lens[start + i]);
+        block_max = std::max(block_max, s);
+      }
+    } else {
+      // Scalar path for tail blocks
+      for (uint32_t i = 0; i < num_docs; ++i) {
+        max_delta = std::max(max_delta, deltas[start + i]);
+        max_tf = std::max(max_tf, tfs[start + i]);
+        max_dl = std::max(max_dl, doc_lens[start + i]);
+        float s = scorer.score(df, tfs[start + i], doc_lens[start + i]);
+        block_max = std::max(block_max, s);
+      }
+    }
+
+    blocks[b].start = start;
+    blocks[b].num_docs = num_docs;
+    blocks[b].bw_id = bits_needed(max_delta);
+    blocks[b].bw_tf = bits_needed(max_tf);
+    blocks[b].bw_dl = bits_needed(max_dl);
+    blocks[b].max_score = block_max;
+    // Full block (128 values): use SIMD packed size; tail block: use scalar
+    if (num_docs == DOCS_PER_BLOCK) {
+      blocks[b].packed_size = simd_packed_byte_size(blocks[b].bw_id) +
+                              simd_packed_byte_size(blocks[b].bw_tf) +
+                              simd_packed_byte_size(blocks[b].bw_dl);
+    } else {
+      blocks[b].packed_size = packed_byte_size(blocks[b].bw_id, num_docs) +
+                              packed_byte_size(blocks[b].bw_tf, num_docs) +
+                              packed_byte_size(blocks[b].bw_dl, num_docs);
+    }
+  }
+
+  // ---- Phase 3: Compute total size and block offsets ----
+  const size_t skip_list_size = num_blocks * sizeof(BlockMeta);
+  const size_t block_header_size = sizeof(BlockHeader);
+
+  // Compute block offsets, aligning each block start to a 16-byte boundary
+  // so that SIMD decode paths can use aligned loads on the packed data.
+  size_t current_offset = align_up(HEADER_SIZE + skip_list_size, 16);
+  std::vector<uint32_t> block_offsets(num_blocks);
+  for (uint32_t b = 0; b < num_blocks; ++b) {
+    block_offsets[b] = static_cast<uint32_t>(current_offset);
+    current_offset = align_up(
+        current_offset + block_header_size + blocks[b].packed_size, 16);
+  }
+
+  const size_t total_size = current_offset;
+
+  // ---- Phase 4: Serialize ----
+  std::string result(total_size, '\0');
+  char *buf = result.data();
+
+  // File Header
+  Header hdr{};
+  hdr.magic = MAGIC;
+  hdr.version = VERSION;
+  hdr.num_docs = static_cast<uint32_t>(count);
+  hdr.num_blocks = num_blocks;
+  std::memcpy(buf, &hdr, HEADER_SIZE);
+
+  // Skip List
+  BlockMeta *skip = reinterpret_cast<BlockMeta *>(buf + HEADER_SIZE);
+  for (uint32_t b = 0; b < num_blocks; ++b) {
+    const size_t last_idx = blocks[b].start + blocks[b].num_docs - 1;
+    skip[b].max_doc_id = doc_ids[last_idx];
+    skip[b].block_offset = block_offsets[b];
+    skip[b].block_max_score = blocks[b].max_score;
+  }
+
+  // Blocks
+  for (uint32_t b = 0; b < num_blocks; ++b) {
+    char *block_ptr = buf + block_offsets[b];
+
+    // Block Header
+    BlockHeader bhdr{};
+    bhdr.min_doc_id = doc_ids[blocks[b].start];
+    bhdr.bitwidth_id = blocks[b].bw_id;
+    bhdr.bitwidth_tf = blocks[b].bw_tf;
+    bhdr.bitwidth_dl = blocks[b].bw_dl;
+    bhdr.num_docs = static_cast<uint8_t>(blocks[b].num_docs);
+    bhdr.block_max_score = blocks[b].max_score;
+    std::memcpy(block_ptr, &bhdr, sizeof(BlockHeader));
+
+    uint8_t *packed_ptr =
+        reinterpret_cast<uint8_t *>(block_ptr + sizeof(BlockHeader));
+
+    const bool is_full_block = (blocks[b].num_docs == DOCS_PER_BLOCK);
+
+    // Pack doc_id deltas
+    const size_t id_bytes =
+        is_full_block ? simd_packed_byte_size(blocks[b].bw_id)
+                      : packed_byte_size(blocks[b].bw_id, blocks[b].num_docs);
+    pack_uint32(&deltas[blocks[b].start], blocks[b].bw_id, blocks[b].num_docs,
+                packed_ptr);
+    packed_ptr += id_bytes;
+
+    // Pack term frequencies
+    const size_t tf_bytes =
+        is_full_block ? simd_packed_byte_size(blocks[b].bw_tf)
+                      : packed_byte_size(blocks[b].bw_tf, blocks[b].num_docs);
+    pack_uint32(&tfs[blocks[b].start], blocks[b].bw_tf, blocks[b].num_docs,
+                packed_ptr);
+    packed_ptr += tf_bytes;
+
+    // Pack document lengths
+    pack_uint32(&doc_lens[blocks[b].start], blocks[b].bw_dl, blocks[b].num_docs,
+                packed_ptr);
+  }
+
+  return result;
+}
+
+// ============================================================
+// Iterator
+// ============================================================
+
+int BitPackedPostingIterator::open(const char *data, size_t size) {
+  if (!data || size < BitPackedPostingList::HEADER_SIZE) {
+    LOG_ERROR(
+        "BitPackedPostingIterator open failed: truncated data, "
+        "size[%zu] expected_min[%zu]",
+        size, BitPackedPostingList::HEADER_SIZE);
+    return -1;
+  }
+
+  // Parse file header
+  BitPackedPostingList::Header hdr{};
+  std::memcpy(&hdr, data, sizeof(hdr));
+
+  if (hdr.magic != BitPackedPostingList::MAGIC) {
+    LOG_ERROR(
+        "BitPackedPostingIterator open failed: bad magic, "
+        "got[0x%x] expected[0x%x]",
+        hdr.magic, BitPackedPostingList::MAGIC);
+    return -1;
+  }
+  if (hdr.version != BitPackedPostingList::VERSION) {
+    LOG_ERROR(
+        "BitPackedPostingIterator open failed: unsupported version, "
+        "got[%u] expected[%u]",
+        hdr.version, BitPackedPostingList::VERSION);
+    return -1;
+  }
+
+  num_docs_ = hdr.num_docs;
+  num_blocks_ = hdr.num_blocks;
+  data_ = data;
+  data_size_ = size;
+
+  if (num_docs_ == 0) {
+    current_doc_id_ = NO_MORE_DOCS;
+    return 0;
+  }
+
+  // Validate skip list fits
+  const size_t skip_list_offset = BitPackedPostingList::HEADER_SIZE;
+  const size_t skip_list_size =
+      num_blocks_ * sizeof(BitPackedPostingList::BlockMeta);
+  if (skip_list_offset + skip_list_size > size) {
+    LOG_ERROR(
+        "BitPackedPostingIterator open failed: skip list overruns buffer, "
+        "num_blocks[%u] data_size[%zu] need[%zu]",
+        num_blocks_, size, skip_list_offset + skip_list_size);
+    return -1;
+  }
+
+  skip_list_ = reinterpret_cast<const BitPackedPostingList::BlockMeta *>(
+      data + skip_list_offset);
+
+  // Compute global max score
+  global_max_score_ = 0.0f;
+  for (uint32_t b = 0; b < num_blocks_; ++b) {
+    global_max_score_ =
+        std::max(global_max_score_, skip_list_[b].block_max_score);
+  }
+
+  // Initialize to before-first-block state
+  current_block_idx_ = 0;
+  in_block_pos_ = 0;
+  current_block_size_ = 0;
+  block_decoded_ = false;
+  current_doc_id_ = NO_MORE_DOCS;
+
+  // Cache SIMD dispatch function pointers to avoid PLT overhead on hot path
+  const auto &dispatch = simd::get_dispatch();
+  prefix_sum_fn_ = dispatch.prefix_sum_128;
+  find_first_ge_fn_ = dispatch.find_first_ge;
+  unpack_fn_ = dispatch.unpack_uint32_128;
+
+  return 0;
+}
+
+void BitPackedPostingIterator::decode_block(size_t block_idx) {
+  if (block_idx >= num_blocks_) {
+    LOG_WARN(
+        "BitPackedPostingIterator decode_block out of range: "
+        "block_idx[%zu] num_blocks[%u]",
+        block_idx, num_blocks_);
+    current_block_size_ = 0;
+    block_decoded_ = false;
+    return;
+  }
+
+  const auto &meta = skip_list_[block_idx];
+  const char *block_ptr = data_ + meta.block_offset;
+
+  // Parse block header
+  BitPackedPostingList::BlockHeader bhdr{};
+  std::memcpy(&bhdr, block_ptr, sizeof(bhdr));
+
+  current_block_size_ = bhdr.num_docs;
+  current_block_idx_ = block_idx;
+  in_block_pos_ = 0;
+
+  const uint8_t *packed_ptr =
+      reinterpret_cast<const uint8_t *>(block_ptr + sizeof(bhdr));
+
+  const bool is_full_block =
+      (bhdr.num_docs == BitPackedPostingList::DOCS_PER_BLOCK);
+
+  // Unpack doc_id deltas
+  const size_t id_bytes =
+      is_full_block
+          ? BitPackedPostingList::simd_packed_byte_size(bhdr.bitwidth_id)
+          : BitPackedPostingList::packed_byte_size(bhdr.bitwidth_id,
+                                                   bhdr.num_docs);
+  alignas(16) uint32_t deltas[BitPackedPostingList::DOCS_PER_BLOCK];
+  if (is_full_block) {
+    // Fast path: use cached function pointer directly for full blocks
+    unpack_fn_(packed_ptr, bhdr.bitwidth_id, deltas);
+  } else {
+    BitPackedPostingList::unpack_uint32(packed_ptr, bhdr.bitwidth_id,
+                                        bhdr.num_docs, deltas);
+  }
+  packed_ptr += id_bytes;
+
+  // Reconstruct absolute doc_ids from deltas using prefix-sum
+  if (is_full_block) {
+    prefix_sum_fn_(deltas, bhdr.min_doc_id,
+                   BitPackedPostingList::DOCS_PER_BLOCK, block_doc_ids_);
+  } else {
+    // Scalar prefix-sum for tail block
+    block_doc_ids_[0] = bhdr.min_doc_id;
+    for (uint32_t i = 1; i < bhdr.num_docs; ++i) {
+      block_doc_ids_[i] = block_doc_ids_[i - 1] + deltas[i];
+    }
+  }
+
+  // Lazy decode: record packed data pointers and bitwidths for tf/doc_len.
+  // Actual decoding is deferred until term_freq() or doc_len() is called.
+  const size_t tf_bytes =
+      is_full_block
+          ? BitPackedPostingList::simd_packed_byte_size(bhdr.bitwidth_tf)
+          : BitPackedPostingList::packed_byte_size(bhdr.bitwidth_tf,
+                                                   bhdr.num_docs);
+  packed_tf_ptr_ = packed_ptr;
+  current_bitwidth_tf_ = bhdr.bitwidth_tf;
+  packed_ptr += tf_bytes;
+
+  packed_dl_ptr_ = packed_ptr;
+  current_bitwidth_dl_ = bhdr.bitwidth_dl;
+
+  current_block_num_docs_ = bhdr.num_docs;
+  current_block_is_full_ = is_full_block;
+
+  // Reset lazy decode flags
+  tf_decoded_ = false;
+  dl_decoded_ = false;
+
+  block_decoded_ = true;
+}
+
+uint32_t BitPackedPostingIterator::next_doc() {
+  if (num_docs_ == 0) {
+    current_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  // If no block is decoded yet, decode the first block
+  if (!block_decoded_) {
+    decode_block(0);
+    if (current_block_size_ == 0) {
+      current_doc_id_ = NO_MORE_DOCS;
+      return NO_MORE_DOCS;
+    }
+    current_doc_id_ = block_doc_ids_[0];
+    in_block_pos_ = 0;
+    return current_doc_id_;
+  }
+
+  // Advance within current block
+  ++in_block_pos_;
+  if (in_block_pos_ < current_block_size_) {
+    current_doc_id_ = block_doc_ids_[in_block_pos_];
+    return current_doc_id_;
+  }
+
+  // Move to next block
+  size_t next_block = current_block_idx_ + 1;
+  if (next_block >= num_blocks_) {
+    current_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  decode_block(next_block);
+  if (current_block_size_ == 0) {
+    current_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+  current_doc_id_ = block_doc_ids_[0];
+  in_block_pos_ = 0;
+  return current_doc_id_;
+}
+
+size_t BitPackedPostingIterator::simd_find_first_ge(uint32_t target,
+                                                    size_t start) const {
+  return find_first_ge_fn_(block_doc_ids_, current_block_size_, target, start);
+}
+
+uint32_t BitPackedPostingIterator::advance(uint32_t target) {
+  if (num_docs_ == 0) {
+    current_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  // If current doc_id already >= target, return it
+  if (current_doc_id_ != NO_MORE_DOCS && current_doc_id_ >= target) {
+    return current_doc_id_;
+  }
+
+  // Use skip list to find the target block via binary search.
+  // Find the first block whose max_doc_id >= target.
+  size_t lo = 0, hi = num_blocks_;
+
+  // If we have a current block and its max_doc_id >= target,
+  // we can search within the current block first.
+  if (block_decoded_ && current_block_idx_ < num_blocks_ &&
+      skip_list_[current_block_idx_].max_doc_id >= target) {
+    // Target might be in current block - SIMD scan from current position
+    {
+      size_t pos = simd_find_first_ge(target, in_block_pos_);
+      if (pos < current_block_size_) {
+        in_block_pos_ = pos;
+        current_doc_id_ = block_doc_ids_[pos];
+        return current_doc_id_;
+      }
+    }
+    // Not found in current block (shouldn't happen if skip list is correct)
+    lo = current_block_idx_ + 1;
+  } else if (block_decoded_) {
+    // Current block's max_doc_id < target, start search from next block
+    lo = current_block_idx_ + 1;
+  }
+
+  // Binary search in skip list for the first block with max_doc_id >= target
+  size_t target_block = hi;  // sentinel: no block found
+  while (lo < hi) {
+    size_t mid = lo + (hi - lo) / 2;
+    if (skip_list_[mid].max_doc_id >= target) {
+      target_block = mid;
+      hi = mid;
+    } else {
+      lo = mid + 1;
+    }
+  }
+
+  if (target_block >= num_blocks_) {
+    current_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  // Decode the target block
+  decode_block(target_block);
+  if (current_block_size_ == 0) {
+    current_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+
+  // SIMD scan within the block for the first doc_id >= target
+  {
+    size_t pos = simd_find_first_ge(target, 0);
+    if (pos < current_block_size_) {
+      in_block_pos_ = pos;
+      current_doc_id_ = block_doc_ids_[pos];
+      return current_doc_id_;
+    }
+  }
+
+  // All docs in this block are < target (shouldn't happen with correct skip
+  // list), try next block
+  size_t next = target_block + 1;
+  if (next >= num_blocks_) {
+    current_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+  decode_block(next);
+  if (current_block_size_ == 0) {
+    current_doc_id_ = NO_MORE_DOCS;
+    return NO_MORE_DOCS;
+  }
+  {
+    size_t pos = simd_find_first_ge(target, 0);
+    if (pos < current_block_size_) {
+      in_block_pos_ = pos;
+      current_doc_id_ = block_doc_ids_[pos];
+      return current_doc_id_;
+    }
+  }
+  current_doc_id_ = NO_MORE_DOCS;
+  return NO_MORE_DOCS;
+}
+
+void BitPackedPostingIterator::ensure_tf_decoded() {
+  if (tf_decoded_) {
+    return;
+  }
+  if (current_block_is_full_) {
+    unpack_fn_(packed_tf_ptr_, current_bitwidth_tf_, block_tfs_);
+  } else {
+    BitPackedPostingList::unpack_uint32(packed_tf_ptr_, current_bitwidth_tf_,
+                                        current_block_num_docs_, block_tfs_);
+  }
+  tf_decoded_ = true;
+}
+
+void BitPackedPostingIterator::ensure_dl_decoded() {
+  if (dl_decoded_) {
+    return;
+  }
+  if (current_block_is_full_) {
+    unpack_fn_(packed_dl_ptr_, current_bitwidth_dl_, block_doc_lens_);
+  } else {
+    BitPackedPostingList::unpack_uint32(packed_dl_ptr_, current_bitwidth_dl_,
+                                        current_block_num_docs_,
+                                        block_doc_lens_);
+  }
+  dl_decoded_ = true;
+}
+
+uint32_t BitPackedPostingIterator::term_freq() {
+  if (!block_decoded_ || in_block_pos_ >= current_block_size_) {
+    return 0;
+  }
+  ensure_tf_decoded();
+  return block_tfs_[in_block_pos_];
+}
+
+uint32_t BitPackedPostingIterator::doc_len() {
+  if (!block_decoded_ || in_block_pos_ >= current_block_size_) {
+    return 1;
+  }
+  ensure_dl_decoded();
+  return block_doc_lens_[in_block_pos_];
+}
+
+BitPackedPostingIterator::BlockMaxInfo
+BitPackedPostingIterator::block_max_info_for(uint32_t target) const {
+  if (num_blocks_ == 0 || skip_list_ == nullptr) {
+    return {0.0f, NO_MORE_DOCS};
+  }
+
+  // Fast path: check if target falls within the previously cached block
+  if (cached_bmi_valid_ && target <= cached_bmi_last_doc_) {
+    // target is in the same or earlier block as last query.
+    // Check if it's still in the same block (block_idx is correct).
+    if (cached_bmi_block_idx_ == 0 ||
+        target > skip_list_[cached_bmi_block_idx_ - 1].max_doc_id) {
+      return {cached_bmi_score_, cached_bmi_last_doc_};
+    }
+  }
+
+  size_t lo = 0, hi = num_blocks_;
+  while (lo < hi) {
+    size_t mid = lo + (hi - lo) / 2;
+    if (skip_list_[mid].max_doc_id >= target) {
+      hi = mid;
+    } else {
+      lo = mid + 1;
+    }
+  }
+  if (lo >= num_blocks_) {
+    return {0.0f, NO_MORE_DOCS};
+  }
+
+  // Update cache
+  cached_bmi_block_idx_ = lo;
+  cached_bmi_last_doc_ = skip_list_[lo].max_doc_id;
+  cached_bmi_score_ = skip_list_[lo].block_max_score;
+  cached_bmi_valid_ = true;
+
+  return {cached_bmi_score_, cached_bmi_last_doc_};
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/posting/bitpacked_posting_list.h b/src/db/index/column/fts_column/posting/bitpacked_posting_list.h
new file mode 100644
index 000000000..aeeb7f12f
--- /dev/null
+++ b/src/db/index/column/fts_column/posting/bitpacked_posting_list.h
@@ -0,0 +1,237 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include "bitpacked_simd_dispatch.h"
+#include "../bm25_scorer.h"
+
+namespace zvec::fts {
+
+// ============================================================
+// BitPacked Posting List encoder
+// ============================================================
+
+class BitPackedPostingList {
+ public:
+  static constexpr uint32_t DOCS_PER_BLOCK = 128;
+  static constexpr uint32_t MAGIC = 0x42504B44;  // "BPKD"
+  static constexpr uint32_t VERSION = 1;
+
+  /// Skip-list entry stored after the file header.
+  struct BlockMeta {
+    uint32_t max_doc_id;    ///< Last (largest) doc_id in this block
+    uint32_t block_offset;  ///< Byte offset from data start to block header
+    float block_max_score;  ///< BM25 score upper bound for this block
+  };
+
+  /// File header (16 bytes).
+  struct Header {
+    uint32_t magic;
+    uint32_t version;
+    uint32_t num_docs;
+    uint32_t num_blocks;
+  };
+  static constexpr size_t HEADER_SIZE = sizeof(Header);
+
+  /// Block header (16 bytes, padded for SIMD alignment).
+  struct BlockHeader {
+    uint32_t min_doc_id;
+    uint8_t bitwidth_id;
+    uint8_t bitwidth_tf;
+    uint8_t bitwidth_dl;
+    uint8_t num_docs;       ///< Number of docs in this block (<=128)
+    float block_max_score;  ///< Redundant copy for fast in-block access
+    uint32_t padding_{
+        0};  ///< Padding to make BlockHeader 16 bytes (SIMD alignment)
+  };
+
+  /// Encode a posting list with inline payloads.
+  /// \param doc_ids   Sorted ascending doc_id array
+  /// \param tfs       Term frequency for each doc
+  /// \param doc_lens  Document length for each doc
+  /// \param count     Number of entries
+  /// \param df        Document frequency (used for IDF in block_max_score)
+  /// \param scorer    BM25 scorer with segment stats loaded
+  /// \return Serialized bitpacked posting list
+  static std::string encode(const uint32_t *doc_ids, const uint32_t *tfs,
+                            const uint32_t *doc_lens, size_t count, uint64_t df,
+                            const BM25Scorer &scorer);
+
+  /// Check if raw data starts with the BitPacked magic number.
+  static bool is_bitpacked_format(const char *data, size_t size) {
+    if (size < sizeof(uint32_t)) return false;
+    uint32_t magic = 0;
+    std::memcpy(&magic, data, sizeof(uint32_t));
+    return magic == MAGIC;
+  }
+
+  // ---- Low-level bitpacking primitives ----
+
+  /// Pack \p count uint32 values (each using \p bitwidth bits) into \p out.
+  /// \p out must have at least ceil(bitwidth * count / 8) bytes.
+  /// \p count must be <= DOCS_PER_BLOCK (128).
+  static void pack_uint32(const uint32_t *in, uint8_t bitwidth, uint32_t count,
+                          uint8_t *out);
+
+  /// Unpack \p count uint32 values (each using \p bitwidth bits) from \p in.
+  /// \p out must have room for \p count uint32_t values.
+  static void unpack_uint32(const uint8_t *in, uint8_t bitwidth, uint32_t count,
+                            uint32_t *out);
+
+  /// Compute the minimum number of bits needed to represent \p max_value.
+  /// Returns 0 if max_value == 0.
+  static uint8_t bits_needed(uint32_t max_value);
+
+  /// Compute packed byte size for \p count values at \p bitwidth bits each
+  /// (scalar format, used for tail blocks with count < DOCS_PER_BLOCK).
+  static size_t packed_byte_size(uint8_t bitwidth, uint32_t count) {
+    return (static_cast<size_t>(bitwidth) * count + 7) / 8;
+  }
+
+  /// Compute packed byte size for a full SIMD block (128 values).
+  /// SIMD format stores bitwidth __m128i values = bitwidth * 16 bytes.
+  static size_t simd_packed_byte_size(uint8_t bitwidth) {
+    return static_cast<size_t>(bitwidth) * 16;
+  }
+};
+
+// ============================================================
+// BitPacked Posting Iterator (zero-copy, block-at-a-time)
+// ============================================================
+
+/// Zero-copy iterator over a serialized BitPacked posting list.
+/// Decodes one block at a time into stack-allocated arrays.
+class BitPackedPostingIterator {
+ public:
+  static constexpr uint32_t NO_MORE_DOCS = UINT32_MAX;
+
+  BitPackedPostingIterator() = default;
+
+  /// Open from serialized data (zero-copy, does not own the data).
+  /// \param data  Pointer to serialized bitpacked posting list
+  /// \param size  Size of the serialized data in bytes
+  /// \return 0 on success, -1 on error (bad magic, truncated data, etc.)
+  int open(const char *data, size_t size);
+
+  /// Advance to the next document.
+  /// \return doc_id of the next document, or NO_MORE_DOCS if exhausted.
+  uint32_t next_doc();
+
+  /// Advance to the first document with doc_id >= target.
+  /// Uses the skip list for O(log N_blocks) block-level seeking.
+  /// \return doc_id >= target, or NO_MORE_DOCS if exhausted.
+  uint32_t advance(uint32_t target);
+
+  /// Current document ID (valid after next_doc/advance).
+  uint32_t doc_id() const {
+    return current_doc_id_;
+  }
+
+  /// Term frequency of the current document (valid after next_doc/advance).
+  /// NOTE: non-const because lazy decode may be triggered on first access.
+  uint32_t term_freq();
+
+  /// Document length of the current document (valid after next_doc/advance).
+  /// NOTE: non-const because lazy decode may be triggered on first access.
+  uint32_t doc_len();
+
+  /// Return both block_max_score and max_doc_id for the block containing
+  /// \p target in a single binary search on the skip list.
+  /// Does NOT move the iterator position.
+  struct BlockMaxInfo {
+    float block_max_score{0.0f};
+    uint32_t block_last_doc{NO_MORE_DOCS};
+  };
+  BlockMaxInfo block_max_info_for(uint32_t target) const;
+
+  /// Total number of documents in this posting list.
+  uint64_t cost() const {
+    return num_docs_;
+  }
+
+  /// Maximum block_max_score across all blocks (global upper bound).
+  float max_score() const {
+    return global_max_score_;
+  }
+
+ private:
+  /// Decode block at index \p block_idx into the stack arrays.
+  void decode_block(size_t block_idx);
+
+  /// Lazy decode: ensure tf values are decoded before access.
+  void ensure_tf_decoded();
+
+  /// Lazy decode: ensure doc_len values are decoded before access.
+  void ensure_dl_decoded();
+
+  /// SIMD search: find first index i in block_doc_ids_[start..size)
+  /// where doc_id >= target. Uses SSE4.1 for 4-wide comparison.
+  size_t simd_find_first_ge(uint32_t target, size_t start) const;
+
+  // File header fields
+  uint32_t num_docs_{0};
+  uint32_t num_blocks_{0};
+
+  // Skip list (pointer into data_, not owned)
+  const BitPackedPostingList::BlockMeta *skip_list_{nullptr};
+
+  // Raw data pointer (not owned)
+  const char *data_{nullptr};
+  size_t data_size_{0};
+
+  // Current block state (decoded into stack arrays)
+  alignas(16) uint32_t block_doc_ids_[BitPackedPostingList::DOCS_PER_BLOCK];
+  alignas(16) uint32_t block_tfs_[BitPackedPostingList::DOCS_PER_BLOCK];
+  alignas(16) uint32_t block_doc_lens_[BitPackedPostingList::DOCS_PER_BLOCK];
+  size_t current_block_idx_{0};
+  uint32_t current_block_size_{0};
+  size_t in_block_pos_{0};     ///< Position within current decoded block
+  bool block_decoded_{false};  ///< Whether current block is decoded
+
+  // Lazy decode state: tf and doc_len are decoded on first access
+  bool tf_decoded_{false};
+  bool dl_decoded_{false};
+
+  // Store packed data pointers for lazy decode
+  const uint8_t *packed_tf_ptr_{nullptr};
+  const uint8_t *packed_dl_ptr_{nullptr};
+  uint8_t current_bitwidth_tf_{0};
+  uint8_t current_bitwidth_dl_{0};
+  uint32_t current_block_num_docs_{0};  ///< num_docs for lazy decode dispatch
+  bool current_block_is_full_{false};   ///< Whether current block is full (128)
+
+  uint32_t current_doc_id_{NO_MORE_DOCS};
+  float global_max_score_{0.0f};
+
+  // Cached SIMD dispatch function pointers (initialized in open()).
+  // Avoids repeated PLT/indirect calls through get_dispatch() on every
+  // decode_block / simd_find_first_ge invocation.
+  simd::PrefixSumFunc prefix_sum_fn_{nullptr};
+  simd::FindFirstGeFunc find_first_ge_fn_{nullptr};
+  simd::UnpackFunc unpack_fn_{nullptr};
+
+  // Cache for block_max_info_for to avoid repeated binary searches.
+  // If target falls within [cached_bmi_block_min_doc_+1, cached_bmi_last_doc_],
+  // we can return the cached result directly.
+  mutable uint32_t cached_bmi_last_doc_{0};
+  mutable float cached_bmi_score_{0.0f};
+  mutable size_t cached_bmi_block_idx_{0};
+  mutable bool cached_bmi_valid_{false};
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/posting/bitpacked_simd_avx2.cc b/src/db/index/column/fts_column/posting/bitpacked_simd_avx2.cc
new file mode 100644
index 000000000..91f5ed002
--- /dev/null
+++ b/src/db/index/column/fts_column/posting/bitpacked_simd_avx2.cc
@@ -0,0 +1,216 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bitpacked_simd_avx2.h"
+
+#if defined(__AVX2__) || \
+    (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)))
+
+#include <immintrin.h>
+#include <cstring>
+#include "bitpacked_simd_sse41.h"
+
+#ifdef _MSC_VER
+#include <intrin.h>
+static inline int ctz_u32(unsigned int v) {
+  unsigned long index;
+  _BitScanForward(&index, v);
+  return static_cast<int>(index);
+}
+#else
+static inline int ctz_u32(unsigned int v) {
+  return __builtin_ctz(v);
+}
+#endif
+
+namespace zvec::fts::simd {
+
+// ------------------------------------------------------------
+// avx2_max_128
+// ------------------------------------------------------------
+
+void avx2_max_128(const uint32_t *deltas, const uint32_t *tfs,
+                  const uint32_t *doc_lens, size_t start, uint32_t count,
+                  uint32_t &max_delta, uint32_t &max_tf, uint32_t &max_dl) {
+  __m256i vmax_delta = _mm256_setzero_si256();
+  __m256i vmax_tf = _mm256_setzero_si256();
+  __m256i vmax_dl = _mm256_setzero_si256();
+
+  for (uint32_t i = 0; i < count; i += 8) {
+    vmax_delta = _mm256_max_epu32(
+        vmax_delta, _mm256_loadu_si256(
+                        reinterpret_cast<const __m256i *>(&deltas[start + i])));
+    vmax_tf = _mm256_max_epu32(
+        vmax_tf,
+        _mm256_loadu_si256(reinterpret_cast<const __m256i *>(&tfs[start + i])));
+    vmax_dl = _mm256_max_epu32(
+        vmax_dl, _mm256_loadu_si256(
+                     reinterpret_cast<const __m256i *>(&doc_lens[start + i])));
+  }
+
+  // Horizontal max: reduce 8 lanes to scalar
+  auto hmax = [](__m256i v) -> uint32_t {
+    // Reduce 256-bit to 128-bit by taking max of high and low halves
+    __m128i lo = _mm256_castsi256_si128(v);
+    __m128i hi = _mm256_extracti128_si256(v, 1);
+    __m128i m = _mm_max_epu32(lo, hi);
+    // Reduce 128-bit to scalar
+    m = _mm_max_epu32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(2, 3, 0, 1)));
+    m = _mm_max_epu32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(1, 0, 3, 2)));
+    return static_cast<uint32_t>(_mm_extract_epi32(m, 0));
+  };
+
+  max_delta = hmax(vmax_delta);
+  max_tf = hmax(vmax_tf);
+  max_dl = hmax(vmax_dl);
+}
+
+// ------------------------------------------------------------
+// avx2_pack_uint32_128 — fallback to SSE4.1
+// ------------------------------------------------------------
+
+void avx2_pack_uint32_128(const uint32_t *in, uint8_t bitwidth, uint8_t *out) {
+  // FastPForLib does not provide AVX2 bitpacking; delegate to SSE4.1.
+  sse41_pack_uint32_128(in, bitwidth, out);
+}
+
+// ------------------------------------------------------------
+// avx2_unpack_uint32_128 — fallback to SSE4.1
+// ------------------------------------------------------------
+
+void avx2_unpack_uint32_128(const uint8_t *in, uint8_t bitwidth,
+                            uint32_t *out) {
+  // FastPForLib does not provide AVX2 bitpacking; delegate to SSE4.1.
+  sse41_unpack_uint32_128(in, bitwidth, out);
+}
+
+// ------------------------------------------------------------
+// avx2_prefix_sum_128
+// ------------------------------------------------------------
+
+void avx2_prefix_sum_128(const uint32_t *deltas, uint32_t min_doc_id,
+                         uint32_t /*count*/, uint32_t *out) {
+  // Process 8 elements per iteration (16 groups of 8 = 128 elements).
+  // Within each 256-bit register we compute a prefix-sum, then propagate
+  // the carry (last element) to the next group.
+  __m256i carry = _mm256_set1_epi32(static_cast<int>(min_doc_id) -
+                                    static_cast<int>(deltas[0]));
+
+  for (uint32_t g = 0; g < 16; ++g) {
+    __m256i v =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i *>(&deltas[g * 8]));
+
+    // In-register prefix-sum for 8 elements (two 128-bit lanes independently,
+    // then cross-lane fixup).
+
+    // Step 1: shift by 1 element (4 bytes) within each 128-bit lane
+    __m256i shifted1 = _mm256_bslli_epi128(v, 4);
+    v = _mm256_add_epi32(v, shifted1);
+
+    // Step 2: shift by 2 elements (8 bytes) within each 128-bit lane
+    __m256i shifted2 = _mm256_bslli_epi128(v, 8);
+    v = _mm256_add_epi32(v, shifted2);
+
+    // Step 3: cross-lane fixup — high lane needs the sum of the low lane's
+    // last element (index 3) added to all its elements.
+    // Broadcast low lane's element[3] to all positions of high lane.
+    __m128i lo = _mm256_castsi256_si128(v);
+    __m128i lo_last = _mm_shuffle_epi32(lo, _MM_SHUFFLE(3, 3, 3, 3));
+    __m256i cross = _mm256_set_m128i(lo_last, _mm_setzero_si128());
+    v = _mm256_add_epi32(v, cross);
+
+    // Add carry from previous group
+    v = _mm256_add_epi32(v, carry);
+
+    _mm256_storeu_si256(reinterpret_cast<__m256i *>(&out[g * 8]), v);
+
+    // Broadcast the last element (index 7) as carry for next group.
+    // Element 7 is in the high lane at position 3.
+    __m128i hi = _mm256_extracti128_si256(v, 1);
+    __m128i hi_last = _mm_shuffle_epi32(hi, _MM_SHUFFLE(3, 3, 3, 3));
+    carry = _mm256_set_m128i(hi_last, hi_last);
+  }
+}
+
+// ------------------------------------------------------------
+// avx2_find_first_ge
+// ------------------------------------------------------------
+
+size_t avx2_find_first_ge(const uint32_t *arr, uint32_t size, uint32_t target,
+                          size_t start) {
+  const __m256i vtarget = _mm256_set1_epi32(static_cast<int>(target));
+  const __m256i sign_bit = _mm256_set1_epi32(static_cast<int>(0x80000000u));
+  const __m256i starget = _mm256_xor_si256(vtarget, sign_bit);
+
+  size_t i = start;
+  // Scalar until aligned to 4-element boundary (minimum for unaligned AVX2)
+  for (; i < size && (i & 3); ++i) {
+    if (arr[i] >= target) {
+      return i;
+    }
+  }
+  // SIMD scan: 8 elements at a time
+  for (; i + 8 <= size; i += 8) {
+    __m256i v = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(&arr[i]));
+    __m256i sv = _mm256_xor_si256(v, sign_bit);
+    // cmpgt: sv < starget means arr[i] < target
+    __m256i cmp = _mm256_cmpgt_epi32(starget, sv);
+    int mask = _mm256_movemask_ps(_mm256_castsi256_ps(cmp));
+    if (mask != 0xFF) {
+      // At least one element >= target
+      int first = ctz_u32(static_cast<unsigned int>(~mask & 0xFF));
+      return i + first;
+    }
+  }
+  // Scalar tail
+  for (; i < size; ++i) {
+    if (arr[i] >= target) {
+      return i;
+    }
+  }
+  return size;
+}
+
+}  // namespace zvec::fts::simd
+
+#else  // !defined(__AVX2__) && !(defined(_MSC_VER) && (defined(_M_X64) ||
+       // defined(_M_IX86)))
+
+// Stub implementations when AVX2 is not available at compile time.
+// The runtime dispatch layer (bitpacked_simd_dispatch.cc) will never call
+// these on non-AVX2 machines, but the linker still needs the symbols.
+
+namespace zvec::fts::simd {
+
+void avx2_max_128(const uint32_t *, const uint32_t *, const uint32_t *, size_t,
+                  uint32_t, uint32_t &max_delta, uint32_t &max_tf,
+                  uint32_t &max_dl) {
+  max_delta = 0;
+  max_tf = 0;
+  max_dl = 0;
+}
+
+void avx2_pack_uint32_128(const uint32_t *, uint8_t, uint8_t *) {}
+
+void avx2_unpack_uint32_128(const uint8_t *, uint8_t, uint32_t *) {}
+
+void avx2_prefix_sum_128(const uint32_t *, uint32_t, uint32_t, uint32_t *) {}
+
+size_t avx2_find_first_ge(const uint32_t *, uint32_t size, uint32_t, size_t) {
+  return size;
+}
+
+}  // namespace zvec::fts::simd
+
+#endif  // defined(__AVX2__)
diff --git a/src/db/index/column/fts_column/posting/bitpacked_simd_avx2.h b/src/db/index/column/fts_column/posting/bitpacked_simd_avx2.h
new file mode 100644
index 000000000..d86796016
--- /dev/null
+++ b/src/db/index/column/fts_column/posting/bitpacked_simd_avx2.h
@@ -0,0 +1,49 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace zvec::fts::simd {
+
+/// Compute element-wise max of 128 uint32 values across three arrays using
+/// AVX2 _mm256_max_epu32.  \p deltas must be 32-byte aligned; \p tfs and
+/// \p doc_lens may be unaligned.
+void avx2_max_128(const uint32_t *deltas, const uint32_t *tfs,
+                  const uint32_t *doc_lens, size_t start, uint32_t count,
+                  uint32_t &max_delta, uint32_t &max_tf, uint32_t &max_dl);
+
+/// Pack 128 uint32 values at \p bitwidth bits each into \p out.
+/// Falls back to SSE4.1 implementation (FastPForLib lacks AVX2 bitpacking).
+void avx2_pack_uint32_128(const uint32_t *in, uint8_t bitwidth, uint8_t *out);
+
+/// Unpack 128 uint32 values at \p bitwidth bits each from \p in.
+/// Falls back to SSE4.1 implementation (FastPForLib lacks AVX2 bitpacking).
+void avx2_unpack_uint32_128(const uint8_t *in, uint8_t bitwidth, uint32_t *out);
+
+/// Compute prefix-sum over \p count (must be 128) delta values, producing
+/// absolute doc_ids.  Uses AVX2 SIMD prefix-sum with carry propagation.
+/// \p deltas must be 32-byte aligned; \p out must be 32-byte aligned.
+void avx2_prefix_sum_128(const uint32_t *deltas, uint32_t min_doc_id,
+                         uint32_t count, uint32_t *out);
+
+/// Find the first index i in arr[start..size) where arr[i] >= target.
+/// Uses AVX2 8-wide comparison with unsigned-to-signed trick.
+/// \p arr must be 32-byte aligned.
+size_t avx2_find_first_ge(const uint32_t *arr, uint32_t size, uint32_t target,
+                          size_t start);
+
+}  // namespace zvec::fts::simd
diff --git a/src/db/index/column/fts_column/posting/bitpacked_simd_dispatch.cc b/src/db/index/column/fts_column/posting/bitpacked_simd_dispatch.cc
new file mode 100644
index 000000000..c850703cd
--- /dev/null
+++ b/src/db/index/column/fts_column/posting/bitpacked_simd_dispatch.cc
@@ -0,0 +1,60 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bitpacked_simd_dispatch.h"
+#include <ailego/internal/cpu_features.h>
+#include "bitpacked_simd_scalar.h"
+#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
+    defined(_M_IX86)
+#include "bitpacked_simd_avx2.h"
+#include "bitpacked_simd_sse41.h"
+#endif
+
+namespace zvec::fts::simd {
+
+static DispatchTable init_dispatch() {
+  DispatchTable t{};
+#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
+    defined(_M_IX86)
+  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
+    t.max_128 = avx2_max_128;
+    t.pack_uint32_128 = avx2_pack_uint32_128;
+    t.unpack_uint32_128 = avx2_unpack_uint32_128;
+    t.prefix_sum_128 = avx2_prefix_sum_128;
+    t.find_first_ge = avx2_find_first_ge;
+    return t;
+  }
+  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
+    t.max_128 = sse41_max_128;
+    t.pack_uint32_128 = sse41_pack_uint32_128;
+    t.unpack_uint32_128 = sse41_unpack_uint32_128;
+    t.prefix_sum_128 = sse41_prefix_sum_128;
+    t.find_first_ge = sse41_find_first_ge;
+    return t;
+  }
+#endif
+  t.max_128 = scalar_max_128;
+  t.pack_uint32_128 = scalar_pack_uint32_128;
+  t.unpack_uint32_128 = scalar_unpack_uint32_128;
+  t.prefix_sum_128 = scalar_prefix_sum_128;
+  t.find_first_ge = scalar_find_first_ge;
+  return t;
+}
+
+const DispatchTable &get_dispatch() {
+  static const DispatchTable table = init_dispatch();
+  return table;
+}
+
+}  // namespace zvec::fts::simd
diff --git a/src/db/index/column/fts_column/posting/bitpacked_simd_dispatch.h b/src/db/index/column/fts_column/posting/bitpacked_simd_dispatch.h
new file mode 100644
index 000000000..64c498e06
--- /dev/null
+++ b/src/db/index/column/fts_column/posting/bitpacked_simd_dispatch.h
@@ -0,0 +1,44 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace zvec::fts::simd {
+
+// Function pointer types for SIMD-dispatched operations.
+using MaxFunc = void (*)(const uint32_t *, const uint32_t *, const uint32_t *,
+                         size_t, uint32_t, uint32_t &, uint32_t &, uint32_t &);
+using PackFunc = void (*)(const uint32_t *, uint8_t, uint8_t *);
+using UnpackFunc = void (*)(const uint8_t *, uint8_t, uint32_t *);
+using PrefixSumFunc = void (*)(const uint32_t *, uint32_t, uint32_t,
+                               uint32_t *);
+using FindFirstGeFunc = size_t (*)(const uint32_t *, uint32_t, uint32_t,
+                                   size_t);
+
+/// Dispatch table populated once at startup via CPU feature detection.
+struct DispatchTable {
+  MaxFunc max_128;
+  PackFunc pack_uint32_128;
+  UnpackFunc unpack_uint32_128;
+  PrefixSumFunc prefix_sum_128;
+  FindFirstGeFunc find_first_ge;
+};
+
+/// Get the global dispatch table (initialized on first call).
+const DispatchTable &get_dispatch();
+
+}  // namespace zvec::fts::simd
diff --git a/src/db/index/column/fts_column/posting/bitpacked_simd_scalar.cc b/src/db/index/column/fts_column/posting/bitpacked_simd_scalar.cc
new file mode 100644
index 000000000..4877751ba
--- /dev/null
+++ b/src/db/index/column/fts_column/posting/bitpacked_simd_scalar.cc
@@ -0,0 +1,97 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bitpacked_simd_scalar.h"
+#include <bitpackinghelpers.h>
+#include <algorithm>
+#include <cstring>
+#include "bitpacked_posting_list.h"
+
+namespace zvec::fts::simd {
+
+// ------------------------------------------------------------
+// scalar_max_128
+// ------------------------------------------------------------
+
+void scalar_max_128(const uint32_t *deltas, const uint32_t *tfs,
+                    const uint32_t *doc_lens, size_t start, uint32_t count,
+                    uint32_t &max_delta, uint32_t &max_tf, uint32_t &max_dl) {
+  uint32_t md = 0, mt = 0, ml = 0;
+  for (uint32_t i = 0; i < count; ++i) {
+    md = std::max(md, deltas[start + i]);
+    mt = std::max(mt, tfs[start + i]);
+    ml = std::max(ml, doc_lens[start + i]);
+  }
+  max_delta = md;
+  max_tf = mt;
+  max_dl = ml;
+}
+
+// ------------------------------------------------------------
+// scalar_pack_uint32_128
+// ------------------------------------------------------------
+
+void scalar_pack_uint32_128(const uint32_t *in, uint8_t bitwidth,
+                            uint8_t *out) {
+  // Scalar fastpack processes 32 values at a time; loop 4 times for 128.
+  const size_t total_bytes =
+      BitPackedPostingList::simd_packed_byte_size(bitwidth);
+  std::memset(out, 0, total_bytes);
+
+  uint32_t *out32 = reinterpret_cast<uint32_t *>(out);
+  for (uint32_t g = 0; g < 4; ++g) {
+    FastPForLib::fastpackwithoutmask(in + g * 32, out32, bitwidth);
+    out32 += bitwidth;
+  }
+}
+
+// ------------------------------------------------------------
+// scalar_unpack_uint32_128
+// ------------------------------------------------------------
+
+void scalar_unpack_uint32_128(const uint8_t *in, uint8_t bitwidth,
+                              uint32_t *out) {
+  const uint32_t *in32 = reinterpret_cast<const uint32_t *>(in);
+  for (uint32_t g = 0; g < 4; ++g) {
+    FastPForLib::fastunpack(in32, out + g * 32, bitwidth);
+    in32 += bitwidth;
+  }
+}
+
+// ------------------------------------------------------------
+// scalar_prefix_sum_128
+// ------------------------------------------------------------
+
+void scalar_prefix_sum_128(const uint32_t *deltas, uint32_t min_doc_id,
+                           uint32_t count, uint32_t *out) {
+  // First element: min_doc_id corresponds to deltas[0]
+  out[0] = min_doc_id;
+  for (uint32_t i = 1; i < count; ++i) {
+    out[i] = out[i - 1] + deltas[i];
+  }
+}
+
+// ------------------------------------------------------------
+// scalar_find_first_ge
+// ------------------------------------------------------------
+
+size_t scalar_find_first_ge(const uint32_t *arr, uint32_t size, uint32_t target,
+                            size_t start) {
+  for (size_t i = start; i < size; ++i) {
+    if (arr[i] >= target) return i;
+  }
+  return size;
+}
+
+}  // namespace zvec::fts::simd
diff --git a/src/db/index/column/fts_column/posting/bitpacked_simd_scalar.h b/src/db/index/column/fts_column/posting/bitpacked_simd_scalar.h
new file mode 100644
index 000000000..ce0cbf9f7
--- /dev/null
+++ b/src/db/index/column/fts_column/posting/bitpacked_simd_scalar.h
@@ -0,0 +1,47 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace zvec::fts::simd {
+
+/// Scalar fallback: compute element-wise max of up to 128 uint32 values across
+/// three arrays using a simple loop.
+void scalar_max_128(const uint32_t *deltas, const uint32_t *tfs,
+                    const uint32_t *doc_lens, size_t start, uint32_t count,
+                    uint32_t &max_delta, uint32_t &max_tf, uint32_t &max_dl);
+
+/// Scalar fallback: pack 128 uint32 values at \p bitwidth bits each into \p out
+/// using FastPForLib::fastpackwithoutmask (32 values at a time, 4 iterations).
+void scalar_pack_uint32_128(const uint32_t *in, uint8_t bitwidth, uint8_t *out);
+
+/// Scalar fallback: unpack 128 uint32 values at \p bitwidth bits each from
+/// \p in using FastPForLib::fastunpack (32 values at a time, 4 iterations).
+void scalar_unpack_uint32_128(const uint8_t *in, uint8_t bitwidth,
+                              uint32_t *out);
+
+/// Scalar fallback: compute prefix-sum over \p count delta values, producing
+/// absolute doc_ids.
+void scalar_prefix_sum_128(const uint32_t *deltas, uint32_t min_doc_id,
+                           uint32_t count, uint32_t *out);
+
+/// Scalar fallback: find the first index i in arr[start..size) where
+/// arr[i] >= target using a linear scan.
+size_t scalar_find_first_ge(const uint32_t *arr, uint32_t size, uint32_t target,
+                            size_t start);
+
+}  // namespace zvec::fts::simd
diff --git a/src/db/index/column/fts_column/posting/bitpacked_simd_sse41.cc b/src/db/index/column/fts_column/posting/bitpacked_simd_sse41.cc
new file mode 100644
index 000000000..1a7ccd20f
--- /dev/null
+++ b/src/db/index/column/fts_column/posting/bitpacked_simd_sse41.cc
@@ -0,0 +1,202 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bitpacked_simd_sse41.h"
+
+#if defined(__SSE4_1__) || \
+    (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)))
+
+#include <bitpackinghelpers.h>
+#include <emmintrin.h>  // SSE2
+#include <simdbitpacking.h>
+#include <smmintrin.h>  // SSE4.1
+#include <cstring>
+#include "bitpacked_posting_list.h"
+
+#ifdef _MSC_VER
+#include <intrin.h>
+static inline int ctz_u32(unsigned int v) {
+  unsigned long index;
+  _BitScanForward(&index, v);
+  return static_cast<int>(index);
+}
+#else
+static inline int ctz_u32(unsigned int v) {
+  return __builtin_ctz(v);
+}
+#endif
+
+namespace zvec::fts::simd {
+
+// ------------------------------------------------------------
+// sse41_max_128
+// ------------------------------------------------------------
+
+void sse41_max_128(const uint32_t *deltas, const uint32_t *tfs,
+                   const uint32_t *doc_lens, size_t start, uint32_t count,
+                   uint32_t &max_delta, uint32_t &max_tf, uint32_t &max_dl) {
+  __m128i vmax_delta = _mm_setzero_si128();
+  __m128i vmax_tf = _mm_setzero_si128();
+  __m128i vmax_dl = _mm_setzero_si128();
+  for (uint32_t i = 0; i < count; i += 4) {
+    vmax_delta = _mm_max_epu32(
+        vmax_delta,
+        _mm_load_si128(reinterpret_cast<const __m128i *>(&deltas[start + i])));
+    vmax_tf = _mm_max_epu32(
+        vmax_tf,
+        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&tfs[start + i])));
+    vmax_dl = _mm_max_epu32(
+        vmax_dl, _mm_loadu_si128(
+                     reinterpret_cast<const __m128i *>(&doc_lens[start + i])));
+  }
+  // Horizontal max: reduce 4 lanes to scalar
+  auto hmax = [](__m128i v) -> uint32_t {
+    v = _mm_max_epu32(v, _mm_shuffle_epi32(v, _MM_SHUFFLE(2, 3, 0, 1)));
+    v = _mm_max_epu32(v, _mm_shuffle_epi32(v, _MM_SHUFFLE(1, 0, 3, 2)));
+    return static_cast<uint32_t>(_mm_extract_epi32(v, 0));
+  };
+  max_delta = hmax(vmax_delta);
+  max_tf = hmax(vmax_tf);
+  max_dl = hmax(vmax_dl);
+}
+
+// ------------------------------------------------------------
+// sse41_pack_uint32_128
+// ------------------------------------------------------------
+
+void sse41_pack_uint32_128(const uint32_t *in, uint8_t bitwidth, uint8_t *out) {
+  const size_t total_bytes =
+      BitPackedPostingList::simd_packed_byte_size(bitwidth);
+  if ((reinterpret_cast<uintptr_t>(out) & 15) == 0) {
+    FastPForLib::SIMD_fastpackwithoutmask_32(
+        in, reinterpret_cast<__m128i *>(out), bitwidth);
+  } else {
+    alignas(16) __m128i simd_out[32];
+    FastPForLib::SIMD_fastpackwithoutmask_32(in, simd_out, bitwidth);
+    std::memcpy(out, simd_out, total_bytes);
+  }
+}
+
+// ------------------------------------------------------------
+// sse41_unpack_uint32_128
+// ------------------------------------------------------------
+
+void sse41_unpack_uint32_128(const uint8_t *in, uint8_t bitwidth,
+                             uint32_t *out) {
+  if ((reinterpret_cast<uintptr_t>(in) & 15) == 0) {
+    FastPForLib::SIMD_fastunpack_32(reinterpret_cast<const __m128i *>(in), out,
+                                    bitwidth);
+  } else {
+    const size_t packed_bytes =
+        BitPackedPostingList::simd_packed_byte_size(bitwidth);
+    alignas(16) __m128i simd_in[32];
+    std::memcpy(simd_in, in, packed_bytes);
+    FastPForLib::SIMD_fastunpack_32(simd_in, out, bitwidth);
+  }
+}
+
+// ------------------------------------------------------------
+// sse41_prefix_sum_128
+// ------------------------------------------------------------
+
+void sse41_prefix_sum_128(const uint32_t *deltas, uint32_t min_doc_id,
+                          uint32_t /*count*/, uint32_t *out) {
+  __m128i carry = _mm_set1_epi32(static_cast<int>(min_doc_id) -
+                                 static_cast<int>(deltas[0]));
+
+  for (uint32_t g = 0; g < 32; ++g) {
+    __m128i v =
+        _mm_load_si128(reinterpret_cast<const __m128i *>(&deltas[g * 4]));
+
+    // In-register prefix-sum for 4 elements
+    __m128i shifted1 = _mm_slli_si128(v, 4);
+    v = _mm_add_epi32(v, shifted1);
+    __m128i shifted2 = _mm_slli_si128(v, 8);
+    v = _mm_add_epi32(v, shifted2);
+
+    // Add carry from previous group
+    v = _mm_add_epi32(v, carry);
+
+    _mm_store_si128(reinterpret_cast<__m128i *>(&out[g * 4]), v);
+
+    // Broadcast the last element as carry for next group
+    carry = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 3, 3));
+  }
+}
+
+// ------------------------------------------------------------
+// sse41_find_first_ge
+// ------------------------------------------------------------
+
+size_t sse41_find_first_ge(const uint32_t *arr, uint32_t size, uint32_t target,
+                           size_t start) {
+  const __m128i vtarget = _mm_set1_epi32(static_cast<int>(target));
+  const __m128i sign_bit = _mm_set1_epi32(static_cast<int>(0x80000000u));
+  const __m128i starget = _mm_xor_si128(vtarget, sign_bit);
+
+  size_t i = start;
+  // Scalar until aligned to 4-element boundary
+  for (; i < size && (i & 3); ++i) {
+    if (arr[i] >= target) return i;
+  }
+  // SIMD scan: 4 elements at a time
+  for (; i + 4 <= size; i += 4) {
+    __m128i v = _mm_load_si128(reinterpret_cast<const __m128i *>(&arr[i]));
+    __m128i sv = _mm_xor_si128(v, sign_bit);
+    __m128i cmp = _mm_cmplt_epi32(sv, starget);
+    int mask = _mm_movemask_ps(_mm_castsi128_ps(cmp));
+    if (mask != 0xF) {
+      int first = ctz_u32(static_cast<unsigned int>(~mask));
+      return i + first;
+    }
+  }
+  // Scalar tail
+  for (; i < size; ++i) {
+    if (arr[i] >= target) return i;
+  }
+  return size;
+}
+
+}  // namespace zvec::fts::simd
+
+#else  // !defined(__SSE4_1__) && !(defined(_MSC_VER) && (defined(_M_X64) ||
+       // defined(_M_IX86)))
+
+// Stub implementations when SSE4.1 is not available at compile time.
+// The runtime dispatch layer (bitpacked_simd_dispatch.cc) will never call
+// these on non-SSE4.1 machines, but the linker still needs the symbols.
+
+namespace zvec::fts::simd {
+
+void sse41_max_128(const uint32_t *, const uint32_t *, const uint32_t *, size_t,
+                   uint32_t, uint32_t &max_delta, uint32_t &max_tf,
+                   uint32_t &max_dl) {
+  max_delta = 0;
+  max_tf = 0;
+  max_dl = 0;
+}
+
+void sse41_pack_uint32_128(const uint32_t *, uint8_t, uint8_t *) {}
+
+void sse41_unpack_uint32_128(const uint8_t *, uint8_t, uint32_t *) {}
+
+void sse41_prefix_sum_128(const uint32_t *, uint32_t, uint32_t, uint32_t *) {}
+
+size_t sse41_find_first_ge(const uint32_t *, uint32_t size, uint32_t, size_t) {
+  return size;
+}
+
+}  // namespace zvec::fts::simd
+
+#endif  // defined(__SSE4_1__)
diff --git a/src/db/index/column/fts_column/posting/bitpacked_simd_sse41.h b/src/db/index/column/fts_column/posting/bitpacked_simd_sse41.h
new file mode 100644
index 000000000..ca82514c4
--- /dev/null
+++ b/src/db/index/column/fts_column/posting/bitpacked_simd_sse41.h
@@ -0,0 +1,50 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace zvec::fts::simd {
+
+/// Compute element-wise max of 128 uint32 values across three arrays using
+/// SSE4.1 _mm_max_epu32.  \p deltas must be 16-byte aligned; \p tfs and
+/// \p doc_lens may be unaligned.
+void sse41_max_128(const uint32_t *deltas, const uint32_t *tfs,
+                   const uint32_t *doc_lens, size_t start, uint32_t count,
+                   uint32_t &max_delta, uint32_t &max_tf, uint32_t &max_dl);
+
+/// Pack 128 uint32 values at \p bitwidth bits each into \p out using SSE SIMD
+/// interleaved layout (SIMD_fastpackwithoutmask_32).
+void sse41_pack_uint32_128(const uint32_t *in, uint8_t bitwidth, uint8_t *out);
+
+/// Unpack 128 uint32 values at \p bitwidth bits each from \p in using SSE SIMD
+/// interleaved layout (SIMD_fastunpack_32).
+void sse41_unpack_uint32_128(const uint8_t *in, uint8_t bitwidth,
+                             uint32_t *out);
+
+/// Compute prefix-sum over \p count (must be 128) delta values, producing
+/// absolute doc_ids.  Uses SSE2 SIMD prefix-sum with carry propagation.
+/// \p deltas must be 16-byte aligned; \p out must be 16-byte aligned.
+void sse41_prefix_sum_128(const uint32_t *deltas, uint32_t min_doc_id,
+                          uint32_t count, uint32_t *out);
+
+/// Find the first index i in arr[start..size) where arr[i] >= target.
+/// Uses SSE2 SIMD 4-wide comparison with unsigned-to-signed trick.
+/// \p arr must be 16-byte aligned.
+size_t sse41_find_first_ge(const uint32_t *arr, uint32_t size, uint32_t target,
+                           size_t start);
+
+}  // namespace zvec::fts::simd
diff --git a/src/db/index/column/fts_column/tokenizer/jieba_tokenizer.cc b/src/db/index/column/fts_column/tokenizer/jieba_tokenizer.cc
new file mode 100644
index 000000000..ceabbeced
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/jieba_tokenizer.cc
@@ -0,0 +1,128 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "jieba_tokenizer.h"
+#include <zvec/ailego/logger/logger.h>
+#include "cppjieba/Jieba.hpp"
+
+namespace zvec::fts {
+
+static std::string get_string_or_default(const ailego::JsonObject &config,
+                                         const char *key,
+                                         const std::string &default_value) {
+  auto val = config[key];
+  if (val.is_string()) {
+    std::string result = val.as_string().c_str();
+    if (!result.empty()) {
+      return result;
+    }
+  }
+  return default_value;
+}
+
+bool JiebaTokenizer::init(const ailego::JsonObject &config) {
+  std::string dict_path = get_string_or_default(config, "dict_path", "");
+  if (dict_path.empty()) {
+    LOG_ERROR("JiebaTokenizer: 'dict_path' is required but not provided");
+    return false;
+  }
+  std::string model_path = get_string_or_default(config, "model_path", "");
+  if (model_path.empty()) {
+    LOG_ERROR("JiebaTokenizer: 'model_path' is required but not provided");
+    return false;
+  }
+  std::string user_dict_path =
+      get_string_or_default(config, "user_dict_path", "");
+  std::string idf_path = get_string_or_default(config, "idf_path", "");
+  std::string stop_word_path =
+      get_string_or_default(config, "stop_word_path", "");
+
+  // Parse cut mode
+  std::string mode_str = get_string_or_default(config, "cut_mode", "search");
+  if (mode_str == "search") {
+    cut_mode_ = CutMode::kSearch;
+  } else if (mode_str == "mix") {
+    cut_mode_ = CutMode::kMix;
+  } else if (mode_str == "full") {
+    cut_mode_ = CutMode::kFull;
+  } else if (mode_str == "hmm") {
+    cut_mode_ = CutMode::kHmm;
+  } else {
+    LOG_ERROR("JiebaTokenizer: unknown cut_mode '%s'", mode_str.c_str());
+    return false;
+  }
+
+  // Release any previously initialised handle
+  jieba_.reset();
+
+  try {
+    jieba_ = std::make_unique<cppjieba::Jieba>(
+        dict_path, model_path, user_dict_path, idf_path, stop_word_path);
+  } catch (const std::exception &e) {
+    LOG_ERROR("JiebaTokenizer init failed: %s", e.what());
+    jieba_.reset();
+    return false;
+  }
+
+  LOG_INFO(
+      "JiebaTokenizer init success. dict_path[%s] model_path[%s] "
+      "cut_mode[%s]",
+      dict_path.c_str(), model_path.c_str(), mode_str.c_str());
+  return true;
+}
+
+JiebaTokenizer::~JiebaTokenizer() = default;
+
+std::vector<Token> JiebaTokenizer::tokenize(const std::string &text) const {
+  std::vector<Token> tokens;
+  if (!jieba_ || text.empty()) {
+    return tokens;
+  }
+
+  std::vector<cppjieba::Word> words;
+  switch (cut_mode_) {
+    case CutMode::kSearch:
+      jieba_->CutForSearch(text, words, true);
+      break;
+    case CutMode::kMix:
+      jieba_->Cut(text, words, true);
+      break;
+    case CutMode::kFull:
+      jieba_->CutAll(text, words);
+      break;
+    case CutMode::kHmm:
+      jieba_->CutHMM(text, words);
+      break;
+    default:
+      LOG_ERROR("JiebaTokenizer: unexpected cut_mode %d",
+                static_cast<int>(cut_mode_));
+      return tokens;
+  }
+
+  tokens.reserve(words.size());
+  for (const auto &word : words) {
+    if (word.word.empty()) {
+      continue;
+    }
+    Token token;
+    token.text = word.word;
+    token.offset = word.offset;
+    token.position = word.unicode_offset;
+    tokens.push_back(std::move(token));
+  }
+
+  return tokens;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/jieba_tokenizer.h b/src/db/index/column/fts_column/tokenizer/jieba_tokenizer.h
new file mode 100644
index 000000000..88665d1a5
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/jieba_tokenizer.h
@@ -0,0 +1,77 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include "tokenizer.h"
+
+namespace cppjieba {
+class Jieba;
+}  // namespace cppjieba
+
+namespace zvec::fts {
+
+/*! Jieba tokenizer
+ *
+ *  Wraps cppjieba to provide Chinese (and mixed Chinese/English) word
+ *  segmentation.  Uses CutForSearch mode by default which produces finer
+ *  granularity suitable for search/indexing scenarios.
+ *
+ *  The cppjieba::Jieba instance is thread-safe for concurrent Cut* calls
+ *  after construction, so tokenize() can be called from multiple threads.
+ */
+class JiebaTokenizer : public Tokenizer {
+ public:
+  JiebaTokenizer() = default;
+  ~JiebaTokenizer() override;
+
+  // Non-copyable
+  JiebaTokenizer(const JiebaTokenizer &) = delete;
+  JiebaTokenizer &operator=(const JiebaTokenizer &) = delete;
+
+  /*! Initialise from JSON config.
+   *  Supported keys:
+   *    "dict_path"      – path to jieba.dict.utf8 (required)
+   *    "model_path"     – path to hmm_model.utf8 (required)
+   *    "user_dict_path" – path to user.dict.utf8 (optional)
+   *    "idf_path"       – path to idf.utf8 (optional)
+   *    "stop_word_path" – path to stop_words.utf8 (optional)
+   *    "cut_mode"       – "search" (default) | "mix" | "full" | "hmm"
+   */
+  bool init(const ailego::JsonObject &config) override;
+
+  std::vector<Token> tokenize(const std::string &text) const override;
+
+  const char *name() const override {
+    return "jieba";
+  }
+
+  bool is_valid() const {
+    return jieba_ != nullptr;
+  }
+
+  // Move-only (unique_ptr member)
+  JiebaTokenizer(JiebaTokenizer &&) = default;
+  JiebaTokenizer &operator=(JiebaTokenizer &&) = default;
+
+ private:
+  enum class CutMode { kSearch, kMix, kFull, kHmm };
+
+  std::unique_ptr<cppjieba::Jieba> jieba_;
+  CutMode cut_mode_{CutMode::kSearch};
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/standard_tokenizer.cc b/src/db/index/column/fts_column/tokenizer/standard_tokenizer.cc
new file mode 100644
index 000000000..122d9878b
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/standard_tokenizer.cc
@@ -0,0 +1,76 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "standard_tokenizer.h"
+#include <cctype>
+
+namespace zvec::fts {
+
+bool StandardTokenizer::init(const ailego::JsonObject &config) {
+  // Read optional max_token_length; keep default (255) if not present or
+  // if the provided value is zero.
+  auto length_val = config["max_token_length"];
+  if (length_val.is_integer()) {
+    uint32_t configured_length = static_cast<uint32_t>(length_val.as_integer());
+    if (configured_length > 0) {
+      max_token_length_ = configured_length;
+    }
+  }
+  return true;
+}
+
+std::vector<Token> StandardTokenizer::tokenize(const std::string &text) const {
+  std::vector<Token> tokens;
+  uint32_t position = 0;
+  size_t index = 0;
+  const size_t text_length = text.size();
+
+  while (index < text_length) {
+    // Skip non-alphanumeric characters (delimiters / punctuation).
+    while (index < text_length &&
+           !std::isalnum(static_cast<unsigned char>(text[index]))) {
+      ++index;
+    }
+    if (index >= text_length) {
+      break;
+    }
+
+    // Mark the start of an alphanumeric run.
+    const uint32_t token_start = static_cast<uint32_t>(index);
+
+    // Advance to the end of the alphanumeric run.
+    while (index < text_length &&
+           std::isalnum(static_cast<unsigned char>(text[index]))) {
+      ++index;
+    }
+
+    const uint32_t token_length = static_cast<uint32_t>(index) - token_start;
+
+    // Discard tokens that exceed the configured length limit.
+    if (token_length > max_token_length_) {
+      ++position;
+      continue;
+    }
+
+    Token token;
+    token.text = text.substr(token_start, token_length);
+    token.offset = token_start;
+    token.position = position++;
+    tokens.push_back(std::move(token));
+  }
+
+  return tokens;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/standard_tokenizer.h b/src/db/index/column/fts_column/tokenizer/standard_tokenizer.h
new file mode 100644
index 000000000..48a3c25e7
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/standard_tokenizer.h
@@ -0,0 +1,48 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include "tokenizer.h"
+
+namespace zvec::fts {
+
+/*! Standard tokenizer
+ *  Splits text on non-alphanumeric characters (punctuation, whitespace, etc.)
+ *  and discards the delimiters. Produces lowercase-ready tokens composed of
+ *  letters and digits only.
+ */
+class StandardTokenizer : public Tokenizer {
+ public:
+  /*! Initialise from JSON config.
+   *  Supported keys:
+   *    "max_token_length" (uint32, default 255): tokens longer than this limit
+   *      are silently discarded.
+   *  Always returns true.
+   */
+  bool init(const ailego::JsonObject &config) override;
+
+  std::vector<Token> tokenize(const std::string &text) const override;
+
+  const char *name() const override {
+    return "standard";
+  }
+
+ private:
+  // Tokens whose byte length exceeds this value are discarded.
+  uint32_t max_token_length_{255};
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/token_filter.cc b/src/db/index/column/fts_column/tokenizer/token_filter.cc
new file mode 100644
index 000000000..ffcb9b961
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/token_filter.cc
@@ -0,0 +1,32 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "token_filter.h"
+#include <algorithm>
+#include <cctype>
+
+namespace zvec::fts {
+
+std::vector<Token> LowercaseTokenFilter::filter(
+    std::vector<Token> tokens) const {
+  for (auto &token : tokens) {
+    std::transform(token.text.begin(), token.text.end(), token.text.begin(),
+                   [](unsigned char character) {
+                     return static_cast<char>(std::tolower(character));
+                   });
+  }
+  return tokens;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/token_filter.h b/src/db/index/column/fts_column/tokenizer/token_filter.h
new file mode 100644
index 000000000..ce11fbe14
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/token_filter.h
@@ -0,0 +1,57 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include "tokenizer.h"
+
+namespace zvec::fts {
+
+/*! Token Filter abstract interface
+ *  Post-process tokenization results, such as case conversion, stopword
+ * filtering, etc.
+ */
+class TokenFilter {
+ public:
+  virtual ~TokenFilter() = default;
+
+  /*! Filter/transform a list of tokens.
+   *  \param tokens  input token list (may be modified in place)
+   *  \return        processed token list
+   */
+  virtual std::vector<Token> filter(std::vector<Token> tokens) const = 0;
+
+  /*! Return filter name
+   */
+  virtual const char *name() const = 0;
+};
+
+using TokenFilterPtr = std::shared_ptr<TokenFilter>;
+
+/*! Lowercase Token Filter
+ *  Convert all token text to lowercase (only handles ASCII characters)
+ */
+class LowercaseTokenFilter : public TokenFilter {
+ public:
+  std::vector<Token> filter(std::vector<Token> tokens) const override;
+
+  const char *name() const override {
+    return "lowercase";
+  }
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/tokenizer.h b/src/db/index/column/fts_column/tokenizer/tokenizer.h
new file mode 100644
index 000000000..efc7906fa
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/tokenizer.h
@@ -0,0 +1,64 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+#include <zvec/ailego/encoding/json/mod_json_plus.h>
+
+namespace zvec::fts {
+
+/*! A single token in the tokenization result
+ */
+struct Token {
+  // token text content
+  std::string text;
+  // start byte offset of token in original text
+  uint32_t offset{0};
+  // token position in document (which word, starting from 0)
+  uint32_t position{0};
+};
+
+/*! Abstract tokenizer interface
+ *  All tokenizer implementations must inherit from this interface
+ */
+class Tokenizer {
+ public:
+  virtual ~Tokenizer() = default;
+
+  /*! Initialise the tokenizer from a JSON configuration object.
+   *  Must be called once before tokenize().
+   *  \param config  JSON object containing tokenizer-specific parameters.
+   *  \return        true on success, false on failure.
+   */
+  virtual bool init(const ailego::JsonObject &config) = 0;
+
+  /*! Tokenize input text
+   *  \param text  UTF-8 encoded input text
+   *  \return      Tokenization result list, sorted by position in ascending
+   * order
+   */
+  virtual std::vector<Token> tokenize(const std::string &text) const = 0;
+
+  /*! Return tokenizer name
+   */
+  virtual const char *name() const = 0;
+};
+
+using TokenizerPtr = std::shared_ptr<Tokenizer>;
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/tokenizer_factory.cc b/src/db/index/column/fts_column/tokenizer/tokenizer_factory.cc
new file mode 100644
index 000000000..d9dbf564c
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/tokenizer_factory.cc
@@ -0,0 +1,105 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tokenizer_factory.h"
+#include <zvec/ailego/encoding/json/mod_json_plus.h>
+#include <zvec/ailego/logger/logger.h>
+#include "cppjieba/Jieba.hpp"
+#include "jieba_tokenizer.h"
+#include "standard_tokenizer.h"
+#include "whitespace_tokenizer.h"
+
+namespace zvec::fts {
+
+TokenizerPipelinePtr TokenizerFactory::create(const FtsIndexParams &params) {
+  // Parse extra_params JSON string into a JsonObject.
+  // Empty string is treated as an empty object; malformed JSON fails.
+  ailego::JsonObject extra_json;
+  if (!params.extra_params.empty()) {
+    ailego::JsonValue parsed;
+    if (!parsed.parse(params.extra_params.c_str())) {
+      LOG_ERROR("[TokenizerFactory] failed to parse extra_params JSON: %s",
+                params.extra_params.c_str());
+      return nullptr;
+    }
+    if (!parsed.is_object()) {
+      LOG_ERROR("[TokenizerFactory] extra_params is not a JSON object: %s",
+                params.extra_params.c_str());
+      return nullptr;
+    }
+    extra_json = parsed.as_object();
+  }
+
+  TokenizerPtr tokenizer = create_tokenizer(params.tokenizer_name, extra_json);
+  if (!tokenizer) {
+    LOG_ERROR("[TokenizerFactory] failed to create tokenizer: %s",
+              params.tokenizer_name.c_str());
+    return nullptr;
+  }
+
+  std::vector<TokenFilterPtr> filters;
+  for (const auto &filter_name : params.filters) {
+    TokenFilterPtr filter = create_filter(filter_name);
+    if (!filter) {
+      LOG_ERROR("[TokenizerFactory] failed to create filter: %s",
+                filter_name.c_str());
+      return nullptr;
+    }
+    filters.push_back(std::move(filter));
+  }
+
+  return std::make_shared<TokenizerPipeline>(std::move(tokenizer),
+                                             std::move(filters));
+}
+
+std::vector<Token> TokenizerPipeline::process(const std::string &text) const {
+  std::vector<Token> tokens = tokenizer_->tokenize(text);
+  for (const auto &filter : filters_) {
+    tokens = filter->filter(std::move(tokens));
+  }
+  return tokens;
+}
+
+TokenizerPtr TokenizerFactory::create_tokenizer(
+    const std::string &tokenizer_name, const ailego::JsonObject &extra_json) {
+  TokenizerPtr tokenizer;
+  if (tokenizer_name.empty() || tokenizer_name == "standard") {
+    tokenizer = std::make_shared<StandardTokenizer>();
+  } else if (tokenizer_name == "jieba") {
+    tokenizer = std::make_shared<JiebaTokenizer>();
+  } else if (tokenizer_name == "whitespace") {
+    tokenizer = std::make_shared<WhitespaceTokenizer>();
+  } else {
+    LOG_ERROR("[TokenizerFactory] unknown tokenizer name: %s",
+              tokenizer_name.c_str());
+    return nullptr;
+  }
+
+  if (!tokenizer->init(extra_json)) {
+    LOG_ERROR("[TokenizerFactory] failed to init tokenizer: %s",
+              tokenizer_name.c_str());
+    return nullptr;
+  }
+  return tokenizer;
+}
+
+TokenFilterPtr TokenizerFactory::create_filter(const std::string &filter_name) {
+  if (filter_name == "lowercase") {
+    return std::make_shared<LowercaseTokenFilter>();
+  }
+  LOG_ERROR("[TokenizerFactory] unknown filter name: %s", filter_name.c_str());
+  return nullptr;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/tokenizer_factory.h b/src/db/index/column/fts_column/tokenizer/tokenizer_factory.h
new file mode 100644
index 000000000..f118f8e1a
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/tokenizer_factory.h
@@ -0,0 +1,64 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include "token_filter.h"
+#include "tokenizer.h"
+#include "../fts_types.h"
+
+namespace zvec::fts {
+
+/*! Tokenizer pipeline: contains one tokenizer and a set of token filters
+ *  Execution order: tokenizer → filter[0] → filter[1] → ...
+ */
+class TokenizerPipeline {
+ public:
+  TokenizerPipeline(TokenizerPtr tokenizer, std::vector<TokenFilterPtr> filters)
+      : tokenizer_(std::move(tokenizer)), filters_(std::move(filters)) {}
+
+  /*! Tokenize text and apply all filters
+   */
+  std::vector<Token> process(const std::string &text) const;
+
+ private:
+  TokenizerPtr tokenizer_;
+  std::vector<TokenFilterPtr> filters_;
+};
+
+using TokenizerPipelinePtr = std::shared_ptr<TokenizerPipeline>;
+
+/*! Tokenizer factory
+ *  Create TokenizerPipeline based on FtsIndexParams configuration.
+ */
+class TokenizerFactory {
+ public:
+  /*! Create tokenizer pipeline from FtsIndexParams.
+   *  \param params  FTS index parameters containing tokenizer_name, filters,
+   *                 and extra_params (JSON string for tokenizer-specific
+   *                 configuration).
+   *  \return        Tokenizer pipeline, returns nullptr on failure
+   */
+  static TokenizerPipelinePtr create(const FtsIndexParams &params);
+
+ private:
+  static TokenizerPtr create_tokenizer(const std::string &tokenizer_name,
+                                       const ailego::JsonObject &extra_json);
+  static TokenFilterPtr create_filter(const std::string &filter_name);
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/tokenizer_pipeline_manager.cc b/src/db/index/column/fts_column/tokenizer/tokenizer_pipeline_manager.cc
new file mode 100644
index 000000000..b3261319d
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/tokenizer_pipeline_manager.cc
@@ -0,0 +1,124 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tokenizer_pipeline_manager.h"
+#include <mutex>
+#include <shared_mutex>
+#include <zvec/ailego/logger/logger.h>
+
+namespace zvec::fts {
+
+// ============================================================
+// Key generation
+// ============================================================
+
+std::string TokenizerPipelineManager::make_key(const FtsIndexParams &params) {
+  // Build a stable cache key from the three FtsIndexParams fields.
+  // Format: "tokenizer_name|filter0,filter1,...|extra_params_json"
+  std::string key;
+  key += params.tokenizer_name;
+  key += "|";
+  for (size_t i = 0; i < params.filters.size(); ++i) {
+    if (i > 0) {
+      key += ",";
+    }
+    key += params.filters[i];
+  }
+  key += "|";
+  key += params.extra_params;
+  return key;
+}
+
+// ============================================================
+// acquire
+// ============================================================
+
+TokenizerPipelinePtr TokenizerPipelineManager::acquire(
+    const FtsIndexParams &params) {
+  const std::string key = make_key(params);
+
+  // Fast path: pipeline already exists.
+  {
+    std::unique_lock<std::shared_mutex> lock(mutex_);
+    auto it = pipelines_.find(key);
+    if (it != pipelines_.end()) {
+      it->second.ref_count++;
+      LOG_DEBUG(
+          "TokenizerPipelineManager: reuse pipeline key[%s] ref_count[%d]",
+          key.c_str(), it->second.ref_count);
+      return it->second.pipeline;
+    }
+  }
+
+  // Create the pipeline outside of the lock to avoid blocking other
+  // acquire/release calls during the (potentially expensive) construction.
+  TokenizerPipelinePtr pipeline = TokenizerFactory::create(params);
+  if (!pipeline) {
+    LOG_ERROR(
+        "TokenizerPipelineManager: failed to create pipeline for "
+        "tokenizer[%s] key[%s]",
+        params.tokenizer_name.c_str(), key.c_str());
+    return nullptr;
+  }
+
+  // Re-acquire the lock and check whether another thread has already
+  // created a pipeline with the same key while we were constructing ours.
+  std::unique_lock<std::shared_mutex> lock(mutex_);
+  auto it = pipelines_.find(key);
+  if (it != pipelines_.end()) {
+    it->second.ref_count++;
+    LOG_DEBUG(
+        "TokenizerPipelineManager: another thread created pipeline first, "
+        "discard newly created one. key[%s] ref_count[%d]",
+        key.c_str(), it->second.ref_count);
+    return it->second.pipeline;
+  }
+
+  Entry entry;
+  entry.pipeline = pipeline;
+  entry.ref_count = 1;
+  pipelines_.emplace(key, std::move(entry));
+
+  LOG_DEBUG("TokenizerPipelineManager: created pipeline key[%s]", key.c_str());
+  return pipeline;
+}
+
+// ============================================================
+// release
+// ============================================================
+
+void TokenizerPipelineManager::release(const FtsIndexParams &params) {
+  const std::string key = make_key(params);
+
+  std::unique_lock<std::shared_mutex> lock(mutex_);
+
+  auto it = pipelines_.find(key);
+  if (it == pipelines_.end()) {
+    LOG_WARN("TokenizerPipelineManager: release called for unknown key[%s]",
+             key.c_str());
+    return;
+  }
+
+  it->second.ref_count--;
+  LOG_DEBUG("TokenizerPipelineManager: release key[%s] ref_count[%d]",
+            key.c_str(), it->second.ref_count);
+
+  if (it->second.ref_count <= 0) {
+    pipelines_.erase(it);
+    LOG_DEBUG("TokenizerPipelineManager: destroyed pipeline key[%s]",
+              key.c_str());
+  }
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/tokenizer_pipeline_manager.h b/src/db/index/column/fts_column/tokenizer/tokenizer_pipeline_manager.h
new file mode 100644
index 000000000..9c975a062
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/tokenizer_pipeline_manager.h
@@ -0,0 +1,88 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <shared_mutex>
+#include <string>
+#include <unordered_map>
+#include <zvec/ailego/pattern/singleton.h>
+#include "tokenizer_factory.h"
+
+namespace zvec::fts {
+
+/*!
+ * TokenizerPipelineManager
+ *
+ * Global singleton that creates, caches and reference-counts
+ * TokenizerPipeline instances.  Two callers that request a pipeline with
+ * the same FtsIndexParams will receive the same shared_ptr, and the
+ * underlying pipeline is destroyed only when the last caller releases it.
+ *
+ * The cache key is built from tokenizer_name, filters and extra_params
+ * fields of FtsIndexParams, producing a deterministic string.
+ *
+ * Thread-safety: all public methods are protected by a std::shared_mutex.
+ * acquire() and release() take an exclusive (write) lock; the map itself is
+ * never read concurrently with a write.
+ */
+class TokenizerPipelineManager
+    : public ailego::Singleton<TokenizerPipelineManager> {
+ public:
+  /*!
+   * Build a canonical cache key from the given FtsIndexParams.
+   * The key is deterministic: tokenizer_name + sorted filters + extra_params.
+   *
+   * \param params  FTS index parameters
+   * \return        Canonical string key
+   */
+  static std::string make_key(const FtsIndexParams &params);
+
+  /*!
+   * Acquire a shared pipeline for the given configuration.
+   * If a pipeline with the same key already exists its reference count is
+   * incremented and the existing instance is returned.  Otherwise a new
+   * pipeline is created via TokenizerFactory::create().
+   *
+   * \param params  FTS index parameters
+   * \return        Shared pipeline pointer, or nullptr on failure
+   */
+  TokenizerPipelinePtr acquire(const FtsIndexParams &params);
+
+  /*!
+   * Release a previously acquired pipeline identified by its FtsIndexParams.
+   * Decrements the reference count; when it reaches zero the entry is
+   * removed from the map and the pipeline is destroyed.
+   *
+   * \param params  Same FtsIndexParams used during acquire()
+   */
+  void release(const FtsIndexParams &params);
+
+ protected:
+  //! Constructor (protected, accessed via Singleton<T>::Instance())
+  TokenizerPipelineManager() = default;
+  friend class ailego::Singleton<TokenizerPipelineManager>;
+
+ private:
+  //! Internal map entry
+  struct Entry {
+    TokenizerPipelinePtr pipeline;
+    int ref_count{0};
+  };
+
+  std::shared_mutex mutex_;
+  std::unordered_map<std::string, Entry> pipelines_;
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/whitespace_tokenizer.cc b/src/db/index/column/fts_column/tokenizer/whitespace_tokenizer.cc
new file mode 100644
index 000000000..aad42fc7d
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/whitespace_tokenizer.cc
@@ -0,0 +1,56 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "whitespace_tokenizer.h"
+#include <cctype>
+
+namespace zvec::fts {
+
+std::vector<Token> WhitespaceTokenizer::tokenize(
+    const std::string &text) const {
+  std::vector<Token> tokens;
+  uint32_t position = 0;
+  size_t index = 0;
+  const size_t text_length = text.size();
+
+  while (index < text_length) {
+    // skip whitespace characters
+    while (index < text_length &&
+           std::isspace(static_cast<unsigned char>(text[index]))) {
+      ++index;
+    }
+    if (index >= text_length) {
+      break;
+    }
+
+    // find token start position
+    const uint32_t token_start = static_cast<uint32_t>(index);
+
+    // find token end position
+    while (index < text_length &&
+           !std::isspace(static_cast<unsigned char>(text[index]))) {
+      ++index;
+    }
+
+    Token token;
+    token.text = text.substr(token_start, index - token_start);
+    token.offset = token_start;
+    token.position = position++;
+    tokens.push_back(std::move(token));
+  }
+
+  return tokens;
+}
+
+}  // namespace zvec::fts
diff --git a/src/db/index/column/fts_column/tokenizer/whitespace_tokenizer.h b/src/db/index/column/fts_column/tokenizer/whitespace_tokenizer.h
new file mode 100644
index 000000000..e2668c671
--- /dev/null
+++ b/src/db/index/column/fts_column/tokenizer/whitespace_tokenizer.h
@@ -0,0 +1,39 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "tokenizer.h"
+
+namespace zvec::fts {
+
+/*! Whitespace tokenizer
+ *  Split text by whitespace characters (space, tab, newline, etc.), used as
+ * default tokenizer
+ */
+class WhitespaceTokenizer : public Tokenizer {
+ public:
+  // WhitespaceTokenizer requires no configuration; always succeeds.
+  bool init(const ailego::JsonObject & /*config*/) override {
+    return true;
+  }
+
+  std::vector<Token> tokenize(const std::string &text) const override;
+
+  const char *name() const override {
+    return "whitespace";
+  }
+};
+
+}  // namespace zvec::fts
diff --git a/src/db/index/common/doc.cc b/src/db/index/common/doc.cc
index 0405eac1d..a29737d8b 100644
--- a/src/db/index/common/doc.cc
+++ b/src/db/index/common/doc.cc
@@ -1281,21 +1281,45 @@ Status VectorQuery::validate_and_sanitize(const FieldSchema *schema) {
         kMaxOutputFieldSize);
   }
 
+  // Mutual exclusion: fts_query_ and vector fields cannot be set together.
+  if (fts_query_.has_value()) {
+    if (!query_vector_.empty() || !query_sparse_indices_.empty()) {
+      return Status::InvalidArgument(
+          "Invalid query: fts_query and vector query fields "
+          "(query_vector/query_sparse_indices) are mutually exclusive");
+    }
+  }
+
   if (schema == nullptr) {
+    if (fts_query_.has_value()) {
+      // FTS query requires a valid field_name_ that resolves to an FTS field.
+      return Status::InvalidArgument(
+          "Invalid query: fts_query requires a valid FTS field, but field[",
+          field_name_, "] does not exist in the collection");
+    }
     if (query_vector_.empty() && query_sparse_indices_.empty()) {
-      // Scalar-only filter query
+      // Scalar-only filter query (no field_name_ needed)
       return Status::OK();
-    } else {
-      // If a query vector was provided, the field must exist as a vector field
-      // since we are performing a vector similarity search.
+    }
+    // If a query vector was provided, the field must exist as a vector field.
+    return Status::InvalidArgument(
+        "Invalid query: query vector is provided, but query field[",
+        field_name_,
+        "] does not exist or is not a vector field in the collection");
+  }
+
+  // FTS query: field must be an FTS-indexed field.
+  if (fts_query_.has_value()) {
+    if (schema->index_type() != IndexType::FTS) {
       return Status::InvalidArgument(
-          "Invalid query: query vector is provided, but query field[",
-          field_name_,
-          "] does not exist or is not a vector field in the collection");
+          "Invalid query: fts_query requires an FTS-indexed field, but field[",
+          field_name_, "] has index type ",
+          IndexTypeCodeBook::AsString(schema->index_type()));
     }
+    return Status::OK();
   }
 
-  // Vector query
+  // Vector query: field must be a vector field.
   if (schema->is_dense_vector()) {
     // Validate dimension
     auto dim = schema->dimension();
diff --git a/src/db/index/common/index_params.cc b/src/db/index/common/index_params.cc
index cb06f0779..0d7315d15 100644
--- a/src/db/index/common/index_params.cc
+++ b/src/db/index/common/index_params.cc
@@ -12,8 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <mutex>
+#include <new>
 #include <sstream>
+#include <zvec/ailego/logger/logger.h>
 #include <zvec/db/index_params.h>
+#include "db/index/column/fts_column/fts_types.h"
+#include "db/index/column/fts_column/tokenizer/tokenizer_pipeline_manager.h"
 #include "type_helper.h"
 
 namespace zvec {
@@ -38,4 +43,70 @@ std::string VectorIndexParams::vector_index_params_to_string(
   return oss.str();
 }
 
+// ============================================================
+// FtsIndexParams — helpers
+// ============================================================
+
+static fts::FtsIndexParams to_internal(const FtsIndexParams &params) {
+  fts::FtsIndexParams p;
+  p.tokenizer_name = params.tokenizer_name();
+  p.filters = params.filters();
+  p.extra_params = params.extra_params();
+  return p;
+}
+
+// ============================================================
+// FtsIndexParams — destructor
+// ============================================================
+
+FtsIndexParams::~FtsIndexParams() {
+  if (pipeline_created_) {
+    auto internal = to_internal(*this);
+    fts::TokenizerPipelineManager::Instance().release(internal);
+  }
+}
+
+// ============================================================
+// FtsIndexParams — move semantics
+// ============================================================
+
+FtsIndexParams::FtsIndexParams(FtsIndexParams &&other) noexcept
+    : IndexParams(IndexType::FTS),
+      tokenizer_name_(std::move(other.tokenizer_name_)),
+      filters_(std::move(other.filters_)),
+      extra_params_(std::move(other.extra_params_)),
+      pipeline_(std::move(other.pipeline_)),
+      pipeline_created_(other.pipeline_created_) {
+  other.pipeline_created_ = false;
+  other.pipeline_.reset();
+  // std::once_flag is not movable; default-initialise ours (already done by
+  // the member initialiser) and leave other's in a valid but used state.
+  // If the source had already called create_pipeline(), we inherit the
+  // cached result.  If not, our fresh once_flag will allow a future call.
+  if (pipeline_created_) {
+    // Mark our once_flag as "already called" by running a no-op through it.
+    std::call_once(pipeline_once_, [] {});
+  }
+}
+
+
+// ============================================================
+// FtsIndexParams — create_pipeline
+// ============================================================
+
+Result<FtsIndexParams::PipelinePtr> FtsIndexParams::create_pipeline() {
+  std::call_once(pipeline_once_, [this]() {
+    auto internal = to_internal(*this);
+    pipeline_ = fts::TokenizerPipelineManager::Instance().acquire(internal);
+    if (pipeline_) {
+      pipeline_created_ = true;
+    }
+  });
+  if (!pipeline_) {
+    return tl::make_unexpected(
+        Status::InternalError("Failed to create tokenizer pipeline"));
+  }
+  return pipeline_;
+}
+
 }  // namespace zvec
\ No newline at end of file
diff --git a/src/db/index/common/proto_converter.cc b/src/db/index/common/proto_converter.cc
index d58dc1897..109a09fe0 100644
--- a/src/db/index/common/proto_converter.cc
+++ b/src/db/index/common/proto_converter.cc
@@ -144,6 +144,28 @@ proto::InvertIndexParams ProtoConverter::ToPb(const InvertIndexParams *params) {
   return params_pb;
 }
 
+// FtsIndexParams
+FtsIndexParams::Ptr ProtoConverter::FromPb(
+    const proto::FtsIndexParams &params_pb) {
+  std::vector<std::string> filters;
+  filters.reserve(params_pb.filters_size());
+  for (const auto &filter : params_pb.filters()) {
+    filters.push_back(filter);
+  }
+  return std::make_shared<FtsIndexParams>(
+      params_pb.tokenizer_name(), std::move(filters), params_pb.extra_params());
+}
+
+proto::FtsIndexParams ProtoConverter::ToPb(const FtsIndexParams *params) {
+  proto::FtsIndexParams params_pb;
+  params_pb.set_tokenizer_name(params->tokenizer_name());
+  for (const auto &filter : params->filters()) {
+    params_pb.add_filters(filter);
+  }
+  params_pb.set_extra_params(params->extra_params());
+  return params_pb;
+}
+
 // FieldSchema
 FieldSchema::Ptr ProtoConverter::FromPb(const proto::FieldSchema &schema_pb) {
   auto schema = std::make_shared<FieldSchema>();
@@ -215,6 +237,8 @@ IndexParams::Ptr ProtoConverter::FromPb(const proto::IndexParams &params_pb) {
     return ProtoConverter::FromPb(params_pb.hnsw_rabitq());
   } else if (params_pb.has_vamana()) {
     return ProtoConverter::FromPb(params_pb.vamana());
+  } else if (params_pb.has_fts()) {
+    return ProtoConverter::FromPb(params_pb.fts());
   }
 
   return nullptr;
@@ -286,6 +310,13 @@ proto::IndexParams ProtoConverter::ToPb(const IndexParams *params) {
       }
       break;
     }
+    case IndexType::FTS: {
+      auto fts_params = dynamic_cast<const FtsIndexParams *>(params);
+      if (fts_params) {
+        params_pb.mutable_fts()->CopyFrom(ProtoConverter::ToPb(fts_params));
+      }
+      break;
+    }
     default:
       break;
   }
diff --git a/src/db/index/common/proto_converter.h b/src/db/index/common/proto_converter.h
index 362f95047..4850bac9c 100644
--- a/src/db/index/common/proto_converter.h
+++ b/src/db/index/common/proto_converter.h
@@ -48,6 +48,10 @@ struct ProtoConverter {
       const proto::InvertIndexParams &params_pb);
   static proto::InvertIndexParams ToPb(const InvertIndexParams *params);
 
+  // FtsIndexParams
+  static FtsIndexParams::Ptr FromPb(const proto::FtsIndexParams &params_pb);
+  static proto::FtsIndexParams ToPb(const FtsIndexParams *params);
+
   // IndexParams
   static IndexParams::Ptr FromPb(const proto::IndexParams &params_pb);
   static proto::IndexParams ToPb(const IndexParams *params);
diff --git a/src/db/index/common/schema.cc b/src/db/index/common/schema.cc
index 1236f5fc2..d0716eb78 100644
--- a/src/db/index/common/schema.cc
+++ b/src/db/index/common/schema.cc
@@ -549,6 +549,25 @@ FieldSchemaPtrList CollectionSchema::vector_fields() const {
   return vector_fields;
 }
 
+bool CollectionSchema::has_fts_field() const {
+  for (const auto &field : fields_) {
+    if (field->index_type() == IndexType::FTS) {
+      return true;
+    }
+  }
+  return false;
+}
+
+FieldSchemaPtrList CollectionSchema::fts_fields() const {
+  FieldSchemaPtrList fts;
+  for (const auto &field : fields_) {
+    if (field->index_type() == IndexType::FTS) {
+      fts.push_back(field);
+    }
+  }
+  return fts;
+}
+
 uint64_t CollectionSchema::max_doc_count_per_segment() const {
   return max_doc_count_per_segment_;
 }
diff --git a/src/db/index/common/type_helper.h b/src/db/index/common/type_helper.h
index 02b7c0bad..0fe42d0c1 100644
--- a/src/db/index/common/type_helper.h
+++ b/src/db/index/common/type_helper.h
@@ -37,6 +37,8 @@ struct IndexTypeCodeBook {
         return IndexType::VAMANA;
       case proto::IT_INVERT:
         return IndexType::INVERT;
+      case proto::IT_FTS:
+        return IndexType::FTS;
       default:
         break;
     }
@@ -58,6 +60,8 @@ struct IndexTypeCodeBook {
         return proto::IT_VAMANA;
       case IndexType::INVERT:
         return proto::IT_INVERT;
+      case IndexType::FTS:
+        return proto::IT_FTS;
       default:
         break;
     }
@@ -79,6 +83,8 @@ struct IndexTypeCodeBook {
         return "VAMANA";
       case IndexType::INVERT:
         return "INVERT";
+      case IndexType::FTS:
+        return "FTS";
       default:
         break;
     }
diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc
index 7d3b2a56b..2e0d9cdaf 100644
--- a/src/db/index/segment/segment.cc
+++ b/src/db/index/segment/segment.cc
@@ -44,6 +44,9 @@
 #include "db/common/file_helper.h"
 #include "db/common/global_resource.h"
 #include "db/common/typedef.h"
+#include "db/index/column/fts_column/fts_column_indexer.h"
+#include "db/index/column/fts_column/fts_rocksdb_merge.h"
+#include "db/index/column/fts_column/fts_types.h"
 #include "db/index/column/inverted_column/inverted_indexer.h"
 #include "db/index/column/vector_column/engine_helper.hpp"
 #include "db/index/column/vector_column/vector_column_indexer.h"
@@ -67,6 +70,7 @@
 
 namespace zvec {
 
+
 void global_init() {
   static std::once_flag once;
   // run once
@@ -156,6 +160,13 @@ class SegmentImpl : public Segment,
   InvertedColumnIndexer::Ptr get_scalar_indexer(
       const std::string &field_name) const override;
 
+  fts::FtsColumnIndexerPtr get_fts_indexer(
+      const std::string &field_name) const override;
+
+  Result<std::vector<fts::FtsResult>> fts_search(
+      const std::string &field_name, const fts::FtsAstNode &ast,
+      const fts::FtsQueryParams &params) override;
+
   const IndexFilter::Ptr get_filter() override;
 
   Status create_all_vector_index(
@@ -275,6 +286,7 @@ class SegmentImpl : public Segment,
       const vector_column_params::VectorDataBuffer &buf, Doc *doc);
 
   Status insert_scalar_indexer(Doc &doc);
+  Status insert_fts_indexer(Doc &doc);
   Status insert_vector_indexer(Doc &doc);
   Status internal_insert(Doc &doc);
   Status internal_update(Doc &doc);
@@ -294,6 +306,12 @@ class SegmentImpl : public Segment,
 
   Status reopen_invert_indexer(bool read_only = false);
 
+  // FTS helpers
+  Status open_fts_indexers(bool create);
+  Status close_fts_indexers();
+  Status flush_fts_indexers();
+  Status dump_fts_indexers();
+
   Status insert_array_to_invert_indexer(
       const FieldSchema::Ptr &schema,
       const std::shared_ptr<arrow::ChunkedArray> &data,
@@ -318,6 +336,11 @@ class SegmentImpl : public Segment,
   // scalar index (uses segment-local doc ID)
   InvertedIndexer::Ptr invert_indexers_;
 
+  // FTS index (uses segment-local doc ID)
+  std::shared_ptr<RocksdbContext> fts_ctx_;
+  std::unordered_map<std::string, fts::FtsColumnIndexerPtr> fts_indexers_;
+  bool has_fts_{false};
+
   // vector index (uses block-local doc ID, each indexer starts from 0)
   std::unordered_map<std::string, VectorColumnIndexer::Ptr>
       memory_vector_indexers_;
@@ -443,6 +466,10 @@ Status SegmentImpl::Open(const SegmentOptions &options) {
   s = load_scalar_index_blocks();
   CHECK_RETURN_STATUS(s);
 
+  // load FTS indexes
+  s = open_fts_indexers(false);
+  CHECK_RETURN_STATUS(s);
+
   // load vector indexes
   s = load_vector_index_blocks();
   CHECK_RETURN_STATUS(s);
@@ -506,6 +533,9 @@ Status SegmentImpl::Create(const SegmentOptions &options, uint64_t min_doc_id) {
   auto s = load_scalar_index_blocks(true);
   CHECK_RETURN_STATUS(s);
 
+  s = open_fts_indexers(true);
+  CHECK_RETURN_STATUS(s);
+
   doc_id_allocator_.store(min_doc_id);
 
   return Status::OK();
@@ -516,6 +546,7 @@ Status SegmentImpl::close() {
   if (invert_indexers_) {
     invert_indexers_.reset();
   }
+  close_fts_indexers();
   for (const auto &[name, indexers] : vector_indexers_) {
     for (auto indexer : indexers) {
       indexer->Close();
@@ -814,6 +845,9 @@ Status SegmentImpl::internal_insert(Doc &doc) {
   if (!s.ok() && s.code() != StatusCode::ALREADY_EXISTS) {
     return s;
   }
+  // write FTS index
+  s = insert_fts_indexer(doc);
+  CHECK_RETURN_STATUS(s);
   // write vector index
   s = insert_vector_indexer(doc);
   if (!s.ok() && s != Status::AlreadyExists()) {
@@ -2191,6 +2225,9 @@ Status SegmentImpl::dump() {
     CHECK_RETURN_STATUS(s);
   }
 
+  s = dump_fts_indexers();
+  CHECK_RETURN_STATUS(s);
+
   sealed_ = true;
 
   return Status::OK();
@@ -2223,6 +2260,12 @@ Status SegmentImpl::flush() {
     CHECK_RETURN_STATUS(s);
   }
 
+  // flush FTS indexers
+  if (has_fts_) {
+    s = flush_fts_indexers();
+    CHECK_RETURN_STATUS(s);
+  }
+
   // flush vector indexer
   for (const auto &indexer : memory_vector_indexers_) {
     if (indexer.second) {
@@ -4462,4 +4505,240 @@ Result<Segment::Ptr> Segment::Open(const std::string &path,
   return segment;
 }
 
+////////////////////////////////////////////////////////////////////////////////////
+// FTS integration
+////////////////////////////////////////////////////////////////////////////////////
+
+Status SegmentImpl::open_fts_indexers(bool create) {
+  if (!collection_schema_->has_fts_field()) {
+    return Status::OK();
+  }
+
+  auto fts_fields = collection_schema_->fts_fields();
+  has_fts_ = true;
+
+  auto fts_path = FileHelper::MakeFtsIndexPath(seg_path_);
+
+  // Collect CF names and per-CF merge operators
+  std::vector<std::string> cf_names;
+  std::unordered_map<std::string, std::shared_ptr<rocksdb::MergeOperator>>
+      per_cf_merge_ops;
+
+  for (const auto &field : fts_fields) {
+    const auto &name = field->name();
+    cf_names.push_back(name);                        // postings
+    cf_names.push_back(name + kFtsPositionsSuffix);  // positions
+
+    per_cf_merge_ops[name] = std::make_shared<fts::FtsPostingsMerge>();
+
+    // Side CFs (_tf / _max_tf / _doc_len) are present in mutable segments
+    // that have not yet been dumped.  After dump,
+    // convert_postings_to_bitpacked() inlines their payloads into BitPacked
+    // postings and the CFs are dropped.
+    //
+    // When opening an existing segment (create=false), we always include the
+    // side CF names so that segments closed without dump (e.g. graceful
+    // shutdown with only flush) can still perform accurate BM25 scoring via
+    // the Roaring posting path.  If the CFs were already dropped (post-dump
+    // immutable segment), the open will fail and we retry without them.
+    if (create) {
+      cf_names.push_back(name + kFtsTfSuffix);
+      cf_names.push_back(name + kFtsMaxTfSuffix);
+      cf_names.push_back(name + kFtsDocLenSuffix);
+      per_cf_merge_ops[name + kFtsMaxTfSuffix] =
+          std::make_shared<fts::FtsMaxTfMerge>();
+    }
+  }
+  cf_names.push_back(kFtsStatCfName);
+
+  fts_ctx_ = std::make_shared<RocksdbContext>();
+  Status s;
+
+  // Whether side CFs are available after open
+  bool has_side_cfs = create;
+
+  bool enable_hash_skiplist = true;
+  if (create) {
+    s = fts_ctx_->create(RocksdbContext::Args{
+        fts_path, cf_names, nullptr, per_cf_merge_ops, enable_hash_skiplist});
+  } else {
+    // Try opening with side CFs first (un-dumped mutable segment).
+    // If they don't exist (post-dump), retry without them.
+    std::vector<std::string> cf_names_with_side = cf_names;
+    auto per_cf_merge_ops_with_side = per_cf_merge_ops;
+    for (const auto &field : fts_fields) {
+      const auto &name = field->name();
+      cf_names_with_side.push_back(name + kFtsTfSuffix);
+      cf_names_with_side.push_back(name + kFtsMaxTfSuffix);
+      cf_names_with_side.push_back(name + kFtsDocLenSuffix);
+      per_cf_merge_ops_with_side[name + kFtsMaxTfSuffix] =
+          std::make_shared<fts::FtsMaxTfMerge>();
+    }
+    s = fts_ctx_->open(
+        RocksdbContext::Args{fts_path, cf_names_with_side, nullptr,
+                             per_cf_merge_ops_with_side, enable_hash_skiplist},
+        options_.read_only_);
+    if (s.ok()) {
+      has_side_cfs = true;
+    } else {
+      // Side CFs not found (immutable segment after dump) — retry without.
+      fts_ctx_ = std::make_shared<RocksdbContext>();
+      s = fts_ctx_->open(
+          RocksdbContext::Args{fts_path, cf_names, nullptr, per_cf_merge_ops},
+          options_.read_only_);
+    }
+  }
+  if (!s.ok()) {
+    LOG_ERROR("open_fts_indexers: failed to %s FTS RocksDB at [%s]: %s",
+              create ? "create" : "open", fts_path.c_str(),
+              s.message().c_str());
+    return s;
+  }
+
+  auto *stat_cf = fts_ctx_->get_cf(kFtsStatCfName);
+
+  for (const auto &field : fts_fields) {
+    const auto &name = field->name();
+    auto *postings_cf = fts_ctx_->get_cf(name);
+    auto *positions_cf = fts_ctx_->get_cf(name + kFtsPositionsSuffix);
+    // Side CF handles are available when the segment has not been dumped
+    // (side CFs still exist).  For dumped immutable segments the handles
+    // are nullptr and FtsColumnIndexer falls back to BitPacked inline
+    // payloads or tf=1/doc_len=1 defaults.
+    auto *term_freq_cf =
+        has_side_cfs ? fts_ctx_->get_cf(name + kFtsTfSuffix) : nullptr;
+    auto *max_tf_cf =
+        has_side_cfs ? fts_ctx_->get_cf(name + kFtsMaxTfSuffix) : nullptr;
+    auto *doc_len_cf =
+        has_side_cfs ? fts_ctx_->get_cf(name + kFtsDocLenSuffix) : nullptr;
+
+    auto indexer = std::make_shared<fts::FtsColumnIndexer>();
+
+    auto ret = indexer->open(field, fts_ctx_.get(), postings_cf, positions_cf,
+                             term_freq_cf, max_tf_cf, doc_len_cf, stat_cf);
+    if (!ret.has_value()) {
+      LOG_ERROR(
+          "open_fts_indexers: FtsColumnIndexer::open failed for field[%s] "
+          "err[%s] postings_cf[%p] positions_cf[%p] stat_cf[%p]",
+          name.c_str(), ret.error().message().c_str(), (void *)postings_cf,
+          (void *)positions_cf, (void *)stat_cf);
+      return Status::InternalError("Failed to open FTS indexer: ", name, " ",
+                                   ret.error().message());
+    }
+
+    fts_indexers_[name] = indexer;
+  }
+
+  return Status::OK();
+}
+
+Status SegmentImpl::flush_fts_indexers() {
+  for (const auto &[name, indexer] : fts_indexers_) {
+    auto ret = indexer->flush();
+    if (!ret.has_value()) {
+      return Status::InternalError("FTS flush failed: ", name, " ",
+                                   ret.error().message());
+    }
+  }
+  auto s = fts_ctx_->flush();
+  CHECK_RETURN_STATUS(s);
+  return Status::OK();
+}
+
+Status SegmentImpl::close_fts_indexers() {
+  fts_indexers_.clear();
+  if (fts_ctx_) {
+    auto s = fts_ctx_->close();
+    fts_ctx_.reset();
+    return s;
+  }
+  return Status::OK();
+}
+
+Status SegmentImpl::insert_fts_indexer(Doc &doc) {
+  if (!has_fts_) {
+    return Status::OK();
+  }
+  for (const auto &field : collection_schema_->fts_fields()) {
+    auto it = fts_indexers_.find(field->name());
+    if (it == fts_indexers_.end()) {
+      return Status::InternalError("FTS indexer not found: ", field->name());
+    }
+    auto value = doc.get<std::string>(field->name());
+    if (value.has_value()) {
+      auto segment_doc_id = doc_ids_.size();
+      auto ret = it->second->insert(segment_doc_id, value.value());
+      if (!ret.has_value()) {
+        return Status::InternalError("FTS insert failed: ", field->name(), " ",
+                                     ret.error().message());
+      }
+    }
+  }
+  return Status::OK();
+}
+
+Status SegmentImpl::dump_fts_indexers() {
+  if (!has_fts_) {
+    return Status::OK();
+  }
+
+  // flush all indexers
+  for (const auto &[name, indexer] : fts_indexers_) {
+    auto ret = indexer->flush();
+    if (!ret.has_value()) {
+      return Status::InternalError("FTS flush failed during dump: ", name, " ",
+                                   ret.error().message());
+    }
+  }
+
+  // convert postings to bitpacked format
+  for (const auto &[name, indexer] : fts_indexers_) {
+    auto ret = indexer->convert_postings_to_bitpacked();
+    if (!ret.has_value()) {
+      return Status::InternalError("FTS convert_postings_to_bitpacked failed: ",
+                                   name, " ", ret.error().message());
+    }
+  }
+
+  // reset side CFs and drop $TF/$MAX_TF/$DOC_LEN CFs
+  for (const auto &[name, indexer] : fts_indexers_) {
+    indexer->reset_side_cfs();
+  }
+  for (const auto &field : collection_schema_->fts_fields()) {
+    const auto &name = field->name();
+    fts_ctx_->drop_cf(name + kFtsTfSuffix);
+    fts_ctx_->drop_cf(name + kFtsMaxTfSuffix);
+    fts_ctx_->drop_cf(name + kFtsDocLenSuffix);
+  }
+
+  return Status::OK();
+}
+
+fts::FtsColumnIndexerPtr SegmentImpl::get_fts_indexer(
+    const std::string &field_name) const {
+  auto it = fts_indexers_.find(field_name);
+  if (it != fts_indexers_.end()) {
+    return it->second;
+  }
+  return nullptr;
+}
+
+Result<std::vector<fts::FtsResult>> SegmentImpl::fts_search(
+    const std::string &field_name, const fts::FtsAstNode &ast,
+    const fts::FtsQueryParams &params) {
+  auto indexer = get_fts_indexer(field_name);
+  if (!indexer) {
+    return tl::make_unexpected(
+        Status::NotFound("FTS indexer not found: ", field_name));
+  }
+
+  auto ret = indexer->search(ast, params);
+  if (!ret.has_value()) {
+    return tl::make_unexpected(Status::InternalError(
+        "FTS search failed: ", field_name, " ", ret.error().message()));
+  }
+
+  return std::move(ret.value());
+}
+
 }  // namespace zvec
\ No newline at end of file
diff --git a/src/db/index/segment/segment.h b/src/db/index/segment/segment.h
index 263463ea0..4f6ad316d 100644
--- a/src/db/index/segment/segment.h
+++ b/src/db/index/segment/segment.h
@@ -25,6 +25,7 @@
 #include <zvec/db/options.h>
 #include <zvec/db/schema.h>
 #include <zvec/db/status.h>
+#include "db/index/column/fts_column/fts_column_indexer.h"
 #include "db/index/column/inverted_column/inverted_column_indexer.h"
 #include "db/index/column/inverted_column/inverted_indexer.h"
 #include "db/index/column/vector_column/combined_vector_column_indexer.h"
@@ -169,6 +170,14 @@ class Segment {
   virtual InvertedColumnIndexer::Ptr get_scalar_indexer(
       const std::string &field_name) const = 0;
 
+  // caller hold segment shared_ptr for segment handle the indexer's lifetime
+  virtual fts::FtsColumnIndexerPtr get_fts_indexer(
+      const std::string &field_name) const = 0;
+
+  virtual Result<std::vector<fts::FtsResult>> fts_search(
+      const std::string &field_name, const fts::FtsAstNode &ast,
+      const fts::FtsQueryParams &params) = 0;
+
   virtual const IndexFilter::Ptr get_filter() = 0;
 
   // for others
diff --git a/src/db/proto/zvec.proto b/src/db/proto/zvec.proto
index 197914c76..e94d1d399 100644
--- a/src/db/proto/zvec.proto
+++ b/src/db/proto/zvec.proto
@@ -62,6 +62,8 @@ enum IndexType {
   IT_VAMANA = 5;
   // Invert Index
   IT_INVERT = 10;
+  // Full-Text Search Index
+  IT_FTS = 11;
 };
 
 enum QuantizeType {
@@ -131,6 +133,12 @@ message VamanaIndexParams {
   bool use_id_map = 7;
 }
 
+message FtsIndexParams {
+  string tokenizer_name = 1;
+  repeated string filters = 2;
+  string extra_params = 3;
+};
+
 message IndexParams {
   oneof params {
     InvertIndexParams invert = 1;
@@ -139,6 +147,7 @@ message IndexParams {
     IVFIndexParams ivf = 4;
     HnswRabitqIndexParams hnsw_rabitq = 5;
     VamanaIndexParams vamana = 6;
+    FtsIndexParams fts = 7;
   };
 };
 
diff --git a/src/db/sqlengine/analyzer/query_analyzer.cc b/src/db/sqlengine/analyzer/query_analyzer.cc
index 4d981370a..c4af8f366 100644
--- a/src/db/sqlengine/analyzer/query_analyzer.cc
+++ b/src/db/sqlengine/analyzer/query_analyzer.cc
@@ -400,6 +400,11 @@ Result<QueryInfo::Ptr> QueryAnalyzer::create_queryinfo_from_sqlinfo(
   // set group by
   query_info->set_group_by(select_info->group_by());
 
+  // set fts query
+  if (select_info->has_fts_query()) {
+    query_info->set_fts_cond_info(select_info->fts_cond_info());
+  }
+
   return query_info;
 }
 
diff --git a/src/db/sqlengine/analyzer/query_info.cc b/src/db/sqlengine/analyzer/query_info.cc
index f6f066312..3a506272c 100644
--- a/src/db/sqlengine/analyzer/query_info.cc
+++ b/src/db/sqlengine/analyzer/query_info.cc
@@ -85,6 +85,12 @@ std::string QueryInfo::to_string() const {
         ")\n");
   }
 
+  str += "fts_cond:\n";
+  if (fts_cond_info_ != nullptr) {
+    str += fts_cond_info_->to_string();
+    str += "\n";
+  }
+
   str += "filter_cond:\n";
   if (filter_cond_ != nullptr) {
     str += filter_cond_->text();
diff --git a/src/db/sqlengine/analyzer/query_info.h b/src/db/sqlengine/analyzer/query_info.h
index 653231a74..ad9b381fc 100644
--- a/src/db/sqlengine/analyzer/query_info.h
+++ b/src/db/sqlengine/analyzer/query_info.h
@@ -22,6 +22,7 @@
 #include <zvec/core/framework/index_meta.h>
 #include <zvec/db/schema.h>
 #include "db/common/constants.h"
+#include "db/sqlengine/common/fts_cond_info.h"
 #include "db/sqlengine/common/group_by.h"
 #include "query_field_info.h"
 #include "query_node.h"
@@ -125,6 +126,7 @@ class QueryInfo {
     bool reverse_sort_{false};
   };
 
+
  public:
   QueryInfo() = default;
   ~QueryInfo() = default;
@@ -161,6 +163,14 @@ class QueryInfo {
     return vector_cond_info_;
   }
 
+  void set_fts_cond_info(FtsCondInfo::Ptr value) {
+    fts_cond_info_ = std::move(value);
+  }
+
+  const FtsCondInfo::Ptr &fts_cond_info() const {
+    return fts_cond_info_;
+  }
+
   void set_query_topn(uint32_t value) {
     query_topn_ = value;
   }
@@ -340,6 +350,7 @@ class QueryInfo {
   QueryNode::Ptr filter_cond_{nullptr};
 
   QueryVectorCondInfo::Ptr vector_cond_info_{nullptr};
+  FtsCondInfo::Ptr fts_cond_info_{nullptr};
 
   // these two are for post filtering only
   QueryNode::Ptr post_invert_cond_{nullptr};
diff --git a/src/db/sqlengine/common/fts_cond_info.h b/src/db/sqlengine/common/fts_cond_info.h
new file mode 100644
index 000000000..17de4ad75
--- /dev/null
+++ b/src/db/sqlengine/common/fts_cond_info.h
@@ -0,0 +1,43 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include "db/index/column/fts_column/fts_query_ast.h"
+
+namespace zvec::sqlengine {
+
+struct FtsCondInfo {
+  using Ptr = std::shared_ptr<FtsCondInfo>;
+
+  FtsCondInfo() = default;
+
+  FtsCondInfo(std::string field_name, fts::FtsAstNodePtr ast)
+      : field_name(std::move(field_name)), fts_ast(std::move(ast)) {}
+
+  std::string to_string() const {
+    std::string str = field_name + " MATCH ";
+    if (fts_ast) {
+      str += fts_ast->text();
+    }
+    return str;
+  }
+
+  std::string field_name;
+  fts::FtsAstNodePtr fts_ast;
+};
+
+}  // namespace zvec::sqlengine
diff --git a/src/db/sqlengine/parser/select_info.cc b/src/db/sqlengine/parser/select_info.cc
index 87ac39975..c4bed19df 100644
--- a/src/db/sqlengine/parser/select_info.cc
+++ b/src/db/sqlengine/parser/select_info.cc
@@ -196,6 +196,11 @@ std::string SelectInfo::to_string() {
     str += "\n";
   }
 
+  if (fts_cond_info_ != nullptr) {
+    str += "fts_cond: " + fts_cond_info_->to_string();
+    str += "\n";
+  }
+
   return str;
 }
 
diff --git a/src/db/sqlengine/parser/select_info.h b/src/db/sqlengine/parser/select_info.h
index e1a312013..c393ef756 100644
--- a/src/db/sqlengine/parser/select_info.h
+++ b/src/db/sqlengine/parser/select_info.h
@@ -17,6 +17,7 @@
 #include <map>
 #include <memory>
 #include <vector>
+#include "db/sqlengine/common/fts_cond_info.h"
 #include "db/sqlengine/common/group_by.h"
 #include "base_info.h"
 #include "node.h"
@@ -69,6 +70,18 @@ class SelectInfo : public BaseInfo {
     return group_by_;
   }
 
+  void set_fts_cond_info(FtsCondInfo::Ptr value) {
+    fts_cond_info_ = std::move(value);
+  }
+
+  const FtsCondInfo::Ptr &fts_cond_info() const {
+    return fts_cond_info_;
+  }
+
+  bool has_fts_query() const {
+    return fts_cond_info_ != nullptr;
+  }
+
   std::string to_string();
 
  private:
@@ -82,6 +95,7 @@ class SelectInfo : public BaseInfo {
   int limit_{-1};
   bool include_vector_{false};
   bool include_doc_id_{false};
+  FtsCondInfo::Ptr fts_cond_info_{nullptr};
 };
 
 }  // namespace zvec::sqlengine
diff --git a/src/db/sqlengine/planner/doc_filter.cc b/src/db/sqlengine/planner/doc_filter.cc
index 756a1b972..0f44e6e97 100644
--- a/src/db/sqlengine/planner/doc_filter.cc
+++ b/src/db/sqlengine/planner/doc_filter.cc
@@ -17,7 +17,6 @@
 #include <arrow/acero/exec_plan.h>
 #include <arrow/table.h>
 #include <zvec/ailego/logger/logger.h>
-#include <zvec/db/config.h>
 #include "db/sqlengine/planner/invert_search.h"
 
 namespace zvec::sqlengine {
@@ -107,7 +106,8 @@ std::optional<bool> DocFilter::get_forward_bit(uint64_t id) const {
   return std::nullopt;
 }
 
-std::optional<std::vector<uint64_t>> DocFilter::get_bf_by_keys_and_update() {
+std::optional<std::vector<uint64_t>> DocFilter::get_bf_by_keys_and_update(
+    float ratio) {
   auto meta = segment_->meta();
   if (!meta) {
     return std::nullopt;
@@ -117,9 +117,7 @@ std::optional<std::vector<uint64_t>> DocFilter::get_bf_by_keys_and_update() {
     return std::nullopt;
   }
   size_t doc_count = meta->doc_count();
-  float brute_force_by_keys_ratio =
-      GlobalConfig::Instance().brute_force_by_keys_ratio();
-  uint64_t bf_by_keys_threshold = meta->doc_count() * brute_force_by_keys_ratio;
+  uint64_t bf_by_keys_threshold = static_cast<uint64_t>(doc_count * ratio);
 
   // decide to use brute force by keys or not
   if (size_t match_count = invert_result_->count();
@@ -128,13 +126,16 @@ std::optional<std::vector<uint64_t>> DocFilter::get_bf_by_keys_and_update() {
     invert_result_->extract_ids(&ids);
     invert_filter_.reset();
     invert_result_.reset();
-    LOG_INFO("Use brute force by keys, doc_count[%zu] invert_result_count[%zu]",
-             doc_count, match_count);
+    LOG_INFO(
+        "Use brute force by keys, doc_count[%zu] invert_result_count[%zu] "
+        "ratio[%.4f]",
+        doc_count, match_count, ratio);
     return std::vector<uint64_t>(ids.begin(), ids.end());
   } else {
     LOG_DEBUG(
-        "Not use brute force by keys, doc_count[%zu] invert_result_count[%zu]",
-        doc_count, match_count);
+        "Not use brute force by keys, doc_count[%zu] invert_result_count[%zu] "
+        "ratio[%.4f]",
+        doc_count, match_count, ratio);
   }
   return std::nullopt;
 }
diff --git a/src/db/sqlengine/planner/doc_filter.h b/src/db/sqlengine/planner/doc_filter.h
index b662a7425..7f4dffbd1 100644
--- a/src/db/sqlengine/planner/doc_filter.h
+++ b/src/db/sqlengine/planner/doc_filter.h
@@ -44,8 +44,11 @@ class DocFilter : public IndexFilter {
 
   bool is_filtered(uint64_t id) const override;
 
-  //! get brute force by keys and clear `invert_filter_` if suitable
-  std::optional<std::vector<uint64_t>> get_bf_by_keys_and_update();
+  //! When invert cardinality <= \p ratio * doc_count, extract the ids and
+  //! clear invert_filter_ so the caller drives evaluation by ids instead of
+  //! bitmap-checking. Ratio is per-caller (vector vs FTS use different
+  //! GlobalConfig knobs) because per-candidate cost differs.
+  std::optional<std::vector<uint64_t>> get_bf_by_keys_and_update(float ratio);
 
   bool empty() const;
 
diff --git a/src/db/sqlengine/planner/fts_recall_node.cc b/src/db/sqlengine/planner/fts_recall_node.cc
new file mode 100644
index 000000000..45313d9e0
--- /dev/null
+++ b/src/db/sqlengine/planner/fts_recall_node.cc
@@ -0,0 +1,143 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "db/sqlengine/planner/fts_recall_node.h"
+#include <arrow/api.h>
+#include <zvec/ailego/logger/logger.h>
+#include <zvec/db/config.h>
+#include "db/sqlengine/common/util.h"
+
+namespace cp = arrow::compute;
+
+namespace zvec::sqlengine {
+
+FtsRecallNode::FtsRecallNode(Segment::Ptr segment, QueryInfo::Ptr query_info,
+                             DocFilter::Ptr doc_filter, int batch_size)
+    : segment_(std::move(segment)),
+      query_info_(std::move(query_info)),
+      doc_filter_(std::move(doc_filter)),
+      fetched_columns_(query_info_->get_all_fetched_scalar_field_names()),
+      batch_size_(batch_size) {
+  auto table = segment_->fetch(fetched_columns_, std::vector<int>{});
+  // Append BM25 score column so downstream fill_doc_score() surfaces it to
+  // the Python Doc.score, matching the vector-recall path.
+  schema_ = Util::append_field(*table->schema(), kFieldScore, arrow::float32());
+}
+
+arrow::AsyncGenerator<std::optional<cp::ExecBatch>> FtsRecallNode::gen() {
+  auto state_ptr = std::make_shared<State>();
+  return [self = shared_from_this(), state_ptr = std::move(state_ptr)]()
+             -> arrow::Future<std::optional<cp::ExecBatch>> {
+    auto &state = *state_ptr;
+
+    if (!state.iter_) {
+      auto fts_ret = self->prepare();
+      if (!fts_ret) {
+        return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
+            arrow::Status::ExecutionError("prepare fts failed:",
+                                          fts_ret.error().c_str()));
+      }
+      state.fts_result_ = fts_ret.value();
+      state.iter_ = state.fts_result_->create_iterator();
+    }
+
+    if (!state.iter_->valid()) {
+      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
+          std::nullopt);
+    }
+
+    std::vector<int> indices;
+    indices.reserve(self->batch_size_);
+    arrow::FloatBuilder score_builder;
+    for (int i = 0; state.iter_->valid() && i < self->batch_size_;
+         i++, state.iter_->next()) {
+      indices.push_back(state.iter_->doc_id());
+      auto s = score_builder.Append(state.iter_->score());
+      if (!s.ok()) {
+        return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
+            arrow::Status::ExecutionError("score builder append failed:",
+                                          s.ToString()));
+      }
+    }
+    if (indices.empty()) {
+      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
+          std::nullopt);
+    }
+
+    auto table = self->segment_->fetch(self->fetched_columns_, indices);
+    if (!table) {
+      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
+          arrow::Status::UnknownError("fetch table failed"));
+    }
+    auto batch = table->CombineChunksToBatch();
+    if (!batch.ok()) {
+      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
+          arrow::Status::ExecutionError("combine chunks to batch failed:",
+                                        batch.status().ToString()));
+    }
+    auto score_array = score_builder.Finish();
+    if (!score_array.ok()) {
+      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
+          arrow::Status::ExecutionError("finish score builder failed:",
+                                        score_array.status().ToString()));
+    }
+    auto record_batch = std::move(batch.ValueUnsafe());
+    auto with_score =
+        record_batch->AddColumn(record_batch->num_columns(), kFieldScore,
+                                score_array.MoveValueUnsafe());
+    if (!with_score.ok()) {
+      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
+          arrow::Status::ExecutionError("add score column failed:",
+                                        with_score.status().ToString()));
+    }
+    cp::ExecBatch exec_batch(*with_score.ValueUnsafe());
+    return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
+        std::move(exec_batch));
+  };
+}
+
+Result<FtsIndexResults::Ptr> FtsRecallNode::prepare() {
+  auto filter_status = doc_filter_->compute_filter();
+  if (!filter_status.ok()) {
+    return tl::make_unexpected(filter_status);
+  }
+
+  const auto &fts_cond = query_info_->fts_cond_info();
+  if (!fts_cond) {
+    return tl::make_unexpected(
+        Status::InvalidArgument("FtsRecallNode: no fts_cond_info in query"));
+  }
+
+  fts::FtsQueryParams params;
+  params.topk = query_info_->query_topn();
+  // Brute-force path: get_bf_by_keys_and_update also clears invert_filter_
+  // when it returns ids, so the filter set below won't double-check them.
+  if (auto bf_keys = doc_filter_->get_bf_by_keys_and_update(
+          GlobalConfig::Instance().fts_brute_force_by_keys_ratio())) {
+    params.candidate_ids = std::move(bf_keys.value());
+  }
+  // Push down remaining filters (delete / forward) so filtered docs are
+  // skipped during scoring and we still return up to topk results.
+  params.filter = doc_filter_->empty() ? nullptr : doc_filter_;
+
+  auto results =
+      segment_->fts_search(fts_cond->field_name, *fts_cond->fts_ast, params);
+  if (!results) {
+    return tl::make_unexpected(results.error());
+  }
+
+  return std::make_shared<FtsIndexResults>(std::move(results.value()));
+}
+
+}  // namespace zvec::sqlengine
diff --git a/src/db/sqlengine/planner/fts_recall_node.h b/src/db/sqlengine/planner/fts_recall_node.h
new file mode 100644
index 000000000..ec1079fc3
--- /dev/null
+++ b/src/db/sqlengine/planner/fts_recall_node.h
@@ -0,0 +1,59 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <arrow/acero/api.h>
+#include <arrow/api.h>
+#include "db/index/column/common/index_results.h"
+#include "db/index/column/fts_column/fts_index_results.h"
+#include "db/index/segment/segment.h"
+#include "db/sqlengine/analyzer/query_info.h"
+#include "db/sqlengine/planner/doc_filter.h"
+
+namespace cp = arrow::compute;
+
+namespace zvec::sqlengine {
+
+class FtsRecallNode : public std::enable_shared_from_this<FtsRecallNode> {
+ public:
+  FtsRecallNode(Segment::Ptr segment, QueryInfo::Ptr query_info,
+                DocFilter::Ptr doc_filter, int batch_size);
+
+  //! get schema
+  std::shared_ptr<arrow::Schema> schema() const {
+    return schema_;
+  }
+
+  arrow::AsyncGenerator<std::optional<cp::ExecBatch>> gen();
+
+ private:
+  Result<FtsIndexResults::Ptr> prepare();
+
+ private:
+  struct State {
+    FtsIndexResults::Ptr fts_result_;
+    IndexResults::IteratorUPtr iter_;
+  };
+
+  Segment::Ptr segment_;
+  QueryInfo::Ptr query_info_;
+  DocFilter::Ptr doc_filter_;
+  const std::vector<std::string> &fetched_columns_;
+  int batch_size_;
+  std::shared_ptr<arrow::Schema> schema_;
+};
+
+}  // namespace zvec::sqlengine
diff --git a/src/db/sqlengine/planner/query_planner.cc b/src/db/sqlengine/planner/query_planner.cc
index c0c588a30..4fe9ec812 100644
--- a/src/db/sqlengine/planner/query_planner.cc
+++ b/src/db/sqlengine/planner/query_planner.cc
@@ -28,6 +28,7 @@
 #include "db/sqlengine/analyzer/query_info.h"
 #include "db/sqlengine/analyzer/query_node.h"
 #include "db/sqlengine/common/util.h"
+#include "db/sqlengine/planner/fts_recall_node.h"
 #include "db/sqlengine/planner/invert_recall_node.h"
 #include "db/sqlengine/planner/ops/check_not_filtered_op.h"
 #include "db/sqlengine/planner/ops/contain_op.h"
@@ -406,6 +407,9 @@ Result<PlanInfo::Ptr> QueryPlanner::make_physical_plan(
     if (query_info->vector_cond_info()) {
       seg_plan = vector_scan(segment, std::move(segment_query_info),
                              std::move(forward_filter), single_stage_search);
+    } else if (query_info->fts_cond_info()) {
+      seg_plan = fts_scan(segment, std::move(segment_query_info),
+                          std::move(forward_filter), single_stage_search);
     } else if (query_info->invert_cond()) {
       seg_plan = invert_scan(segment, std::move(segment_query_info),
                              std::move(forward_filter));
@@ -515,14 +519,14 @@ Result<PlanInfo::Ptr> QueryPlanner::forward_scan(
   return std::make_shared<PlanInfo>(std::move(node), std::move(schema));
 }
 
-Result<PlanInfo::Ptr> QueryPlanner::vector_scan(
-    Segment::Ptr seg, QueryInfo::Ptr query_info,
-    std::unique_ptr<arrow::compute::Expression> forward_filter,
+DocFilter::Ptr QueryPlanner::build_doc_filter(
+    const Segment::Ptr &seg, const QueryInfo::Ptr &query_info,
+    std::unique_ptr<arrow::compute::Expression> &forward_filter,
     bool single_stage_search) {
   std::unique_ptr<ac::Declaration> forward_filter_plan;
   // if single stage search is not enabled, first run acero plan to get
-  // forward bitmap, then filter during vector search. otherwise, filter
-  // forward during forward search.
+  // forward bitmap, then filter during search. otherwise, filter forward
+  // during search.
   if (forward_filter && !single_stage_search) {
     ac::RecordBatchReaderSourceNodeOptions source_options{
         seg->scan(query_info->get_forward_filter_field_names())};
@@ -536,9 +540,17 @@ Result<PlanInfo::Ptr> QueryPlanner::vector_scan(
     })});
     forward_filter.reset();
   }
-  auto doc_filter = std::make_shared<DocFilter>(seg, query_info,
-                                                std::move(forward_filter_plan),
-                                                std::move(forward_filter));
+  return std::make_shared<DocFilter>(seg, query_info,
+                                     std::move(forward_filter_plan),
+                                     std::move(forward_filter));
+}
+
+Result<PlanInfo::Ptr> QueryPlanner::vector_scan(
+    Segment::Ptr seg, QueryInfo::Ptr query_info,
+    std::unique_ptr<arrow::compute::Expression> forward_filter,
+    bool single_stage_search) {
+  auto doc_filter =
+      build_doc_filter(seg, query_info, forward_filter, single_stage_search);
 
   int topn = query_info->query_topn();
   int batch_size = get_batch_size(*query_info, false);
@@ -616,6 +628,28 @@ Result<PlanInfo::Ptr> QueryPlanner::invert_scan(
   return std::make_shared<PlanInfo>(std::move(node), std::move(schema));
 }
 
+Result<PlanInfo::Ptr> QueryPlanner::fts_scan(
+    Segment::Ptr seg, QueryInfo::Ptr query_info,
+    std::unique_ptr<arrow::compute::Expression> forward_filter,
+    bool single_stage_search) {
+  auto doc_filter =
+      build_doc_filter(seg, query_info, forward_filter, single_stage_search);
+
+  auto topn = query_info->query_topn();
+  int batch_size = get_batch_size(*query_info, false);
+  auto recall_node = std::make_shared<FtsRecallNode>(
+      std::move(seg), std::move(query_info), std::move(doc_filter), batch_size);
+
+  auto source_node_options =
+      arrow::acero::SourceNodeOptions{recall_node->schema(), recall_node->gen(),
+                                      arrow::compute::Ordering::Implicit()};
+  ac::Declaration node{"source", source_node_options};
+
+  node = ac::Declaration{
+      "fetch", {std::move(node)}, ac::FetchNodeOptions{0, topn}};
+  return std::make_shared<PlanInfo>(std::move(node), recall_node->schema());
+}
+
 int QueryPlanner::get_batch_size(const QueryInfo &info, bool has_later_filter) {
   // ref https://arrow.apache.org/docs/developers/cpp/acero.html#batch-size
   if (!info.query_orderbys().empty() || has_later_filter) {
diff --git a/src/db/sqlengine/planner/query_planner.h b/src/db/sqlengine/planner/query_planner.h
index b93fa34e9..c0cc61993 100644
--- a/src/db/sqlengine/planner/query_planner.h
+++ b/src/db/sqlengine/planner/query_planner.h
@@ -22,6 +22,7 @@
 #include <zvec/db/status.h>
 #include "db/index/segment/segment.h"
 #include "db/sqlengine/analyzer/query_info.h"
+#include "db/sqlengine/planner/doc_filter.h"
 #include "plan_info.h"
 
 namespace zvec::sqlengine {
@@ -59,6 +60,15 @@ class QueryPlanner {
   Result<PlanInfo::Ptr> forward_scan(
       Segment::Ptr seg, QueryInfo::Ptr query_info,
       std::unique_ptr<arrow::compute::Expression> forward_filter);
+  Result<PlanInfo::Ptr> fts_scan(
+      Segment::Ptr seg, QueryInfo::Ptr query_info,
+      std::unique_ptr<arrow::compute::Expression> forward_filter,
+      bool single_stage_search);
+
+  static DocFilter::Ptr build_doc_filter(
+      const Segment::Ptr &seg, const QueryInfo::Ptr &query_info,
+      std::unique_ptr<arrow::compute::Expression> &forward_filter,
+      bool single_stage_search);
 
   static int get_batch_size(const QueryInfo &info, bool has_later_filter);
 
diff --git a/src/db/sqlengine/planner/vector_recall_node.cc b/src/db/sqlengine/planner/vector_recall_node.cc
index f56bb44e8..f58d02c1b 100644
--- a/src/db/sqlengine/planner/vector_recall_node.cc
+++ b/src/db/sqlengine/planner/vector_recall_node.cc
@@ -21,6 +21,7 @@
 #include <zvec/ailego/logger/logger.h>
 #include <zvec/ailego/pattern/expected.hpp>
 #include <zvec/core/framework/index_meta.h>
+#include <zvec/db/config.h>
 #include <zvec/db/index_params.h>
 #include <zvec/db/schema.h>
 #include <zvec/db/type.h>
@@ -159,7 +160,8 @@ Result<IndexResults::Ptr> VectorRecallNode::prepare() {
   query_params.data_type = vector_cond_->vector_schema()->data_type();
   query_params.dimension = vector_cond_->dimension();
   query_params.query_params = vector_cond_->query_params();
-  auto brute_force_keys = doc_filter_->get_bf_by_keys_and_update();
+  auto brute_force_keys = doc_filter_->get_bf_by_keys_and_update(
+      GlobalConfig::Instance().brute_force_by_keys_ratio());
   if (brute_force_keys) {
     query_params.bf_pks.emplace_back(std::move(brute_force_keys.value()));
   }
diff --git a/src/db/sqlengine/sqlengine_impl.cc b/src/db/sqlengine/sqlengine_impl.cc
index 1f5bd5141..84fe30d2d 100644
--- a/src/db/sqlengine/sqlengine_impl.cc
+++ b/src/db/sqlengine/sqlengine_impl.cc
@@ -16,9 +16,12 @@
 #include <unordered_map>
 #include <zvec/ailego/logger/logger.h>
 #include <zvec/db/doc.h>
+#include <zvec/db/index_params.h>
 #include <zvec/db/type.h>
 #include "db/common/constants.h"
+#include "db/index/column/fts_column/fts_query_ast.h"
 #include "db/sqlengine/analyzer/query_analyzer.h"
+#include "db/sqlengine/parser/select_info.h"
 #include "db/sqlengine/parser/sql_info_helper.h"
 #include "db/sqlengine/parser/zvec_parser.h"
 #include "db/sqlengine/planner/op_register.h"
@@ -120,6 +123,89 @@ Result<GroupResults> SQLEngineImpl::execute_group_by(
   return fill_group_by_result(*query_info.value(), reader.value().get());
 }
 
+Result<FtsCondInfo::Ptr> SQLEngineImpl::parse_fts_query(
+    CollectionSchema::Ptr collection, const std::string &field_name,
+    const FtsQuery &fts_query, const QueryParams::Ptr &query_params) {
+  // Exactly one of query_string_ or match_string_ must be provided.
+  bool has_query = !fts_query.query_string_.empty();
+  bool has_match_string = !fts_query.match_string_.empty();
+  if (has_query == has_match_string) {
+    return tl::make_unexpected(Status::InvalidArgument(
+        "Exactly one of query_string or match_string must be provided"));
+  }
+
+  auto *fts_query_param = dynamic_cast<FtsQueryParams *>(query_params.get());
+
+  // Determine default operator once, shared by both query_string and
+  // match_string paths.
+  fts::FtsDefaultOperator default_op = fts::FtsDefaultOperator::OR;
+  if (fts_query_param) {
+    auto &op_str = fts_query_param->default_operator();
+    if (op_str == "AND" || op_str == "and") {
+      default_op = fts::FtsDefaultOperator::AND;
+    }
+  }
+
+  fts::FtsAstNodePtr ast;
+  if (has_query) {
+    // Structured query expression: parse via ANTLR grammar.
+    fts::FtsQueryParser fts_parser;
+    ast = fts_parser.parse(fts_query.query_string_, default_op);
+    if (!ast) {
+      LOG_ERROR("FTS query parse failed: %s", fts_parser.err_msg().c_str());
+      return tl::make_unexpected(Status::InvalidArgument(
+          "FTS query parse failed: ", fts_parser.err_msg()));
+    }
+  } else {
+    // Natural language match_string: tokenize using the field's configured
+    // tokenizer pipeline, then combine tokens with default_operator.
+    auto *field_schema = collection->get_field(field_name);
+    if (!field_schema) {
+      return tl::make_unexpected(
+          Status::InvalidArgument("FTS field not found: ", field_name));
+    }
+    auto fts_idx_param =
+        std::dynamic_pointer_cast<FtsIndexParams>(field_schema->index_params());
+    if (!fts_idx_param) {
+      return tl::make_unexpected(Status::InvalidArgument(
+          "FTS field has no FtsIndexParams: ", field_name));
+    }
+    auto pipeline_result = fts_idx_param->create_pipeline();
+    if (!pipeline_result.has_value()) {
+      return tl::make_unexpected(Status::InternalError(
+          "Failed to create tokenizer pipeline for field: ", field_name, " ",
+          pipeline_result.error().message()));
+    }
+    auto &pipeline = pipeline_result.value();
+    auto tokens = pipeline->process(fts_query.match_string_);
+    if (tokens.empty()) {
+      return tl::make_unexpected(
+          Status::InvalidArgument("match_string produced no tokens"));
+    }
+    if (tokens.size() == 1) {
+      ast = std::make_unique<fts::TermNode>(std::move(tokens[0].text));
+    } else {
+      if (default_op == fts::FtsDefaultOperator::AND) {
+        auto and_node = std::make_unique<fts::AndNode>();
+        for (auto &token : tokens) {
+          and_node->children.push_back(
+              std::make_unique<fts::TermNode>(std::move(token.text)));
+        }
+        ast = std::move(and_node);
+      } else {
+        auto or_node = std::make_unique<fts::OrNode>();
+        for (auto &token : tokens) {
+          or_node->children.push_back(
+              std::make_unique<fts::TermNode>(std::move(token.text)));
+        }
+        ast = std::move(or_node);
+      }
+    }
+  }
+
+  return std::make_shared<FtsCondInfo>(field_name, std::move(ast));
+}
+
 Result<QueryInfo::Ptr> SQLEngineImpl::parse_sql_info(
     const CollectionSchema &schema, const SQLInfo::Ptr &sql_info) {
   profiler_->open_stage("analyze stage");
@@ -172,6 +258,21 @@ Result<QueryInfo::Ptr> SQLEngineImpl::parse_request(
     return tl::make_unexpected(Status::InvalidArgument(
         "Convert message to SQL info failed: ", err_msg));
   }
+
+  // If the request carries an FTS query, parse it and attach to SelectInfo
+  // so that query_analyzer can propagate it to QueryInfo.
+  if (request.fts_query_.has_value()) {
+    auto fts_result =
+        parse_fts_query(collection, request.field_name_,
+                        request.fts_query_.value(), request.query_params_);
+    if (!fts_result) {
+      return tl::make_unexpected(fts_result.error());
+    }
+    auto select_info =
+        std::dynamic_pointer_cast<SelectInfo>(sql_info->base_info());
+    select_info->set_fts_cond_info(std::move(fts_result.value()));
+  }
+
   LOG_DEBUG("Sql info is %s", sql_info->to_string().c_str());
   return parse_sql_info(*collection, std::move(sql_info));
 }
diff --git a/src/db/sqlengine/sqlengine_impl.h b/src/db/sqlengine/sqlengine_impl.h
index 88c279283..e3d5270c0 100644
--- a/src/db/sqlengine/sqlengine_impl.h
+++ b/src/db/sqlengine/sqlengine_impl.h
@@ -22,6 +22,8 @@
 #include <zvec/db/schema.h>
 #include "analyzer/query_info.h"
 #include "common/group_by.h"
+#include "db/index/column/fts_column/fts_query_ast.h"
+#include "db/index/column/fts_column/parser/fts_query_parser.h"
 #include "db/sqlengine/common/util.h"
 #include "db/sqlengine/parser/sql_info.h"
 #include "db/sqlengine/sqlengine.h"
@@ -67,6 +69,11 @@ class SQLEngineImpl : public SQLEngine {
   Result<GroupResults> fill_group_by_result(const QueryInfo &query_info,
                                             arrow::RecordBatchReader *reader);
 
+  //! Parse FTS query into a FtsCondInfo (AST + field name).
+  Result<FtsCondInfo::Ptr> parse_fts_query(
+      CollectionSchema::Ptr collection, const std::string &field_name,
+      const FtsQuery &fts_query, const QueryParams::Ptr &query_params);
+
  private:
   zvec::Profiler::Ptr profiler_;
   std::string execution_time_info_{};
diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h
index c64190d50..7bc141572 100644
--- a/src/include/zvec/c_api.h
+++ b/src/include/zvec/c_api.h
@@ -679,6 +679,24 @@ zvec_config_data_set_brute_force_by_keys_ratio(zvec_config_data_t *config,
 ZVEC_EXPORT float ZVEC_CALL zvec_config_data_get_brute_force_by_keys_ratio(
     const zvec_config_data_t *config);
 
+/**
+ * @brief Set FTS brute force by keys ratio in configuration data
+ * @param config Configuration data pointer
+ * @param ratio FTS brute force by keys ratio
+ * @return zvec_error_code_t Error code
+ */
+ZVEC_EXPORT zvec_error_code_t ZVEC_CALL
+zvec_config_data_set_fts_brute_force_by_keys_ratio(zvec_config_data_t *config,
+                                                   float ratio);
+
+/**
+ * @brief Get FTS brute force by keys ratio from configuration data
+ * @param config Configuration data pointer
+ * @return float FTS brute force by keys ratio
+ */
+ZVEC_EXPORT float ZVEC_CALL zvec_config_data_get_fts_brute_force_by_keys_ratio(
+    const zvec_config_data_t *config);
+
 /**
  * @brief Set optimize thread count in configuration data
  * @param config Configuration data pointer
diff --git a/src/include/zvec/db/config.h b/src/include/zvec/db/config.h
index 29fe19674..35dd09a23 100644
--- a/src/include/zvec/db/config.h
+++ b/src/include/zvec/db/config.h
@@ -92,6 +92,9 @@ class GlobalConfig : public ailego::Singleton<GlobalConfig> {
     uint32_t query_thread_count;
     float invert_to_forward_scan_ratio;
     float brute_force_by_keys_ratio;
+    // Independent from brute_force_by_keys_ratio: per-candidate FTS cost
+    // (phrase phase-2 IO, BM25) is higher, so a tighter default fits.
+    float fts_brute_force_by_keys_ratio;
 
     // optimize
     uint32_t optimize_thread_count;
@@ -161,6 +164,12 @@ class GlobalConfig : public ailego::Singleton<GlobalConfig> {
     return config_.brute_force_by_keys_ratio;
   }
 
+  //! FTS brute force by keys ratio (independent from brute_force_by_keys_ratio
+  //! because FTS per-candidate cost is higher).
+  float fts_brute_force_by_keys_ratio() const noexcept {
+    return config_.fts_brute_force_by_keys_ratio;
+  }
+
   //! Optimize thread count
   uint32_t optimize_thread_count() const noexcept {
     return config_.optimize_thread_count;
diff --git a/src/include/zvec/db/doc.h b/src/include/zvec/db/doc.h
index f702a43c3..d85a778bb 100644
--- a/src/include/zvec/db/doc.h
+++ b/src/include/zvec/db/doc.h
@@ -364,6 +364,14 @@ using DocPtrMap = std::unordered_map<std::string, Doc::Ptr>;
 
 using WriteResults = std::vector<Status>;
 
+struct FtsQuery {
+  std::string query_string_;  // FTS query expression (e.g. "+vector -slow
+                              // \"exact phrase\"")
+  std::string match_string_;  // Natural language match string, tokenized and
+                              // combined using default_operator. Mutually
+                              // exclusive with query_string_.
+};
+
 struct VectorQuery {
   int topk_;
   std::string field_name_;
@@ -378,6 +386,8 @@ struct VectorQuery {
   std::optional<std::vector<std::string>> output_fields_;
   QueryParams::Ptr query_params_;
 
+  std::optional<FtsQuery> fts_query_;
+
   Status validate_and_sanitize(const FieldSchema *schema);
 };
 
diff --git a/src/include/zvec/db/index_params.h b/src/include/zvec/db/index_params.h
index 5f6faff4e..bae85f656 100644
--- a/src/include/zvec/db/index_params.h
+++ b/src/include/zvec/db/index_params.h
@@ -14,15 +14,22 @@
 #pragma once
 
 #include <memory>
+#include <mutex>
 #include <sstream>
 #include <string>
+#include <vector>
 #include <zvec/core/interface/constants.h>
+#include <zvec/db/status.h>
 #include <zvec/db/type.h>
 #include "zvec/core/framework/index_provider.h"
 #include "zvec/core/framework/index_reformer.h"
 
 namespace zvec {
 
+namespace fts {
+class TokenizerPipeline;
+}  // namespace fts
+
 /*
  * Column index params
  */
@@ -558,4 +565,98 @@ class VamanaIndexParams : public VectorIndexParams {
   bool use_id_map_;
 };
 
+/*
+ * FTS (Full-Text Search) index params
+ *
+ * Not copyable.  Use shared_ptr<FtsIndexParams> for shared ownership.
+ * Provides a thread-safe create_pipeline() that lazily creates and caches
+ * a TokenizerPipeline; the pipeline is automatically released on destruction.
+ */
+class FtsIndexParams : public IndexParams {
+ public:
+  using PipelinePtr = std::shared_ptr<fts::TokenizerPipeline>;
+
+  FtsIndexParams(std::string tokenizer_name = "standard",
+                 std::vector<std::string> filters = {"lowercase"},
+                 std::string extra_params = "")
+      : IndexParams(IndexType::FTS),
+        tokenizer_name_(std::move(tokenizer_name)),
+        filters_(std::move(filters)),
+        extra_params_(std::move(extra_params)) {}
+
+  // Not copyable.
+  FtsIndexParams(const FtsIndexParams &) = delete;
+  FtsIndexParams &operator=(const FtsIndexParams &) = delete;
+
+  // Movable (transfers pipeline ownership).
+  FtsIndexParams(FtsIndexParams &&other) noexcept;
+  FtsIndexParams &operator=(FtsIndexParams &&) = delete;
+
+  ~FtsIndexParams() override;
+
+  Ptr clone() const override {
+    // Clone produces an independent copy without pipeline cache.
+    return std::make_shared<FtsIndexParams>(tokenizer_name_, filters_,
+                                            extra_params_);
+  }
+
+  std::string to_string() const override {
+    std::ostringstream oss;
+    oss << "{FtsIndexParams,tokenizer_name:" << tokenizer_name_ << ",filters:[";
+    for (size_t i = 0; i < filters_.size(); ++i) {
+      if (i > 0) {
+        oss << ",";
+      }
+      oss << filters_[i];
+    }
+    oss << "],extra_params:" << extra_params_ << "}";
+    return oss.str();
+  }
+
+  bool operator==(const IndexParams &other) const override {
+    if (type() != other.type()) {
+      return false;
+    }
+    auto &other_fts = static_cast<const FtsIndexParams &>(other);
+    return tokenizer_name_ == other_fts.tokenizer_name_ &&
+           filters_ == other_fts.filters_ &&
+           extra_params_ == other_fts.extra_params_;
+  }
+
+  //! Thread-safe lazy creation of TokenizerPipeline.
+  //! Returns the cached pipeline on subsequent calls.
+  Result<PipelinePtr> create_pipeline();
+
+  const std::string &tokenizer_name() const {
+    return tokenizer_name_;
+  }
+  void set_tokenizer_name(std::string tokenizer_name) {
+    tokenizer_name_ = std::move(tokenizer_name);
+  }
+
+  const std::vector<std::string> &filters() const {
+    return filters_;
+  }
+  void set_filters(std::vector<std::string> filters) {
+    filters_ = std::move(filters);
+  }
+
+  const std::string &extra_params() const {
+    return extra_params_;
+  }
+  void set_extra_params(std::string extra_params) {
+    extra_params_ = std::move(extra_params);
+  }
+
+ private:
+  std::string tokenizer_name_;
+  std::vector<std::string> filters_;
+  std::string extra_params_;
+
+  // Pipeline cache (thread-safe via std::call_once).
+  mutable std::once_flag pipeline_once_;
+  PipelinePtr pipeline_;
+  bool pipeline_created_{false};
+};
+
 }  // namespace zvec
\ No newline at end of file
diff --git a/src/include/zvec/db/query_params.h b/src/include/zvec/db/query_params.h
index fc0667252..df148aed0 100644
--- a/src/include/zvec/db/query_params.h
+++ b/src/include/zvec/db/query_params.h
@@ -14,6 +14,7 @@
 #pragma once
 
 #include <memory>
+#include <string>
 #include <zvec/core/interface/constants.h>
 #include <zvec/db/type.h>
 
@@ -197,4 +198,25 @@ class VamanaQueryParams : public QueryParams {
   int ef_search_;
 };
 
+class FtsQueryParams : public QueryParams {
+ public:
+  using Ptr = std::shared_ptr<FtsQueryParams>;
+
+  FtsQueryParams() : QueryParams(IndexType::FTS) {}
+  ~FtsQueryParams() override = default;
+
+  const std::string &default_operator() const {
+    return default_operator_;
+  }
+
+  void set_default_operator(const std::string &default_operator) {
+    default_operator_ = default_operator;
+  }
+
+ private:
+  // Default boolean operator for adjacent bare terms.
+  // Supported values (case-insensitive): "OR" (default), "AND".
+  std::string default_operator_;
+};
+
 }  // namespace zvec
\ No newline at end of file
diff --git a/src/include/zvec/db/schema.h b/src/include/zvec/db/schema.h
index 80e6cabd4..291abc571 100644
--- a/src/include/zvec/db/schema.h
+++ b/src/include/zvec/db/schema.h
@@ -359,6 +359,10 @@ class CollectionSchema {
 
   FieldSchemaPtrList vector_fields() const;
 
+  bool has_fts_field() const;
+
+  FieldSchemaPtrList fts_fields() const;
+
   uint64_t max_doc_count_per_segment() const;
 
   void set_max_doc_count_per_segment(uint64_t max_doc_count_per_segment);
diff --git a/src/include/zvec/db/type.h b/src/include/zvec/db/type.h
index 31b8850f3..a48267994 100644
--- a/src/include/zvec/db/type.h
+++ b/src/include/zvec/db/type.h
@@ -28,6 +28,7 @@ enum class IndexType : uint32_t {
   HNSW_RABITQ = 4,
   VAMANA = 5,
   INVERT = 10,
+  FTS = 11,
 };
 
 /*
diff --git a/tests/db/common/config_test.cc b/tests/db/common/config_test.cc
index fe4f027f1..974074135 100644
--- a/tests/db/common/config_test.cc
+++ b/tests/db/common/config_test.cc
@@ -43,6 +43,7 @@ TEST_F(ConfigTest, InitializeWithDefaultConfig) {
   ASSERT_GT(GlobalConfig::Instance().query_thread_count(), 0);
   ASSERT_EQ(GlobalConfig::Instance().invert_to_forward_scan_ratio(), 0.9f);
   ASSERT_EQ(GlobalConfig::Instance().brute_force_by_keys_ratio(), 0.1f);
+  ASSERT_EQ(GlobalConfig::Instance().fts_brute_force_by_keys_ratio(), 0.05f);
   ASSERT_GT(GlobalConfig::Instance().optimize_thread_count(), 0);
 }
 
@@ -150,6 +151,16 @@ TEST_F(ConfigTest, ValidateConfigWithInvalidRatios) {
   ASSERT_NE(status.message().find(
                 "brute_force_by_keys_ratio must be between 0 and 1"),
             std::string::npos);
+
+  // Test invalid fts_brute_force_by_keys_ratio
+  config.brute_force_by_keys_ratio = 0.1f;       // Reset to valid value
+  config.fts_brute_force_by_keys_ratio = -0.5f;  // Invalid value
+  status = config_instance.Validate(config);
+  ASSERT_FALSE(status.ok());
+  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
+  ASSERT_NE(status.message().find(
+                "fts_brute_force_by_keys_ratio must be between 0 and 1"),
+            std::string::npos);
 }
 
 TEST_F(ConfigTest, ValidateConfigWithInvalidFileLogSettings) {
diff --git a/tests/db/fts_query_test.cc b/tests/db/fts_query_test.cc
new file mode 100644
index 000000000..ea8b6cabd
--- /dev/null
+++ b/tests/db/fts_query_test.cc
@@ -0,0 +1,230 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <gtest/gtest.h>
+#include <zvec/ailego/io/file.h>
+#include <zvec/ailego/utility/file_helper.h>
+#include "db/common/file_helper.h"
+#include "zvec/db/collection.h"
+#include "zvec/db/doc.h"
+#include "zvec/db/index_params.h"
+#include "zvec/db/options.h"
+#include "zvec/db/schema.h"
+#include "zvec/db/status.h"
+#include "zvec/db/type.h"
+
+using namespace zvec;
+
+static const std::string kTestPath = "./test_fts_query";
+
+class FtsQueryTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    FileHelper::RemoveDirectory(kTestPath);
+  }
+  void TearDown() override {
+    FileHelper::RemoveDirectory(kTestPath);
+  }
+
+  // Create a schema with one STRING field (for forward) and one FTS field.
+  static CollectionSchema::Ptr CreateFtsSchema() {
+    auto schema = std::make_shared<CollectionSchema>("fts_demo");
+    // A simple scalar field for forward store
+    schema->add_field(std::make_shared<FieldSchema>("title", DataType::STRING));
+    // FTS indexed field
+    schema->add_field(
+        std::make_shared<FieldSchema>("content", DataType::STRING, false,
+                                      std::make_shared<FtsIndexParams>()));
+    // A vector field is required for Collection to work (segment open expects
+    // at least one vector field in the normal schema path).
+    schema->add_field(std::make_shared<FieldSchema>(
+        "vec", DataType::VECTOR_FP32, 4, false,
+        std::make_shared<FlatIndexParams>(MetricType::IP)));
+    return schema;
+  }
+
+  static Doc MakeDoc(uint64_t id, const std::string &title,
+                     const std::string &content) {
+    Doc doc;
+    doc.set_pk("pk_" + std::to_string(id));
+    doc.set<std::string>("title", title);
+    doc.set<std::string>("content", content);
+    // dummy vector
+    doc.set<std::vector<float>>("vec", std::vector<float>(4, float(id + 0.1)));
+    return doc;
+  }
+};
+
+TEST_F(FtsQueryTest, BasicFtsQuery) {
+  auto schema = CreateFtsSchema();
+  CollectionOptions options;
+  options.read_only_ = false;
+
+  auto result = Collection::CreateAndOpen(kTestPath, *schema, options);
+  ASSERT_TRUE(result.has_value()) << result.error().message();
+  auto col = result.value();
+
+  // Insert documents
+  std::vector<Doc> docs;
+  docs.push_back(MakeDoc(0, "intro", "hello world from zvec"));
+  docs.push_back(MakeDoc(1, "guide", "hello foo bar"));
+  docs.push_back(MakeDoc(2, "faq", "baz qux nothing here"));
+  docs.push_back(MakeDoc(3, "tips", "hello hello hello world"));
+
+  auto insert_res = col->Insert(docs);
+  ASSERT_TRUE(insert_res.has_value()) << insert_res.error().message();
+
+  // FTS query: search for "hello"
+  VectorQuery vq;
+  vq.field_name_ = "content";
+  vq.topk_ = 10;
+  FtsQuery fts_query;
+  fts_query.query_string_ = "hello";
+  vq.fts_query_ = fts_query;
+
+  auto query_res = col->Query(vq);
+  ASSERT_TRUE(query_res.has_value()) << query_res.error().message();
+
+  auto &results = query_res.value();
+  // Documents 0, 1, 3 contain "hello"; document 2 does not.
+  ASSERT_GE(results.size(), 2u);
+  ASSERT_LE(results.size(), 3u);
+}
+
+TEST_F(FtsQueryTest, FtsQueryEmptyField) {
+  auto schema = CreateFtsSchema();
+  CollectionOptions options;
+  options.read_only_ = false;
+
+  auto result = Collection::CreateAndOpen(kTestPath, *schema, options);
+  ASSERT_TRUE(result.has_value());
+  auto col = result.value();
+
+  VectorQuery vq;
+  vq.field_name_ = "";  // empty
+  vq.topk_ = 10;
+  FtsQuery fts_query;
+  fts_query.query_string_ = "hello";
+  vq.fts_query_ = fts_query;
+
+  auto query_res = col->Query(vq);
+  ASSERT_FALSE(query_res.has_value());
+}
+
+TEST_F(FtsQueryTest, FtsQueryNoMatch) {
+  auto schema = CreateFtsSchema();
+  CollectionOptions options;
+  options.read_only_ = false;
+
+  auto result = Collection::CreateAndOpen(kTestPath, *schema, options);
+  ASSERT_TRUE(result.has_value());
+  auto col = result.value();
+
+  std::vector<Doc> docs;
+  docs.push_back(MakeDoc(0, "intro", "hello world"));
+  auto insert_res = col->Insert(docs);
+  ASSERT_TRUE(insert_res.has_value());
+
+  VectorQuery vq;
+  vq.field_name_ = "content";
+  vq.topk_ = 10;
+  FtsQuery fts_query;
+  fts_query.query_string_ = "nonexistent_term_xyz";
+  vq.fts_query_ = fts_query;
+
+  auto query_res = col->Query(vq);
+  ASSERT_TRUE(query_res.has_value());
+  ASSERT_EQ(query_res.value().size(), 0u);
+}
+
+// Verify that FTS fields do NOT support add/alter/drop column operations.
+// The schema change validation only allows basic numeric types [INT32..DOUBLE].
+TEST_F(FtsQueryTest, FtsFieldUnsupportedAddColumn) {
+  auto schema = CreateFtsSchema();
+  CollectionOptions options;
+  options.read_only_ = false;
+
+  auto result = Collection::CreateAndOpen(kTestPath, *schema, options);
+  ASSERT_TRUE(result.has_value());
+  auto col = result.value();
+
+  // Insert a document so the collection is non-empty
+  std::vector<Doc> docs;
+  docs.push_back(MakeDoc(0, "intro", "hello world"));
+  auto insert_res = col->Insert(docs);
+  ASSERT_TRUE(insert_res.has_value());
+  ASSERT_TRUE(col->Flush().ok());
+
+  // Attempt to add a new FTS column — should fail
+  auto fts_field = std::make_shared<FieldSchema>(
+      "new_fts", DataType::STRING, true, std::make_shared<FtsIndexParams>());
+  auto status = col->AddColumn(fts_field, "", AddColumnOptions());
+  ASSERT_FALSE(status.ok());
+  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
+}
+
+TEST_F(FtsQueryTest, FtsFieldUnsupportedDropColumn) {
+  auto schema = CreateFtsSchema();
+  CollectionOptions options;
+  options.read_only_ = false;
+
+  auto result = Collection::CreateAndOpen(kTestPath, *schema, options);
+  ASSERT_TRUE(result.has_value());
+  auto col = result.value();
+
+  // Insert a document so the collection is non-empty
+  std::vector<Doc> docs;
+  docs.push_back(MakeDoc(0, "intro", "hello world"));
+  auto insert_res = col->Insert(docs);
+  ASSERT_TRUE(insert_res.has_value());
+  ASSERT_TRUE(col->Flush().ok());
+
+  // Attempt to drop an existing FTS column — should fail
+  auto status = col->DropColumn("content");
+  ASSERT_FALSE(status.ok());
+  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
+}
+
+TEST_F(FtsQueryTest, FtsFieldUnsupportedAlterColumn) {
+  auto schema = CreateFtsSchema();
+  CollectionOptions options;
+  options.read_only_ = false;
+
+  auto result = Collection::CreateAndOpen(kTestPath, *schema, options);
+  ASSERT_TRUE(result.has_value());
+  auto col = result.value();
+
+  // Insert a document so the collection is non-empty
+  std::vector<Doc> docs;
+  docs.push_back(MakeDoc(0, "intro", "hello world"));
+  auto insert_res = col->Insert(docs);
+  ASSERT_TRUE(insert_res.has_value());
+  ASSERT_TRUE(col->Flush().ok());
+
+  // Attempt to alter (rename) the FTS column — should fail
+  auto status = col->AlterColumn("content", "content_renamed", nullptr,
+                                 AlterColumnOptions());
+  ASSERT_FALSE(status.ok());
+  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
+
+  // Attempt to alter the FTS column with a new schema — should also fail
+  auto new_fts_field = std::make_shared<FieldSchema>(
+      "content", DataType::STRING, true, std::make_shared<FtsIndexParams>());
+  status = col->AlterColumn("content", "", new_fts_field, AlterColumnOptions());
+  ASSERT_FALSE(status.ok());
+  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
+}
diff --git a/tests/db/index/CMakeLists.txt b/tests/db/index/CMakeLists.txt
index d600dca6a..441f49009 100644
--- a/tests/db/index/CMakeLists.txt
+++ b/tests/db/index/CMakeLists.txt
@@ -54,3 +54,10 @@ foreach(CC_SRCS ${ALL_TEST_SRCS})
   )
   cc_test_suite(zvec_index ${CC_TARGET})
 endforeach()
+
+# Inject TEST_SOURCE_DIR for fts_column_indexer_test so it can locate testdata/
+if(TARGET fts_column_indexer_test)
+  target_compile_definitions(fts_column_indexer_test PRIVATE
+    TEST_SOURCE_DIR="${CMAKE_CURRENT_SOURCE_DIR}/column/fts_column"
+    JIEBA_DICT_DIR="${PROJECT_SOURCE_DIR}/thirdparty/cppjieba/cppjieba-5.6.7/dict")
+endif()
diff --git a/tests/db/index/column/fts_column/bitpacked_posting_list_test.cc b/tests/db/index/column/fts_column/bitpacked_posting_list_test.cc
new file mode 100644
index 000000000..76d28cd6e
--- /dev/null
+++ b/tests/db/index/column/fts_column/bitpacked_posting_list_test.cc
@@ -0,0 +1,681 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "db/index/column/fts_column/posting/bitpacked_posting_list.h"
+#include <algorithm>
+#include <cstdint>
+#include <numeric>
+#include <random>
+#include <vector>
+#include <gtest/gtest.h>
+#include "db/index/column/fts_column/bm25_scorer.h"
+
+using namespace zvec::fts;
+
+// ============================================================
+// Helper: create a BM25Scorer with reasonable defaults
+// ============================================================
+
+static BM25Scorer make_scorer(uint64_t total_docs = 1000,
+                              uint64_t total_tokens = 50000) {
+  BM25Scorer scorer;
+  scorer.update_stats(total_docs, total_tokens);
+  return scorer;
+}
+
+// ============================================================
+// bits_needed()
+// ============================================================
+
+TEST(BitPackedPostingListTest, BitsNeededZero) {
+  EXPECT_EQ(BitPackedPostingList::bits_needed(0), 0);
+}
+
+TEST(BitPackedPostingListTest, BitsNeededOne) {
+  EXPECT_EQ(BitPackedPostingList::bits_needed(1), 1);
+}
+
+TEST(BitPackedPostingListTest, BitsNeededPowerOfTwo) {
+  EXPECT_EQ(BitPackedPostingList::bits_needed(2), 2);
+  EXPECT_EQ(BitPackedPostingList::bits_needed(4), 3);
+  EXPECT_EQ(BitPackedPostingList::bits_needed(8), 4);
+  EXPECT_EQ(BitPackedPostingList::bits_needed(256), 9);
+  EXPECT_EQ(BitPackedPostingList::bits_needed(1024), 11);
+}
+
+TEST(BitPackedPostingListTest, BitsNeededNonPowerOfTwo) {
+  EXPECT_EQ(BitPackedPostingList::bits_needed(3), 2);
+  EXPECT_EQ(BitPackedPostingList::bits_needed(5), 3);
+  EXPECT_EQ(BitPackedPostingList::bits_needed(7), 3);
+  EXPECT_EQ(BitPackedPostingList::bits_needed(255), 8);
+  EXPECT_EQ(BitPackedPostingList::bits_needed(1023), 10);
+}
+
+TEST(BitPackedPostingListTest, BitsNeededMaxUint32) {
+  EXPECT_EQ(BitPackedPostingList::bits_needed(0xFFFFFFFF), 32);
+}
+
+// ============================================================
+// pack_uint32 / unpack_uint32 round-trip
+// ============================================================
+
+class BitPackingTest : public ::testing::TestWithParam<uint8_t> {};
+
+TEST_P(BitPackingTest, PackUnpackRoundTrip128) {
+  const uint8_t bitwidth = GetParam();
+  if (bitwidth == 0) return;
+
+  const uint32_t count = 128;
+  const uint32_t mask =
+      (bitwidth == 32) ? 0xFFFFFFFFu : ((1u << bitwidth) - 1u);
+
+  // Generate test values
+  std::vector<uint32_t> original(count);
+  for (uint32_t i = 0; i < count; ++i) {
+    original[i] = (i * 17 + 3) & mask;  // deterministic pattern
+  }
+
+  // Pack
+  const size_t packed_size =
+      BitPackedPostingList::packed_byte_size(bitwidth, count);
+  std::vector<uint8_t> packed(packed_size, 0);
+  BitPackedPostingList::pack_uint32(original.data(), bitwidth, count,
+                                    packed.data());
+
+  // Unpack
+  std::vector<uint32_t> decoded(count, 0);
+  BitPackedPostingList::unpack_uint32(packed.data(), bitwidth, count,
+                                      decoded.data());
+
+  // Verify
+  for (uint32_t i = 0; i < count; ++i) {
+    EXPECT_EQ(decoded[i], original[i])
+        << "Mismatch at index " << i << " with bitwidth " << (int)bitwidth;
+  }
+}
+
+TEST_P(BitPackingTest, PackUnpackRoundTripSmall) {
+  const uint8_t bitwidth = GetParam();
+  if (bitwidth == 0) return;
+
+  // Test with a small count (not a full block)
+  const uint32_t count = 7;
+  const uint32_t mask =
+      (bitwidth == 32) ? 0xFFFFFFFFu : ((1u << bitwidth) - 1u);
+
+  std::vector<uint32_t> original(count);
+  for (uint32_t i = 0; i < count; ++i) {
+    original[i] = i & mask;
+  }
+
+  const size_t packed_size =
+      BitPackedPostingList::packed_byte_size(bitwidth, count);
+  std::vector<uint8_t> packed(packed_size, 0);
+  BitPackedPostingList::pack_uint32(original.data(), bitwidth, count,
+                                    packed.data());
+
+  std::vector<uint32_t> decoded(count, 0);
+  BitPackedPostingList::unpack_uint32(packed.data(), bitwidth, count,
+                                      decoded.data());
+
+  for (uint32_t i = 0; i < count; ++i) {
+    EXPECT_EQ(decoded[i], original[i])
+        << "Mismatch at index " << i << " with bitwidth " << (int)bitwidth;
+  }
+}
+
+// Test all bitwidths from 1 to 32
+INSTANTIATE_TEST_SUITE_P(AllBitwidths, BitPackingTest,
+                         ::testing::Range(static_cast<uint8_t>(1),
+                                          static_cast<uint8_t>(33)));
+
+TEST(BitPackingTest, PackUnpackZeroBitwidth) {
+  const uint32_t count = 128;
+  std::vector<uint32_t> original(count, 0);
+  std::vector<uint32_t> decoded(count, 99);
+
+  // bitwidth 0: all values must be 0
+  BitPackedPostingList::unpack_uint32(nullptr, 0, count, decoded.data());
+  for (uint32_t i = 0; i < count; ++i) {
+    EXPECT_EQ(decoded[i], 0u);
+  }
+}
+
+// ============================================================
+// Encode / Decode: empty posting list
+// ============================================================
+
+TEST(BitPackedPostingListTest, EncodeDecodeEmpty) {
+  BM25Scorer scorer = make_scorer();
+  std::string encoded =
+      BitPackedPostingList::encode(nullptr, nullptr, nullptr, 0, 0, scorer);
+
+  EXPECT_TRUE(BitPackedPostingList::is_bitpacked_format(encoded.data(),
+                                                        encoded.size()));
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+  EXPECT_EQ(iter.cost(), 0u);
+  EXPECT_EQ(iter.next_doc(), BitPackedPostingIterator::NO_MORE_DOCS);
+}
+
+// ============================================================
+// Encode / Decode: single element
+// ============================================================
+
+TEST(BitPackedPostingListTest, EncodeDecodeSingleElement) {
+  BM25Scorer scorer = make_scorer();
+  uint32_t doc_ids[] = {42};
+  uint32_t tfs[] = {3};
+  uint32_t doc_lens[] = {100};
+
+  std::string encoded =
+      BitPackedPostingList::encode(doc_ids, tfs, doc_lens, 1, 1, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+  EXPECT_EQ(iter.cost(), 1u);
+
+  EXPECT_EQ(iter.next_doc(), 42u);
+  EXPECT_EQ(iter.doc_id(), 42u);
+  EXPECT_EQ(iter.term_freq(), 3u);
+  EXPECT_EQ(iter.doc_len(), 100u);
+
+  EXPECT_EQ(iter.next_doc(), BitPackedPostingIterator::NO_MORE_DOCS);
+}
+
+// ============================================================
+// Encode / Decode: small list (< 128)
+// ============================================================
+
+TEST(BitPackedPostingListTest, EncodeDecodeSmallList) {
+  BM25Scorer scorer = make_scorer();
+  const size_t count = 10;
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count);
+  std::vector<uint32_t> doc_lens(count);
+
+  for (size_t i = 0; i < count; ++i) {
+    doc_ids[i] = static_cast<uint32_t>(i * 5);
+    tfs[i] = static_cast<uint32_t>(i + 1);
+    doc_lens[i] = static_cast<uint32_t>(50 + i * 10);
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+  EXPECT_EQ(iter.cost(), count);
+
+  for (size_t i = 0; i < count; ++i) {
+    uint32_t doc = iter.next_doc();
+    EXPECT_EQ(doc, doc_ids[i]) << "Mismatch at index " << i;
+    EXPECT_EQ(iter.term_freq(), tfs[i]) << "TF mismatch at index " << i;
+    EXPECT_EQ(iter.doc_len(), doc_lens[i]) << "DocLen mismatch at index " << i;
+  }
+
+  EXPECT_EQ(iter.next_doc(), BitPackedPostingIterator::NO_MORE_DOCS);
+}
+
+// ============================================================
+// Encode / Decode: exactly 128 elements (one full block)
+// ============================================================
+
+TEST(BitPackedPostingListTest, EncodeDecodeExact128) {
+  BM25Scorer scorer = make_scorer();
+  const size_t count = 128;
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count);
+  std::vector<uint32_t> doc_lens(count);
+
+  for (size_t i = 0; i < count; ++i) {
+    doc_ids[i] = static_cast<uint32_t>(i * 3);
+    tfs[i] = static_cast<uint32_t>((i % 10) + 1);
+    doc_lens[i] = static_cast<uint32_t>(100 + i);
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+  EXPECT_EQ(iter.cost(), count);
+
+  for (size_t i = 0; i < count; ++i) {
+    uint32_t doc = iter.next_doc();
+    EXPECT_EQ(doc, doc_ids[i]) << "Mismatch at index " << i;
+    EXPECT_EQ(iter.term_freq(), tfs[i]) << "TF mismatch at index " << i;
+    EXPECT_EQ(iter.doc_len(), doc_lens[i]) << "DocLen mismatch at index " << i;
+  }
+
+  EXPECT_EQ(iter.next_doc(), BitPackedPostingIterator::NO_MORE_DOCS);
+}
+
+// ============================================================
+// Encode / Decode: 129 elements (two blocks, last block has 1 element)
+// ============================================================
+
+TEST(BitPackedPostingListTest, EncodeDecodeCrossBlockBoundary) {
+  BM25Scorer scorer = make_scorer();
+  const size_t count = 129;
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count);
+  std::vector<uint32_t> doc_lens(count);
+
+  for (size_t i = 0; i < count; ++i) {
+    doc_ids[i] = static_cast<uint32_t>(i * 2);
+    tfs[i] = static_cast<uint32_t>((i % 5) + 1);
+    doc_lens[i] = static_cast<uint32_t>(200 + i);
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+  EXPECT_EQ(iter.cost(), count);
+
+  for (size_t i = 0; i < count; ++i) {
+    uint32_t doc = iter.next_doc();
+    EXPECT_EQ(doc, doc_ids[i]) << "Mismatch at index " << i;
+    EXPECT_EQ(iter.term_freq(), tfs[i]) << "TF mismatch at index " << i;
+    EXPECT_EQ(iter.doc_len(), doc_lens[i]) << "DocLen mismatch at index " << i;
+  }
+
+  EXPECT_EQ(iter.next_doc(), BitPackedPostingIterator::NO_MORE_DOCS);
+}
+
+// ============================================================
+// Encode / Decode: large list (multiple blocks)
+// ============================================================
+
+TEST(BitPackedPostingListTest, EncodeDecodeLargeList) {
+  BM25Scorer scorer = make_scorer(10000, 500000);
+  const size_t count = 1000;
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count);
+  std::vector<uint32_t> doc_lens(count);
+
+  for (size_t i = 0; i < count; ++i) {
+    doc_ids[i] = static_cast<uint32_t>(i * 10);
+    tfs[i] = static_cast<uint32_t>((i % 20) + 1);
+    doc_lens[i] = static_cast<uint32_t>(50 + (i % 200));
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+  EXPECT_EQ(iter.cost(), count);
+
+  for (size_t i = 0; i < count; ++i) {
+    uint32_t doc = iter.next_doc();
+    EXPECT_EQ(doc, doc_ids[i]) << "Mismatch at index " << i;
+    EXPECT_EQ(iter.term_freq(), tfs[i]) << "TF mismatch at index " << i;
+    EXPECT_EQ(iter.doc_len(), doc_lens[i]) << "DocLen mismatch at index " << i;
+  }
+
+  EXPECT_EQ(iter.next_doc(), BitPackedPostingIterator::NO_MORE_DOCS);
+}
+
+// ============================================================
+// advance(): basic skip-list functionality
+// ============================================================
+
+TEST(BitPackedPostingListTest, AdvanceToExactDocId) {
+  BM25Scorer scorer = make_scorer();
+  const size_t count = 500;
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count, 1);
+  std::vector<uint32_t> doc_lens(count, 100);
+
+  for (size_t i = 0; i < count; ++i) {
+    doc_ids[i] = static_cast<uint32_t>(i * 3);
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+
+  // Advance to exact doc_id
+  EXPECT_EQ(iter.advance(300), 300u);
+  EXPECT_EQ(iter.doc_id(), 300u);
+
+  // Advance to a doc_id that doesn't exist (should return next >= target)
+  EXPECT_EQ(iter.advance(301), 303u);
+  EXPECT_EQ(iter.doc_id(), 303u);
+}
+
+TEST(BitPackedPostingListTest, AdvanceToFirstDoc) {
+  BM25Scorer scorer = make_scorer();
+  uint32_t doc_ids[] = {10, 20, 30, 40, 50};
+  uint32_t tfs[] = {1, 2, 3, 4, 5};
+  uint32_t doc_lens[] = {100, 200, 300, 400, 500};
+
+  std::string encoded =
+      BitPackedPostingList::encode(doc_ids, tfs, doc_lens, 5, 5, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+
+  // Advance to 0 should return the first doc (10)
+  EXPECT_EQ(iter.advance(0), 10u);
+  EXPECT_EQ(iter.term_freq(), 1u);
+  EXPECT_EQ(iter.doc_len(), 100u);
+}
+
+TEST(BitPackedPostingListTest, AdvanceBeyondLastDoc) {
+  BM25Scorer scorer = make_scorer();
+  uint32_t doc_ids[] = {10, 20, 30};
+  uint32_t tfs[] = {1, 2, 3};
+  uint32_t doc_lens[] = {100, 200, 300};
+
+  std::string encoded =
+      BitPackedPostingList::encode(doc_ids, tfs, doc_lens, 3, 3, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+
+  EXPECT_EQ(iter.advance(31), BitPackedPostingIterator::NO_MORE_DOCS);
+}
+
+TEST(BitPackedPostingListTest, AdvanceAcrossBlocks) {
+  BM25Scorer scorer = make_scorer();
+  const size_t count = 300;
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count, 2);
+  std::vector<uint32_t> doc_lens(count, 50);
+
+  for (size_t i = 0; i < count; ++i) {
+    doc_ids[i] = static_cast<uint32_t>(i * 5);
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+
+  // Advance from start to a doc in the 3rd block (block 2, index 256+)
+  // Block 0: doc_ids 0..635 (indices 0..127)
+  // Block 1: doc_ids 640..1275 (indices 128..255)
+  // Block 2: doc_ids 1280..1495 (indices 256..299)
+  EXPECT_EQ(iter.advance(1280), 1280u);
+  EXPECT_EQ(iter.doc_id(), 1280u);
+  EXPECT_EQ(iter.term_freq(), 2u);
+
+  // Continue with next_doc
+  EXPECT_EQ(iter.next_doc(), 1285u);
+}
+
+TEST(BitPackedPostingListTest, AdvanceSequentialCalls) {
+  BM25Scorer scorer = make_scorer();
+  const size_t count = 200;
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count, 1);
+  std::vector<uint32_t> doc_lens(count, 100);
+
+  for (size_t i = 0; i < count; ++i) {
+    doc_ids[i] = static_cast<uint32_t>(i * 7);
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+
+  // Multiple sequential advance calls
+  EXPECT_EQ(iter.advance(100), 105u);    // 15*7=105
+  EXPECT_EQ(iter.advance(500), 504u);    // 72*7=504
+  EXPECT_EQ(iter.advance(1000), 1001u);  // 143*7=1001
+  EXPECT_EQ(iter.advance(1400), BitPackedPostingIterator::NO_MORE_DOCS);
+}
+
+// ============================================================
+// advance() after next_doc()
+// ============================================================
+
+TEST(BitPackedPostingListTest, AdvanceAfterNextDoc) {
+  BM25Scorer scorer = make_scorer();
+  const size_t count = 256;
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count, 1);
+  std::vector<uint32_t> doc_lens(count, 50);
+
+  for (size_t i = 0; i < count; ++i) {
+    doc_ids[i] = static_cast<uint32_t>(i * 4);
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+
+  // Read a few docs
+  EXPECT_EQ(iter.next_doc(), 0u);
+  EXPECT_EQ(iter.next_doc(), 4u);
+  EXPECT_EQ(iter.next_doc(), 8u);
+
+  // Now advance past the current block
+  EXPECT_EQ(iter.advance(600), 600u);  // 150*4=600
+  EXPECT_EQ(iter.term_freq(), 1u);
+
+  // Continue with next_doc
+  EXPECT_EQ(iter.next_doc(), 604u);
+}
+
+// ============================================================
+// block_max_score correctness
+// ============================================================
+
+TEST(BitPackedPostingListTest, BlockMaxScoreCorrectness) {
+  BM25Scorer scorer = make_scorer(100, 5000);
+  const size_t count = 256;  // 2 blocks
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count);
+  std::vector<uint32_t> doc_lens(count);
+
+  for (size_t i = 0; i < count; ++i) {
+    doc_ids[i] = static_cast<uint32_t>(i);
+    tfs[i] = static_cast<uint32_t>((i % 10) + 1);
+    doc_lens[i] = static_cast<uint32_t>(50 + (i % 50));
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+
+  // Verify block_max_score for block 0 via block_max_info_for()
+  auto info0 = iter.block_max_info_for(0);
+
+  // Manually compute max score for block 0
+  float expected_max = 0.0f;
+  for (size_t i = 0; i < 128; ++i) {
+    float s = scorer.score(count, tfs[i], doc_lens[i]);
+    expected_max = std::max(expected_max, s);
+  }
+  EXPECT_FLOAT_EQ(info0.block_max_score, expected_max);
+  EXPECT_EQ(info0.block_last_doc, 127u);
+
+  // Verify block_max_score for block 1 via block_max_info_for()
+  auto info1 = iter.block_max_info_for(128);
+
+  expected_max = 0.0f;
+  for (size_t i = 128; i < 256; ++i) {
+    float s = scorer.score(count, tfs[i], doc_lens[i]);
+    expected_max = std::max(expected_max, s);
+  }
+  EXPECT_FLOAT_EQ(info1.block_max_score, expected_max);
+  EXPECT_EQ(info1.block_last_doc, 255u);
+}
+
+// ============================================================
+// max_score() (global)
+// ============================================================
+
+TEST(BitPackedPostingListTest, GlobalMaxScore) {
+  BM25Scorer scorer = make_scorer(100, 5000);
+  const size_t count = 256;
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count);
+  std::vector<uint32_t> doc_lens(count);
+
+  for (size_t i = 0; i < count; ++i) {
+    doc_ids[i] = static_cast<uint32_t>(i);
+    tfs[i] = static_cast<uint32_t>((i % 10) + 1);
+    doc_lens[i] = static_cast<uint32_t>(50 + (i % 50));
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(encoded.data(), encoded.size()), 0);
+
+  // Global max_score should be the maximum of all block_max_scores
+  float global_max = 0.0f;
+  for (size_t i = 0; i < count; ++i) {
+    float s = scorer.score(count, tfs[i], doc_lens[i]);
+    global_max = std::max(global_max, s);
+  }
+  EXPECT_FLOAT_EQ(iter.max_score(), global_max);
+}
+
+// ============================================================
+// is_bitpacked_format()
+// ============================================================
+
+TEST(BitPackedPostingListTest, IsBitpackedFormatTrue) {
+  BM25Scorer scorer = make_scorer();
+  uint32_t doc_ids[] = {1};
+  uint32_t tfs[] = {1};
+  uint32_t doc_lens[] = {10};
+
+  std::string encoded =
+      BitPackedPostingList::encode(doc_ids, tfs, doc_lens, 1, 1, scorer);
+  EXPECT_TRUE(BitPackedPostingList::is_bitpacked_format(encoded.data(),
+                                                        encoded.size()));
+}
+
+TEST(BitPackedPostingListTest, IsBitpackedFormatFalse) {
+  // Random data that doesn't start with the magic number
+  std::string random_data = "hello world";
+  EXPECT_FALSE(BitPackedPostingList::is_bitpacked_format(random_data.data(),
+                                                         random_data.size()));
+}
+
+TEST(BitPackedPostingListTest, IsBitpackedFormatTooShort) {
+  std::string short_data = "ab";
+  EXPECT_FALSE(BitPackedPostingList::is_bitpacked_format(short_data.data(),
+                                                         short_data.size()));
+}
+
+// ============================================================
+// Error handling: open() with invalid data
+// ============================================================
+
+TEST(BitPackedPostingListTest, OpenWithNullData) {
+  BitPackedPostingIterator iter;
+  EXPECT_NE(iter.open(nullptr, 0), 0);
+}
+
+TEST(BitPackedPostingListTest, OpenWithTruncatedHeader) {
+  BitPackedPostingIterator iter;
+  char data[4] = {0};
+  EXPECT_NE(iter.open(data, 4), 0);
+}
+
+TEST(BitPackedPostingListTest, OpenWithBadMagic) {
+  BitPackedPostingIterator iter;
+  char data[16] = {0};
+  EXPECT_NE(iter.open(data, 16), 0);
+}
+
+// ============================================================
+// Consistency: advance() vs sequential next_doc()
+// ============================================================
+
+TEST(BitPackedPostingListTest, AdvanceConsistentWithNextDoc) {
+  BM25Scorer scorer = make_scorer();
+  const size_t count = 500;
+  std::vector<uint32_t> doc_ids(count);
+  std::vector<uint32_t> tfs(count);
+  std::vector<uint32_t> doc_lens(count);
+
+  std::mt19937 rng(42);
+  uint32_t current = 0;
+  for (size_t i = 0; i < count; ++i) {
+    current += (rng() % 10) + 1;
+    doc_ids[i] = current;
+    tfs[i] = (rng() % 10) + 1;
+    doc_lens[i] = (rng() % 200) + 10;
+  }
+
+  std::string encoded = BitPackedPostingList::encode(
+      doc_ids.data(), tfs.data(), doc_lens.data(), count, count, scorer);
+
+  // Collect all docs via next_doc
+  BitPackedPostingIterator iter1;
+  EXPECT_EQ(iter1.open(encoded.data(), encoded.size()), 0);
+  std::vector<uint32_t> all_docs;
+  std::vector<uint32_t> all_tfs;
+  std::vector<uint32_t> all_doc_lens;
+  uint32_t doc = iter1.next_doc();
+  while (doc != BitPackedPostingIterator::NO_MORE_DOCS) {
+    all_docs.push_back(doc);
+    all_tfs.push_back(iter1.term_freq());
+    all_doc_lens.push_back(iter1.doc_len());
+    doc = iter1.next_doc();
+  }
+
+  ASSERT_EQ(all_docs.size(), count);
+
+  // Verify advance to various targets matches sequential scan
+  BitPackedPostingIterator iter2;
+  EXPECT_EQ(iter2.open(encoded.data(), encoded.size()), 0);
+
+  std::vector<uint32_t> targets = {0,
+                                   1,
+                                   doc_ids[50],
+                                   doc_ids[127],
+                                   doc_ids[128],
+                                   doc_ids[200],
+                                   doc_ids[count - 1]};
+
+  for (uint32_t target : targets) {
+    BitPackedPostingIterator iter_adv;
+    EXPECT_EQ(iter_adv.open(encoded.data(), encoded.size()), 0);
+    uint32_t adv_doc = iter_adv.advance(target);
+
+    // Find expected result via linear scan
+    auto it = std::lower_bound(all_docs.begin(), all_docs.end(), target);
+    if (it == all_docs.end()) {
+      EXPECT_EQ(adv_doc, BitPackedPostingIterator::NO_MORE_DOCS)
+          << "target=" << target;
+    } else {
+      size_t idx = it - all_docs.begin();
+      EXPECT_EQ(adv_doc, all_docs[idx]) << "target=" << target;
+      EXPECT_EQ(iter_adv.term_freq(), all_tfs[idx]) << "target=" << target;
+      EXPECT_EQ(iter_adv.doc_len(), all_doc_lens[idx]) << "target=" << target;
+    }
+  }
+}
diff --git a/tests/db/index/column/fts_column/fts_candidate_iterator_test.cc b/tests/db/index/column/fts_column/fts_candidate_iterator_test.cc
new file mode 100644
index 000000000..bd4728525
--- /dev/null
+++ b/tests/db/index/column/fts_column/fts_candidate_iterator_test.cc
@@ -0,0 +1,98 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "db/index/column/fts_column/iterator/fts_candidate_iterator.h"
+#include <cstdint>
+#include <vector>
+#include <gtest/gtest.h>
+#include "db/index/column/fts_column/iterator/fts_doc_iterator.h"
+
+using zvec::fts::CandidateDocIterator;
+using zvec::fts::DocIterator;
+
+namespace {
+
+constexpr uint32_t kNoMore = DocIterator::NO_MORE_DOCS;
+
+}  // namespace
+
+TEST(CandidateDocIteratorTest, EmptyVectorYieldsNothing) {
+  CandidateDocIterator it({});
+  EXPECT_EQ(it.cost(), 0u);
+  EXPECT_EQ(it.next_doc(), kNoMore);
+  EXPECT_EQ(it.next_doc(), kNoMore);
+}
+
+TEST(CandidateDocIteratorTest, NextDocStreamsAscending) {
+  CandidateDocIterator it({0, 5, 10, 100});
+  EXPECT_EQ(it.cost(), 4u);
+  EXPECT_FLOAT_EQ(it.max_score(), 0.0f);
+  EXPECT_FLOAT_EQ(it.score(), 0.0f);
+  EXPECT_TRUE(it.matches());
+
+  EXPECT_EQ(it.next_doc(), 0u);
+  EXPECT_EQ(it.doc_id(), 0u);
+  EXPECT_EQ(it.next_doc(), 5u);
+  EXPECT_EQ(it.next_doc(), 10u);
+  EXPECT_EQ(it.next_doc(), 100u);
+  EXPECT_EQ(it.next_doc(), kNoMore);
+  EXPECT_EQ(it.next_doc(), kNoMore);
+}
+
+TEST(CandidateDocIteratorTest, AdvanceLandsOnExactMatch) {
+  CandidateDocIterator it({10, 20, 30, 40, 50});
+  EXPECT_EQ(it.advance(20), 20u);
+  EXPECT_EQ(it.doc_id(), 20u);
+  // Subsequent next_doc continues past the advanced position.
+  EXPECT_EQ(it.next_doc(), 30u);
+}
+
+TEST(CandidateDocIteratorTest, AdvanceSeeksToNextHigher) {
+  CandidateDocIterator it({10, 20, 30, 40, 50});
+  EXPECT_EQ(it.advance(25), 30u);
+  EXPECT_EQ(it.next_doc(), 40u);
+}
+
+TEST(CandidateDocIteratorTest, AdvancePastLastYieldsNoMore) {
+  CandidateDocIterator it({10, 20, 30});
+  EXPECT_EQ(it.advance(50), kNoMore);
+  EXPECT_EQ(it.next_doc(), kNoMore);
+}
+
+TEST(CandidateDocIteratorTest, AdvanceBeforeAnyConsumeWorks) {
+  CandidateDocIterator it({10, 20, 30});
+  EXPECT_EQ(it.advance(0), 10u);
+  EXPECT_EQ(it.next_doc(), 20u);
+}
+
+TEST(CandidateDocIteratorTest, AdvanceInterleavedWithNext) {
+  CandidateDocIterator it({5, 10, 15, 20, 25, 30});
+  EXPECT_EQ(it.next_doc(), 5u);
+  EXPECT_EQ(it.advance(15), 15u);
+  EXPECT_EQ(it.next_doc(), 20u);
+  EXPECT_EQ(it.advance(99), kNoMore);
+}
+
+TEST(CandidateDocIteratorTest, SingleElement) {
+  CandidateDocIterator it({42});
+  EXPECT_EQ(it.cost(), 1u);
+  EXPECT_EQ(it.advance(42), 42u);
+  EXPECT_EQ(it.next_doc(), kNoMore);
+}
+
+TEST(CandidateDocIteratorTest, AdvanceCachesDocId) {
+  CandidateDocIterator it({1, 2, 3});
+  EXPECT_EQ(it.advance(2), 2u);
+  EXPECT_EQ(it.doc_id(), 2u);
+}
diff --git a/tests/db/index/column/fts_column/fts_column_indexer_test.cc b/tests/db/index/column/fts_column/fts_column_indexer_test.cc
new file mode 100644
index 000000000..b2e0af340
--- /dev/null
+++ b/tests/db/index/column/fts_column/fts_column_indexer_test.cc
@@ -0,0 +1,1465 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "db/index/column/fts_column/fts_column_indexer.h"
+#include <fstream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include <gtest/gtest.h>
+#include <zvec/db/index_params.h>
+#include "db/common/file_helper.h"
+#include "db/index/common/index_filter.h"
+// FtsQueryParams defined below
+#include "db/index/column/fts_column/fts_rocksdb_merge.h"
+#include "db/index/column/fts_column/parser/fts_query_parser.h"
+#include "db/index/column/fts_column/tokenizer/tokenizer_factory.h"
+// meta.h not needed in zvec
+#include "db/common/constants.h"
+#include "db/common/rocksdb_context.h"
+
+using namespace zvec;
+using namespace zvec::fts;
+
+namespace {
+
+// Build a transient FieldSchema for FTS unit tests.
+// When fts_params is provided, it is attached as the field's index_params
+// so that FtsColumnIndexer::open() can retrieve the tokenizer configuration.
+FieldSchema::Ptr make_test_field_meta(
+    const std::string &field_name,
+    std::shared_ptr<zvec::FtsIndexParams> fts_params = nullptr) {
+  if (fts_params) {
+    return std::make_shared<FieldSchema>(field_name, DataType::STRING, false,
+                                         fts_params);
+  }
+  return std::make_shared<FieldSchema>(field_name, DataType::STRING);
+}
+
+}  // namespace
+
+// Helper: parse a query string and call search() on a reader/indexer.
+// Terminates the test with ASSERT if parsing fails.
+template <typename Reader>
+static bool search_ok(Reader &reader, const std::string &query_str,
+                      uint32_t topk, std::vector<FtsResult> *results) {
+  FtsQueryParser parser;
+  auto ast = parser.parse(query_str);
+  if (!ast) {
+    ADD_FAILURE() << "FtsQueryParser failed to parse: " << query_str
+                  << " err: " << parser.err_msg();
+    return false;
+  }
+  zvec::fts::FtsQueryParams qp;
+  qp.topk = topk;
+  auto ret = reader.search(*ast, qp);
+  if (!ret.has_value()) {
+    return false;
+  }
+  *results = std::move(ret.value());
+  return true;
+}
+
+// Helper: parse a query string with a filter and call search().
+template <typename Reader>
+static bool search_ok_with_filter(Reader &reader, const std::string &query_str,
+                                  uint32_t topk, zvec::IndexFilter::Ptr filter,
+                                  std::vector<FtsResult> *results) {
+  FtsQueryParser parser;
+  auto ast = parser.parse(query_str);
+  if (!ast) {
+    ADD_FAILURE() << "FtsQueryParser failed to parse: " << query_str
+                  << " err: " << parser.err_msg();
+    return false;
+  }
+  zvec::fts::FtsQueryParams qp;
+  qp.topk = topk;
+  qp.filter = std::move(filter);
+  auto ret = reader.search(*ast, qp);
+  if (!ret.has_value()) {
+    return false;
+  }
+  *results = std::move(ret.value());
+  return true;
+}
+
+// ============================================================
+// Test fixture
+// ============================================================
+
+static const std::string kDbPath{"./test_fts_db"};
+
+static const std::string kPostingsCf{"fts"};
+static const std::string kMaxTfCf{kPostingsCf + zvec::kFtsMaxTfSuffix};
+static const std::string kPositionsCf{kPostingsCf + zvec::kFtsPositionsSuffix};
+static const std::string kTermFreqCf{kPostingsCf + zvec::kFtsTfSuffix};
+static const std::string kDocLenCf{kPostingsCf + zvec::kFtsDocLenSuffix};
+static const std::string kStatCf{zvec::kFtsStatCfName};
+
+class FtsColumnIndexerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    zvec::FileHelper::RemoveDirectory(kDbPath);
+
+    // Single RocksDB instance with per-CF merge operators.
+    std::vector<std::string> cf_names = {kPostingsCf, kMaxTfCf,  kPositionsCf,
+                                         kTermFreqCf, kDocLenCf, kStatCf};
+    std::unordered_map<std::string, std::shared_ptr<rocksdb::MergeOperator>>
+        per_cf_ops = {
+            {kPostingsCf, std::make_shared<FtsPostingsMerge>()},
+            {kMaxTfCf, std::make_shared<FtsMaxTfMerge>()},
+        };
+    ASSERT_TRUE(
+        db_.create(RocksdbContext::Args{kDbPath, cf_names, nullptr, per_cf_ops})
+            .ok());
+
+    postings_cf_ = db_.get_cf(kPostingsCf);
+    max_tf_cf_ = db_.get_cf(kMaxTfCf);
+    positions_cf_ = db_.get_cf(kPositionsCf);
+    term_freq_cf_ = db_.get_cf(kTermFreqCf);
+    doc_len_cf_ = db_.get_cf(kDocLenCf);
+    stat_cf_ = db_.get_cf(kStatCf);
+
+    ASSERT_NE(postings_cf_, nullptr);
+    ASSERT_NE(max_tf_cf_, nullptr);
+    ASSERT_NE(positions_cf_, nullptr);
+    ASSERT_NE(term_freq_cf_, nullptr);
+    ASSERT_NE(doc_len_cf_, nullptr);
+    ASSERT_NE(stat_cf_, nullptr);
+  }
+
+  void TearDown() override {
+    db_.close();
+    zvec::FileHelper::RemoveDirectory(kDbPath);
+  }
+
+  // Create and open a fresh indexer with whitespace tokenizer.
+  // Returns unique_ptr because FtsColumnIndexer is not copyable (atomic
+  // members).
+  std::unique_ptr<FtsColumnIndexer> make_indexer(
+      const std::string &field_name = "content") {
+    auto fts_params = std::make_shared<zvec::FtsIndexParams>("whitespace");
+    auto field_meta = make_test_field_meta(field_name, fts_params);
+    auto indexer = std::make_unique<FtsColumnIndexer>();
+    auto ret = indexer->open(field_meta, &db_, postings_cf_, positions_cf_,
+                             term_freq_cf_, max_tf_cf_, doc_len_cf_, stat_cf_);
+    EXPECT_TRUE(ret.has_value());
+    return indexer;
+  }
+
+  RocksdbContext db_;
+
+  rocksdb::ColumnFamilyHandle *postings_cf_{nullptr};
+  rocksdb::ColumnFamilyHandle *max_tf_cf_{nullptr};
+  rocksdb::ColumnFamilyHandle *positions_cf_{nullptr};
+  rocksdb::ColumnFamilyHandle *term_freq_cf_{nullptr};
+  rocksdb::ColumnFamilyHandle *doc_len_cf_{nullptr};
+  rocksdb::ColumnFamilyHandle *stat_cf_{nullptr};
+};
+// ============================================================
+// open()
+// ============================================================
+
+TEST_F(FtsColumnIndexerTest, OpenWithValidTokenizer) {
+  auto fts_params = std::make_shared<zvec::FtsIndexParams>("whitespace");
+  auto field_meta = make_test_field_meta("content", fts_params);
+  FtsColumnIndexer indexer;
+  auto ret = indexer.open(field_meta, &db_, postings_cf_, positions_cf_,
+                          term_freq_cf_, max_tf_cf_, doc_len_cf_, stat_cf_);
+  EXPECT_TRUE(ret.has_value());
+  EXPECT_EQ(indexer.total_docs(), 0u);
+  EXPECT_EQ(indexer.total_tokens(), 0u);
+}
+
+TEST_F(FtsColumnIndexerTest, OpenWithNullFieldMetaFails) {
+  FtsColumnIndexer indexer;
+  auto ret =
+      indexer.open(FieldSchema::Ptr{nullptr}, &db_, postings_cf_, positions_cf_,
+                   term_freq_cf_, max_tf_cf_, doc_len_cf_, stat_cf_);
+  EXPECT_FALSE(ret.has_value());
+}
+
+TEST_F(FtsColumnIndexerTest, OpenWithNullStoreFails) {
+  auto fts_params = std::make_shared<zvec::FtsIndexParams>("whitespace");
+  auto field_meta = make_test_field_meta("content", fts_params);
+  FtsColumnIndexer indexer;
+  auto ret =
+      indexer.open(field_meta, /*store=*/nullptr, postings_cf_, positions_cf_,
+                   term_freq_cf_, max_tf_cf_, doc_len_cf_, stat_cf_);
+  EXPECT_FALSE(ret.has_value());
+}
+
+// ============================================================
+// insert() - statistics update
+// ============================================================
+
+TEST_F(FtsColumnIndexerTest, InsertUpdatesTotalDocs) {
+  auto indexer = make_indexer();
+
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_EQ(indexer->total_docs(), 1u);
+
+  EXPECT_TRUE(indexer->insert(1, "foo bar baz").has_value());
+  EXPECT_EQ(indexer->total_docs(), 2u);
+}
+
+TEST_F(FtsColumnIndexerTest, InsertUpdatesTotalTokens) {
+  auto indexer = make_indexer();
+
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_EQ(indexer->total_tokens(), 2u);  // "hello", "world"
+
+  EXPECT_TRUE(indexer->insert(1, "foo bar baz").has_value());
+  EXPECT_EQ(indexer->total_tokens(), 5u);  // 2 + 3
+}
+
+TEST_F(FtsColumnIndexerTest, InsertEmptyTextCountsAsZeroTokens) {
+  auto indexer = make_indexer();
+
+  EXPECT_TRUE(indexer->insert(0, "").has_value());
+  EXPECT_EQ(indexer->total_docs(), 1u);
+  EXPECT_EQ(indexer->total_tokens(), 0u);
+}
+
+// ============================================================
+// flush() - persist stats to RocksDB
+// ============================================================
+
+TEST_F(FtsColumnIndexerTest, FlushPersistsStats) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(indexer->insert(1, "foo bar").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  // Verify stats were written to stat_cf by opening a standalone reader.
+  // Pass doc_len_cf as nullptr so the reader loads stats from stat_cf.
+  FtsColumnIndexer reader;
+  auto ret = reader.open_reader("content", &db_, postings_cf_, positions_cf_,
+                                term_freq_cf_, max_tf_cf_,
+                                /*doc_len_cf=*/nullptr, stat_cf_);
+  EXPECT_TRUE(ret.has_value());
+  // Reader loads stats from stat_cf on open; search should succeed
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(reader, "hello", 10, &results));
+  ASSERT_EQ(results.size(), 1u);
+}
+
+// ============================================================
+// search() - term query
+// ============================================================
+
+TEST_F(FtsColumnIndexerTest, SearchTermFound) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(indexer->insert(1, "hello foo").has_value());
+  EXPECT_TRUE(indexer->insert(2, "bar baz").has_value());
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "hello", 10, &results));
+  EXPECT_EQ(results.size(), 2u);
+
+  bool found_doc0 = false;
+  bool found_doc1 = false;
+  for (const auto &result : results) {
+    if (result.doc_id == 0) found_doc0 = true;
+    if (result.doc_id == 1) found_doc1 = true;
+  }
+  EXPECT_TRUE(found_doc0);
+  EXPECT_TRUE(found_doc1);
+}
+
+TEST_F(FtsColumnIndexerTest, SearchTermNotFound) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "missing", 10, &results));
+  EXPECT_TRUE(results.empty());
+}
+
+TEST_F(FtsColumnIndexerTest, SearchResultsSortedByScoreDescending) {
+  auto indexer = make_indexer();
+  // Doc 0: "hello" appears once
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  // Doc 1: "hello" appears twice (higher TF -> higher BM25 score)
+  EXPECT_TRUE(indexer->insert(1, "hello hello").has_value());
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "hello", 10, &results));
+  ASSERT_EQ(results.size(), 2u);
+
+  // Results must be in descending score order
+  EXPECT_GE(results[0].score, results[1].score);
+  // Doc 1 (higher TF) should rank first
+  EXPECT_EQ(results[0].doc_id, 1ull);
+}
+
+TEST_F(FtsColumnIndexerTest, SearchTopkLimitsResults) {
+  auto indexer = make_indexer();
+  for (uint64_t doc_id = 0; doc_id < 10; ++doc_id) {
+    EXPECT_TRUE(indexer->insert(doc_id, "hello world").has_value());
+  }
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "hello", 3, &results));
+  EXPECT_LE(results.size(), 3u);
+}
+
+// ============================================================
+// search() - phrase query
+// ============================================================
+
+TEST_F(FtsColumnIndexerTest, SearchPhraseFound) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "machine learning model").has_value());
+  EXPECT_TRUE(indexer->insert(1, "learning machine translation").has_value());
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "\"machine learning\"", 10, &results));
+  ASSERT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 0ull);
+}
+
+TEST_F(FtsColumnIndexerTest, SearchPhraseNotFound) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world foo").has_value());
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "\"hello foo\"", 10, &results));
+  EXPECT_TRUE(results.empty());
+}
+
+// ============================================================
+// search() - boolean query (AND / OR)
+// ============================================================
+
+TEST_F(FtsColumnIndexerTest, SearchExplicitAnd) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());  // matches both
+  EXPECT_TRUE(indexer->insert(1, "hello foo").has_value());    // only hello
+  EXPECT_TRUE(indexer->insert(2, "world bar").has_value());    // only world
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "hello AND world", 10, &results));
+  ASSERT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 0ull);
+}
+
+TEST_F(FtsColumnIndexerTest, SearchExplicitOr) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(indexer->insert(1, "foo bar").has_value());
+  EXPECT_TRUE(indexer->insert(2, "baz qux").has_value());
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "hello OR foo", 10, &results));
+  ASSERT_EQ(results.size(), 2u);
+}
+
+TEST_F(FtsColumnIndexerTest, SearchImplicitAdjacency) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(indexer->insert(1, "foo bar").has_value());
+
+  // Adjacent terms without operator -> OR semantics (default operator)
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "hello foo", 10, &results));
+  EXPECT_EQ(results.size(), 2u);
+}
+
+// ============================================================
+// search() - must_not modifier
+// ============================================================
+
+TEST_F(FtsColumnIndexerTest, SearchMustNotExcludesDoc) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(indexer->insert(1, "hello foo").has_value());
+
+  // "hello" matches both; "- world" (with space) excludes doc 0
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "hello - world", 10, &results));
+  ASSERT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 1ull);
+}
+
+// `a NOT b` is the new binary AND-NOT operator (`a AND NOT b`).
+TEST_F(FtsColumnIndexerTest, SearchBinaryNotExcludesDoc) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(indexer->insert(1, "hello foo").has_value());
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "hello NOT world", 10, &results));
+  ASSERT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 1ull);
+}
+
+// `a NOT (b OR c)` — must_not on a parenthesised OR sub-expression must
+// exclude every doc matching either `b` or `c`.
+TEST_F(FtsColumnIndexerTest, SearchMustNotOnGroupedOrExcludesDocs) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(
+      indexer->insert(0, "hello world").has_value());  // excluded (has world)
+  EXPECT_TRUE(
+      indexer->insert(1, "hello foo").has_value());  // excluded (has foo)
+  EXPECT_TRUE(indexer->insert(2, "hello bar").has_value());  // kept
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "hello NOT (world OR foo)", 10, &results));
+  ASSERT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 2ull);
+}
+
+// Top-level `-(...)` produces a must_not root and must be rejected by
+// search() (see fts_column_indexer.cc::search early-out).
+TEST_F(FtsColumnIndexerTest, SearchTopLevelMustNotIsRejected) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+
+  // -(hello AND world) => AndNode with must_not=true at the root
+  FtsQueryParser parser;
+  auto ast = parser.parse("-(hello AND world)");
+  ASSERT_NE(ast, nullptr);
+  EXPECT_TRUE(ast->must_not);
+
+  std::vector<FtsResult> results;
+  FtsQueryParams query_params;
+  query_params.topk = 10;
+  EXPECT_FALSE(indexer->search(*ast, query_params).has_value());
+}
+
+// ============================================================
+// BM25 stats are updated in real-time after insert
+// ============================================================
+
+TEST_F(FtsColumnIndexerTest, BM25StatsUpdatedAfterInsert) {
+  auto indexer = make_indexer();
+  EXPECT_EQ(indexer->total_docs(), 0u);
+  EXPECT_EQ(indexer->total_tokens(), 0u);
+
+  EXPECT_TRUE(indexer->insert(0, "hello world foo").has_value());
+  EXPECT_EQ(indexer->total_docs(), 1u);
+  EXPECT_EQ(indexer->total_tokens(), 3u);
+
+  EXPECT_TRUE(indexer->insert(1, "bar baz").has_value());
+  EXPECT_EQ(indexer->total_docs(), 2u);
+  EXPECT_EQ(indexer->total_tokens(), 5u);
+}
+
+TEST_F(FtsColumnIndexerTest, SearchScorePositiveAfterInsert) {
+  auto indexer = make_indexer();
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "hello", 10, &results));
+  ASSERT_EQ(results.size(), 1u);
+  EXPECT_GT(results[0].score, 0.0f);
+}
+
+// ============================================================
+// End-to-end: multiple inserts and searches
+// ============================================================
+
+TEST_F(FtsColumnIndexerTest, MultipleInsertsAndSearches) {
+  auto indexer = make_indexer("content");
+
+  const std::vector<std::string> docs = {
+      "the quick brown fox",
+      "the lazy dog",
+      "quick brown dog",
+      "fox and dog",
+  };
+
+  for (uint64_t doc_id = 0; doc_id < docs.size(); ++doc_id) {
+    EXPECT_TRUE(indexer->insert(doc_id, docs[doc_id]).has_value());
+  }
+
+  EXPECT_EQ(indexer->total_docs(), docs.size());
+
+  // "quick" appears in doc 0 and doc 2
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "quick", 10, &results));
+  EXPECT_EQ(results.size(), 2u);
+
+  // "the" appears in doc 0 and doc 1
+  results.clear();
+  EXPECT_TRUE(search_ok(*indexer, "the", 10, &results));
+  EXPECT_EQ(results.size(), 2u);
+
+  // "quick AND dog" -> only doc 2
+  results.clear();
+  EXPECT_TRUE(search_ok(*indexer, "quick AND dog", 10, &results));
+  ASSERT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 2ull);
+}
+
+// ============================================================
+// Jieba Chinese tokenizer tests
+// ============================================================
+
+// JIEBA_DICT_DIR points to thirdparty/cppjieba/.../dict/ (injected by CMake).
+#ifndef JIEBA_DICT_DIR
+#define JIEBA_DICT_DIR "."
+#endif
+
+static const std::string kJiebaDictDir{JIEBA_DICT_DIR};
+
+static bool jieba_dict_available() {
+  std::string path = kJiebaDictDir + "/jieba.dict.utf8";
+  std::ifstream ifs(path);
+  return ifs.good();
+}
+
+static std::string make_jieba_extra_params() {
+  return std::string(R"({"dict_path":")") + kJiebaDictDir +
+         R"(/jieba.dict.utf8","model_path":")" + kJiebaDictDir +
+         R"(/hmm_model.utf8"})";
+}
+
+class FtsColumnIndexerJiebaTest : public FtsColumnIndexerTest {
+ protected:
+  void SetUp() override {
+    if (!jieba_dict_available()) {
+      GTEST_SKIP() << "Jieba dict not available at: " << kJiebaDictDir;
+    }
+    FtsColumnIndexerTest::SetUp();
+  }
+  // Create and open a fresh indexer with jieba tokenizer.
+  std::unique_ptr<FtsColumnIndexer> make_jieba_indexer(
+      const std::string &field_name = "content") {
+    auto fts_params = std::make_shared<zvec::FtsIndexParams>(
+        "jieba", std::vector<std::string>{"lowercase"},
+        make_jieba_extra_params());
+    auto field_meta = make_test_field_meta(field_name, fts_params);
+    auto indexer = std::make_unique<FtsColumnIndexer>();
+    auto ret = indexer->open(field_meta, &db_, postings_cf_, positions_cf_,
+                             term_freq_cf_, max_tf_cf_, doc_len_cf_, stat_cf_);
+    EXPECT_TRUE(ret.has_value());
+    return indexer;
+  }
+};
+
+// Verify that jieba tokenizer opens successfully with valid dict paths.
+TEST_F(FtsColumnIndexerJiebaTest, OpenWithJiebaTokenizerSucceeds) {
+  auto fts_params = std::make_shared<zvec::FtsIndexParams>(
+      "jieba", std::vector<std::string>{"lowercase"},
+      make_jieba_extra_params());
+  auto field_meta = make_test_field_meta("content", fts_params);
+  FtsColumnIndexer indexer;
+  auto ret = indexer.open(field_meta, &db_, postings_cf_, positions_cf_,
+                          term_freq_cf_, max_tf_cf_, doc_len_cf_, stat_cf_);
+  EXPECT_TRUE(ret.has_value());
+}
+
+// Verify that jieba tokenizer fails to open when required model_path is
+// missing.  (Note: cppjieba FATAL-aborts on non-existent dict files, so we
+// test the init-time validation in JiebaTokenizer instead.)
+TEST_F(FtsColumnIndexerJiebaTest, OpenWithJiebaTokenizerFailsWithoutModelPath) {
+  fts::FtsIndexParams bad_params;
+  bad_params.tokenizer_name = "jieba";
+  // Provide dict_path but omit model_path — JiebaTokenizer::init should fail.
+  bad_params.extra_params = std::string(R"({"dict_path":")") + kJiebaDictDir +
+                            R"(/jieba.dict.utf8"})";
+  auto pipeline = TokenizerFactory::create(bad_params);
+  EXPECT_EQ(pipeline, nullptr);
+}
+
+// Insert a Chinese sentence and verify that total_docs and total_tokens are
+// updated correctly (jieba should produce at least one token).
+TEST_F(FtsColumnIndexerJiebaTest, InsertChineseTextUpdatesStats) {
+  auto indexer = make_jieba_indexer();
+
+  // "中文分词测试" should be segmented into multiple tokens by jieba.
+  EXPECT_TRUE(indexer->insert(0, "中文分词测试").has_value());
+  EXPECT_EQ(indexer->total_docs(), 1u);
+  EXPECT_GT(indexer->total_tokens(), 0u);
+}
+
+// Insert multiple Chinese documents and verify that a segmented term can be
+// found via search(). The dedicated FtsLexer supports UNICODE_TERM so Chinese
+// words can be used as bare terms without quoting.
+TEST_F(FtsColumnIndexerJiebaTest, SearchChineseTermFound) {
+  auto indexer = make_jieba_indexer();
+
+  // doc 0: contains "中文" and "分词"
+  EXPECT_TRUE(indexer->insert(0, "中文分词技术").has_value());
+  // doc 1: contains "搜索" and "引擎"
+  EXPECT_TRUE(indexer->insert(1, "搜索引擎优化").has_value());
+  // doc 2: contains "中文" again
+  EXPECT_TRUE(indexer->insert(2, "中文搜索").has_value());
+
+  // jieba CutForSearch segments "中文分词技术" → [中文, 分词, 技术, ...] and
+  //                             "中文搜索"     → [中文, 搜索], so doc 0 and
+  //                             doc 2 should match "中文".
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "中文", 10, &results));
+  EXPECT_GE(results.size(), 1u);
+
+  bool found_doc0 = false;
+  bool found_doc2 = false;
+  for (const auto &result : results) {
+    if (result.doc_id == 0) found_doc0 = true;
+    if (result.doc_id == 2) found_doc2 = true;
+  }
+  EXPECT_TRUE(found_doc0);
+  EXPECT_TRUE(found_doc2);
+}
+
+// Verify that a term not present in any document returns empty results.
+TEST_F(FtsColumnIndexerJiebaTest, SearchChineseTermNotFound) {
+  auto indexer = make_jieba_indexer();
+
+  EXPECT_TRUE(indexer->insert(0, "中文分词技术").has_value());
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "日语", 10, &results));
+  EXPECT_EQ(results.size(), 0u);
+}
+
+// Verify BM25 scores are positive after inserting Chinese documents.
+TEST_F(FtsColumnIndexerJiebaTest, SearchChineseTermHasPositiveScore) {
+  auto indexer = make_jieba_indexer();
+
+  EXPECT_TRUE(indexer->insert(0, "自然语言处理技术").has_value());
+  EXPECT_TRUE(indexer->insert(1, "机器学习算法").has_value());
+
+  // Search for a token that jieba should produce from "自然语言处理技术".
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "自然语言", 10, &results));
+  if (!results.empty()) {
+    EXPECT_GT(results[0].score, 0.0f);
+  }
+}
+
+// Verify that topk limits the number of results for Chinese queries.
+TEST_F(FtsColumnIndexerJiebaTest, SearchChineseTermTopkLimitsResults) {
+  auto indexer = make_jieba_indexer();
+
+  // Insert 5 documents all containing "技术"
+  for (uint64_t doc_id = 0; doc_id < 5; ++doc_id) {
+    EXPECT_TRUE(indexer->insert(doc_id, "人工智能技术发展").has_value());
+  }
+
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "技术", /*topk=*/3, &results));
+  EXPECT_LE(results.size(), 3u);
+}
+
+// End-to-end: flush and reload with jieba tokenizer.
+TEST_F(FtsColumnIndexerJiebaTest, FlushAndReloadWithJiebaTokenizer) {
+  auto indexer = make_jieba_indexer("content");
+
+  EXPECT_TRUE(indexer->insert(0, "深度学习模型").has_value());
+  EXPECT_TRUE(indexer->insert(1, "神经网络结构").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  // Reload via a standalone reader (no tokenizer needed for reading).
+  // Pass doc_len_cf as nullptr so the reader loads stats from stat_cf.
+  FtsColumnIndexer reader;
+  auto ret = reader.open_reader("content", &db_, postings_cf_, positions_cf_,
+                                term_freq_cf_, max_tf_cf_,
+                                /*doc_len_cf=*/nullptr, stat_cf_);
+  EXPECT_TRUE(ret.has_value());
+
+  // Search with a term that jieba produces from "深度学习模型":
+  // jieba CutForSearch segments it into [深度, 学习, 深度学习, 模型].
+  TermNode term_node("模型");
+  FtsQueryParams query_params;
+  query_params.topk = 10;
+  auto search_ret = reader.search(term_node, query_params);
+  EXPECT_TRUE(search_ret.has_value());
+  EXPECT_GE(search_ret.value().size(), 1u);
+}
+
+// ============================================================
+// convert_postings_to_bitpacked()
+// ============================================================
+//
+// These tests exercise the BitPacked conversion path that is invoked from
+// MutableSegment::dump_fts_column_indexers() right before the SST dump.
+// They use the BitPackedPostingList::is_bitpacked_format magic-number probe
+// to verify that postings have been re-encoded, and iterate $TF / $DOC_LEN
+// CFs to verify the DeleteRange tombstones effectively removed all entries.
+
+#include "db/index/column/fts_column/posting/bitpacked_posting_list.h"  // NOLINT: in-test include
+
+namespace {
+
+// Count entries in a CF by iterating from the first key.  Used to verify that
+// $TF / $DOC_LEN have been DeleteRange-cleared.
+size_t count_cf_entries(RocksdbContext &db, rocksdb::ColumnFamilyHandle *cf) {
+  size_t count = 0;
+  std::unique_ptr<rocksdb::Iterator> iter(
+      db.db_->NewIterator(db.read_opts_, cf));
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+    ++count;
+  }
+  return count;
+}
+
+// Verify every value in postings_cf_ is in BitPacked format.
+size_t count_postings_entries_and_check_bitpacked(
+    RocksdbContext &db, rocksdb::ColumnFamilyHandle *cf) {
+  size_t count = 0;
+  std::unique_ptr<rocksdb::Iterator> iter(
+      db.db_->NewIterator(db.read_opts_, cf));
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+    const std::string value = iter->value().ToString();
+    EXPECT_TRUE(
+        BitPackedPostingList::is_bitpacked_format(value.data(), value.size()))
+        << "Posting for term[" << iter->key().ToString()
+        << "] is not BitPacked";
+    ++count;
+  }
+  return count;
+}
+
+}  // namespace
+
+// Insert N docs, run the conversion, and verify:
+//   - postings_cf_ values all carry the BitPacked magic
+//   - decoded posting iterators yield the original (doc_id, tf, doc_len)
+//   - $TF / $DOC_LEN CFs are empty
+TEST_F(FtsColumnIndexerTest, ConvertPostingsToBitpackedBasic) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(indexer->insert(1, "hello foo bar").has_value());
+  EXPECT_TRUE(indexer->insert(2, "hello hello world").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  EXPECT_TRUE(indexer->convert_postings_to_bitpacked().has_value());
+
+  // All postings must now be BitPacked.
+  size_t postings_count =
+      count_postings_entries_and_check_bitpacked(db_, postings_cf_);
+  EXPECT_GT(postings_count, 0u);
+
+  // Spot-check: decode the "hello" posting and confirm doc_ids/tfs/doc_lens
+  // match what we wrote.  Doc 0 -> tf=1, dl=2; Doc 1 -> tf=1, dl=3; Doc 2 ->
+  // tf=2, dl=3.
+  std::string raw;
+  ASSERT_TRUE(db_.db_->Get(db_.read_opts_, postings_cf_, "hello", &raw).ok());
+  ASSERT_FALSE(raw.empty());
+  ASSERT_TRUE(
+      BitPackedPostingList::is_bitpacked_format(raw.data(), raw.size()));
+
+  BitPackedPostingIterator iter;
+  ASSERT_EQ(iter.open(raw.data(), raw.size()), 0);
+
+  std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> decoded;
+  while (true) {
+    uint32_t did = iter.next_doc();
+    if (did == BitPackedPostingIterator::NO_MORE_DOCS) break;
+    decoded.emplace_back(did, iter.term_freq(), iter.doc_len());
+  }
+  ASSERT_EQ(decoded.size(), 3u);
+  EXPECT_EQ(std::get<0>(decoded[0]), 0u);
+  EXPECT_EQ(std::get<1>(decoded[0]), 1u);
+  EXPECT_EQ(std::get<2>(decoded[0]), 2u);
+  EXPECT_EQ(std::get<0>(decoded[1]), 1u);
+  EXPECT_EQ(std::get<1>(decoded[1]), 1u);
+  EXPECT_EQ(std::get<2>(decoded[1]), 3u);
+  EXPECT_EQ(std::get<0>(decoded[2]), 2u);
+  EXPECT_EQ(std::get<1>(decoded[2]), 2u);
+  EXPECT_EQ(std::get<2>(decoded[2]), 3u);
+}
+
+// After conversion the $TF / $DOC_LEN / $MAX_TF side CFs must be EMPTY: the
+// indexer DeleteRange's them once their content has been inlined into the
+// BitPacked posting list.  MutableSegment then drops the CFs entirely.
+TEST_F(FtsColumnIndexerTest, ConvertPostingsToBitpackedClearsSideCfs) {
+  auto indexer = make_indexer("content");
+  for (uint64_t doc_id = 0; doc_id < 5; ++doc_id) {
+    EXPECT_TRUE(indexer->insert(doc_id, "alpha beta gamma").has_value());
+  }
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  // Sanity: side CFs are populated before conversion.
+  EXPECT_GT(count_cf_entries(db_, term_freq_cf_), 0u);
+  EXPECT_GT(count_cf_entries(db_, doc_len_cf_), 0u);
+  EXPECT_GT(count_cf_entries(db_, max_tf_cf_), 0u);
+
+  EXPECT_TRUE(indexer->convert_postings_to_bitpacked().has_value());
+
+  // Side CFs must be empty after conversion (DeleteRange'd by the indexer).
+  EXPECT_EQ(count_cf_entries(db_, term_freq_cf_), 0u);
+  EXPECT_EQ(count_cf_entries(db_, doc_len_cf_), 0u);
+  EXPECT_EQ(count_cf_entries(db_, max_tf_cf_), 0u);
+
+  // After reset_side_cfs, search should still work (BitPacked path).
+  indexer->reset_side_cfs();
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(*indexer, "alpha", 10, &results));
+  EXPECT_EQ(results.size(), 5u);
+}
+
+// Conversion must be idempotent: calling it twice should not corrupt postings,
+// nor should it re-encode terms that are already BitPacked.
+TEST_F(FtsColumnIndexerTest, ConvertPostingsToBitpackedIsIdempotent) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(indexer->insert(1, "hello foo").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  EXPECT_TRUE(indexer->convert_postings_to_bitpacked().has_value());
+
+  // Snapshot the BitPacked posting for "hello" after the first conversion.
+  std::string snapshot;
+  ASSERT_TRUE(
+      db_.db_->Get(db_.read_opts_, postings_cf_, "hello", &snapshot).ok());
+  ASSERT_FALSE(snapshot.empty());
+
+  // Second invocation must succeed and leave the posting byte-for-byte
+  // identical (the idempotency guard skips re-encoding).
+  EXPECT_TRUE(indexer->convert_postings_to_bitpacked().has_value());
+
+  std::string after;
+  ASSERT_TRUE(db_.db_->Get(db_.read_opts_, postings_cf_, "hello", &after).ok());
+  EXPECT_EQ(snapshot, after);
+}
+
+// An indexer with no inserted documents must still allow the conversion to
+// succeed (no-op path) — this matches MutableSegment dump-flow expectations
+// for FTS fields that received zero writes.
+TEST_F(FtsColumnIndexerTest, ConvertPostingsToBitpackedEmptyIndexer) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->flush().has_value());
+  EXPECT_TRUE(indexer->convert_postings_to_bitpacked().has_value());
+  EXPECT_EQ(count_postings_entries_and_check_bitpacked(db_, postings_cf_), 0u);
+  // Side CFs were never populated (empty indexer); no special expectation
+  // about them here beyond "the conversion did not crash".
+}
+
+// After conversion the search() path must keep working — readers fall through
+// to the BitPacked branch via is_bitpacked_format(), and no longer require the
+// $TF / $DOC_LEN CFs.
+TEST_F(FtsColumnIndexerTest, SearchAfterConvertPostingsToBitpacked) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "the quick brown fox").has_value());
+  EXPECT_TRUE(indexer->insert(1, "the lazy dog").has_value());
+  EXPECT_TRUE(indexer->insert(2, "quick brown dog").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  // Pre-conversion baseline: "quick" hits doc 0 and doc 2.
+  std::vector<FtsResult> baseline;
+  EXPECT_TRUE(search_ok(*indexer, "quick", 10, &baseline));
+  ASSERT_EQ(baseline.size(), 2u);
+
+  EXPECT_TRUE(indexer->convert_postings_to_bitpacked().has_value());
+
+  // Post-conversion via a standalone reader (mirrors immutable segment use).
+  // Side CFs are passed as nullptr — immutable segments no longer register
+  // them.
+  FtsColumnIndexer reader;
+  ASSERT_TRUE(reader
+                  .open_reader("content", &db_, postings_cf_, positions_cf_,
+                               /*term_freq_cf=*/nullptr, /*max_tf_cf=*/nullptr,
+                               /*doc_len_cf=*/nullptr, stat_cf_)
+                  .has_value());
+  std::vector<FtsResult> results;
+  EXPECT_TRUE(search_ok(reader, "quick", 10, &results));
+  ASSERT_EQ(results.size(), 2u);
+
+  // Same set of doc_ids as the baseline; scores may differ slightly because
+  // the reader loaded stats fresh from stat_cf, but both must be positive.
+  std::vector<uint64_t> ids;
+  for (const auto &r : results) {
+    ids.push_back(r.doc_id);
+    EXPECT_GT(r.score, 0.0f);
+  }
+  std::sort(ids.begin(), ids.end());
+  EXPECT_EQ(ids[0], 0ull);
+  EXPECT_EQ(ids[1], 2ull);
+}
+
+// ============================================================
+// Multi-column shared RocksDB tests
+//
+// Mirrors the CF-naming scheme used by SegmentImpl::open_fts_indexers():
+//   field_name           -> postings CF
+//   field_name_positions -> positions CF
+//   field_name_tf        -> term-freq CF
+//   field_name_max_tf    -> max-tf CF
+//   field_name_doc_len   -> doc-len CF
+//   fts_stat             -> shared stat CF
+// ============================================================
+
+static const std::string kMultiDbPath{"./test_fts_multi_db"};
+
+class FtsMultiColumnSharedDbTest : public ::testing::Test {
+ protected:
+  // Two FTS fields sharing the same RocksDB instance.
+  static constexpr const char *kFields[] = {"title", "body"};
+  static constexpr size_t kNumFields = 2;
+
+  void SetUp() override {
+    zvec::FileHelper::RemoveDirectory(kMultiDbPath);
+
+    // Build CF names and per-CF merge operators following the segment pattern.
+    std::vector<std::string> cf_names;
+    std::unordered_map<std::string, std::shared_ptr<rocksdb::MergeOperator>>
+        per_cf_ops;
+
+    for (size_t i = 0; i < kNumFields; ++i) {
+      std::string f{kFields[i]};
+      cf_names.push_back(f);                        // postings
+      cf_names.push_back(f + kFtsPositionsSuffix);  // positions
+      cf_names.push_back(f + kFtsTfSuffix);         // term freq
+      cf_names.push_back(f + kFtsMaxTfSuffix);      // max tf
+      cf_names.push_back(f + kFtsDocLenSuffix);     // doc len
+
+      per_cf_ops[f] = std::make_shared<FtsPostingsMerge>();
+      per_cf_ops[f + kFtsMaxTfSuffix] = std::make_shared<FtsMaxTfMerge>();
+    }
+    cf_names.push_back(zvec::kFtsStatCfName);
+
+    ASSERT_TRUE(db_.create(RocksdbContext::Args{kMultiDbPath, cf_names, nullptr,
+                                                per_cf_ops})
+                    .ok());
+
+    // Resolve CF handles per field.
+    for (size_t i = 0; i < kNumFields; ++i) {
+      std::string f{kFields[i]};
+      postings_cf_[i] = db_.get_cf(f);
+      positions_cf_[i] = db_.get_cf(f + kFtsPositionsSuffix);
+      term_freq_cf_[i] = db_.get_cf(f + kFtsTfSuffix);
+      max_tf_cf_[i] = db_.get_cf(f + kFtsMaxTfSuffix);
+      doc_len_cf_[i] = db_.get_cf(f + kFtsDocLenSuffix);
+      ASSERT_NE(postings_cf_[i], nullptr) << "field=" << f;
+      ASSERT_NE(positions_cf_[i], nullptr) << "field=" << f;
+      ASSERT_NE(term_freq_cf_[i], nullptr) << "field=" << f;
+      ASSERT_NE(max_tf_cf_[i], nullptr) << "field=" << f;
+      ASSERT_NE(doc_len_cf_[i], nullptr) << "field=" << f;
+    }
+    stat_cf_ = db_.get_cf(zvec::kFtsStatCfName);
+    ASSERT_NE(stat_cf_, nullptr);
+  }
+
+  void TearDown() override {
+    db_.close();
+    zvec::FileHelper::RemoveDirectory(kMultiDbPath);
+  }
+
+  // Return the array index for a field name (0 = title, 1 = body).
+  size_t field_index(const std::string &field_name) const {
+    for (size_t i = 0; i < kNumFields; ++i) {
+      if (field_name == kFields[i]) return i;
+    }
+    ADD_FAILURE() << "Unknown field: " << field_name;
+    return 0;
+  }
+
+  // Create and open a FtsColumnIndexer bound to the CFs of the given field.
+  std::unique_ptr<FtsColumnIndexer> make_indexer(
+      const std::string &field_name) {
+    size_t idx = field_index(field_name);
+    auto fts_params = std::make_shared<zvec::FtsIndexParams>("whitespace");
+    auto field_meta = make_test_field_meta(field_name, fts_params);
+    auto indexer = std::make_unique<FtsColumnIndexer>();
+    auto ret = indexer->open(field_meta, &db_, postings_cf_[idx],
+                             positions_cf_[idx], term_freq_cf_[idx],
+                             max_tf_cf_[idx], doc_len_cf_[idx], stat_cf_);
+    EXPECT_TRUE(ret.has_value());
+    return indexer;
+  }
+
+  RocksdbContext db_;
+  rocksdb::ColumnFamilyHandle *postings_cf_[kNumFields]{};
+  rocksdb::ColumnFamilyHandle *positions_cf_[kNumFields]{};
+  rocksdb::ColumnFamilyHandle *term_freq_cf_[kNumFields]{};
+  rocksdb::ColumnFamilyHandle *max_tf_cf_[kNumFields]{};
+  rocksdb::ColumnFamilyHandle *doc_len_cf_[kNumFields]{};
+  rocksdb::ColumnFamilyHandle *stat_cf_{nullptr};
+};
+
+// Two FTS columns write different documents; search on each column only
+// returns hits from that column's data.
+TEST_F(FtsMultiColumnSharedDbTest, MultiColumnInsertAndSearchIsolation) {
+  auto title_indexer = make_indexer("title");
+  auto body_indexer = make_indexer("body");
+
+  // title column: documents about animals
+  EXPECT_TRUE(title_indexer->insert(0, "quick brown fox").has_value());
+  EXPECT_TRUE(title_indexer->insert(1, "lazy dog").has_value());
+
+  // body column: documents about programming
+  EXPECT_TRUE(body_indexer->insert(0, "hello world program").has_value());
+  EXPECT_TRUE(body_indexer->insert(1, "quick sort algorithm").has_value());
+
+  // Search "quick" in title -> only doc 0
+  {
+    std::vector<FtsResult> results;
+    EXPECT_TRUE(search_ok(*title_indexer, "quick", 10, &results));
+    ASSERT_EQ(results.size(), 1u);
+    EXPECT_EQ(results[0].doc_id, 0ull);
+  }
+
+  // Search "quick" in body -> only doc 1
+  {
+    std::vector<FtsResult> results;
+    EXPECT_TRUE(search_ok(*body_indexer, "quick", 10, &results));
+    ASSERT_EQ(results.size(), 1u);
+    EXPECT_EQ(results[0].doc_id, 1ull);
+  }
+
+  // Search "hello" in title -> no results
+  {
+    std::vector<FtsResult> results;
+    EXPECT_TRUE(search_ok(*title_indexer, "hello", 10, &results));
+    EXPECT_TRUE(results.empty());
+  }
+
+  // Search "fox" in body -> no results
+  {
+    std::vector<FtsResult> results;
+    EXPECT_TRUE(search_ok(*body_indexer, "fox", 10, &results));
+    EXPECT_TRUE(results.empty());
+  }
+}
+
+// Flush both columns, then open read-only readers and verify each column's
+// search results survive the reload.
+TEST_F(FtsMultiColumnSharedDbTest, MultiColumnFlushAndReload) {
+  auto title_indexer = make_indexer("title");
+  auto body_indexer = make_indexer("body");
+
+  EXPECT_TRUE(title_indexer->insert(0, "alpha beta gamma").has_value());
+  EXPECT_TRUE(body_indexer->insert(0, "delta epsilon").has_value());
+  EXPECT_TRUE(body_indexer->insert(1, "alpha zeta").has_value());
+
+  EXPECT_TRUE(title_indexer->flush().has_value());
+  EXPECT_TRUE(body_indexer->flush().has_value());
+
+  // Open standalone readers (pass doc_len_cf as nullptr to exercise the
+  // stat-CF reload path, matching immutable segment behaviour).
+  size_t ti = field_index("title");
+  size_t bi = field_index("body");
+
+  FtsColumnIndexer title_reader;
+  ASSERT_TRUE(title_reader
+                  .open_reader("title", &db_, postings_cf_[ti],
+                               positions_cf_[ti], term_freq_cf_[ti],
+                               max_tf_cf_[ti], /*doc_len_cf=*/nullptr, stat_cf_)
+                  .has_value());
+
+  FtsColumnIndexer body_reader;
+  ASSERT_TRUE(body_reader
+                  .open_reader("body", &db_, postings_cf_[bi],
+                               positions_cf_[bi], term_freq_cf_[bi],
+                               max_tf_cf_[bi], /*doc_len_cf=*/nullptr, stat_cf_)
+                  .has_value());
+
+  // title reader: "alpha" -> doc 0 only
+  {
+    std::vector<FtsResult> results;
+    EXPECT_TRUE(search_ok(title_reader, "alpha", 10, &results));
+    ASSERT_EQ(results.size(), 1u);
+    EXPECT_EQ(results[0].doc_id, 0ull);
+  }
+
+  // body reader: "alpha" -> doc 1 only
+  {
+    std::vector<FtsResult> results;
+    EXPECT_TRUE(search_ok(body_reader, "alpha", 10, &results));
+    ASSERT_EQ(results.size(), 1u);
+    EXPECT_EQ(results[0].doc_id, 1ull);
+  }
+
+  // body reader: "delta" -> doc 0 only
+  {
+    std::vector<FtsResult> results;
+    EXPECT_TRUE(search_ok(body_reader, "delta", 10, &results));
+    ASSERT_EQ(results.size(), 1u);
+    EXPECT_EQ(results[0].doc_id, 0ull);
+  }
+}
+
+// Each column maintains independent total_docs and total_tokens counters.
+TEST_F(FtsMultiColumnSharedDbTest, MultiColumnStatsIndependent) {
+  auto title_indexer = make_indexer("title");
+  auto body_indexer = make_indexer("body");
+
+  // title: 2 docs, 4 tokens
+  EXPECT_TRUE(title_indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(title_indexer->insert(1, "foo bar").has_value());
+  EXPECT_EQ(title_indexer->total_docs(), 2u);
+  EXPECT_EQ(title_indexer->total_tokens(), 4u);
+
+  // body: 1 doc, 3 tokens
+  EXPECT_TRUE(body_indexer->insert(0, "alpha beta gamma").has_value());
+  EXPECT_EQ(body_indexer->total_docs(), 1u);
+  EXPECT_EQ(body_indexer->total_tokens(), 3u);
+
+  // Inserting into body must not affect title's counters.
+  EXPECT_EQ(title_indexer->total_docs(), 2u);
+  EXPECT_EQ(title_indexer->total_tokens(), 4u);
+}
+
+// ============================================================
+// Filter pushdown into FTS iterators (single-term / OR / Phrase)
+// ============================================================
+
+namespace {
+
+// Build an IndexFilter that excludes any doc_id present in `blocked`.
+zvec::IndexFilter::Ptr make_blocked_filter(
+    std::initializer_list<uint64_t> blocked) {
+  std::unordered_set<uint64_t> set(blocked);
+  return zvec::EasyIndexFilter::Create(
+      [set = std::move(set)](uint64_t id) { return set.count(id) > 0; });
+}
+
+}  // namespace
+
+// Single-term query path: TermDocIterator inherits the base-class default
+// next_doc(filter), which loops over next_doc() and skips filtered docs.
+TEST_F(FtsColumnIndexerTest, FilterPushdownExcludesFilteredDocs) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(indexer->insert(1, "hello foo").has_value());
+  EXPECT_TRUE(indexer->insert(2, "hello world bar").has_value());
+  EXPECT_TRUE(indexer->insert(3, "hello baz").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  // Baseline: no filter — all 4 docs match "hello".
+  std::vector<FtsResult> baseline;
+  EXPECT_TRUE(search_ok(*indexer, "hello", 10, &baseline));
+  EXPECT_EQ(baseline.size(), 4u);
+
+  // Block docs 1 and 3.
+  std::vector<FtsResult> filtered;
+  EXPECT_TRUE(search_ok_with_filter(*indexer, "hello", 10,
+                                    make_blocked_filter({1, 3}), &filtered));
+  ASSERT_EQ(filtered.size(), 2u);
+
+  std::vector<uint64_t> ids;
+  for (const auto &r : filtered) {
+    ids.push_back(r.doc_id);
+    EXPECT_GT(r.score, 0.0f);
+  }
+  std::sort(ids.begin(), ids.end());
+  EXPECT_EQ(ids[0], 0ull);
+  EXPECT_EQ(ids[1], 2ull);
+}
+
+// OR query exercises DisjunctionIterator::next_doc(filter) override —
+// pivot_doc is filter-checked before block-max accumulation and resort.
+TEST_F(FtsColumnIndexerTest, FilterPushdownWithDisjunction) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "alpha beta").has_value());
+  EXPECT_TRUE(indexer->insert(1, "alpha gamma").has_value());
+  EXPECT_TRUE(indexer->insert(2, "beta gamma").has_value());
+  EXPECT_TRUE(indexer->insert(3, "alpha beta gamma").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  // Baseline: "alpha OR beta" matches all 4 docs.
+  std::vector<FtsResult> baseline;
+  EXPECT_TRUE(search_ok(*indexer, "alpha beta", 10, &baseline));
+  EXPECT_EQ(baseline.size(), 4u);
+
+  std::vector<FtsResult> filtered;
+  EXPECT_TRUE(search_ok_with_filter(*indexer, "alpha beta", 10,
+                                    make_blocked_filter({0, 2}), &filtered));
+  ASSERT_EQ(filtered.size(), 2u);
+
+  std::vector<uint64_t> ids;
+  for (const auto &r : filtered) {
+    ids.push_back(r.doc_id);
+    EXPECT_GT(r.score, 0.0f);
+  }
+  std::sort(ids.begin(), ids.end());
+  EXPECT_EQ(ids[0], 1ull);
+  EXPECT_EQ(ids[1], 3ull);
+}
+
+// Phrase query exercises PhraseDocIterator::next_doc(filter) -> inner
+// ConjunctionIterator::next_doc(filter), ensuring verify_phrase_positions()
+// is never executed for filtered docs.
+TEST_F(FtsColumnIndexerTest, FilterPushdownWithPhrase) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "machine learning model").has_value());
+  EXPECT_TRUE(indexer->insert(1, "machine learning notes").has_value());
+  EXPECT_TRUE(indexer->insert(2, "learning machine translation").has_value());
+  EXPECT_TRUE(indexer->insert(3, "machine learning systems").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  // Baseline: phrase "machine learning" matches docs 0, 1, 3 (not 2, where
+  // the order is reversed).
+  std::vector<FtsResult> baseline;
+  EXPECT_TRUE(search_ok(*indexer, "\"machine learning\"", 10, &baseline));
+  EXPECT_EQ(baseline.size(), 3u);
+
+  // Block docs 1 and 3 — only doc 0 should remain.
+  std::vector<FtsResult> filtered;
+  EXPECT_TRUE(search_ok_with_filter(*indexer, "\"machine learning\"", 10,
+                                    make_blocked_filter({1, 3}), &filtered));
+  ASSERT_EQ(filtered.size(), 1u);
+  EXPECT_EQ(filtered[0].doc_id, 0ull);
+  EXPECT_GT(filtered[0].score, 0.0f);
+}
+
+// ============================================================
+// Brute-force (candidate-driven) mode via FtsQueryParams.candidate_ids
+// ============================================================
+
+namespace {
+
+// Helper: run a query with an explicit candidate id list.
+template <typename Reader>
+static bool search_ok_with_candidates(Reader &reader,
+                                      const std::string &query_str,
+                                      uint32_t topk,
+                                      std::vector<uint64_t> candidates,
+                                      std::vector<FtsResult> *results) {
+  FtsQueryParser parser;
+  auto ast = parser.parse(query_str);
+  if (!ast) {
+    ADD_FAILURE() << "FtsQueryParser failed to parse: " << query_str
+                  << " err: " << parser.err_msg();
+    return false;
+  }
+  zvec::fts::FtsQueryParams qp;
+  qp.topk = topk;
+  qp.candidate_ids = std::move(candidates);
+  auto ret = reader.search(*ast, qp);
+  if (!ret.has_value()) {
+    return false;
+  }
+  *results = std::move(ret.value());
+  return true;
+}
+
+// Compare two result vectors as (doc_id, score) sets — order independent on
+// doc_id, scores compared with FLOAT_EQ. Brute-force and posting-driven
+// paths reuse the same TermDocIterator / BM25Scorer so scores must agree.
+static void ExpectSameResults(std::vector<FtsResult> a,
+                              std::vector<FtsResult> b) {
+  ASSERT_EQ(a.size(), b.size());
+  auto by_id = [](const FtsResult &x, const FtsResult &y) {
+    return x.doc_id < y.doc_id;
+  };
+  std::sort(a.begin(), a.end(), by_id);
+  std::sort(b.begin(), b.end(), by_id);
+  for (size_t i = 0; i < a.size(); ++i) {
+    EXPECT_EQ(a[i].doc_id, b[i].doc_id) << "i=" << i;
+    EXPECT_FLOAT_EQ(a[i].score, b[i].score) << "i=" << i;
+  }
+}
+
+}  // namespace
+
+// Single-term query: candidate-driven path returns the intersection of the
+// term posting and the candidate set, with the same BM25 scores as the
+// posting-driven baseline.
+TEST_F(FtsColumnIndexerTest, BruteForceTermMatchesPostingDriven) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "hello world").has_value());
+  EXPECT_TRUE(indexer->insert(1, "hello foo").has_value());
+  EXPECT_TRUE(indexer->insert(2, "hello world bar").has_value());
+  EXPECT_TRUE(indexer->insert(3, "hello baz").has_value());
+  EXPECT_TRUE(indexer->insert(4, "world only").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  // Baseline: "hello" matches docs 0,1,2,3.
+  std::vector<FtsResult> baseline;
+  EXPECT_TRUE(search_ok(*indexer, "hello", 10, &baseline));
+  ASSERT_EQ(baseline.size(), 4u);
+
+  // Candidate-driven with {1, 2, 4} -> expect {1, 2} (4 is not in posting).
+  std::vector<FtsResult> bf;
+  EXPECT_TRUE(search_ok_with_candidates(*indexer, "hello", 10,
+                                        /*candidates=*/{1, 2, 4}, &bf));
+
+  std::vector<FtsResult> expected;
+  for (const auto &r : baseline) {
+    if (r.doc_id == 1 || r.doc_id == 2) expected.push_back(r);
+  }
+  ExpectSameResults(std::move(expected), std::move(bf));
+}
+
+// Disjunction (OR) — same BM25 score, only intersected docs returned.
+TEST_F(FtsColumnIndexerTest, BruteForceDisjunctionMatchesPostingDriven) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "alpha beta").has_value());
+  EXPECT_TRUE(indexer->insert(1, "alpha gamma").has_value());
+  EXPECT_TRUE(indexer->insert(2, "beta gamma").has_value());
+  EXPECT_TRUE(indexer->insert(3, "alpha beta gamma").has_value());
+  EXPECT_TRUE(indexer->insert(4, "delta").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  std::vector<FtsResult> baseline;
+  EXPECT_TRUE(search_ok(*indexer, "alpha beta", 10, &baseline));
+  ASSERT_EQ(baseline.size(), 4u);  // 0,1,2,3 all match OR
+
+  std::vector<FtsResult> bf;
+  EXPECT_TRUE(search_ok_with_candidates(*indexer, "alpha beta", 10,
+                                        /*candidates=*/{0, 3, 4}, &bf));
+
+  std::vector<FtsResult> expected;
+  for (const auto &r : baseline) {
+    if (r.doc_id == 0 || r.doc_id == 3) expected.push_back(r);
+  }
+  ExpectSameResults(std::move(expected), std::move(bf));
+}
+
+// Conjunction (AND) — wrapped AND-of-AND is semantically transparent.
+TEST_F(FtsColumnIndexerTest, BruteForceConjunctionMatchesPostingDriven) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "alpha beta gamma").has_value());
+  EXPECT_TRUE(indexer->insert(1, "alpha gamma").has_value());  // missing beta
+  EXPECT_TRUE(indexer->insert(2, "alpha beta").has_value());   // missing gamma
+  EXPECT_TRUE(indexer->insert(3, "alpha beta gamma").has_value());
+  EXPECT_TRUE(indexer->insert(4, "alpha beta gamma").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  std::vector<FtsResult> baseline;
+  EXPECT_TRUE(search_ok(*indexer, "alpha AND beta AND gamma", 10, &baseline));
+  ASSERT_EQ(baseline.size(), 3u);  // 0,3,4
+
+  std::vector<FtsResult> bf;
+  EXPECT_TRUE(search_ok_with_candidates(*indexer, "alpha AND beta AND gamma",
+                                        10, /*candidates=*/{0, 1, 4}, &bf));
+
+  std::vector<FtsResult> expected;
+  for (const auto &r : baseline) {
+    if (r.doc_id == 0 || r.doc_id == 4) expected.push_back(r);
+  }
+  ExpectSameResults(std::move(expected), std::move(bf));
+}
+
+// Phrase query — phase-2 position check is preserved in candidate-driven mode.
+TEST_F(FtsColumnIndexerTest, BruteForcePhraseMatchesPostingDriven) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "machine learning model").has_value());
+  EXPECT_TRUE(indexer->insert(1, "machine notes learning").has_value());
+  EXPECT_TRUE(indexer->insert(2, "the machine learning jumps").has_value());
+  EXPECT_TRUE(indexer->insert(3, "learning machine").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  std::vector<FtsResult> baseline;
+  EXPECT_TRUE(search_ok(*indexer, "\"machine learning\"", 10, &baseline));
+  ASSERT_EQ(baseline.size(), 2u);  // 0,2
+
+  // Candidate set = {1, 2, 3}: only 2 is a real phrase match.
+  std::vector<FtsResult> bf;
+  EXPECT_TRUE(search_ok_with_candidates(*indexer, "\"machine learning\"", 10,
+                                        /*candidates=*/{1, 2, 3}, &bf));
+
+  std::vector<FtsResult> expected;
+  for (const auto &r : baseline) {
+    if (r.doc_id == 2) expected.push_back(r);
+  }
+  ExpectSameResults(std::move(expected), std::move(bf));
+}
+
+// Nested (AND of OR) — root iterator type does not matter; wrap is
+// transparent.
+TEST_F(FtsColumnIndexerTest, BruteForceNestedMatchesPostingDriven) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "alpha").has_value());
+  EXPECT_TRUE(indexer->insert(1, "beta").has_value());
+  EXPECT_TRUE(indexer->insert(2, "alpha gamma").has_value());  // matches
+  EXPECT_TRUE(indexer->insert(3, "beta gamma").has_value());   // matches
+  EXPECT_TRUE(indexer->insert(4, "gamma only").has_value());   // no alpha/beta
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  // (alpha OR beta) AND gamma -> docs 2, 3
+  std::vector<FtsResult> baseline;
+  EXPECT_TRUE(search_ok(*indexer, "(alpha OR beta) AND gamma", 10, &baseline));
+  ASSERT_EQ(baseline.size(), 2u);
+
+  std::vector<FtsResult> bf;
+  EXPECT_TRUE(search_ok_with_candidates(*indexer, "(alpha OR beta) AND gamma",
+                                        10, /*candidates=*/{2, 4}, &bf));
+
+  std::vector<FtsResult> expected;
+  for (const auto &r : baseline) {
+    if (r.doc_id == 2) expected.push_back(r);
+  }
+  ExpectSameResults(std::move(expected), std::move(bf));
+}
+
+// Candidate-driven coexists with the existing filter pushdown:
+// candidate_ids narrows the doc set; filter further drops some.
+TEST_F(FtsColumnIndexerTest, BruteForceCoexistsWithFilterPushdown) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "alpha").has_value());
+  EXPECT_TRUE(indexer->insert(1, "alpha").has_value());
+  EXPECT_TRUE(indexer->insert(2, "alpha").has_value());
+  EXPECT_TRUE(indexer->insert(3, "alpha").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  FtsQueryParser parser;
+  auto ast = parser.parse("alpha");
+  ASSERT_NE(ast, nullptr);
+
+  zvec::fts::FtsQueryParams qp;
+  qp.topk = 10;
+  qp.candidate_ids = {0, 1, 2};          // candidates restrict to {0,1,2}
+  qp.filter = make_blocked_filter({1});  // further drop doc 1
+  auto ret = indexer->search(*ast, qp);
+  ASSERT_TRUE(ret.has_value());
+  auto results = std::move(ret.value());
+  ASSERT_EQ(results.size(), 2u);
+
+  std::vector<uint64_t> ids;
+  for (const auto &r : results) ids.push_back(r.doc_id);
+  std::sort(ids.begin(), ids.end());
+  EXPECT_EQ(ids[0], 0ull);
+  EXPECT_EQ(ids[1], 2ull);
+}
+
+// Empty candidate_ids takes the regular posting-driven path (the wrap guard
+// requires non-empty), so search still finds all matching docs.
+TEST_F(FtsColumnIndexerTest, BruteForceEmptyCandidatesFallsBack) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "alpha beta").has_value());
+  EXPECT_TRUE(indexer->insert(1, "alpha gamma").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  std::vector<FtsResult> r;
+  EXPECT_TRUE(search_ok_with_candidates(*indexer, "alpha", 10, {}, &r));
+  EXPECT_EQ(r.size(), 2u);
+}
+
+// Regression guard: a null filter yields the same doc_ids and scores as the
+// baseline path (which still uses the no-filter next_doc() overload).
+TEST_F(FtsColumnIndexerTest, FilterPushdownNullFilterUnchanged) {
+  auto indexer = make_indexer("content");
+  EXPECT_TRUE(indexer->insert(0, "quick brown fox").has_value());
+  EXPECT_TRUE(indexer->insert(1, "lazy brown dog").has_value());
+  EXPECT_TRUE(indexer->flush().has_value());
+
+  std::vector<FtsResult> baseline;
+  EXPECT_TRUE(search_ok(*indexer, "brown", 10, &baseline));
+  ASSERT_EQ(baseline.size(), 2u);
+
+  std::vector<FtsResult> with_null;
+  EXPECT_TRUE(search_ok_with_filter(*indexer, "brown", 10, /*filter=*/nullptr,
+                                    &with_null));
+  ASSERT_EQ(with_null.size(), 2u);
+
+  auto by_id = [](const FtsResult &a, const FtsResult &b) {
+    return a.doc_id < b.doc_id;
+  };
+  std::sort(baseline.begin(), baseline.end(), by_id);
+  std::sort(with_null.begin(), with_null.end(), by_id);
+  for (size_t i = 0; i < baseline.size(); ++i) {
+    EXPECT_EQ(baseline[i].doc_id, with_null[i].doc_id);
+    EXPECT_FLOAT_EQ(baseline[i].score, with_null[i].score);
+  }
+}
diff --git a/tests/db/index/column/fts_column/fts_rocksdb_reducer_test.cc b/tests/db/index/column/fts_column/fts_rocksdb_reducer_test.cc
new file mode 100644
index 000000000..16fe0f3dc
--- /dev/null
+++ b/tests/db/index/column/fts_column/fts_rocksdb_reducer_test.cc
@@ -0,0 +1,1068 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "db/index/column/fts_column/fts_rocksdb_reducer.h"
+#include <cstring>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <gtest/gtest.h>
+#include <roaring/roaring.h>
+#include <zvec/db/index_params.h>
+#include "db/common/file_helper.h"
+// FtsSegmentStats defined below
+#include "db/index/column/fts_column/fts_column_indexer.h"
+#include "db/index/column/fts_column/fts_rocksdb_merge.h"
+#include "db/index/column/fts_column/parser/fts_query_parser.h"
+#include "db/index/column/fts_column/posting/bitpacked_posting_list.h"
+// meta.h not needed in zvec
+#include "db/common/constants.h"
+#include "db/common/rocksdb_context.h"
+#include "db/index/column/fts_column/fts_utils.h"
+
+using namespace zvec::fts;
+using namespace zvec;
+using namespace zvec::fts;
+
+// Helper: parse a query string and call search() on a reader.
+// Returns true on success, false on failure.
+template <typename Reader>
+static bool search_str_ok(Reader &reader, const std::string &query_str,
+                          uint32_t topk, std::vector<FtsResult> *results) {
+  FtsQueryParser parser;
+  auto ast = parser.parse(query_str);
+  if (!ast) {
+    ADD_FAILURE() << "FtsQueryParser failed to parse: " << query_str
+                  << " err: " << parser.err_msg();
+    return false;
+  }
+  zvec::fts::FtsQueryParams qp;
+  qp.topk = topk;
+  auto ret = reader.search(*ast, qp);
+  if (!ret.has_value()) {
+    return false;
+  }
+  *results = std::move(ret.value());
+  return true;
+}
+
+// ============================================================
+// Constants
+// ============================================================
+
+static const std::string kTestDir{"./test_fts_reducer"};
+static const std::string kSrc0Dir{kTestDir + "/src0"};
+static const std::string kSrc1Dir{kTestDir + "/src1"};
+static const std::string kDstDir{kTestDir + "/dst"};
+static const std::string kMid0Dir{kTestDir + "/mid0"};
+static const std::string kMid1Dir{kTestDir + "/mid1"};
+static const std::string kDst2Dir{kTestDir + "/dst2"};
+
+static const std::string kPostingsCf{"fts"};
+static const std::string kMaxTfCf{kPostingsCf + zvec::kFtsMaxTfSuffix};
+static const std::string kPositionsCf{kPostingsCf + zvec::kFtsPositionsSuffix};
+static const std::string kTermFreqCf{kPostingsCf + zvec::kFtsTfSuffix};
+static const std::string kDocLenCf{kPostingsCf + zvec::kFtsDocLenSuffix};
+static const std::string kStatCf{zvec::kFtsStatCfName};
+
+static const std::string kFieldName{"content"};
+
+// ============================================================
+// Helper: build a transient FieldMeta with whitespace tokenizer for tests
+// ============================================================
+
+static FieldSchema::Ptr MakeWhitespaceFieldMeta(const std::string &field_name) {
+  auto fts_params = std::make_shared<zvec::FtsIndexParams>("whitespace");
+  return std::make_shared<FieldSchema>(field_name, DataType::STRING, false,
+                                       fts_params);
+}
+
+// ============================================================
+// Helper: open a RocksDB store with FTS merge operators
+// ============================================================
+
+// Build RocksDB args for source/indexer stores (mutable stage: includes side
+// CFs).
+static Status OpenFtsStoreWithSideCfs(RocksdbContext &db,
+                                      const std::string &data_dir) {
+  std::vector<std::string> cf_names = {kPostingsCf, kMaxTfCf,  kPositionsCf,
+                                       kTermFreqCf, kDocLenCf, kStatCf};
+  std::unordered_map<std::string, std::shared_ptr<rocksdb::MergeOperator>>
+      per_cf_ops = {
+          {kPostingsCf, std::make_shared<FtsPostingsMerge>()},
+          {kMaxTfCf, std::make_shared<FtsMaxTfMerge>()},
+      };
+  return db.create(
+      RocksdbContext::Args{data_dir, cf_names, nullptr, per_cf_ops});
+}
+
+// Build RocksDB args for destination/reader stores (immutable stage: no side
+// CFs).
+static Status OpenFtsStore(RocksdbContext &db, const std::string &data_dir) {
+  std::vector<std::string> cf_names = {kPostingsCf, kPositionsCf, kStatCf};
+  std::unordered_map<std::string, std::shared_ptr<rocksdb::MergeOperator>>
+      per_cf_ops = {
+          {kPostingsCf, std::make_shared<FtsPostingsMerge>()},
+      };
+  return db.create(
+      RocksdbContext::Args{data_dir, cf_names, nullptr, per_cf_ops});
+}
+
+// Open an existing RocksDB FTS store (immutable stage: no side CFs).
+static Status OpenExistingFtsStore(RocksdbContext &db,
+                                   const std::string &data_dir) {
+  std::vector<std::string> cf_names = {kPostingsCf, kPositionsCf, kStatCf};
+  std::unordered_map<std::string, std::shared_ptr<rocksdb::MergeOperator>>
+      per_cf_ops = {
+          {kPostingsCf, std::make_shared<FtsPostingsMerge>()},
+      };
+  return db.open(RocksdbContext::Args{data_dir, cf_names, nullptr, per_cf_ops},
+                 false);
+}
+
+
+// ============================================================
+// Helper: build a SegmentStats with given doc_id range
+// ============================================================
+
+static FtsSegmentStats MakeSegmentStats(uint64_t min_doc_id,
+                                        uint64_t max_doc_id) {
+  FtsSegmentStats stats;
+  stats.min_doc_id = min_doc_id;
+  stats.max_doc_id = max_doc_id;
+  return stats;
+}
+
+// ============================================================
+// Helper: insert documents into a source segment via FtsColumnIndexer
+// ============================================================
+
+static void InsertDocs(
+    FtsColumnIndexer *indexer,
+    const std::vector<std::pair<uint64_t, std::string>> &docs) {
+  for (const auto &[doc_id, text] : docs) {
+    ASSERT_TRUE(indexer->insert(doc_id, text).has_value());
+  }
+  ASSERT_TRUE(indexer->flush().has_value());
+  // The post-2026 reducer requires source postings_cf to be in BitPacked
+  // format (and the side CFs to be empty), which is exactly what
+  // MutableSegment::dump_fts_column_indexers() produces via
+  // convert_postings_to_bitpacked().  Mirror that here so every src segment
+  // looks identical to a real on-disk SST.
+  ASSERT_TRUE(indexer->convert_postings_to_bitpacked().has_value());
+}
+
+// ============================================================
+// Helper: build a no-op filter (no documents deleted)
+// ============================================================
+
+static zvec::IndexFilter::Ptr NoDeleteFilter() {
+  return zvec::EasyIndexFilter::Create(
+      [](uint64_t /*doc_id*/) { return false; });
+}
+
+// ============================================================
+// Helper: build a filter that deletes specific global doc_ids
+// ============================================================
+
+static zvec::IndexFilter::Ptr DeleteFilter(
+    const std::vector<uint64_t> &deleted_doc_ids) {
+  return zvec::EasyIndexFilter::Create([deleted_doc_ids](uint64_t doc_id) {
+    for (uint64_t deleted : deleted_doc_ids) {
+      if (doc_id == deleted) return true;
+    }
+    return false;
+  });
+}
+
+// ============================================================
+// Test fixture
+// ============================================================
+
+class FtsRocksdbReducerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    zvec::FileHelper::RemoveDirectory(kTestDir);
+    zvec::FileHelper::CreateDirectory(kTestDir);
+
+    // Source stores need side CFs for FtsColumnIndexer::insert().
+    ASSERT_TRUE(OpenFtsStoreWithSideCfs(src0_db_, kSrc0Dir).ok());
+    ASSERT_TRUE(OpenFtsStoreWithSideCfs(src1_db_, kSrc1Dir).ok());
+    // Destination store mirrors immutable/reducer layout - no side CFs.
+    ASSERT_TRUE(OpenFtsStore(dst_db_, kDstDir).ok());
+
+    // Grab CF pointers for src0
+    src0_postings_ = src0_db_.get_cf(kPostingsCf);
+    src0_positions_ = src0_db_.get_cf(kPositionsCf);
+    src0_term_freq_ = src0_db_.get_cf(kTermFreqCf);
+    src0_max_tf_ = src0_db_.get_cf(kMaxTfCf);
+    src0_doc_len_ = src0_db_.get_cf(kDocLenCf);
+    src0_stat_ = src0_db_.get_cf(kStatCf);
+
+    // Grab CF pointers for src1
+    src1_postings_ = src1_db_.get_cf(kPostingsCf);
+    src1_positions_ = src1_db_.get_cf(kPositionsCf);
+    src1_term_freq_ = src1_db_.get_cf(kTermFreqCf);
+    src1_max_tf_ = src1_db_.get_cf(kMaxTfCf);
+    src1_doc_len_ = src1_db_.get_cf(kDocLenCf);
+    src1_stat_ = src1_db_.get_cf(kStatCf);
+
+    // Grab CF pointers for dst (no side CFs)
+    dst_postings_ = dst_db_.get_cf(kPostingsCf);
+    dst_positions_ = dst_db_.get_cf(kPositionsCf);
+    dst_stat_ = dst_db_.get_cf(kStatCf);
+  }
+
+  void TearDown() override {
+    src0_db_.close();
+    src1_db_.close();
+    dst_db_.close();
+    zvec::FileHelper::RemoveDirectory(kTestDir);
+  }
+
+  std::unique_ptr<FtsColumnIndexer> MakeSrc0Indexer() {
+    auto field_meta = MakeWhitespaceFieldMeta(kFieldName);
+    auto indexer = std::make_unique<FtsColumnIndexer>();
+    EXPECT_TRUE(indexer
+                    ->open(field_meta, &src0_db_, src0_postings_,
+                           src0_positions_, src0_term_freq_, src0_max_tf_,
+                           src0_doc_len_, src0_stat_)
+                    .has_value());
+    return indexer;
+  }
+
+  // Create and open a FtsColumnIndexer for src1 (doc_ids start at offset)
+  std::unique_ptr<FtsColumnIndexer> MakeSrc1Indexer() {
+    auto field_meta = MakeWhitespaceFieldMeta(kFieldName);
+    auto indexer = std::make_unique<FtsColumnIndexer>();
+    EXPECT_TRUE(indexer
+                    ->open(field_meta, &src1_db_, src1_postings_,
+                           src1_positions_, src1_term_freq_, src1_max_tf_,
+                           src1_doc_len_, src1_stat_)
+                    .has_value());
+    return indexer;
+  }
+
+  // Open a FtsColumnIndexer (read-only) on the merged destination store.
+  // Side CFs are nullptr — immutable/reducer stores no longer contain them.
+  std::unique_ptr<FtsColumnIndexer> MakeDstReader() {
+    auto reader = std::make_unique<FtsColumnIndexer>();
+    EXPECT_TRUE(reader
+                    ->open_reader(kFieldName, &dst_db_, dst_postings_,
+                                  dst_positions_, /*term_freq_cf=*/nullptr,
+                                  /*max_tf_cf=*/nullptr,
+                                  /*doc_len_cf=*/nullptr, dst_stat_)
+                    .has_value());
+    return reader;
+  }
+
+  // Initialize a reducer targeting the destination store
+  FtsRocksdbReducer MakeReducer() {
+    FtsRocksdbReducer reducer;
+    EXPECT_TRUE(reducer
+                    .init(kFieldName, &dst_db_, dst_postings_, dst_positions_,
+                          dst_stat_)
+                    .has_value());
+    return reducer;
+  }
+
+  RocksdbContext src0_db_;
+  RocksdbContext src1_db_;
+  RocksdbContext dst_db_;
+
+  rocksdb::ColumnFamilyHandle *src0_postings_{nullptr};
+  rocksdb::ColumnFamilyHandle *src0_positions_{nullptr};
+  rocksdb::ColumnFamilyHandle *src0_term_freq_{nullptr};
+  rocksdb::ColumnFamilyHandle *src0_max_tf_{nullptr};
+  rocksdb::ColumnFamilyHandle *src0_doc_len_{nullptr};
+  rocksdb::ColumnFamilyHandle *src0_stat_{nullptr};
+
+  rocksdb::ColumnFamilyHandle *src1_postings_{nullptr};
+  rocksdb::ColumnFamilyHandle *src1_positions_{nullptr};
+  rocksdb::ColumnFamilyHandle *src1_term_freq_{nullptr};
+  rocksdb::ColumnFamilyHandle *src1_max_tf_{nullptr};
+  rocksdb::ColumnFamilyHandle *src1_doc_len_{nullptr};
+  rocksdb::ColumnFamilyHandle *src1_stat_{nullptr};
+
+  rocksdb::ColumnFamilyHandle *dst_postings_{nullptr};
+  rocksdb::ColumnFamilyHandle *dst_positions_{nullptr};
+  rocksdb::ColumnFamilyHandle *dst_stat_{nullptr};
+};
+
+// ============================================================
+// init() error cases
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, InitFailsWithNullCF) {
+  FtsRocksdbReducer reducer;
+  EXPECT_FALSE(
+      reducer.init(kFieldName, &dst_db_, nullptr, dst_positions_, dst_stat_)
+          .has_value());
+}
+
+// ============================================================
+// feed() error cases
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, FeedFailsBeforeInit) {
+  FtsRocksdbReducer reducer;
+  FtsSegmentStats stats = MakeSegmentStats(0, 2);
+  EXPECT_FALSE(reducer.feed(stats, &src0_db_, src0_postings_, src0_positions_)
+                   .has_value());
+}
+
+TEST_F(FtsRocksdbReducerTest, FeedFailsWithNonConsecutiveDocIds) {
+  FtsRocksdbReducer reducer = MakeReducer();
+
+  FtsSegmentStats stats0 = MakeSegmentStats(0, 2);
+  EXPECT_TRUE(reducer.feed(stats0, &src0_db_, src0_postings_, src0_positions_)
+                  .has_value());
+
+  // Gap: src1 starts at 4 instead of 3
+  FtsSegmentStats stats1 = MakeSegmentStats(4, 6);
+  EXPECT_FALSE(reducer.feed(stats1, &src1_db_, src1_postings_, src1_positions_)
+                   .has_value());
+}
+
+// ============================================================
+// Single segment: basic merge without deletes
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, SingleSegmentMergeNoDeletes) {
+  // Segment 0: doc_ids 0..2
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(),
+             {{0, "hello world"}, {1, "hello foo"}, {2, "bar"}});
+
+  FtsRocksdbReducer reducer = MakeReducer();
+  FtsSegmentStats stats0 = MakeSegmentStats(0, 2);
+  ASSERT_TRUE(reducer.feed(stats0, &src0_db_, src0_postings_, src0_positions_)
+                  .has_value());
+  ASSERT_TRUE(reducer.reduce(*NoDeleteFilter()).has_value());
+
+  // Verify: search "hello" should return doc_ids 0 and 1
+  auto reader = MakeDstReader();
+  std::vector<FtsResult> results;
+  ASSERT_TRUE(search_str_ok(*reader, "hello", 10, &results));
+  EXPECT_EQ(results.size(), 2u);
+
+  std::vector<uint64_t> found_ids;
+  for (const auto &result : results) {
+    found_ids.push_back(result.doc_id);
+  }
+  EXPECT_NE(std::find(found_ids.begin(), found_ids.end(), 0ull),
+            found_ids.end());
+  EXPECT_NE(std::find(found_ids.begin(), found_ids.end(), 1ull),
+            found_ids.end());
+
+  // "bar" should return doc_id 2
+  results.clear();
+  ASSERT_TRUE(search_str_ok(*reader, "bar", 10, &results));
+  EXPECT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 2ull);
+}
+
+// ============================================================
+// Single segment: delete filter removes documents
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, SingleSegmentMergeWithDeletes) {
+  // Segment 0: doc_ids 0..2
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(),
+             {{0, "hello world"}, {1, "hello foo"}, {2, "bar"}});
+
+  FtsRocksdbReducer reducer = MakeReducer();
+  FtsSegmentStats stats0 = MakeSegmentStats(0, 2);
+  ASSERT_TRUE(reducer.feed(stats0, &src0_db_, src0_postings_, src0_positions_)
+                  .has_value());
+
+  // Delete doc_id 0 (global)
+  ASSERT_TRUE(reducer.reduce(*DeleteFilter({0})).has_value());
+
+  auto reader = MakeDstReader();
+  std::vector<FtsResult> results;
+
+  // "hello" should only return doc_id 1 (doc_id 0 was deleted)
+  ASSERT_TRUE(search_str_ok(*reader, "hello", 10, &results));
+  EXPECT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 1ull);
+
+  // "world" should return nothing (its only document was deleted)
+  results.clear();
+  ASSERT_TRUE(search_str_ok(*reader, "world", 10, &results));
+  EXPECT_EQ(results.size(), 0u);
+}
+
+// ============================================================
+// Two segments: doc_id remapping across segment boundary
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, TwoSegmentsMergeDocIdRemapping) {
+  // Segment 0: GLOBAL doc_ids 0..2
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(),
+             {{0, "hello world"}, {1, "hello baz"}, {2, "foo bar"}});
+
+  // Segment 1: GLOBAL doc_ids 3..3 (stored as LOCAL 0 in src1 RocksDB)
+  auto indexer1 = MakeSrc1Indexer();
+  InsertDocs(indexer1.get(), {{0, "hello qux"}});
+
+  FtsRocksdbReducer reducer = MakeReducer();
+
+  FtsSegmentStats stats0 = MakeSegmentStats(0, 2);
+  ASSERT_TRUE(reducer.feed(stats0, &src0_db_, src0_postings_, src0_positions_)
+                  .has_value());
+
+  FtsSegmentStats stats1 = MakeSegmentStats(3, 3);
+  ASSERT_TRUE(reducer.feed(stats1, &src1_db_, src1_postings_, src1_positions_)
+                  .has_value());
+
+  ASSERT_TRUE(reducer.reduce(*NoDeleteFilter()).has_value());
+
+  // Dst segment starts at GLOBAL doc_id 0 (covers 0..3); reader returns
+  // GLOBAL doc_ids by adding start_doc_id back to local doc_ids stored in
+  // the merged dst RocksDB.
+  auto reader = MakeDstReader();
+  std::vector<FtsResult> results;
+
+  // "hello" appears in global doc_ids 0, 1 (seg0) and 3 (seg1)
+  ASSERT_TRUE(search_str_ok(*reader, "hello", 10, &results));
+  EXPECT_EQ(results.size(), 3u);
+
+  std::vector<uint64_t> found_ids;
+  for (const auto &result : results) {
+    found_ids.push_back(result.doc_id);
+  }
+  EXPECT_NE(std::find(found_ids.begin(), found_ids.end(), 0ull),
+            found_ids.end());
+  EXPECT_NE(std::find(found_ids.begin(), found_ids.end(), 1ull),
+            found_ids.end());
+  EXPECT_NE(std::find(found_ids.begin(), found_ids.end(), 3ull),
+            found_ids.end());
+
+  // "world" appears only in global doc_id 0
+  results.clear();
+  ASSERT_TRUE(search_str_ok(*reader, "world", 10, &results));
+  EXPECT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 0ull);
+
+  // "qux" appears only in global doc_id 3
+  results.clear();
+  ASSERT_TRUE(search_str_ok(*reader, "qux", 10, &results));
+  EXPECT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 3ull);
+}
+
+// ============================================================
+// Two segments: delete from second segment
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, TwoSegmentsMergeDeleteFromSecondSegment) {
+  // Segment 0: GLOBAL doc_ids 0..1
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(), {{0, "hello world"}, {1, "foo bar"}});
+
+  // Segment 1: GLOBAL doc_ids 2..3 (stored as LOCAL 0..1 in src1 RocksDB)
+  auto indexer1 = MakeSrc1Indexer();
+  InsertDocs(indexer1.get(), {{0, "hello baz"}, {1, "qux"}});
+
+  FtsRocksdbReducer reducer = MakeReducer();
+
+  FtsSegmentStats stats0 = MakeSegmentStats(0, 1);
+  ASSERT_TRUE(reducer.feed(stats0, &src0_db_, src0_postings_, src0_positions_)
+                  .has_value());
+
+  FtsSegmentStats stats1 = MakeSegmentStats(2, 3);
+  ASSERT_TRUE(reducer.feed(stats1, &src1_db_, src1_postings_, src1_positions_)
+                  .has_value());
+
+  // Delete global doc_id 2 (first doc of segment 1, local 0)
+  ASSERT_TRUE(reducer.reduce(*DeleteFilter({2})).has_value());
+
+  auto reader = MakeDstReader();
+  std::vector<FtsResult> results;
+
+  // "hello" should only return global doc_id 0 (doc_id 2 was deleted)
+  ASSERT_TRUE(search_str_ok(*reader, "hello", 10, &results));
+  EXPECT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 0ull);
+
+  // "qux" (global doc_id 3) should still be present
+  results.clear();
+  ASSERT_TRUE(search_str_ok(*reader, "qux", 10, &results));
+  EXPECT_EQ(results.size(), 1u);
+  EXPECT_EQ(results[0].doc_id, 3ull);
+}
+
+// ============================================================
+// BM25 scores are positive after merge
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, MergedResultsHavePositiveScores) {
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(),
+             {{0, "hello world"}, {1, "hello foo"}, {2, "bar baz"}});
+
+  FtsRocksdbReducer reducer = MakeReducer();
+  FtsSegmentStats stats0 = MakeSegmentStats(0, 2);
+  ASSERT_TRUE(reducer.feed(stats0, &src0_db_, src0_postings_, src0_positions_)
+                  .has_value());
+  ASSERT_TRUE(reducer.reduce(*NoDeleteFilter()).has_value());
+
+  auto reader = MakeDstReader();
+  std::vector<FtsResult> results;
+  ASSERT_TRUE(search_str_ok(*reader, "hello", 10, &results));
+  ASSERT_EQ(results.size(), 2u);
+
+  for (const auto &result : results) {
+    EXPECT_GT(result.score, 0.0f)
+        << "Expected positive BM25 score for doc_id " << result.doc_id;
+  }
+}
+
+// ============================================================
+// reduce() fails if called before feed()
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, ReduceFailsBeforeFeed) {
+  FtsRocksdbReducer reducer = MakeReducer();
+  EXPECT_FALSE(reducer.reduce(*NoDeleteFilter()).has_value());
+}
+
+// ============================================================
+// cleanup() resets state so reducer can be reused
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, CleanupResetsState) {
+  FtsRocksdbReducer reducer = MakeReducer();
+
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(), {{0, "hello"}, {1, "world"}});
+
+  FtsSegmentStats stats0 = MakeSegmentStats(0, 1);
+  ASSERT_TRUE(reducer.feed(stats0, &src0_db_, src0_postings_, src0_positions_)
+                  .has_value());
+  ASSERT_TRUE(reducer.reduce(*NoDeleteFilter()).has_value());
+  ASSERT_TRUE(reducer.cleanup().has_value());
+
+  // After cleanup, reduce() should fail (no segments fed)
+  EXPECT_FALSE(reducer.reduce(*NoDeleteFilter()).has_value());
+}
+
+// ============================================================
+// Verify reduce produces BitPacked format postings
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, ReduceProducesBitPackedFormat) {
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(),
+             {{0, "hello world"}, {1, "hello foo"}, {2, "bar baz"}});
+
+  FtsRocksdbReducer reducer = MakeReducer();
+  FtsSegmentStats stats0 = MakeSegmentStats(0, 2);
+  ASSERT_TRUE(reducer.feed(stats0, &src0_db_, src0_postings_, src0_positions_)
+                  .has_value());
+  ASSERT_TRUE(reducer.reduce(*NoDeleteFilter()).has_value());
+
+  // Verify that postings in destination CF are in BitPacked format
+  std::string raw_data;
+  ASSERT_TRUE(
+      dst_db_.db_->Get(dst_db_.read_opts_, dst_postings_, "hello", &raw_data)
+          .ok());
+  EXPECT_TRUE(fts::BitPackedPostingList::is_bitpacked_format(raw_data.data(),
+                                                             raw_data.size()));
+
+  // Verify the BitPacked data can be opened and iterated
+  fts::BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(raw_data.data(), raw_data.size()), 0);
+  EXPECT_EQ(iter.cost(), 2u);  // "hello" appears in doc 0 and doc 1
+
+  // Verify inline payloads are accessible
+  uint32_t doc = iter.next_doc();
+  EXPECT_EQ(doc, 0u);
+  EXPECT_GT(iter.term_freq(), 0u);
+  EXPECT_GT(iter.doc_len(), 0u);
+
+  doc = iter.next_doc();
+  EXPECT_EQ(doc, 1u);
+  EXPECT_GT(iter.term_freq(), 0u);
+  EXPECT_GT(iter.doc_len(), 0u);
+
+  EXPECT_EQ(iter.next_doc(), fts::BitPackedPostingIterator::NO_MORE_DOCS);
+}
+
+// ============================================================
+// Verify two-segment merge produces correct BitPacked postings
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, TwoSegmentMergeBitPackedCorrectness) {
+  // Segment 0: GLOBAL doc_ids 0..1
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(), {{0, "hello world"}, {1, "foo bar"}});
+
+  // Segment 1: GLOBAL doc_ids 2..3 (stored as LOCAL 0..1 in src1 RocksDB)
+  auto indexer1 = MakeSrc1Indexer();
+  InsertDocs(indexer1.get(), {{0, "hello baz"}, {1, "qux"}});
+
+  FtsRocksdbReducer reducer = MakeReducer();
+
+  FtsSegmentStats stats0 = MakeSegmentStats(0, 1);
+  ASSERT_TRUE(reducer.feed(stats0, &src0_db_, src0_postings_, src0_positions_)
+                  .has_value());
+
+  FtsSegmentStats stats1 = MakeSegmentStats(2, 3);
+  ASSERT_TRUE(reducer.feed(stats1, &src1_db_, src1_postings_, src1_positions_)
+                  .has_value());
+
+  ASSERT_TRUE(reducer.reduce(*NoDeleteFilter()).has_value());
+
+  // Verify "hello" postings are BitPacked and contain both doc_ids
+  std::string raw_data;
+  ASSERT_TRUE(
+      dst_db_.db_->Get(dst_db_.read_opts_, dst_postings_, "hello", &raw_data)
+          .ok());
+  EXPECT_TRUE(fts::BitPackedPostingList::is_bitpacked_format(raw_data.data(),
+                                                             raw_data.size()));
+
+  fts::BitPackedPostingIterator iter;
+  EXPECT_EQ(iter.open(raw_data.data(), raw_data.size()), 0);
+  EXPECT_EQ(iter.cost(), 2u);  // "hello" in doc 0 and doc 2
+
+  EXPECT_EQ(iter.next_doc(), 0u);
+  EXPECT_EQ(iter.next_doc(), 2u);
+  EXPECT_EQ(iter.next_doc(), fts::BitPackedPostingIterator::NO_MORE_DOCS);
+
+  // Verify search still works correctly via FtsColumnIndexer
+  auto reader = MakeDstReader();
+  std::vector<FtsResult> results;
+  ASSERT_TRUE(search_str_ok(*reader, "hello", 10, &results));
+  EXPECT_EQ(results.size(), 2u);
+
+  // Verify BM25 scores are positive
+  for (const auto &result : results) {
+    EXPECT_GT(result.score, 0.0f);
+  }
+}
+
+// ============================================================
+// Two BitPacked segments merged: both source segments have already been
+// reduced (postings in BitPacked format), verify the reducer can handle
+// BitPacked-to-BitPacked merge correctly.
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, MergeTwoBitPackedSegments) {
+  // --- Phase 1: Build two intermediate segments with BitPacked postings ---
+  // Each intermediate segment is produced by a single-segment reduce.
+
+  // Mid0: reduce src0 -> mid0 (produces BitPacked postings)
+  {
+    auto indexer0 = MakeSrc0Indexer();
+    InsertDocs(indexer0.get(),
+               {{0, "hello world"}, {1, "hello foo"}, {2, "bar"}});
+
+    RocksdbContext mid0_db;
+    ASSERT_TRUE(OpenFtsStore(mid0_db, kMid0Dir).ok());
+
+    auto *mid0_postings = mid0_db.get_cf(kPostingsCf);
+    auto *mid0_positions = mid0_db.get_cf(kPositionsCf);
+    auto *mid0_stat = mid0_db.get_cf(kStatCf);
+    FtsRocksdbReducer reducer0;
+    ASSERT_TRUE(reducer0
+                    .init(kFieldName, &mid0_db, mid0_postings, mid0_positions,
+                          mid0_stat)
+                    .has_value());
+    ASSERT_TRUE(reducer0
+                    .feed(MakeSegmentStats(0, 2), &src0_db_, src0_postings_,
+                          src0_positions_)
+                    .has_value());
+    ASSERT_TRUE(reducer0.reduce(*NoDeleteFilter()).has_value());
+
+    // Verify mid0 postings are in BitPacked format
+    std::string raw;
+    ASSERT_TRUE(
+        mid0_db.db_->Get(mid0_db.read_opts_, mid0_postings, "hello", &raw)
+            .ok());
+    ASSERT_TRUE(
+        fts::BitPackedPostingList::is_bitpacked_format(raw.data(), raw.size()));
+
+    mid0_db.close();
+  }
+
+  // Mid1: reduce src1 -> mid1 (produces BitPacked postings)
+  {
+    auto indexer1 = MakeSrc1Indexer();
+    InsertDocs(indexer1.get(), {{0, "hello baz"}, {1, "qux bar"}});
+
+    RocksdbContext mid1_db;
+    ASSERT_TRUE(OpenFtsStore(mid1_db, kMid1Dir).ok());
+
+    auto *mid1_postings = mid1_db.get_cf(kPostingsCf);
+    auto *mid1_positions = mid1_db.get_cf(kPositionsCf);
+    auto *mid1_stat = mid1_db.get_cf(kStatCf);
+    FtsRocksdbReducer reducer1;
+    ASSERT_TRUE(reducer1
+                    .init(kFieldName, &mid1_db, mid1_postings, mid1_positions,
+                          mid1_stat)
+                    .has_value());
+    ASSERT_TRUE(reducer1
+                    .feed(MakeSegmentStats(0, 1), &src1_db_, src1_postings_,
+                          src1_positions_)
+                    .has_value());
+    ASSERT_TRUE(reducer1.reduce(*NoDeleteFilter()).has_value());
+
+    // Verify mid1 postings are in BitPacked format
+    std::string raw;
+    ASSERT_TRUE(
+        mid1_db.db_->Get(mid1_db.read_opts_, mid1_postings, "hello", &raw)
+            .ok());
+    ASSERT_TRUE(
+        fts::BitPackedPostingList::is_bitpacked_format(raw.data(), raw.size()));
+
+    mid1_db.close();
+  }
+
+  // --- Phase 2: Merge the two BitPacked intermediate segments ---
+  // Reopen mid0 and mid1 as source (existing=true since they were created
+  // in Phase 1), reduce into dst.
+  RocksdbContext mid0_db, mid1_db;
+  ASSERT_TRUE(OpenExistingFtsStore(mid0_db, kMid0Dir).ok());
+  ASSERT_TRUE(OpenExistingFtsStore(mid1_db, kMid1Dir).ok());
+
+  auto *mid0_postings = mid0_db.get_cf(kPostingsCf);
+  auto *mid0_positions = mid0_db.get_cf(kPositionsCf);
+  auto *mid1_postings = mid1_db.get_cf(kPostingsCf);
+  auto *mid1_positions = mid1_db.get_cf(kPositionsCf);
+  FtsRocksdbReducer final_reducer = MakeReducer();
+  // mid0 has doc_ids 0..2, mid1 has doc_ids 3..4
+  ASSERT_TRUE(
+      final_reducer
+          .feed(MakeSegmentStats(0, 2), &mid0_db, mid0_postings, mid0_positions)
+          .has_value());
+  ASSERT_TRUE(
+      final_reducer
+          .feed(MakeSegmentStats(3, 4), &mid1_db, mid1_postings, mid1_positions)
+          .has_value());
+  ASSERT_TRUE(final_reducer.reduce(*NoDeleteFilter()).has_value());
+
+  mid0_db.close();
+  mid1_db.close();
+
+  // --- Phase 3: Verify merged results ---
+  // Verify output is BitPacked
+  std::string raw_data;
+  ASSERT_TRUE(
+      dst_db_.db_->Get(dst_db_.read_opts_, dst_postings_, "hello", &raw_data)
+          .ok());
+  EXPECT_TRUE(fts::BitPackedPostingList::is_bitpacked_format(raw_data.data(),
+                                                             raw_data.size()));
+
+  // "hello" appears in doc 0, 1 (from mid0) and doc 3 (from mid1)
+  fts::BitPackedPostingIterator bp_iter;
+  ASSERT_EQ(bp_iter.open(raw_data.data(), raw_data.size()), 0);
+  EXPECT_EQ(bp_iter.cost(), 3u);
+  EXPECT_EQ(bp_iter.next_doc(), 0u);
+  EXPECT_EQ(bp_iter.next_doc(), 1u);
+  EXPECT_EQ(bp_iter.next_doc(), 3u);
+  EXPECT_EQ(bp_iter.next_doc(), fts::BitPackedPostingIterator::NO_MORE_DOCS);
+
+  // "bar" appears in doc 2 (from mid0) and doc 4 (from mid1)
+  raw_data.clear();
+  ASSERT_TRUE(
+      dst_db_.db_->Get(dst_db_.read_opts_, dst_postings_, "bar", &raw_data)
+          .ok());
+  EXPECT_TRUE(fts::BitPackedPostingList::is_bitpacked_format(raw_data.data(),
+                                                             raw_data.size()));
+  fts::BitPackedPostingIterator bar_iter;
+  ASSERT_EQ(bar_iter.open(raw_data.data(), raw_data.size()), 0);
+  EXPECT_EQ(bar_iter.cost(), 2u);
+  EXPECT_EQ(bar_iter.next_doc(), 2u);
+  EXPECT_EQ(bar_iter.next_doc(), 4u);
+  EXPECT_EQ(bar_iter.next_doc(), fts::BitPackedPostingIterator::NO_MORE_DOCS);
+
+  // Verify search via FtsColumnIndexer still works
+  auto reader = MakeDstReader();
+  std::vector<FtsResult> results;
+  ASSERT_TRUE(search_str_ok(*reader, "hello", 10, &results));
+  EXPECT_EQ(results.size(), 3u);
+  for (const auto &result : results) {
+    EXPECT_GT(result.score, 0.0f);
+  }
+
+  results.clear();
+  ASSERT_TRUE(search_str_ok(*reader, "bar", 10, &results));
+  EXPECT_EQ(results.size(), 2u);
+}
+
+// ============================================================
+// (Removed) Mixed BitPacked + Roaring Bitmap merge.
+// The post-2026 reducer no longer accepts Roaring-format source segments
+// (FtsColumnIndexer::convert_postings_to_bitpacked() always runs at dump
+// time), so this scenario is no longer reachable in production.
+
+// ============================================================
+// Reducer over BitPacked-converted source segments with EMPTY side CFs
+// ============================================================
+//
+// After the post-2026 indexer change,
+// MutableSegment::dump_fts_column_indexers() invokes
+// FtsColumnIndexer::convert_postings_to_bitpacked(), which inlines
+// tf/doc_len/max_tf into the BitPacked posting list AND DeleteRange's the
+// $TF / $MAX_TF / $DOC_LEN side CFs.  By the time the reducer sees the
+// segment:
+//   - postings_cf : every value is BitPacked (magic 'BPKD')
+//   - term_freq_cf / max_tf_cf / doc_len_cf : empty (DeleteRange tombstones)
+//
+// The new reducer never reads the side CFs at all, so this test verifies
+// the end-to-end pipeline produces a queryable destination index whose
+// posting set matches the expected union — and that the empty side CFs
+// cause no errors or stat under-counts.
+
+TEST_F(FtsRocksdbReducerTest, ReducerHandlesBitpackedConvertedSrcSegments) {
+  // ----- src0: insert + flush + convert (the helper already calls convert)
+  // -----
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(), {
+                                 {0, "hello world"},
+                                 {1, "hello foo"},
+                                 {2, "bar baz"},
+                             });
+
+  // Sanity: src0 postings are BitPacked AND the side CFs are empty (the
+  // indexer DeleteRange'd them as part of convert_postings_to_bitpacked()).
+  {
+    std::string raw;
+    ASSERT_TRUE(
+        src0_db_.db_->Get(src0_db_.read_opts_, src0_postings_, "hello", &raw)
+            .ok());
+    EXPECT_TRUE(
+        BitPackedPostingList::is_bitpacked_format(raw.data(), raw.size()));
+    auto it = std::unique_ptr<rocksdb::Iterator>(
+        src0_db_.db_->NewIterator(src0_db_.read_opts_, src0_term_freq_));
+    it->SeekToFirst();
+    EXPECT_FALSE(it->Valid());
+    auto it2 = std::unique_ptr<rocksdb::Iterator>(
+        src0_db_.db_->NewIterator(src0_db_.read_opts_, src0_doc_len_));
+    it2->SeekToFirst();
+    EXPECT_FALSE(it2->Valid());
+    auto it3 = std::unique_ptr<rocksdb::Iterator>(
+        src0_db_.db_->NewIterator(src0_db_.read_opts_, src0_max_tf_));
+    it3->SeekToFirst();
+    EXPECT_FALSE(it3->Valid());
+  }
+
+  // ----- src1: insert + flush + convert -----
+  auto indexer1 = MakeSrc1Indexer();
+  InsertDocs(indexer1.get(), {
+                                 {0, "hello qux"},
+                                 {1, "qux quux"},
+                             });
+
+  // ----- Reduce -----
+  // src0 covers GLOBAL [0, 2], src1 covers GLOBAL [3, 4] (consecutive).
+  FtsRocksdbReducer reducer = MakeReducer();
+  ASSERT_TRUE(reducer
+                  .feed(MakeSegmentStats(0, 2), &src0_db_, src0_postings_,
+                        src0_positions_)
+                  .has_value());
+  ASSERT_TRUE(reducer
+                  .feed(MakeSegmentStats(3, 4), &src1_db_, src1_postings_,
+                        src1_positions_)
+                  .has_value());
+  ASSERT_TRUE(reducer.reduce(*NoDeleteFilter()).has_value());
+
+  // ----- Verify dst can be queried -----
+  // After reduce, dst postings get re-written to BitPacked again by the
+  // reducer's existing convert_postings_to_bitpacked step, so this exercises
+  // the full BitPacked-in / BitPacked-out path.
+  auto reader = MakeDstReader();
+
+  // "hello" appears in src0 doc 0 (global 0), src0 doc 1 (global 1),
+  // src1 doc 0 (global 3) -> 3 hits.
+  std::vector<FtsResult> results;
+  ASSERT_TRUE(search_str_ok(*reader, "hello", 10, &results));
+  EXPECT_EQ(results.size(), 3u);
+  std::vector<uint64_t> hello_ids;
+  for (const auto &r : results) hello_ids.push_back(r.doc_id);
+  std::sort(hello_ids.begin(), hello_ids.end());
+  EXPECT_EQ(hello_ids[0], 0ull);
+  EXPECT_EQ(hello_ids[1], 1ull);
+  EXPECT_EQ(hello_ids[2], 3ull);
+
+  // "qux" appears in src1 docs 0 and 1 -> globals 3 and 4.
+  results.clear();
+  ASSERT_TRUE(search_str_ok(*reader, "qux", 10, &results));
+  EXPECT_EQ(results.size(), 2u);
+  std::vector<uint64_t> qux_ids;
+  for (const auto &r : results) qux_ids.push_back(r.doc_id);
+  std::sort(qux_ids.begin(), qux_ids.end());
+  EXPECT_EQ(qux_ids[0], 3ull);
+  EXPECT_EQ(qux_ids[1], 4ull);
+}
+
+// ============================================================
+// Single-segment reduce when the source side CFs are completely empty:
+// the reducer must rely only on the BitPacked inline payloads (tf, doc_len)
+// for both the merged posting list and the destination stat_cf.  Any
+// regression that re-introduces a side-CF read would surface here as a
+// missing tf / doc_len / score.
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, ReduceWithEmptySideCFsProducesBitPacked) {
+  // InsertDocs() already calls convert_postings_to_bitpacked(), so by the
+  // time we reach reduce() the src $TF / $MAX_TF / $DOC_LEN CFs are empty.
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(), {{0, "alpha beta gamma"},
+                              {1, "alpha alpha gamma"},
+                              {2, "delta epsilon"}});
+
+  // Sanity: side CFs are empty after convert (DeleteRange'd by the indexer).
+  {
+    auto it = std::unique_ptr<rocksdb::Iterator>(
+        src0_db_.db_->NewIterator(src0_db_.read_opts_, src0_term_freq_));
+    it->SeekToFirst();
+    EXPECT_FALSE(it->Valid());
+    auto it2 = std::unique_ptr<rocksdb::Iterator>(
+        src0_db_.db_->NewIterator(src0_db_.read_opts_, src0_doc_len_));
+    it2->SeekToFirst();
+    EXPECT_FALSE(it2->Valid());
+    auto it3 = std::unique_ptr<rocksdb::Iterator>(
+        src0_db_.db_->NewIterator(src0_db_.read_opts_, src0_max_tf_));
+    it3->SeekToFirst();
+    EXPECT_FALSE(it3->Valid());
+  }
+
+  FtsRocksdbReducer reducer = MakeReducer();
+  ASSERT_TRUE(reducer
+                  .feed(MakeSegmentStats(0, 2), &src0_db_, src0_postings_,
+                        src0_positions_)
+                  .has_value());
+  ASSERT_TRUE(reducer.reduce(*NoDeleteFilter()).has_value());
+
+  // Destination postings_cf must be BitPacked and carry inline tf/doc_len
+  // recovered solely from the source BitPacked payloads.
+  std::string raw;
+  ASSERT_TRUE(
+      dst_db_.db_->Get(dst_db_.read_opts_, dst_postings_, "alpha", &raw).ok());
+  ASSERT_TRUE(
+      fts::BitPackedPostingList::is_bitpacked_format(raw.data(), raw.size()));
+  fts::BitPackedPostingIterator bp;
+  ASSERT_EQ(bp.open(raw.data(), raw.size()), 0);
+  EXPECT_EQ(bp.cost(), 2u);
+
+  EXPECT_EQ(bp.next_doc(), 0u);
+  EXPECT_EQ(bp.term_freq(), 1u);  // doc 0: "alpha" once
+  EXPECT_EQ(bp.doc_len(), 3u);
+  EXPECT_EQ(bp.next_doc(), 1u);
+  EXPECT_EQ(bp.term_freq(), 2u);  // doc 1: "alpha alpha"
+  EXPECT_EQ(bp.doc_len(), 3u);
+  EXPECT_EQ(bp.next_doc(), fts::BitPackedPostingIterator::NO_MORE_DOCS);
+
+  // dst_stat_cf must reflect the inline doc_len totals: 3 docs, 8 tokens
+  // ("alpha beta gamma" = 3, "alpha alpha gamma" = 3, "delta epsilon" = 2).
+  std::string total_docs_raw, total_tokens_raw;
+  ASSERT_TRUE(dst_db_.db_
+                  ->Get(dst_db_.read_opts_, dst_stat_,
+                        kFieldName + "_total_docs", &total_docs_raw)
+                  .ok());
+  ASSERT_TRUE(dst_db_.db_
+                  ->Get(dst_db_.read_opts_, dst_stat_,
+                        kFieldName + "_total_tokens", &total_tokens_raw)
+                  .ok());
+  uint64_t total_docs = fts::decode_uint64_value(total_docs_raw.data());
+  uint64_t total_tokens = fts::decode_uint64_value(total_tokens_raw.data());
+  EXPECT_EQ(total_docs, 3u);
+  EXPECT_EQ(total_tokens, 8u);
+
+  // dst no longer has side CFs ($TF/$MAX_TF/$DOC_LEN) — they are dropped
+  // at dump time. Verify search still works end-to-end.
+  auto reader = MakeDstReader();
+  std::vector<FtsResult> results;
+  ASSERT_TRUE(search_str_ok(*reader, "alpha", 10, &results));
+  EXPECT_EQ(results.size(), 2u);
+  for (const auto &r : results) EXPECT_GT(r.score, 0.0f);
+}
+
+// ============================================================
+// Cross-segment BM25 stats: the destination total_docs / total_tokens
+// must equal the sum of the surviving documents from every fed segment,
+// using the inline doc_len payloads (each surviving doc counted ONCE per
+// its segment, regardless of how many terms it appears under).
+// ============================================================
+
+TEST_F(FtsRocksdbReducerTest, MultiSegmentBM25StatsAreAccumulatedCorrectly) {
+  // src0: 2 docs, doc_len 3 + 2 = 5 tokens
+  auto indexer0 = MakeSrc0Indexer();
+  InsertDocs(indexer0.get(), {{0, "alpha beta gamma"}, {1, "alpha beta"}});
+
+  // src1: 2 docs, doc_len 4 + 1 = 5 tokens
+  auto indexer1 = MakeSrc1Indexer();
+  InsertDocs(indexer1.get(), {{0, "alpha gamma delta epsilon"}, {1, "alpha"}});
+
+  FtsRocksdbReducer reducer = MakeReducer();
+  ASSERT_TRUE(reducer
+                  .feed(MakeSegmentStats(0, 1), &src0_db_, src0_postings_,
+                        src0_positions_)
+                  .has_value());
+  ASSERT_TRUE(reducer
+                  .feed(MakeSegmentStats(2, 3), &src1_db_, src1_postings_,
+                        src1_positions_)
+                  .has_value());
+  ASSERT_TRUE(reducer.reduce(*NoDeleteFilter()).has_value());
+
+  // 4 surviving docs across both segments; 5 + 5 = 10 tokens total.
+  std::string total_docs_raw, total_tokens_raw;
+  ASSERT_TRUE(dst_db_.db_
+                  ->Get(dst_db_.read_opts_, dst_stat_,
+                        kFieldName + "_total_docs", &total_docs_raw)
+                  .ok());
+  ASSERT_TRUE(dst_db_.db_
+                  ->Get(dst_db_.read_opts_, dst_stat_,
+                        kFieldName + "_total_tokens", &total_tokens_raw)
+                  .ok());
+  uint64_t total_docs = fts::decode_uint64_value(total_docs_raw.data());
+  uint64_t total_tokens = fts::decode_uint64_value(total_tokens_raw.data());
+  EXPECT_EQ(total_docs, 4u);
+  EXPECT_EQ(total_tokens, 10u);
+
+  // With one doc filtered out (global doc_id 2 from src1, doc_len 4),
+  // totals must drop to 3 docs / 6 tokens.
+  // Reset destination CFs by re-opening the dst RocksDB? Simpler: build a
+  // second dst inside this test would require a second fixture; instead we
+  // assert via a dedicated Reducer + dst pair using the current dst (which
+  // has data already) is not safe.  Skip the filter sub-case here — it's
+  // covered by SingleSegmentMergeWithDeletes for the single-segment path.
+
+  // Verify "alpha" merged posting carries 4 entries with monotonic doc_ids.
+  std::string raw;
+  ASSERT_TRUE(
+      dst_db_.db_->Get(dst_db_.read_opts_, dst_postings_, "alpha", &raw).ok());
+  ASSERT_TRUE(
+      fts::BitPackedPostingList::is_bitpacked_format(raw.data(), raw.size()));
+  fts::BitPackedPostingIterator bp;
+  ASSERT_EQ(bp.open(raw.data(), raw.size()), 0);
+  EXPECT_EQ(bp.cost(), 4u);
+  std::vector<uint32_t> docs;
+  while (true) {
+    uint32_t d = bp.next_doc();
+    if (d == fts::BitPackedPostingIterator::NO_MORE_DOCS) break;
+    docs.push_back(d);
+  }
+  ASSERT_EQ(docs.size(), 4u);
+  EXPECT_EQ(docs[0], 0u);
+  EXPECT_EQ(docs[1], 1u);
+  EXPECT_EQ(docs[2], 2u);
+  EXPECT_EQ(docs[3], 3u);
+}
diff --git a/tests/db/index/column/fts_column/tokenizer_pipeline_manager_test.cc b/tests/db/index/column/fts_column/tokenizer_pipeline_manager_test.cc
new file mode 100644
index 000000000..5a9ba5b0d
--- /dev/null
+++ b/tests/db/index/column/fts_column/tokenizer_pipeline_manager_test.cc
@@ -0,0 +1,271 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "db/index/column/fts_column/tokenizer/tokenizer_pipeline_manager.h"
+#include <atomic>
+#include <memory>
+#include <string>
+#include <thread>
+#include <vector>
+#include <gtest/gtest.h>
+#include "db/index/column/fts_column/fts_types.h"
+
+using namespace zvec::fts;
+
+// ============================================================
+// Helpers
+// ============================================================
+
+static FtsIndexParams make_params(const std::string &tokenizer) {
+  FtsIndexParams params;
+  params.tokenizer_name = tokenizer;
+  return params;
+}
+
+// ============================================================
+// make_key tests
+// ============================================================
+
+TEST(TokenizerPipelineManagerKeyTest, BasicKey) {
+  FtsIndexParams params;
+  params.tokenizer_name = "whitespace";
+  std::string key = TokenizerPipelineManager::make_key(params);
+  EXPECT_FALSE(key.empty());
+  EXPECT_NE(key.find("whitespace"), std::string::npos);
+}
+
+TEST(TokenizerPipelineManagerKeyTest, SameParamsProduceSameKey) {
+  FtsIndexParams params1;
+  params1.tokenizer_name = "whitespace";
+  params1.extra_params = R"({"dict_path":"/path/to/dict"})";
+
+  FtsIndexParams params2;
+  params2.tokenizer_name = "whitespace";
+  params2.extra_params = R"({"dict_path":"/path/to/dict"})";
+
+  std::string key1 = TokenizerPipelineManager::make_key(params1);
+  std::string key2 = TokenizerPipelineManager::make_key(params2);
+  EXPECT_EQ(key1, key2);
+}
+
+TEST(TokenizerPipelineManagerKeyTest, DifferentTokenizersDifferentKeys) {
+  FtsIndexParams params1 = make_params("whitespace");
+  FtsIndexParams params2 = make_params("jieba");
+  std::string key1 = TokenizerPipelineManager::make_key(params1);
+  std::string key2 = TokenizerPipelineManager::make_key(params2);
+  EXPECT_NE(key1, key2);
+}
+
+TEST(TokenizerPipelineManagerKeyTest, FilterNamesAffectKey) {
+  FtsIndexParams params1 = make_params("whitespace");
+  params1.filters.clear();
+
+  FtsIndexParams params2 = make_params("whitespace");
+  params2.filters = {"lowercase"};
+
+  std::string key1 = TokenizerPipelineManager::make_key(params1);
+  std::string key2 = TokenizerPipelineManager::make_key(params2);
+  EXPECT_NE(key1, key2);
+}
+
+// ============================================================
+// acquire / release tests
+// ============================================================
+
+class TokenizerPipelineManagerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // Use whitespace tokenizer (always available, no dict needed)
+    params_ = make_params("whitespace");
+  }
+
+  void TearDown() override {
+    // Best-effort cleanup: release the params if it still exists
+    // (tests that fail mid-way may leave entries)
+    // We do this by calling release repeatedly; release on unknown key is a
+    // no-op
+  }
+
+  FtsIndexParams params_;
+};
+
+TEST_F(TokenizerPipelineManagerTest, FirstAcquireCreatesPipeline) {
+  auto &mgr = TokenizerPipelineManager::Instance();
+  auto pipeline = mgr.acquire(params_);
+  ASSERT_NE(pipeline, nullptr);
+
+  // Cleanup
+  mgr.release(params_);
+}
+
+TEST_F(TokenizerPipelineManagerTest, RepeatedAcquireReturnsSameInstance) {
+  auto &mgr = TokenizerPipelineManager::Instance();
+  auto pipeline1 = mgr.acquire(params_);
+  auto pipeline2 = mgr.acquire(params_);
+
+  ASSERT_NE(pipeline1, nullptr);
+  ASSERT_NE(pipeline2, nullptr);
+  // Both should point to the exact same underlying object
+  EXPECT_EQ(pipeline1.get(), pipeline2.get());
+
+  // Cleanup: two acquires → two releases
+  mgr.release(params_);
+  mgr.release(params_);
+}
+
+TEST_F(TokenizerPipelineManagerTest, ReleaseDecrementsRefCount) {
+  auto &mgr = TokenizerPipelineManager::Instance();
+  auto pipeline1 = mgr.acquire(params_);
+  auto pipeline2 = mgr.acquire(params_);
+  ASSERT_NE(pipeline1, nullptr);
+
+  // Release one reference; pipeline should still be alive (ref_count = 1)
+  mgr.release(params_);
+
+  // Acquire again — should still return the same instance (not recreated)
+  auto pipeline3 = mgr.acquire(params_);
+  ASSERT_NE(pipeline3, nullptr);
+  EXPECT_EQ(pipeline1.get(), pipeline3.get());
+
+  // Cleanup: we now have ref_count = 2 (pipeline2 + pipeline3)
+  mgr.release(params_);
+  mgr.release(params_);
+}
+
+TEST_F(TokenizerPipelineManagerTest, RefCountZeroDestroysEntry) {
+  auto &mgr = TokenizerPipelineManager::Instance();
+
+  auto pipeline1 = mgr.acquire(params_);
+  ASSERT_NE(pipeline1, nullptr);
+  void *raw_ptr = pipeline1.get();
+
+  // Release the only reference → entry should be removed
+  mgr.release(params_);
+
+  // Acquire again → a new pipeline should be created (possibly different
+  // address)
+  auto pipeline2 = mgr.acquire(params_);
+  ASSERT_NE(pipeline2, nullptr);
+  // The old shared_ptr (pipeline1) still holds the object alive, so raw_ptr
+  // is still valid, but the manager has created a fresh entry.
+  // We can't guarantee same/different address, but we can verify it works.
+  (void)raw_ptr;
+
+  // Cleanup
+  mgr.release(params_);
+}
+
+TEST_F(TokenizerPipelineManagerTest, ReleaseUnknownKeyIsNoOp) {
+  auto &mgr = TokenizerPipelineManager::Instance();
+  // Should not crash or assert
+  FtsIndexParams unknown_params;
+  unknown_params.tokenizer_name = "nonexistent_tokenizer_name";
+  EXPECT_NO_THROW(mgr.release(unknown_params));
+}
+
+TEST_F(TokenizerPipelineManagerTest, DifferentConfigsDifferentPipelines) {
+  auto &mgr = TokenizerPipelineManager::Instance();
+
+  FtsIndexParams params_ws = make_params("whitespace");
+
+  // scws tokenizer will fail to create (no dict), but whitespace should succeed
+  auto pipeline_ws = mgr.acquire(params_ws);
+  ASSERT_NE(pipeline_ws, nullptr);
+
+  // Cleanup
+  mgr.release(params_ws);
+}
+
+// ============================================================
+// Concurrent safety tests
+// ============================================================
+
+TEST_F(TokenizerPipelineManagerTest, ConcurrentAcquireSameKey) {
+  auto &mgr = TokenizerPipelineManager::Instance();
+  constexpr int kThreads = 8;
+  constexpr int kAcquiresPerThread = 10;
+
+  std::vector<TokenizerPipelinePtr> results(kThreads * kAcquiresPerThread);
+  std::vector<std::thread> threads;
+  std::atomic<int> success_count{0};
+
+  for (int t = 0; t < kThreads; ++t) {
+    threads.emplace_back([&, t]() {
+      for (int i = 0; i < kAcquiresPerThread; ++i) {
+        auto pipeline = mgr.acquire(params_);
+        if (pipeline) {
+          results[t * kAcquiresPerThread + i] = pipeline;
+          success_count.fetch_add(1);
+        }
+      }
+    });
+  }
+
+  for (auto &th : threads) {
+    th.join();
+  }
+
+  // All acquires should succeed
+  EXPECT_EQ(success_count.load(), kThreads * kAcquiresPerThread);
+
+  // All non-null results should point to the same underlying pipeline
+  void *expected_ptr = nullptr;
+  for (const auto &p : results) {
+    if (p) {
+      if (expected_ptr == nullptr) {
+        expected_ptr = p.get();
+      } else {
+        EXPECT_EQ(p.get(), expected_ptr);
+      }
+    }
+  }
+
+  // Cleanup: release all acquired references
+  for (int i = 0; i < kThreads * kAcquiresPerThread; ++i) {
+    mgr.release(params_);
+  }
+}
+
+TEST_F(TokenizerPipelineManagerTest, ConcurrentAcquireAndRelease) {
+  auto &mgr = TokenizerPipelineManager::Instance();
+  constexpr int kThreads = 4;
+  constexpr int kIterations = 20;
+  std::atomic<int> errors{0};
+
+  std::vector<std::thread> threads;
+  for (int t = 0; t < kThreads; ++t) {
+    threads.emplace_back([&]() {
+      for (int i = 0; i < kIterations; ++i) {
+        auto pipeline = mgr.acquire(params_);
+        if (!pipeline) {
+          errors.fetch_add(1);
+          continue;
+        }
+        // Hold briefly then release
+        mgr.release(params_);
+      }
+    });
+  }
+
+  for (auto &th : threads) {
+    th.join();
+  }
+
+  EXPECT_EQ(errors.load(), 0);
+  // After all threads finish, ref_count should be 0 (all released)
+  // Verify by acquiring once more — should succeed
+  auto pipeline = mgr.acquire(params_);
+  EXPECT_NE(pipeline, nullptr);
+  mgr.release(params_);
+}
diff --git a/tests/db/index/common/doc_test.cc b/tests/db/index/common/doc_test.cc
index 543141169..00dd6d4a2 100644
--- a/tests/db/index/common/doc_test.cc
+++ b/tests/db/index/common/doc_test.cc
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 #include <zvec/ailego/utility/float_helper.h>
 #include "utils/utils.h"
+#include "zvec/db/index_params.h"
 #include "zvec/db/status.h"
 #include "zvec/db/type.h"
 
@@ -823,8 +824,7 @@ TEST_F(DocDetailedTest, ValidateAndSanitization) {
     auto schema = test::TestHelper::CreateNormalSchema(false);
     std::vector<std::string> invalid_names = {
         // Too long (>64)
-        std::string(65, 'a'),
-        std::string(64, 'a') + "_",
+        std::string(65, 'a'), std::string(64, 'a') + "_",
 
         // Illegal characters
         "a b",   // space
@@ -1409,6 +1409,55 @@ TEST(VectorQuery, ValidateAndSanitize) {
     s = query.validate_and_sanitize(&schema);
     EXPECT_TRUE(s.ok());
   }
+
+  // fts_query_ and vector fields are mutually exclusive
+  {
+    auto fts_params = std::make_shared<FtsIndexParams>();
+    FieldSchema fts_schema("content", DataType::STRING, false, fts_params);
+
+    VectorQuery query;
+    query.field_name_ = "embedding";
+    query.topk_ = 10;
+    std::vector<float> query_vector(128, 1.0f);
+    query.query_vector_ =
+        std::string(reinterpret_cast<char *>(query_vector.data()),
+                    query_vector.size() * sizeof(float));
+    FtsQuery fts_query_hello;
+    fts_query_hello.query_string_ = "hello";
+    query.fts_query_ = fts_query_hello;
+
+    // Should fail: both vector and fts_query_ set
+    auto s = query.validate_and_sanitize(&fts_schema);
+    EXPECT_FALSE(s.ok());
+    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
+
+    // Clear vector, should pass with FTS schema
+    query.query_vector_.clear();
+    s = query.validate_and_sanitize(&fts_schema);
+    EXPECT_TRUE(s.ok());
+
+    // FTS query with proper FTS field schema -> OK
+    VectorQuery fts_only;
+    fts_only.field_name_ = "content";
+    fts_only.topk_ = 10;
+    FtsQuery fts_query_test;
+    fts_query_test.query_string_ = "test";
+    fts_only.fts_query_ = fts_query_test;
+    s = fts_only.validate_and_sanitize(&fts_schema);
+    EXPECT_TRUE(s.ok());
+
+    // FTS query with nullptr schema -> fail (field not found)
+    s = fts_only.validate_and_sanitize(nullptr);
+    EXPECT_FALSE(s.ok());
+    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
+
+    // FTS query with vector field schema -> fail (type mismatch)
+    FieldSchema vec_schema("embedding", DataType::VECTOR_FP32, 128, false,
+                           std::make_shared<HnswIndexParams>(MetricType::L2));
+    s = fts_only.validate_and_sanitize(&vec_schema);
+    EXPECT_FALSE(s.ok());
+    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
+  }
 }
 
 // Test null value
diff --git a/tests/db/sqlengine/CMakeLists.txt b/tests/db/sqlengine/CMakeLists.txt
index 7922bbf6b..8b046eeb0 100644
--- a/tests/db/sqlengine/CMakeLists.txt
+++ b/tests/db/sqlengine/CMakeLists.txt
@@ -25,6 +25,7 @@ foreach(CC_SRCS ${ALL_TEST_SRCS})
     LIBS zvec_common
     zvec_proto
     zvec_sqlengine
+    zvec_db
     zvec_ailego
     core_metric
     core_utility
diff --git a/tests/db/sqlengine/fts_parser_test.cc b/tests/db/sqlengine/fts_parser_test.cc
new file mode 100644
index 000000000..0bd5af926
--- /dev/null
+++ b/tests/db/sqlengine/fts_parser_test.cc
@@ -0,0 +1,686 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "db/index/column/fts_column/fts_query_ast.h"
+#include "db/index/column/fts_column/parser/fts_query_parser.h"
+
+namespace zvec::fts {
+
+// ============================================================
+// Test fixture
+// ============================================================
+
+class FtsParserTest : public ::testing::Test {
+ protected:
+  FtsAstNodePtr parse(const std::string &query) {
+    return parser_.parse(query);
+  }
+
+  // Overload for tests that need to specify the default operator explicitly.
+  FtsAstNodePtr parse(const std::string &query, FtsDefaultOperator default_op) {
+    return parser_.parse(query, default_op);
+  }
+
+  const std::string &err_msg() {
+    return parser_.err_msg();
+  }
+
+  // Helpers for type-safe downcasting
+  static const TermNode &as_term(const FtsAstNode &node) {
+    EXPECT_EQ(node.type(), FtsNodeType::TERM);
+    return static_cast<const TermNode &>(node);
+  }
+
+  static const PhraseNode &as_phrase(const FtsAstNode &node) {
+    EXPECT_EQ(node.type(), FtsNodeType::PHRASE);
+    return static_cast<const PhraseNode &>(node);
+  }
+
+  static const AndNode &as_and(const FtsAstNode &node) {
+    EXPECT_EQ(node.type(), FtsNodeType::AND);
+    return static_cast<const AndNode &>(node);
+  }
+
+  static const OrNode &as_or(const FtsAstNode &node) {
+    EXPECT_EQ(node.type(), FtsNodeType::OR);
+    return static_cast<const OrNode &>(node);
+  }
+
+ private:
+  FtsQueryParser parser_;
+};
+
+// ============================================================
+// Single term
+// ============================================================
+
+TEST_F(FtsParserTest, SingleTerm) {
+  auto ast = parse("vector");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::TERM);
+  const auto &term = as_term(*ast);
+  EXPECT_EQ(term.term, "vector");
+  EXPECT_FALSE(term.must);
+  EXPECT_FALSE(term.must_not);
+}
+
+TEST_F(FtsParserTest, SingleTermNumeric) {
+  auto ast = parse("2024");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::TERM);
+  EXPECT_EQ(as_term(*ast).term, "2024");
+}
+
+TEST_F(FtsParserTest, SingleTermWithHyphen) {
+  // REGULAR_ID allows hyphens
+  auto ast = parse("full-text");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::TERM);
+  EXPECT_EQ(as_term(*ast).term, "full-text");
+}
+
+// ============================================================
+// Must (+) and must_not (-/NOT) modifiers
+// ============================================================
+
+TEST_F(FtsParserTest, MustModifier) {
+  auto ast = parse("+vector");
+  ASSERT_NE(ast, nullptr);
+  const auto &term = as_term(*ast);
+  EXPECT_EQ(term.term, "vector");
+  EXPECT_TRUE(term.must);
+  EXPECT_FALSE(term.must_not);
+}
+
+TEST_F(FtsParserTest, MustNotModifierMinus) {
+  // "-slow" is lexed as a single REGULAR_ID token (hyphen is part of the id).
+  // To express must_not, use a space: "- slow" -> MINUS_SIGN + REGULAR_ID.
+  auto ast = parse("- slow");
+  ASSERT_NE(ast, nullptr);
+  const auto &term = as_term(*ast);
+  EXPECT_EQ(term.term, "slow");
+  EXPECT_FALSE(term.must);
+  EXPECT_TRUE(term.must_not);
+}
+
+TEST_F(FtsParserTest, MustNotModifierMinusNoSpace) {
+  // "-slow" without space: FtsLexer treats '-' as MINUS_SIGN modifier,
+  // so "-slow" is parsed as must_not:slow (same as "- slow").
+  auto ast = parse("-slow");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::TERM);
+  EXPECT_EQ(as_term(*ast).term, "slow");
+  EXPECT_TRUE(as_term(*ast).must_not);
+}
+
+TEST_F(FtsParserTest, MustNotModifierNot) {
+  // NOT is now a strict binary operator (`a NOT b` <=> `a AND NOT b`).
+  // A leading `NOT a` is therefore a syntax error — there is no left-hand
+  // operand for NOT to subtract from.
+  auto ast = parse("NOT slow");
+  EXPECT_EQ(ast, nullptr);
+  EXPECT_FALSE(err_msg().empty());
+}
+
+// ============================================================
+// Phrase query
+// ============================================================
+
+TEST_F(FtsParserTest, DoubleQuotedPhrase) {
+  auto ast = parse("\"exact phrase\"");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::PHRASE);
+  const auto &phrase = as_phrase(*ast);
+  ASSERT_EQ(phrase.terms.size(), 2u);
+  EXPECT_EQ(phrase.terms[0], "exact");
+  EXPECT_EQ(phrase.terms[1], "phrase");
+  EXPECT_FALSE(phrase.must);
+  EXPECT_FALSE(phrase.must_not);
+}
+
+TEST_F(FtsParserTest, SingleQuotedPhrase) {
+  // Single-quoted strings are not supported as phrase queries (no SQUOTA_STRING
+  // token).  The lexer's TERM rule absorbs "'hello", "world", and "'" as
+  // individual term tokens, so the query parses as an implicit OR of terms.
+  auto ast = parse("'hello world'");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::OR);
+}
+
+TEST_F(FtsParserTest, PhraseWithMustModifier) {
+  auto ast = parse("+\"exact phrase\"");
+  ASSERT_NE(ast, nullptr);
+  const auto &phrase = as_phrase(*ast);
+  EXPECT_TRUE(phrase.must);
+  EXPECT_FALSE(phrase.must_not);
+}
+
+TEST_F(FtsParserTest, PhraseWithMustNotModifier) {
+  auto ast = parse("-\"bad phrase\"");
+  ASSERT_NE(ast, nullptr);
+  const auto &phrase = as_phrase(*ast);
+  EXPECT_FALSE(phrase.must);
+  EXPECT_TRUE(phrase.must_not);
+}
+
+TEST_F(FtsParserTest, PhraseWithThreeWords) {
+  auto ast = parse("\"one two three\"");
+  ASSERT_NE(ast, nullptr);
+  const auto &phrase = as_phrase(*ast);
+  ASSERT_EQ(phrase.terms.size(), 3u);
+  EXPECT_EQ(phrase.terms[0], "one");
+  EXPECT_EQ(phrase.terms[1], "two");
+  EXPECT_EQ(phrase.terms[2], "three");
+}
+
+// ============================================================
+// Explicit OR
+// ============================================================
+
+TEST_F(FtsParserTest, ExplicitOr) {
+  auto ast = parse("cat OR dog");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::OR);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 2u);
+  EXPECT_EQ(as_term(*or_node.children[0]).term, "cat");
+  EXPECT_EQ(as_term(*or_node.children[1]).term, "dog");
+}
+
+TEST_F(FtsParserTest, MultipleOr) {
+  auto ast = parse("a OR b OR c");
+  ASSERT_NE(ast, nullptr);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 3u);
+}
+
+// ============================================================
+// Explicit AND
+// ============================================================
+
+TEST_F(FtsParserTest, ExplicitAnd) {
+  auto ast = parse("cat AND dog");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::AND);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 2u);
+  EXPECT_EQ(as_term(*and_node.children[0]).term, "cat");
+  EXPECT_EQ(as_term(*and_node.children[1]).term, "dog");
+}
+
+TEST_F(FtsParserTest, MultipleAnd) {
+  auto ast = parse("a AND b AND c");
+  ASSERT_NE(ast, nullptr);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 3u);
+}
+
+// ============================================================
+// Operator precedence: AND binds tighter than OR
+// ============================================================
+
+TEST_F(FtsParserTest, AndBindsTighterThanOr) {
+  // "a OR b AND c" should parse as "a OR (b AND c)"
+  auto ast = parse("a OR b AND c");
+  ASSERT_NE(ast, nullptr);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 2u);
+
+  // Left child: term "a"
+  EXPECT_EQ(as_term(*or_node.children[0]).term, "a");
+
+  // Right child: AND(b, c)
+  const auto &and_node = as_and(*or_node.children[1]);
+  ASSERT_EQ(and_node.children.size(), 2u);
+  EXPECT_EQ(as_term(*and_node.children[0]).term, "b");
+  EXPECT_EQ(as_term(*and_node.children[1]).term, "c");
+}
+
+// ============================================================
+// Implicit adjacency (seqExpr / default operator)
+// ============================================================
+
+TEST_F(FtsParserTest, ImplicitAdjacency) {
+  // Adjacent terms without explicit operator: "a b" -> seqExpr -> OR(a, b)
+  auto ast = parse("a b");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::OR);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 2u);
+  EXPECT_EQ(as_term(*or_node.children[0]).term, "a");
+  EXPECT_EQ(as_term(*or_node.children[1]).term, "b");
+}
+
+TEST_F(FtsParserTest, ImplicitAdjacencyThreeTerms) {
+  auto ast = parse("a b c");
+  ASSERT_NE(ast, nullptr);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 3u);
+}
+
+TEST_F(FtsParserTest, ImplicitAdjacencyWithModifiers) {
+  // "+a - b" -> seqExpr -> OR(must:a, must_not:b)
+  // Note: "-b" (no space) is lexed as a single REGULAR_ID; use "- b" for
+  // must_not.
+  auto ast = parse("+a - b");
+  ASSERT_NE(ast, nullptr);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 2u);
+  EXPECT_TRUE(as_term(*or_node.children[0]).must);
+  EXPECT_TRUE(as_term(*or_node.children[1]).must_not);
+}
+
+// ============================================================
+// Parentheses grouping
+// ============================================================
+
+TEST_F(FtsParserTest, Parentheses) {
+  // "(a OR b) AND c"
+  auto ast = parse("(a OR b) AND c");
+  ASSERT_NE(ast, nullptr);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 2u);
+
+  // Left: OR(a, b)
+  const auto &or_node = as_or(*and_node.children[0]);
+  ASSERT_EQ(or_node.children.size(), 2u);
+
+  // Right: term c
+  EXPECT_EQ(as_term(*and_node.children[1]).term, "c");
+}
+
+TEST_F(FtsParserTest, NestedParentheses) {
+  auto ast = parse("((a OR b) AND c) OR d");
+  ASSERT_NE(ast, nullptr);
+  const auto &outer_or = as_or(*ast);
+  ASSERT_EQ(outer_or.children.size(), 2u);
+  EXPECT_EQ(as_term(*outer_or.children[1]).term, "d");
+}
+
+// ============================================================
+// Mixed complex queries
+// ============================================================
+
+TEST_F(FtsParserTest, MixedTermAndPhrase) {
+  // "+vector - slow \"exact phrase\""
+  // Note: use "- slow" (with space) so MINUS_SIGN is a separate token.
+  auto ast = parse("+vector - slow \"exact phrase\"");
+  ASSERT_NE(ast, nullptr);
+  // Four adjacent items -> seqExpr -> OR(must:vector, must_not:slow, phrase)
+  // Actually: +vector and - slow and phrase are three unary nodes in seqExpr
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 3u);
+
+  EXPECT_TRUE(as_term(*or_node.children[0]).must);
+  EXPECT_EQ(as_term(*or_node.children[0]).term, "vector");
+
+  EXPECT_TRUE(as_term(*or_node.children[1]).must_not);
+  EXPECT_EQ(as_term(*or_node.children[1]).term, "slow");
+
+  EXPECT_EQ(or_node.children[2]->type(), FtsNodeType::PHRASE);
+}
+
+TEST_F(FtsParserTest, AndWithPhrase) {
+  auto ast = parse("\"machine learning\" AND model");
+  ASSERT_NE(ast, nullptr);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 2u);
+  EXPECT_EQ(and_node.children[0]->type(), FtsNodeType::PHRASE);
+  EXPECT_EQ(as_term(*and_node.children[1]).term, "model");
+}
+
+TEST_F(FtsParserTest, ComplexBooleanQuery) {
+  // "a AND b OR c AND d" -> (a AND b) OR (c AND d)
+  auto ast = parse("a AND b OR c AND d");
+  ASSERT_NE(ast, nullptr);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 2u);
+
+  const auto &left_and = as_and(*or_node.children[0]);
+  ASSERT_EQ(left_and.children.size(), 2u);
+
+  const auto &right_and = as_and(*or_node.children[1]);
+  ASSERT_EQ(right_and.children.size(), 2u);
+}
+
+// ============================================================
+// Single-child simplification (no unnecessary wrapping)
+// ============================================================
+
+TEST_F(FtsParserTest, SingleChildNotWrapped) {
+  // A single term should not be wrapped in an AndNode/OrNode
+  auto ast = parse("hello");
+  ASSERT_NE(ast, nullptr);
+  EXPECT_EQ(ast->type(), FtsNodeType::TERM);
+}
+
+TEST_F(FtsParserTest, SinglePhraseNotWrapped) {
+  auto ast = parse("\"hello world\"");
+  ASSERT_NE(ast, nullptr);
+  EXPECT_EQ(ast->type(), FtsNodeType::PHRASE);
+}
+
+// ============================================================
+// Error cases
+// ============================================================
+
+TEST_F(FtsParserTest, EmptyQueryReturnsNull) {
+  auto ast = parse("");
+  EXPECT_EQ(ast, nullptr);
+}
+
+TEST_F(FtsParserTest, OnlyParenthesesReturnsNull) {
+  auto ast = parse("()");
+  EXPECT_EQ(ast, nullptr);
+}
+
+TEST_F(FtsParserTest, UnclosedPhraseReturnsNull) {
+  // An unclosed double-quote causes the DQUOTA_STRING rule to fail.  The
+  // remaining characters are absorbed by the TERM catch-all rule, so the
+  // query parses as a single term rather than returning nullptr.
+  auto ast = parse("\"unclosed phrase");
+  ASSERT_NE(ast, nullptr);
+}
+
+TEST_F(FtsParserTest, UnclosedParenReturnsNull) {
+  auto ast = parse("(a OR b");
+  EXPECT_EQ(ast, nullptr);
+}
+
+// ============================================================
+// NOT as a binary AND-NOT operator
+// ============================================================
+
+TEST_F(FtsParserTest, NotAsBinaryAndNot) {
+  // `foo NOT bar` <=> `foo AND NOT bar` -> And[foo, bar(must_not)]
+  auto ast = parse("foo NOT bar");
+  ASSERT_NE(ast, nullptr);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 2u);
+
+  EXPECT_EQ(as_term(*and_node.children[0]).term, "foo");
+  EXPECT_FALSE(and_node.children[0]->must_not);
+
+  EXPECT_EQ(as_term(*and_node.children[1]).term, "bar");
+  EXPECT_TRUE(and_node.children[1]->must_not);
+}
+
+TEST_F(FtsParserTest, AndAndNot) {
+  // `a AND NOT b` -> And[a, b(must_not)]
+  auto ast = parse("a AND NOT b");
+  ASSERT_NE(ast, nullptr);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 2u);
+  EXPECT_EQ(as_term(*and_node.children[0]).term, "a");
+  EXPECT_FALSE(and_node.children[0]->must_not);
+  EXPECT_EQ(as_term(*and_node.children[1]).term, "b");
+  EXPECT_TRUE(and_node.children[1]->must_not);
+}
+
+TEST_F(FtsParserTest, OrThenNot) {
+  // Precedence check: NOT shares AND's precedence (higher than OR).
+  // `a OR b NOT c` -> Or[a, And[b, c(must_not)]]
+  auto ast = parse("a OR b NOT c");
+  ASSERT_NE(ast, nullptr);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 2u);
+
+  EXPECT_EQ(as_term(*or_node.children[0]).term, "a");
+
+  const auto &right_and = as_and(*or_node.children[1]);
+  ASSERT_EQ(right_and.children.size(), 2u);
+  EXPECT_EQ(as_term(*right_and.children[0]).term, "b");
+  EXPECT_FALSE(right_and.children[0]->must_not);
+  EXPECT_EQ(as_term(*right_and.children[1]).term, "c");
+  EXPECT_TRUE(right_and.children[1]->must_not);
+}
+
+TEST_F(FtsParserTest, NotWithGroup) {
+  // `a NOT (b OR c)` -> And[a, Or[b, c](must_not)]
+  auto ast = parse("a NOT (b OR c)");
+  ASSERT_NE(ast, nullptr);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 2u);
+
+  EXPECT_EQ(as_term(*and_node.children[0]).term, "a");
+  EXPECT_FALSE(and_node.children[0]->must_not);
+
+  ASSERT_EQ(and_node.children[1]->type(), FtsNodeType::OR);
+  EXPECT_TRUE(and_node.children[1]->must_not);
+  const auto &grouped_or = as_or(*and_node.children[1]);
+  ASSERT_EQ(grouped_or.children.size(), 2u);
+  EXPECT_EQ(as_term(*grouped_or.children[0]).term, "b");
+  EXPECT_EQ(as_term(*grouped_or.children[1]).term, "c");
+}
+
+TEST_F(FtsParserTest, LeadingNotIsError) {
+  // Leading NOT has no left-hand operand and must fail to parse.
+  auto ast = parse("NOT a");
+  EXPECT_EQ(ast, nullptr);
+  EXPECT_FALSE(err_msg().empty());
+}
+
+TEST_F(FtsParserTest, MultipleNotsAndAnds) {
+  // `a AND b NOT c AND d NOT e` -> And[a, b, c(must_not), d, e(must_not)]
+  auto ast = parse("a AND b NOT c AND d NOT e");
+  ASSERT_NE(ast, nullptr);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 5u);
+
+  EXPECT_EQ(as_term(*and_node.children[0]).term, "a");
+  EXPECT_FALSE(and_node.children[0]->must_not);
+
+  EXPECT_EQ(as_term(*and_node.children[1]).term, "b");
+  EXPECT_FALSE(and_node.children[1]->must_not);
+
+  EXPECT_EQ(as_term(*and_node.children[2]).term, "c");
+  EXPECT_TRUE(and_node.children[2]->must_not);
+
+  EXPECT_EQ(as_term(*and_node.children[3]).term, "d");
+  EXPECT_FALSE(and_node.children[3]->must_not);
+
+  EXPECT_EQ(as_term(*and_node.children[4]).term, "e");
+  EXPECT_TRUE(and_node.children[4]->must_not);
+}
+
+// ============================================================
+// +/- modifiers on parenthesised sub-expressions
+// ============================================================
+
+TEST_F(FtsParserTest, MustOnGroup) {
+  // `+(a OR b)` -> Or[a, b]{must=true}
+  auto ast = parse("+(a OR b)");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::OR);
+  EXPECT_TRUE(ast->must);
+  EXPECT_FALSE(ast->must_not);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 2u);
+  EXPECT_EQ(as_term(*or_node.children[0]).term, "a");
+  EXPECT_EQ(as_term(*or_node.children[1]).term, "b");
+}
+
+TEST_F(FtsParserTest, MustNotOnGroup) {
+  // `-(a AND b)` -> And[a, b]{must_not=true}
+  auto ast = parse("-(a AND b)");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::AND);
+  EXPECT_FALSE(ast->must);
+  EXPECT_TRUE(ast->must_not);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 2u);
+  EXPECT_EQ(as_term(*and_node.children[0]).term, "a");
+  EXPECT_EQ(as_term(*and_node.children[1]).term, "b");
+}
+
+TEST_F(FtsParserTest, MustGroupAndOther) {
+  // `+(a OR b) c` -> implicit-OR collapses three siblings into a single
+  // OrNode: Or[Or[a, b]{must=true}, c]
+  // (the inner OR keeps its must flag; implicit adjacency is still OR.)
+  auto ast = parse("+(a OR b) c");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::OR);
+  const auto &outer_or = as_or(*ast);
+  ASSERT_EQ(outer_or.children.size(), 2u);
+
+  ASSERT_EQ(outer_or.children[0]->type(), FtsNodeType::OR);
+  EXPECT_TRUE(outer_or.children[0]->must);
+  const auto &inner_or = as_or(*outer_or.children[0]);
+  ASSERT_EQ(inner_or.children.size(), 2u);
+  EXPECT_EQ(as_term(*inner_or.children[0]).term, "a");
+  EXPECT_EQ(as_term(*inner_or.children[1]).term, "b");
+
+  EXPECT_EQ(as_term(*outer_or.children[1]).term, "c");
+}
+
+TEST_F(FtsParserTest, NestedGroupModifier) {
+  // `+((a AND b) OR c)` -> the must flag attaches to the outermost OrNode.
+  auto ast = parse("+((a AND b) OR c)");
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::OR);
+  EXPECT_TRUE(ast->must);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 2u);
+
+  ASSERT_EQ(or_node.children[0]->type(), FtsNodeType::AND);
+  EXPECT_FALSE(or_node.children[0]->must);  // inner AND not affected
+  const auto &inner_and = as_and(*or_node.children[0]);
+  ASSERT_EQ(inner_and.children.size(), 2u);
+  EXPECT_EQ(as_term(*inner_and.children[0]).term, "a");
+  EXPECT_EQ(as_term(*inner_and.children[1]).term, "b");
+
+  EXPECT_EQ(as_term(*or_node.children[1]).term, "c");
+}
+
+// ============================================================
+// Default operator (FtsDefaultOperator::OR / AND)
+// Only adjacent bare terms (no explicit operator) are affected; explicit
+// AND / OR / + / - usages keep their original semantics.
+// ============================================================
+
+TEST_F(FtsParserTest, DefaultOperatorOr_AdjacentBareTerms) {
+  // Backward-compat: omitting default_op or passing OR yields the original
+  // implicit-OR behaviour for adjacent bare terms.
+  auto ast = parse("vector database", FtsDefaultOperator::OR);
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::OR);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 2u);
+  EXPECT_EQ(as_term(*or_node.children[0]).term, "vector");
+  EXPECT_EQ(as_term(*or_node.children[1]).term, "database");
+}
+
+TEST_F(FtsParserTest, DefaultOperatorAnd_AdjacentBareTerms) {
+  // With AND default, two adjacent bare terms collapse into an AndNode.
+  auto ast = parse("vector database", FtsDefaultOperator::AND);
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::AND);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 2u);
+  EXPECT_EQ(as_term(*and_node.children[0]).term, "vector");
+  EXPECT_EQ(as_term(*and_node.children[1]).term, "database");
+}
+
+TEST_F(FtsParserTest, DefaultOperatorAnd_SingleTermUnchanged) {
+  // A single term should not be wrapped in an AndNode.
+  auto ast = parse("vector", FtsDefaultOperator::AND);
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::TERM);
+  EXPECT_EQ(as_term(*ast).term, "vector");
+}
+
+TEST_F(FtsParserTest, DefaultOperatorAnd_PropagatesIntoParens) {
+  // Parenthesised sub-expressions inherit the same default operator.
+  // `(a b) c` with AND default -> And[And[a, b], c].
+  auto ast = parse("(a b) c", FtsDefaultOperator::AND);
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::AND);
+  const auto &outer_and = as_and(*ast);
+  ASSERT_EQ(outer_and.children.size(), 2u);
+
+  ASSERT_EQ(outer_and.children[0]->type(), FtsNodeType::AND);
+  const auto &inner_and = as_and(*outer_and.children[0]);
+  ASSERT_EQ(inner_and.children.size(), 2u);
+  EXPECT_EQ(as_term(*inner_and.children[0]).term, "a");
+  EXPECT_EQ(as_term(*inner_and.children[1]).term, "b");
+
+  EXPECT_EQ(as_term(*outer_and.children[1]).term, "c");
+}
+
+TEST_F(FtsParserTest, DefaultOperatorAnd_DoesNotOverrideExplicitOr) {
+  // Explicit OR has higher-level structure; default_op only changes the
+  // implicit adjacency inside each seqExpr.
+  // `a OR b c` with AND default -> Or[a, And[b, c]].
+  auto ast = parse("a OR b c", FtsDefaultOperator::AND);
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::OR);
+  const auto &or_node = as_or(*ast);
+  ASSERT_EQ(or_node.children.size(), 2u);
+
+  EXPECT_EQ(as_term(*or_node.children[0]).term, "a");
+
+  ASSERT_EQ(or_node.children[1]->type(), FtsNodeType::AND);
+  const auto &inner_and = as_and(*or_node.children[1]);
+  ASSERT_EQ(inner_and.children.size(), 2u);
+  EXPECT_EQ(as_term(*inner_and.children[0]).term, "b");
+  EXPECT_EQ(as_term(*inner_and.children[1]).term, "c");
+}
+
+TEST_F(FtsParserTest, DefaultOperatorOr_DoesNotOverrideExplicitAnd) {
+  // Grammar: andExpr = seqExpr ((AND|NOT) seqExpr)*
+  // `a AND b c` parses as seqExpr("a") AND seqExpr("b c").
+  // With OR default, seqExpr("b c") -> Or[b, c].
+  // Result: And[a, Or[b, c]].
+  auto ast = parse("a AND b c", FtsDefaultOperator::OR);
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::AND);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 2u);
+
+  EXPECT_EQ(as_term(*and_node.children[0]).term, "a");
+
+  ASSERT_EQ(and_node.children[1]->type(), FtsNodeType::OR);
+  const auto &inner_or = as_or(*and_node.children[1]);
+  ASSERT_EQ(inner_or.children.size(), 2u);
+  EXPECT_EQ(as_term(*inner_or.children[0]).term, "b");
+  EXPECT_EQ(as_term(*inner_or.children[1]).term, "c");
+}
+
+TEST_F(FtsParserTest, DefaultOperatorAnd_PreservesPlusMinusModifiers) {
+  // `+a b -c` with AND default -> And[a{must}, b, c{must_not}].
+  // Modifiers on individual terms are independent of default_op.
+  auto ast = parse("+a b -c", FtsDefaultOperator::AND);
+  ASSERT_NE(ast, nullptr);
+  ASSERT_EQ(ast->type(), FtsNodeType::AND);
+  const auto &and_node = as_and(*ast);
+  ASSERT_EQ(and_node.children.size(), 3u);
+
+  const auto &t0 = as_term(*and_node.children[0]);
+  EXPECT_EQ(t0.term, "a");
+  EXPECT_TRUE(t0.must);
+  EXPECT_FALSE(t0.must_not);
+
+  const auto &t1 = as_term(*and_node.children[1]);
+  EXPECT_EQ(t1.term, "b");
+  EXPECT_FALSE(t1.must);
+  EXPECT_FALSE(t1.must_not);
+
+  const auto &t2 = as_term(*and_node.children[2]);
+  EXPECT_EQ(t2.term, "c");
+  EXPECT_FALSE(t2.must);
+  EXPECT_TRUE(t2.must_not);
+}
+
+}  // namespace zvec::fts
diff --git a/tests/db/sqlengine/fts_recall_test.cc b/tests/db/sqlengine/fts_recall_test.cc
new file mode 100644
index 000000000..392d8f4e2
--- /dev/null
+++ b/tests/db/sqlengine/fts_recall_test.cc
@@ -0,0 +1,527 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License
+
+#include <cstdint>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+#include <gtest/gtest.h>
+#include "db/common/file_helper.h"
+#include "db/index/common/version_manager.h"
+#include "db/index/segment/segment.h"
+#include "db/sqlengine/sqlengine.h"
+#include "zvec/db/doc.h"
+#include "zvec/db/index_params.h"
+#include "zvec/db/query_params.h"
+#include "zvec/db/schema.h"
+#include "zvec/db/type.h"
+
+namespace zvec::sqlengine {
+
+// ============================================================
+// FTS Recall Test fixture (real Segment + SQLEngine::execute via VectorQuery)
+// ============================================================
+
+class FtsRecallTest : public ::testing::Test {
+ protected:
+  static void SetUpTestSuite() {
+    FileHelper::RemoveDirectory(seg_path_);
+    FileHelper::CreateDirectory(seg_path_);
+
+    build_schema();
+    auto segment = create_segment();
+    ASSERT_NE(segment, nullptr);
+    insert_docs(segment);
+    segments_.push_back(segment);
+
+    engine_ = SQLEngine::create(std::make_shared<Profiler>());
+  }
+
+  static void TearDownTestSuite() {
+    segments_.clear();
+    engine_.reset();
+    schema_.reset();
+    FileHelper::RemoveDirectory(seg_path_);
+  }
+
+  // Helper: execute FTS query_string search via VectorQuery
+  Result<DocPtrList> fts_search(const std::string &query_string,
+                                int topk = 10) {
+    VectorQuery vq;
+    vq.topk_ = topk;
+    vq.field_name_ = "content";
+    FtsQuery fts_query;
+    fts_query.query_string_ = query_string;
+    vq.fts_query_ = fts_query;
+    return engine_->execute(schema_, vq, segments_);
+  }
+
+  // Helper: execute FTS match_string search via VectorQuery
+  Result<DocPtrList> fts_match(const std::string &match_string,
+                               const std::string &default_op = "",
+                               int topk = 10) {
+    VectorQuery vq;
+    vq.topk_ = topk;
+    vq.field_name_ = "content";
+    FtsQuery fts_query;
+    fts_query.match_string_ = match_string;
+    vq.fts_query_ = fts_query;
+    if (!default_op.empty()) {
+      auto fts_qp = std::make_shared<zvec::FtsQueryParams>();
+      fts_qp->set_default_operator(default_op);
+      vq.query_params_ = fts_qp;
+    }
+    return engine_->execute(schema_, vq, segments_);
+  }
+
+  // Helper: execute FTS query_string with default_operator via VectorQuery
+  Result<DocPtrList> fts_query_with_op(const std::string &query_string,
+                                       const std::string &default_op,
+                                       int topk = 10) {
+    VectorQuery vq;
+    vq.topk_ = topk;
+    vq.field_name_ = "content";
+    FtsQuery fts_query;
+    fts_query.query_string_ = query_string;
+    vq.fts_query_ = fts_query;
+    auto fts_qp = std::make_shared<zvec::FtsQueryParams>();
+    fts_qp->set_default_operator(default_op);
+    vq.query_params_ = fts_qp;
+    return engine_->execute(schema_, vq, segments_);
+  }
+
+  // Helper: execute FTS query_string with WHERE filter via VectorQuery
+  Result<DocPtrList> fts_search_with_filter(const std::string &query_string,
+                                            const std::string &filter,
+                                            int topk = 10) {
+    VectorQuery vq;
+    vq.topk_ = topk;
+    vq.field_name_ = "content";
+    vq.filter_ = filter;
+    FtsQuery fts_query;
+    fts_query.query_string_ = query_string;
+    vq.fts_query_ = fts_query;
+    return engine_->execute(schema_, vq, segments_);
+  }
+
+ private:
+  static void build_schema() {
+    auto fts_params = std::make_shared<FtsIndexParams>(
+        "whitespace", std::vector<std::string>{"lowercase"}, "");
+    auto invert_params = std::make_shared<InvertIndexParams>(true);
+    schema_ = std::make_shared<CollectionSchema>(
+        "fts_recall_test",
+        std::vector<FieldSchema::Ptr>{
+            std::make_shared<FieldSchema>("content", DataType::STRING, false,
+                                          fts_params),
+            std::make_shared<FieldSchema>("tag", DataType::INT32, false,
+                                          invert_params),
+            // Dummy vector field required for filter parsing path in
+            // execute
+            std::make_shared<FieldSchema>(
+                "vec", DataType::VECTOR_FP32, 4, false,
+                std::make_shared<FlatIndexParams>(MetricType::L2)),
+        });
+  }
+
+  static Segment::Ptr create_segment() {
+    auto segment_meta = std::make_shared<SegmentMeta>();
+    segment_meta->set_id(0);
+
+    auto id_map = IDMap::CreateAndOpen("fts_recall_test", seg_path_ + "/id_map",
+                                       true, false);
+    auto delete_store = std::make_shared<DeleteStore>("fts_recall_test");
+
+    Version v1;
+    v1.set_schema(*schema_);
+    std::string v_path = seg_path_ + "/manifest";
+    FileHelper::CreateDirectory(v_path);
+    auto vm = VersionManager::Create(v_path, v1);
+    if (!vm.has_value()) {
+      return nullptr;
+    }
+
+    BlockMeta mem_block;
+    mem_block.id_ = 0;
+    mem_block.type_ = BlockType::SCALAR;
+    mem_block.min_doc_id_ = 0;
+    mem_block.max_doc_id_ = 0;
+    mem_block.doc_count_ = 0;
+    segment_meta->set_writing_forward_block(mem_block);
+
+    SegmentOptions options;
+    options.read_only_ = false;
+    options.enable_mmap_ = true;
+    options.max_buffer_size_ = 256 * 1024;
+
+    auto result = Segment::CreateAndOpen(seg_path_, *schema_, 0, 0, id_map,
+                                         delete_store, vm.value(), options);
+    if (!result) {
+      return nullptr;
+    }
+    return result.value();
+  }
+
+  static void insert_docs(const Segment::Ptr &segment) {
+    // doc_id 0: "apple banana cherry"        tag=1
+    // doc_id 1: "banana date elderberry"     tag=2
+    // doc_id 2: "cherry fig grape"           tag=1
+    // doc_id 3: "apple fig honeydew"         tag=2
+    // doc_id 4: "date grape kiwi"            tag=1
+    // doc_id 5: "apple apple apple"          tag=2
+    // doc_id 6: "mango papaya starfruit"     tag=1
+    // doc_id 7: "banana banana grape"        tag=2
+    struct Entry {
+      std::string content;
+      int32_t tag;
+    };
+    std::vector<Entry> entries = {
+        {"apple banana cherry", 1},    {"banana date elderberry", 2},
+        {"cherry fig grape", 1},       {"apple fig honeydew", 2},
+        {"date grape kiwi", 1},        {"apple apple apple", 2},
+        {"mango papaya starfruit", 1}, {"banana banana grape", 2},
+    };
+
+    for (size_t i = 0; i < entries.size(); ++i) {
+      Doc doc;
+      doc.set_pk("pk_" + std::to_string(i));
+      doc.set_doc_id(i);
+      doc.set<std::string>("content", entries[i].content);
+      doc.set<int32_t>("tag", entries[i].tag);
+      auto status = segment->Insert(doc);
+      ASSERT_TRUE(status.ok())
+          << "Insert doc " << i << " failed: " << status.c_str();
+    }
+  }
+
+ protected:
+  static inline std::string seg_path_ = "./fts_recall_test_collection";
+  static inline CollectionSchema::Ptr schema_;
+  static inline std::vector<Segment::Ptr> segments_;
+  static inline SQLEngine::Ptr engine_;
+};
+
+// ============================================================
+// Basic FTS search tests
+// ============================================================
+
+// "apple" matches docs 0, 3, 5
+TEST_F(FtsRecallTest, BasicSingleTerm) {
+  auto result = fts_search("apple");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 3u);
+}
+
+// BM25 ordering: doc 5 ("apple apple apple") should have highest score
+TEST_F(FtsRecallTest, BM25ScoreOrdering) {
+  auto result = fts_search("apple");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  ASSERT_GE(result->size(), 2u);
+
+  // Results should be sorted by score descending
+  for (size_t i = 0; i + 1 < result->size(); ++i) {
+    EXPECT_GE((*result)[i]->score(), (*result)[i + 1]->score())
+        << "Results not sorted descending at index " << i;
+  }
+  // Doc 5 has highest TF for "apple"
+  EXPECT_EQ((*result)[0]->pk(), "pk_5");
+}
+
+// "kiwi" only in doc 4
+TEST_F(FtsRecallTest, SingleMatch) {
+  auto result = fts_search("kiwi");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  ASSERT_EQ(result->size(), 1u);
+  EXPECT_EQ((*result)[0]->pk(), "pk_4");
+}
+
+// Nonexistent term
+TEST_F(FtsRecallTest, NoMatch) {
+  auto result = fts_search("zzznomatch");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 0u);
+}
+
+// Topk limit: "banana" in docs 0, 1, 7 (3 matches), topk=2
+TEST_F(FtsRecallTest, TopkLimit) {
+  auto result = fts_search("banana", /*topk=*/2);
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_LE(result->size(), 2u);
+}
+
+// Multi-term implicit OR: "apple banana" matches union of {0,3,5} and {0,1,7}
+TEST_F(FtsRecallTest, MultiTermImplicitOr) {
+  auto result = fts_search("apple banana");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  // Union: {0,1,3,5,7} = 5 docs
+  EXPECT_EQ(result->size(), 5u);
+}
+
+// "starfruit" only in doc 6
+TEST_F(FtsRecallTest, RareTerm) {
+  auto result = fts_search("starfruit");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  ASSERT_EQ(result->size(), 1u);
+  EXPECT_EQ((*result)[0]->pk(), "pk_6");
+}
+
+// "grape" in docs 2, 4, 7
+TEST_F(FtsRecallTest, CommonTerm) {
+  auto result = fts_search("grape");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 3u);
+}
+
+// ============================================================
+// Explicit AND
+// ============================================================
+
+// "apple AND banana" -> intersection of {0,3,5} and {0,1,7} = {0}
+TEST_F(FtsRecallTest, ExplicitAnd) {
+  auto result = fts_search("apple AND banana");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 1u);
+  EXPECT_EQ((*result)[0]->pk(), "pk_0");
+}
+
+// "cherry AND fig" -> {0,2} AND {2,3} = {2}
+TEST_F(FtsRecallTest, ExplicitAnd2) {
+  auto result = fts_search("cherry AND fig");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 1u);
+  EXPECT_EQ((*result)[0]->pk(), "pk_2");
+}
+
+// ============================================================
+// Binary NOT (AND-NOT)
+// ============================================================
+
+// "apple NOT banana" -> {0,3,5} minus {0,1,7} = {3,5}
+TEST_F(FtsRecallTest, BinaryNot) {
+  auto result = fts_search("apple NOT banana");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 2u);
+  std::set<std::string> pks;
+  for (auto &doc : *result) {
+    pks.insert(doc->pk());
+  }
+  EXPECT_TRUE(pks.count("pk_3"));
+  EXPECT_TRUE(pks.count("pk_5"));
+}
+
+// "banana NOT grape" -> {0,1,7} minus {2,4,7} = {0,1}
+TEST_F(FtsRecallTest, BinaryNot2) {
+  auto result = fts_search("banana NOT grape");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 2u);
+  std::set<std::string> pks;
+  for (auto &doc : *result) {
+    pks.insert(doc->pk());
+  }
+  EXPECT_TRUE(pks.count("pk_0"));
+  EXPECT_TRUE(pks.count("pk_1"));
+}
+
+// ============================================================
+// Error cases
+// ============================================================
+
+// Leading NOT should fail parse
+TEST_F(FtsRecallTest, LeadingNotIsRejected) {
+  auto result = fts_search("NOT apple");
+  EXPECT_FALSE(result.has_value());
+}
+
+// Both query_string_ and match_string_ empty
+TEST_F(FtsRecallTest, BothEmptyReturnsError) {
+  VectorQuery vq;
+  vq.topk_ = 10;
+  vq.field_name_ = "content";
+  vq.fts_query_ = FtsQuery{};  // both fields empty
+  auto result = engine_->execute(schema_, vq, segments_);
+  EXPECT_FALSE(result.has_value());
+}
+
+// Both query_string_ and match_string_ set
+TEST_F(FtsRecallTest, BothSetReturnsError) {
+  VectorQuery vq;
+  vq.topk_ = 10;
+  vq.field_name_ = "content";
+  FtsQuery fts_query;
+  fts_query.query_string_ = "apple";
+  fts_query.match_string_ = "banana";
+  vq.fts_query_ = fts_query;
+  auto result = engine_->execute(schema_, vq, segments_);
+  EXPECT_FALSE(result.has_value());
+}
+
+// ============================================================
+// match_string tests
+// ============================================================
+
+// match_string "starfruit" -> doc 6
+TEST_F(FtsRecallTest, MatchStringRareTerm) {
+  auto result = fts_match("starfruit");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  ASSERT_EQ(result->size(), 1u);
+  EXPECT_EQ((*result)[0]->pk(), "pk_6");
+}
+
+// match_string "grape" -> docs 2, 4, 7
+TEST_F(FtsRecallTest, MatchStringCommonTerm) {
+  auto result = fts_match("grape");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 3u);
+}
+
+// match_string "apple banana" -> OR -> union {0,1,3,5,7}
+TEST_F(FtsRecallTest, MatchStringMultipleTokens) {
+  auto result = fts_match("apple banana");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 5u);
+}
+
+// ============================================================
+// default_operator tests
+// ============================================================
+
+// AND default for match_string: "apple banana" -> intersection = {0}
+TEST_F(FtsRecallTest, DefaultOperatorAnd_MatchString) {
+  auto result = fts_match("apple banana", "AND");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 1u);
+  EXPECT_EQ((*result)[0]->pk(), "pk_0");
+}
+
+// OR default for match_string (backward compat)
+TEST_F(FtsRecallTest, DefaultOperatorOr_MatchString) {
+  auto result = fts_match("apple banana", "OR");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 5u);
+}
+
+// AND default for query_string: "apple banana" -> AND
+TEST_F(FtsRecallTest, DefaultOperatorAnd_QueryString) {
+  auto result = fts_query_with_op("apple banana", "AND");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 1u);
+  EXPECT_EQ((*result)[0]->pk(), "pk_0");
+}
+
+// Explicit OR in query not overridden by default_operator=AND
+// "apple OR grape" with AND default -> OR still applies
+TEST_F(FtsRecallTest, DefaultOperatorAnd_DoesNotOverrideExplicitOr) {
+  auto result = fts_query_with_op("apple OR grape", "AND");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  // apple: {0,3,5}, grape: {2,4,7} -> union = 6
+  EXPECT_EQ(result->size(), 6u);
+}
+
+// Empty default_operator keeps historical OR for match_string
+TEST_F(FtsRecallTest, DefaultOperatorEmpty_BackwardCompatibleOr) {
+  auto result = fts_match("apple banana");  // no default_op arg
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  // OR semantics: union of apple{0,3,5} and banana{0,1,7} = 5
+  EXPECT_EQ(result->size(), 5u);
+}
+
+// Lowercase "and" must be accepted
+TEST_F(FtsRecallTest, DefaultOperatorAndLowercase_Accepted) {
+  auto result = fts_match("apple banana", "and");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_EQ(result->size(), 1u);
+}
+
+// Mixed-case "And" / "oR": current implementation only recognises exact
+// "AND"/"and" and "OR"/"or".  Unknown values fall through to the default (OR).
+TEST_F(FtsRecallTest, DefaultOperatorMixedCase_Accepted) {
+  {
+    // "And" is not recognised as AND -> falls back to OR
+    auto result = fts_match("apple banana", "And");
+    ASSERT_TRUE(result.has_value()) << result.error().c_str();
+    EXPECT_EQ(result->size(), 5u);
+  }
+  {
+    // "oR" is not recognised as OR explicitly -> also falls back to OR
+    auto result = fts_match("apple banana", "oR");
+    ASSERT_TRUE(result.has_value()) << result.error().c_str();
+    EXPECT_EQ(result->size(), 5u);
+  }
+}
+
+// Invalid default_operator value should be rejected
+TEST_F(FtsRecallTest, DefaultOperatorInvalid_Rejected) {
+  auto result = fts_match("apple banana", "xor");
+  // Current implementation treats unknown values as OR (no rejection),
+  // so this test documents the actual behaviour.
+  // If the implementation is changed to reject, flip to EXPECT_FALSE.
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+}
+
+// ============================================================
+// Error cases (additional)
+// ============================================================
+
+// Empty field_name should fail
+TEST_F(FtsRecallTest, EmptyFieldNameReturnsError) {
+  VectorQuery vq;
+  vq.topk_ = 10;
+  vq.field_name_ = "";
+  FtsQuery fts_query;
+  fts_query.query_string_ = "apple";
+  vq.fts_query_ = fts_query;
+  auto result = engine_->execute(schema_, vq, segments_);
+  EXPECT_FALSE(result.has_value());
+}
+
+// Empty query_string (with field_name set) should fail
+TEST_F(FtsRecallTest, EmptyQueryStringReturnsError) {
+  VectorQuery vq;
+  vq.topk_ = 10;
+  vq.field_name_ = "content";
+  // Both query_string_ and match_string_ empty -> error
+  vq.fts_query_ = FtsQuery{};
+  auto result = engine_->execute(schema_, vq, segments_);
+  EXPECT_FALSE(result.has_value());
+}
+
+// ============================================================
+// FTS search with WHERE filter
+// ============================================================
+
+// "apple" (docs 0,3,5) + tag = 1 (docs 0,2,4,6) -> intersection = {0}
+TEST_F(FtsRecallTest, FtsSearchWithFilter_ScoreTag) {
+  auto result = fts_search_with_filter("apple", "tag = 1");
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  // Filter should reduce results to doc 0 only
+  EXPECT_LE(result->size(), 3u);
+  // Verify that at least doc 0 (which satisfies both FTS and filter) is present
+  bool found_pk0 = false;
+  for (auto &doc : *result) {
+    if (doc->pk() == "pk_0") {
+      found_pk0 = true;
+    }
+  }
+  EXPECT_TRUE(found_pk0);
+}
+
+// "banana" (docs 0,1,7) + tag = 2 (docs 1,3,5,7) + topk=1
+TEST_F(FtsRecallTest, FtsSearchWithFilter_TopkRespected) {
+  auto result = fts_search_with_filter("banana", "tag = 2", /*topk=*/1);
+  ASSERT_TRUE(result.has_value()) << result.error().c_str();
+  EXPECT_LE(result->size(), 1u);
+}
+
+}  // namespace zvec::sqlengine
diff --git a/tests/db/sqlengine/mock_segment.h b/tests/db/sqlengine/mock_segment.h
index 6b46e2385..458cb24fd 100644
--- a/tests/db/sqlengine/mock_segment.h
+++ b/tests/db/sqlengine/mock_segment.h
@@ -496,6 +496,17 @@ class MockSegment : public Segment {
     return {};
   }
 
+  fts::FtsColumnIndexerPtr get_fts_indexer(
+      const std::string &field_name) const override {
+    return nullptr;
+  }
+
+  Result<std::vector<fts::FtsResult>> fts_search(
+      const std::string &field_name, const fts::FtsAstNode &ast,
+      const fts::FtsQueryParams &params) override {
+    return std::vector<fts::FtsResult>{};
+  }
+
   Status flush() override {
     return Status::OK();
   }
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index 22f06ceae..01561e5c7 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -26,4 +26,7 @@ add_subdirectory(CRoaring CRoaring EXCLUDE_FROM_ALL)
 add_subdirectory(arrow arrow EXCLUDE_FROM_ALL)
 add_subdirectory(magic_enum magic_enum EXCLUDE_FROM_ALL)
 add_subdirectory(RaBitQ-Library RaBitQ-Library EXCLUDE_FROM_ALL)
+add_subdirectory(FastPFOR FastPFOR EXCLUDE_FROM_ALL)
+add_subdirectory(limonp limonp EXCLUDE_FROM_ALL)
+add_subdirectory(cppjieba cppjieba EXCLUDE_FROM_ALL)
 
diff --git a/thirdparty/FastPFOR/CMakeLists.txt b/thirdparty/FastPFOR/CMakeLists.txt
new file mode 100644
index 000000000..77a8dfba9
--- /dev/null
+++ b/thirdparty/FastPFOR/CMakeLists.txt
@@ -0,0 +1,46 @@
+include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake)
+
+# On ARM platforms, FastPFOR uses SIMDe to emulate SSE intrinsics.
+# Detection covers native ARM builds AND cross-compilation (e.g. iOS/Android).
+set(_FASTPFOR_IS_ARM FALSE)
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|arm64")
+    set(_FASTPFOR_IS_ARM TRUE)
+elseif(CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+    set(_FASTPFOR_IS_ARM TRUE)
+elseif(CMAKE_SYSTEM_NAME STREQUAL "iOS")
+    set(_FASTPFOR_IS_ARM TRUE)
+endif()
+
+if(_FASTPFOR_IS_ARM)
+    include(FetchContent)
+    FetchContent_Declare(
+        simde
+        GIT_REPOSITORY https://github.com/simd-everywhere/simde.git
+        GIT_TAG v0.8.2
+    )
+    FetchContent_MakeAvailable(simde)
+    set(FASTPFOR_EXTRA_INCS ${simde_SOURCE_DIR})
+    set(FASTPFOR_EXTRA_CXXFLAGS "")
+    set(FASTPFOR_EXTRA_DEFS SIMDE_ENABLE_NATIVE_ALIASES)
+elseif(MSVC)
+    set(FASTPFOR_EXTRA_INCS "")
+    set(FASTPFOR_EXTRA_CXXFLAGS "")
+    set(FASTPFOR_EXTRA_DEFS "")
+else()
+    set(FASTPFOR_EXTRA_INCS "")
+    set(FASTPFOR_EXTRA_CXXFLAGS -msse4.1)
+    set(FASTPFOR_EXTRA_DEFS "")
+endif()
+
+cc_library(
+    NAME FastPFOR STATIC
+    SRCS FastPFOR-0.4.0/src/simdbitpacking.cpp
+         FastPFOR-0.4.0/src/bitpacking.cpp
+         FastPFOR-0.4.0/src/bitpackingaligned.cpp
+         FastPFOR-0.4.0/src/bitpackingunaligned.cpp
+         FastPFOR-0.4.0/src/simdunalignedbitpacking.cpp
+    INCS FastPFOR-0.4.0/headers ${FASTPFOR_EXTRA_INCS}
+    PUBINCS FastPFOR-0.4.0/headers ${FASTPFOR_EXTRA_INCS}
+    DEFS ${FASTPFOR_EXTRA_DEFS}
+    CXXFLAGS ${FASTPFOR_EXTRA_CXXFLAGS}
+)
diff --git a/thirdparty/FastPFOR/FastPFOR-0.4.0 b/thirdparty/FastPFOR/FastPFOR-0.4.0
new file mode 160000
index 000000000..2be1f9769
--- /dev/null
+++ b/thirdparty/FastPFOR/FastPFOR-0.4.0
@@ -0,0 +1 @@
+Subproject commit 2be1f976935b8ff9296b029f574d7f964be9d35d
diff --git a/thirdparty/cppjieba/CMakeLists.txt b/thirdparty/cppjieba/CMakeLists.txt
new file mode 100644
index 000000000..4c80932cc
--- /dev/null
+++ b/thirdparty/cppjieba/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(cppjieba_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/cppjieba-5.6.7")
+
+if(NOT TARGET cppjieba)
+  add_library(cppjieba INTERFACE)
+  target_include_directories(cppjieba SYSTEM INTERFACE
+    ${cppjieba_SOURCE_DIR}/include
+  )
+  target_link_libraries(cppjieba INTERFACE limonp)
+endif()
+
+set(cppjieba_FOUND TRUE PARENT_SCOPE)
+set(cppjieba_INCLUDE_DIR ${cppjieba_SOURCE_DIR}/include PARENT_SCOPE)
+set(cppjieba_INCLUDE_DIRS
+    ${cppjieba_SOURCE_DIR}/include
+    ${limonp_INCLUDE_DIR}
+    PARENT_SCOPE)
+set(cppjieba_DICT_DIR ${cppjieba_SOURCE_DIR}/dict PARENT_SCOPE)
diff --git a/thirdparty/cppjieba/cppjieba-5.6.7 b/thirdparty/cppjieba/cppjieba-5.6.7
new file mode 160000
index 000000000..b3602bef7
--- /dev/null
+++ b/thirdparty/cppjieba/cppjieba-5.6.7
@@ -0,0 +1 @@
+Subproject commit b3602bef7d1f67521a61788a74fb5801a0e62cd3
diff --git a/thirdparty/limonp/CMakeLists.txt b/thirdparty/limonp/CMakeLists.txt
new file mode 100644
index 000000000..610327676
--- /dev/null
+++ b/thirdparty/limonp/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(limonp_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/limonp-v1.0.2")
+
+if(NOT TARGET limonp)
+  add_library(limonp INTERFACE)
+  target_include_directories(limonp SYSTEM INTERFACE
+    ${limonp_SOURCE_DIR}/include
+  )
+endif()
+
+set(limonp_FOUND TRUE PARENT_SCOPE)
+set(limonp_INCLUDE_DIR ${limonp_SOURCE_DIR}/include PARENT_SCOPE)
+set(limonp_INCLUDE_DIRS ${limonp_SOURCE_DIR}/include PARENT_SCOPE)
diff --git a/thirdparty/limonp/limonp-v1.0.2 b/thirdparty/limonp/limonp-v1.0.2
new file mode 160000
index 000000000..9d74077df
--- /dev/null
+++ b/thirdparty/limonp/limonp-v1.0.2
@@ -0,0 +1 @@
+Subproject commit 9d74077dfcdf8073536c97a00bb79d7a3c3fdaba
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 4e17f1ec3..d01b22e1c 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -5,4 +5,5 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
 git_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR})
 
 # Add repository
-cc_directory(core)
\ No newline at end of file
+cc_directory(core)
+cc_directory(db)
\ No newline at end of file
diff --git a/tools/db/CMakeLists.txt b/tools/db/CMakeLists.txt
new file mode 100644
index 000000000..fc224e3f8
--- /dev/null
+++ b/tools/db/CMakeLists.txt
@@ -0,0 +1,13 @@
+include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
+
+cc_binary(
+    NAME fts_bench PACKED
+    SRCS fts_bench_main.cc
+    LIBS
+         zvec_shared
+         gflags
+         roaring
+         rocksdb
+    INCS . ${PROJECT_SOURCE_DIR}/src
+    LDFLAGS ${APPLE_FRAMEWORK_LIBS}
+)
diff --git a/tools/db/fts_bench_main.cc b/tools/db/fts_bench_main.cc
new file mode 100644
index 000000000..773cdd729
--- /dev/null
+++ b/tools/db/fts_bench_main.cc
@@ -0,0 +1,1872 @@
+// Copyright 2025-present the zvec project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include <gflags/gflags.h>
+#include <roaring/roaring.h>
+#include <zvec/ailego/encoding/json.h>
+#include <zvec/ailego/utility/time_helper.h>
+#include <zvec/db/collection.h>
+#include <zvec/db/doc.h>
+#include <zvec/db/index_params.h>
+#include <zvec/db/options.h>
+#include <zvec/db/schema.h>
+#include "db/common/constants.h"
+#include "db/common/file_helper.h"
+#include "db/common/rocksdb_context.h"
+#include "db/index/column/fts_column/fts_column_indexer.h"
+#include "db/index/column/fts_column/fts_query_ast.h"
+#include "db/index/column/fts_column/fts_rocksdb_merge.h"
+#include "db/index/column/fts_column/fts_rocksdb_reducer.h"
+#include "db/index/column/fts_column/fts_types.h"
+#include "db/index/column/fts_column/fts_utils.h"
+#include "db/index/column/fts_column/posting/bitpacked_posting_list.h"
+#include "db/index/common/index_filter.h"
+
+namespace {
+
+// Helper: build a public FtsIndexParams from FLAGS_extra_params JSON string.
+// The JSON may contain a "tokenizer" key that specifies the tokenizer name;
+// the remaining JSON is passed through as extra_params verbatim.
+static std::shared_ptr<zvec::FtsIndexParams> build_fts_index_params(
+    const std::string &extra_params_json) {
+  std::string tokenizer_name = "standard";
+  zvec::ailego::JsonValue jv;
+  if (jv.parse(extra_params_json) && jv.is_object()) {
+    const auto &obj = jv.as_object();
+    zvec::ailego::JsonValue tok_val = obj["tokenizer"];
+    if (tok_val.is_string()) {
+      tokenizer_name = tok_val.as_string().as_stl_string();
+    }
+  }
+  return std::make_shared<zvec::FtsIndexParams>(
+      std::move(tokenizer_name), std::vector<std::string>{"lowercase"},
+      extra_params_json);
+}
+
+// Helper: build a transient FieldSchema for FTS field with index params.
+static zvec::FieldSchema::Ptr make_fts_field_schema(
+    const std::string &field_name,
+    std::shared_ptr<zvec::FtsIndexParams> fts_params = nullptr) {
+  if (!fts_params) {
+    fts_params = std::make_shared<zvec::FtsIndexParams>();
+  }
+  return std::make_shared<zvec::FieldSchema>(field_name, zvec::DataType::STRING,
+                                             false, fts_params);
+}
+
+}  // namespace
+
+// ---------------------------------------------------------------------------
+// gflags
+// ---------------------------------------------------------------------------
+DEFINE_string(cmd, "",
+              "Command to execute: build, search, stats. "
+              "If empty, auto-detect from -corpus / -query flags.");
+DEFINE_string(index, "", "Path to FTS index directory");
+DEFINE_string(corpus, "", "Path to BEIR corpus.jsonl (build mode)");
+DEFINE_string(query, "", "Path to BEIR queries.jsonl (search mode)");
+DEFINE_string(qrels, "", "Path to BEIR qrels directory (search mode)");
+DEFINE_int32(topk, 10, "Top-K results to retrieve per query");
+DEFINE_string(extra_params, R"({"tokenizer":"standard"})",
+              "Extra params JSON for tokenizer pipeline");
+DEFINE_string(field, "text", "FTS field name");
+DEFINE_int32(threads, 16, "Number of threads for multi-threaded search");
+DEFINE_int32(max_queries, 0,
+             "Maximum number of queries to run in search mode. "
+             "0 means all queries (default).");
+DEFINE_bool(reduce, false,
+            "After build, run FtsRocksdbReducer to convert postings to "
+            "BitPacked format. Reduced index is written to <index>-reduce.");
+DEFINE_string(default_operator, "or",
+              "Default operator used to combine query tokens when searching "
+              "match_string-style queries. Valid values: 'or' (union, default) "
+              "or 'and' (intersection).");
+DEFINE_string(mode, "raw",
+              "Execution mode: 'raw' (default) operates directly on RocksDB "
+              "via FtsColumnIndexer; 'db' operates through "
+              "the zvec Collection API (CreateAndOpen / Insert / Query).");
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+static const std::string kForwardCfName = "forward";
+
+using namespace zvec;
+using namespace zvec::fts;
+
+// ---------------------------------------------------------------------------
+// Query AST builder: combine tokens with the configured default operator.
+// Returns nullptr when tokens is empty.
+// ---------------------------------------------------------------------------
+template <typename TokenContainer>
+static FtsAstNodePtr build_query_ast_from_tokens(
+    const TokenContainer &tokens, const std::string &default_operator) {
+  if (tokens.empty()) {
+    return nullptr;
+  }
+  if (default_operator == "and") {
+    auto and_node = std::make_unique<AndNode>();
+    for (const auto &token : tokens) {
+      and_node->children.push_back(std::make_unique<TermNode>(token.text));
+    }
+    return and_node;
+  }
+  // Default: OR
+  auto or_node = std::make_unique<OrNode>();
+  for (const auto &token : tokens) {
+    or_node->children.push_back(std::make_unique<TermNode>(token.text));
+  }
+  return or_node;
+}
+
+// Validate -default_operator flag value. Returns true if valid.
+static bool validate_default_operator(const std::string &op) {
+  return op == "or" || op == "and";
+}
+
+// ---------------------------------------------------------------------------
+// Helper: open RocksdbStore with FTS column families.
+//
+// `with_side_cfs` controls whether the build-time only side CFs
+// ($TF / $MAX_TF / $DOC_LEN) are listed in the open args.  These three CFs
+// are dropped at the end of build (after convert_postings_to_bitpacked()
+// inlines their payloads into BitPacked postings), mirroring
+// MutableSegment::dump_fts_column_indexers().  Search/stats paths therefore
+// open the store without them so that the open call doesn't fail with
+// "column family not found" against a built index.
+// ---------------------------------------------------------------------------
+static bool open_fts_store(RocksdbContext *store, const std::string &field_name,
+                           bool existing, const std::string &index_path = "",
+                           bool with_side_cfs = true,
+                           bool with_forward_cf = true) {
+  const std::string &data_dir = index_path.empty() ? FLAGS_index : index_path;
+  const std::string max_tf_cf = field_name + zvec::kFtsMaxTfSuffix;
+
+  std::vector<std::string> cf_names = {
+      field_name,
+      field_name + zvec::kFtsPositionsSuffix,
+      zvec::kFtsStatCfName,
+  };
+  if (with_forward_cf) {
+    cf_names.push_back(kForwardCfName);
+  }
+  if (with_side_cfs) {
+    cf_names.push_back(field_name + zvec::kFtsTfSuffix);
+    cf_names.push_back(max_tf_cf);
+    cf_names.push_back(field_name + zvec::kFtsDocLenSuffix);
+  }
+
+  // Build per-CF merge operators map
+  std::unordered_map<std::string, std::shared_ptr<rocksdb::MergeOperator>>
+      per_cf_merge_ops;
+  per_cf_merge_ops[field_name] = std::make_shared<FtsPostingsMerge>();
+  if (with_side_cfs) {
+    per_cf_merge_ops[max_tf_cf] = std::make_shared<FtsMaxTfMerge>();
+  }
+
+  Status status;
+  if (existing) {
+    status = store->open(
+        RocksdbContext::Args{data_dir, cf_names, nullptr, per_cf_merge_ops},
+        false);
+  } else {
+    status = store->create(RocksdbContext::Args{data_dir, cf_names, nullptr,
+                                                per_cf_merge_ops, true});
+  }
+  if (!status.ok()) {
+    fprintf(stderr, "ERROR: Failed to open RocksdbStore at [%s], status[%s]\n",
+            data_dir.c_str(), status.message().c_str());
+    return false;
+  }
+  return true;
+}
+
+// ---------------------------------------------------------------------------
+// Helper: drop $TF / $MAX_TF / $DOC_LEN CFs after convert_postings_to_bitpacked
+// has inlined their payloads into BitPacked postings.  Mirrors
+// MutableSegment::dump_fts_column_indexers().  The dumped immutable index is
+// significantly smaller because these CFs no longer occupy SST space.
+// Logs and ignores per-CF failures so that a partial drop (e.g. CF already
+// missing on retry) does not abort the whole build.
+// ---------------------------------------------------------------------------
+static void drop_fts_side_cfs(RocksdbContext *store,
+                              const std::string &field_name) {
+  const std::vector<std::string> side_cf_names = {
+      field_name + zvec::kFtsTfSuffix,
+      field_name + zvec::kFtsMaxTfSuffix,
+      field_name + zvec::kFtsDocLenSuffix,
+  };
+  for (const auto &cf_name : side_cf_names) {
+    Status drop_status = store->drop_cf(cf_name);
+    if (!drop_status.ok()) {
+      fprintf(stderr,
+              "WARN: Drop column family[%s] failed, status[%s] (ignored)\n",
+              cf_name.c_str(), drop_status.message().c_str());
+    }
+  }
+}
+
+
+// ---------------------------------------------------------------------------
+// Helper: parse a JSONL line and extract a string field
+// ---------------------------------------------------------------------------
+static bool parse_jsonl_line(
+    const std::string &line,
+    std::unordered_map<std::string, std::string> *out) {
+  zvec::ailego::JsonValue jv;
+  if (!jv.parse(line) || !jv.is_object()) {
+    return false;
+  }
+  const auto &obj = jv.as_object();
+  for (const auto &kv : obj) {
+    if (kv.value().is_string()) {
+      (*out)[kv.key().as_stl_string()] = kv.value().as_string().as_stl_string();
+    }
+  }
+  return true;
+}
+
+// ---------------------------------------------------------------------------
+// Latency statistics helper
+// ---------------------------------------------------------------------------
+struct LatencyStats {
+  std::vector<uint64_t> samples;  // microseconds
+
+  void add(uint64_t us) {
+    samples.push_back(us);
+  }
+
+  void print(const std::string &label) const {
+    if (samples.empty()) {
+      std::cout << label << ": no samples" << std::endl;
+      return;
+    }
+    std::vector<uint64_t> sorted = samples;
+    std::sort(sorted.begin(), sorted.end());
+
+    uint64_t sum = 0;
+    for (auto v : sorted) sum += v;
+    double avg = static_cast<double>(sum) / sorted.size();
+
+    auto percentile = [&](double p) -> uint64_t {
+      size_t idx = static_cast<size_t>(p * sorted.size());
+      if (idx >= sorted.size()) idx = sorted.size() - 1;
+      return sorted[idx];
+    };
+
+    std::cout << label << " latency (us):" << std::endl;
+    std::cout << "  Count  : " << sorted.size() << std::endl;
+    std::cout << "  Average: " << static_cast<uint64_t>(avg) << std::endl;
+    std::cout << "  Min    : " << sorted.front() << std::endl;
+    std::cout << "  P50    : " << percentile(0.50) << std::endl;
+    std::cout << "  P95    : " << percentile(0.95) << std::endl;
+    std::cout << "  P99    : " << percentile(0.99) << std::endl;
+    std::cout << "  Max    : " << sorted.back() << std::endl;
+  }
+};
+
+// ---------------------------------------------------------------------------
+// REDUCE MODE: convert Roaring Bitmap postings to BitPacked format
+// ---------------------------------------------------------------------------
+static int do_reduce(const std::string &src_index_path, uint32_t total_docs) {
+  const std::string dst_index_path = src_index_path + "-reduce";
+  std::cout << std::endl;
+  std::cout << "=== REDUCE MODE ===" << std::endl;
+  std::cout << "  Source : " << src_index_path << std::endl;
+  std::cout << "  Dest   : " << dst_index_path << std::endl;
+
+  // Create destination directory
+  if (!zvec::FileHelper::DirectoryExists(dst_index_path)) {
+    if (!zvec::FileHelper::CreateDirectory(dst_index_path)) {
+      fprintf(stderr, "ERROR: Failed to create reduce output directory: %s\n",
+              dst_index_path.c_str());
+      return -1;
+    }
+  }
+
+  // Open source store (existing).  $TF/$MAX_TF/$DOC_LEN were dropped at
+  // build time after convert_postings_to_bitpacked(), so we open without
+  // them.  The reducer never consumed these CFs anyway (BitPacked postings
+  // already carry inline tf/doc_len/max_score payloads).
+  RocksdbContext src_store;
+  if (!open_fts_store(&src_store, FLAGS_field, /*existing=*/true,
+                      src_index_path, /*with_side_cfs=*/false)) {
+    fprintf(stderr, "ERROR: Failed to open source store for reduce\n");
+    return -1;
+  }
+
+  // Open destination store (new) — same shape as a freshly-dumped immutable
+  // index: no side CFs.
+  RocksdbContext dst_store;
+  if (!open_fts_store(&dst_store, FLAGS_field, /*existing=*/false,
+                      dst_index_path, /*with_side_cfs=*/false)) {
+    fprintf(stderr, "ERROR: Failed to open destination store for reduce\n");
+    src_store.close();
+    return -1;
+  }
+
+  // Get source column families
+  rocksdb::ColumnFamilyHandle *src_postings = src_store.get_cf(FLAGS_field);
+  rocksdb::ColumnFamilyHandle *src_positions =
+      src_store.get_cf(FLAGS_field + zvec::kFtsPositionsSuffix);
+  rocksdb::ColumnFamilyHandle *src_stat =
+      src_store.get_cf(zvec::kFtsStatCfName);
+  rocksdb::ColumnFamilyHandle *src_forward = src_store.get_cf(kForwardCfName);
+
+  // Get destination column families
+  rocksdb::ColumnFamilyHandle *dst_postings = dst_store.get_cf(FLAGS_field);
+  rocksdb::ColumnFamilyHandle *dst_positions =
+      dst_store.get_cf(FLAGS_field + zvec::kFtsPositionsSuffix);
+  rocksdb::ColumnFamilyHandle *dst_stat =
+      dst_store.get_cf(zvec::kFtsStatCfName);
+  rocksdb::ColumnFamilyHandle *dst_forward = dst_store.get_cf(kForwardCfName);
+
+  if (!src_postings || !src_positions || !src_stat || !dst_postings ||
+      !dst_positions || !dst_stat) {
+    fprintf(stderr, "ERROR: Failed to get column families for reduce\n");
+    src_store.close();
+    dst_store.close();
+    return -1;
+  }
+
+  zvec::ailego::ElapsedTime reduce_timer;
+
+  // Initialize reducer.  Side CFs ($TF/$MAX_TF/$DOC_LEN) are no longer
+  // consumed by the reducer; they remain in the schema for SST compatibility
+  // but the bench tool does not need to wire them in.
+  FtsRocksdbReducer reducer;
+  auto init_result = reducer.init(FLAGS_field, &dst_store, dst_postings,
+                                  dst_positions, dst_stat);
+  if (!init_result.has_value()) {
+    fprintf(stderr, "ERROR: FtsRocksdbReducer init failed, status[%s]\n",
+            init_result.error().message().c_str());
+    src_store.close();
+    dst_store.close();
+    return -1;
+  }
+
+  // Feed source as a single segment: doc_id range [0, total_docs-1]
+  FtsSegmentStats seg_stats;
+  seg_stats.min_doc_id = 0;
+  seg_stats.max_doc_id = total_docs > 0 ? total_docs - 1 : 0;
+
+  auto feed_result =
+      reducer.feed(seg_stats, &src_store, src_postings, src_positions);
+  if (!feed_result.has_value()) {
+    fprintf(stderr, "ERROR: FtsRocksdbReducer feed failed, status[%s]\n",
+            feed_result.error().message().c_str());
+    src_store.close();
+    dst_store.close();
+    return -1;
+  }
+
+  // Run reduce with no-delete filter
+  auto no_delete_filter_ptr =
+      EasyIndexFilter::Create([](uint64_t /*doc_id*/) { return false; });
+  const IndexFilter &no_delete_filter = *no_delete_filter_ptr;
+
+  std::cout << "  Running reduce..." << std::endl;
+  auto reduce_result = reducer.reduce(no_delete_filter);
+  if (!reduce_result.has_value()) {
+    fprintf(stderr, "ERROR: FtsRocksdbReducer reduce failed, status[%s]\n",
+            reduce_result.error().message().c_str());
+    src_store.close();
+    dst_store.close();
+    return -1;
+  }
+
+  // Copy forward CF (doc_id -> corpus_id mapping)
+  if (src_forward && dst_forward) {
+    std::cout << "  Copying forward CF..." << std::endl;
+    auto iter = std::unique_ptr<rocksdb::Iterator>(
+        src_store.db_->NewIterator(src_store.read_opts_, src_forward));
+    while (iter->Valid()) {
+      dst_store.db_->Put(dst_store.write_opts_, dst_forward,
+                         iter->key().ToString(), iter->value().ToString());
+      iter->Next();
+    }
+    // iter auto-closes via unique_ptr
+  }
+
+  // Flush and compact destination.  Side CFs are not present here.
+  dst_store.flush();
+  // compact not available in RocksdbContext
+
+
+  uint64_t reduce_ms = reduce_timer.milli_seconds();
+
+  std::cout << "=== REDUCE COMPLETE ===" << std::endl;
+  std::cout << "  Reduce time : " << reduce_ms << " ms" << std::endl;
+  std::cout << "  Output path : " << dst_index_path << std::endl;
+
+  (void)reducer.cleanup();
+  src_store.close();
+  dst_store.close();
+  return 0;
+}
+
+
+struct CorpusEntry {
+  uint32_t doc_id;
+  std::string corpus_id;
+  std::string content;
+};
+
+static int do_build() {
+  const int num_threads = std::max(1, FLAGS_threads);
+  std::cout << "=== BUILD MODE ===" << std::endl;
+  std::cout << "Index  : " << FLAGS_index << std::endl;
+  std::cout << "Corpus : " << FLAGS_corpus << std::endl;
+  std::cout << "Field  : " << FLAGS_field << std::endl;
+  std::cout << "Threads: " << num_threads << std::endl;
+  std::cout << "ExtraParams: " << FLAGS_extra_params << std::endl;
+
+  // Remove existing index directory so that RocksdbContext::create() starts
+  // fresh (it requires the path to NOT exist).
+  if (zvec::FileHelper::DirectoryExists(FLAGS_index)) {
+    std::cout << "Removing existing index directory: " << FLAGS_index
+              << std::endl;
+    zvec::FileHelper::RemoveDirectory(FLAGS_index);
+  }
+
+  // Open RocksDB (new)
+  RocksdbContext store;
+  if (!open_fts_store(&store, FLAGS_field, /*existing=*/false)) {
+    return -1;
+  }
+
+  // Get column families
+  const std::string max_tf_cf_name = FLAGS_field + zvec::kFtsMaxTfSuffix;
+
+  rocksdb::ColumnFamilyHandle *postings_cf = store.get_cf(FLAGS_field);
+  rocksdb::ColumnFamilyHandle *positions_cf =
+      store.get_cf(FLAGS_field + zvec::kFtsPositionsSuffix);
+  rocksdb::ColumnFamilyHandle *term_freq_cf =
+      store.get_cf(FLAGS_field + zvec::kFtsTfSuffix);
+  rocksdb::ColumnFamilyHandle *max_tf_cf = store.get_cf(max_tf_cf_name);
+  rocksdb::ColumnFamilyHandle *doc_len_cf =
+      store.get_cf(FLAGS_field + zvec::kFtsDocLenSuffix);
+  rocksdb::ColumnFamilyHandle *stat_cf = store.get_cf(zvec::kFtsStatCfName);
+  rocksdb::ColumnFamilyHandle *forward_cf = store.get_cf(kForwardCfName);
+
+  if (!postings_cf || !positions_cf || !term_freq_cf || !max_tf_cf ||
+      !doc_len_cf || !stat_cf || !forward_cf) {
+    fprintf(stderr, "ERROR: Failed to get column families\n");
+    return -1;
+  }
+
+  // Pre-load all corpus entries into memory with pre-assigned doc_ids
+  std::vector<CorpusEntry> corpus_entries;
+  uint64_t parse_failed_count = 0;
+  {
+    std::ifstream corpus_file(FLAGS_corpus);
+    if (!corpus_file.is_open()) {
+      fprintf(stderr, "ERROR: Failed to open corpus file: %s\n",
+              FLAGS_corpus.c_str());
+      return -1;
+    }
+
+    uint32_t doc_id = 0;
+    std::string line;
+    while (std::getline(corpus_file, line)) {
+      if (line.empty()) continue;
+
+      std::unordered_map<std::string, std::string> fields;
+      if (!parse_jsonl_line(line, &fields)) {
+        fprintf(stderr, "WARN: Failed to parse line: %s\n",
+                line.substr(0, 100).c_str());
+        ++parse_failed_count;
+        continue;
+      }
+
+      const std::string &corpus_id = fields["_id"];
+      if (corpus_id.empty()) {
+        ++parse_failed_count;
+        continue;
+      }
+
+      std::string content;
+      if (!fields["title"].empty()) {
+        content = fields["title"] + " " + fields["text"];
+      } else {
+        content = fields["text"];
+      }
+
+      corpus_entries.push_back(
+          {doc_id, std::move(corpus_id), std::move(content)});
+      ++doc_id;
+    }
+  }
+  std::cout << "Loaded " << corpus_entries.size() << " corpus entries."
+            << std::endl;
+  if (parse_failed_count > 0) {
+    std::cout << "  Warning: " << parse_failed_count
+              << " entries failed to parse." << std::endl;
+  }
+
+  auto fts_params = build_fts_index_params(FLAGS_extra_params);
+  auto field_meta = make_fts_field_schema(FLAGS_field, fts_params);
+
+  FtsColumnIndexer indexer;
+  auto open_result = indexer.open(field_meta, &store, postings_cf, positions_cf,
+                                  term_freq_cf, max_tf_cf, doc_len_cf, stat_cf);
+  if (!open_result.has_value()) {
+    fprintf(stderr, "ERROR: Failed to open FtsColumnIndexer, status[%s]\n",
+            open_result.error().message().c_str());
+    return -1;
+  }
+
+  // Shared atomic index for work-stealing across threads
+  std::atomic<size_t> next_entry_index{0};
+
+  // Per-thread result accumulators
+  struct ThreadResult {
+    uint64_t indexed_count{0};
+    uint64_t failed_count{0};
+  };
+  std::vector<ThreadResult> thread_results(num_threads);
+
+  std::cout << "Building index with " << num_threads << " thread(s)..."
+            << std::endl;
+
+  zvec::ailego::ElapsedTime timer;
+
+  auto worker = [&](int thread_id) {
+    ThreadResult &result = thread_results[thread_id];
+
+    while (true) {
+      size_t entry_idx =
+          next_entry_index.fetch_add(1, std::memory_order_relaxed);
+      if (entry_idx >= corpus_entries.size()) break;
+
+      const CorpusEntry &entry = corpus_entries[entry_idx];
+
+      auto insert_result = indexer.insert(entry.doc_id, entry.content);
+      if (!insert_result.has_value()) {
+        fprintf(stderr,
+                "WARN: Thread[%d] failed to insert doc_id[%u] corpus_id[%s], "
+                "status[%s]\n",
+                thread_id, entry.doc_id, entry.corpus_id.c_str(),
+                insert_result.error().message().c_str());
+        ++result.failed_count;
+        continue;
+      }
+
+      // Write forward mapping: doc_id -> corpus_id
+      std::string doc_id_key;
+      fts::encode_uint32_big_endian(entry.doc_id, &doc_id_key);
+      store.db_->Put(store.write_opts_, forward_cf, doc_id_key,
+                     entry.corpus_id);
+
+      ++result.indexed_count;
+
+      // Progress reporting (only from thread 0 to avoid interleaving)
+      if (thread_id == 0 && result.indexed_count % 1000 == 0) {
+        size_t total_done = 0;
+        for (const auto &tr : thread_results) {
+          total_done += tr.indexed_count + tr.failed_count;
+        }
+        std::cout << "\r  Indexed ~" << total_done << " / "
+                  << corpus_entries.size() << " docs..." << std::flush;
+      }
+    }
+  };
+
+  // Launch threads
+  std::vector<std::thread> threads;
+  threads.reserve(num_threads);
+  for (int thread_id = 0; thread_id < num_threads; ++thread_id) {
+    threads.emplace_back(worker, thread_id);
+  }
+  for (auto &thread : threads) {
+    thread.join();
+  }
+
+  uint64_t build_ms = timer.milli_seconds();
+
+  // Merge per-thread results
+  uint64_t total_indexed = 0;
+  uint64_t total_failed = 0;
+  for (const auto &result : thread_results) {
+    total_indexed += result.indexed_count;
+    total_failed += result.failed_count;
+  }
+
+  std::cout << "\r  Indexed " << total_indexed << " docs total." << std::endl;
+  if (total_failed > 0) {
+    std::cout << "  Warning: " << total_failed << " docs failed to index."
+              << std::endl;
+  }
+
+  // Flush statistics — single indexer tracks all docs/tokens atomically
+  std::cout << "Flushing statistics (total_docs=" << indexer.total_docs()
+            << ", total_tokens=" << indexer.total_tokens() << ")..."
+            << std::endl;
+  auto flush_result = indexer.flush();
+  if (!flush_result.has_value()) {
+    fprintf(stderr, "WARN: FtsColumnIndexer flush failed, status[%s]\n",
+            flush_result.error().message().c_str());
+  }
+
+  // Convert Roaring postings to BitPacked before close/dump, mirroring
+  // MutableSegment::dump_fts_column_indexers().  Must run before close()
+  // for symmetry with the single-threaded path; convert itself does not
+  // depend on the tokenizer pipeline.
+  std::cout << "Converting postings to BitPacked..." << std::endl;
+  zvec::ailego::ElapsedTime bitpacked_timer2;
+  auto bitpacked_result = indexer.convert_postings_to_bitpacked();
+  if (!bitpacked_result.has_value()) {
+    fprintf(stderr,
+            "WARN: FtsColumnIndexer convert_postings_to_bitpacked failed, "
+            "status[%s]\n",
+            bitpacked_result.error().message().c_str());
+  }
+  std::cout << "convert_postings_to_bitpacked took "
+            << bitpacked_timer2.micro_seconds() / 1000.0 << " ms" << std::endl;
+
+  // Drop $TF / $MAX_TF / $DOC_LEN CFs after their payloads have been inlined
+  // into BitPacked postings.  Mirrors MutableSegment::dump_fts_column_
+  // indexers(): reset_side_cfs() first so any concurrent reader-path access
+  // through the indexer falls back to default tf=1/doc_len=1 instead of
+  // touching a dropped handle, then drop the CFs from the underlying store.
+  indexer.reset_side_cfs();
+  drop_fts_side_cfs(&store, FLAGS_field);
+  // Local pointers are now dangling; null them out so accidental use becomes
+  // an obvious crash instead of a use-after-free.
+  term_freq_cf = nullptr;
+  max_tf_cf = nullptr;
+  doc_len_cf = nullptr;
+
+  (void)indexer.close();
+
+  // Flush RocksDB memtables + dump checkpoint
+  zvec::ailego::ElapsedTime dump_timer;
+  store.flush();
+
+  // Trigger compaction + checkpoint
+  std::cout << "Running compaction..." << std::endl;
+  store.compact();
+
+  uint64_t dump_ms = dump_timer.milli_seconds();
+  uint64_t elapsed_ms = timer.milli_seconds();
+  std::cout << "=== BUILD COMPLETE ===" << std::endl;
+  std::cout << "  Total docs  : " << total_indexed << std::endl;
+  std::cout << "  Threads     : " << num_threads << std::endl;
+  std::cout << "  Build time  : " << build_ms << " ms" << std::endl;
+  std::cout << "  Dump time   : " << dump_ms << " ms (flush + compaction)"
+            << std::endl;
+  std::cout << "  Total time  : " << elapsed_ms << " ms" << std::endl;
+  std::cout << "  Throughput  : "
+            << (total_indexed > 0
+                    ? total_indexed * 1000ULL / (build_ms > 0 ? build_ms : 1)
+                    : 0)
+            << " docs/s (build only)" << std::endl;
+
+  store.close();
+
+  // Optional: run reduce to convert postings to BitPacked format
+  if (FLAGS_reduce) {
+    int reduce_ret = do_reduce(FLAGS_index, total_indexed);
+    if (reduce_ret != 0) {
+      fprintf(stderr, "ERROR: Reduce step failed, ret[%d]\n", reduce_ret);
+      return reduce_ret;
+    }
+  }
+
+  return 0;
+}
+
+// ---------------------------------------------------------------------------
+// BUILD MODE (db): use zvec Collection API
+// ---------------------------------------------------------------------------
+static int do_build_db() {
+  const int num_threads = std::max(1, FLAGS_threads);
+  std::cout << "=== BUILD MODE (db) ===" << std::endl;
+  std::cout << "Index  : " << FLAGS_index << std::endl;
+  std::cout << "Corpus : " << FLAGS_corpus << std::endl;
+  std::cout << "Field  : " << FLAGS_field << std::endl;
+  std::cout << "Threads: " << num_threads << std::endl;
+
+  // Remove existing collection directory
+  if (zvec::FileHelper::DirectoryExists(FLAGS_index)) {
+    std::cout << "Removing existing collection directory: " << FLAGS_index
+              << std::endl;
+    zvec::FileHelper::RemoveDirectory(FLAGS_index);
+  }
+
+  // Build schema: pk (implicit) + FTS field + dummy vector field (required
+  // by segment layer).
+  // Build FtsIndexParams from FLAGS_extra_params so that the tokenizer
+  // pipeline configuration (e.g. enable_simple_closet) matches raw mode.
+  auto db_fts_params = build_fts_index_params(FLAGS_extra_params);
+
+  CollectionSchema schema("fts_bench");
+  schema.add_field(std::make_shared<FieldSchema>(FLAGS_field, DataType::STRING,
+                                                 false, db_fts_params));
+  // Segment layer requires at least one vector field.  Do NOT set
+  // index_params: fts_bench links with PACKED mode which strips core-layer
+  // metric static registrations, so creating a vector index would fail with
+  // "Failed to create metric".  An unindexed vector field is sufficient.
+  schema.add_field(std::make_shared<FieldSchema>(
+      "__dummy_vec", DataType::VECTOR_FP32, 4, /*nullable=*/true));
+
+  CollectionOptions options;
+  options.read_only_ = false;
+
+  auto create_result = Collection::CreateAndOpen(FLAGS_index, schema, options);
+  if (!create_result.has_value()) {
+    fprintf(stderr, "ERROR: Failed to create collection at [%s]: %s\n",
+            FLAGS_index.c_str(), create_result.error().message().c_str());
+    return -1;
+  }
+  auto collection = create_result.value();
+
+  // Pre-load corpus entries
+  std::vector<CorpusEntry> corpus_entries;
+  uint64_t parse_failed_count = 0;
+  {
+    std::ifstream corpus_file(FLAGS_corpus);
+    if (!corpus_file.is_open()) {
+      fprintf(stderr, "ERROR: Failed to open corpus file: %s\n",
+              FLAGS_corpus.c_str());
+      return -1;
+    }
+    uint32_t doc_id = 0;
+    std::string line;
+    while (std::getline(corpus_file, line)) {
+      if (line.empty()) continue;
+      std::unordered_map<std::string, std::string> fields;
+      if (!parse_jsonl_line(line, &fields)) {
+        ++parse_failed_count;
+        continue;
+      }
+      const std::string &corpus_id = fields["_id"];
+      if (corpus_id.empty()) {
+        ++parse_failed_count;
+        continue;
+      }
+      std::string content;
+      if (!fields["title"].empty()) {
+        content = fields["title"] + " " + fields["text"];
+      } else {
+        content = fields["text"];
+      }
+      corpus_entries.push_back(
+          {doc_id, std::move(corpus_id), std::move(content)});
+      ++doc_id;
+    }
+  }
+  std::cout << "Loaded " << corpus_entries.size() << " corpus entries."
+            << std::endl;
+  if (parse_failed_count > 0) {
+    std::cout << "  Warning: " << parse_failed_count
+              << " entries failed to parse." << std::endl;
+  }
+
+  // Insert in batches via Collection::Insert
+  const size_t batch_size = 1000;
+  uint64_t total_indexed = 0;
+  uint64_t total_failed = 0;
+
+  std::cout << "Inserting documents via Collection API..." << std::endl;
+  zvec::ailego::ElapsedTime timer;
+
+  for (size_t offset = 0; offset < corpus_entries.size();
+       offset += batch_size) {
+    size_t end = std::min(offset + batch_size, corpus_entries.size());
+    std::vector<Doc> docs;
+    docs.reserve(end - offset);
+    for (size_t i = offset; i < end; ++i) {
+      const CorpusEntry &entry = corpus_entries[i];
+      Doc doc;
+      doc.set_pk(entry.corpus_id);
+      doc.set<std::string>(FLAGS_field, entry.content);
+      // dummy vector (nullable field still needs a value for WAL/forward)
+      doc.set<std::vector<float>>("__dummy_vec", {0.0f, 0.0f, 0.0f, 0.0f});
+      docs.push_back(std::move(doc));
+    }
+    auto insert_result = collection->Insert(docs);
+    if (!insert_result.has_value()) {
+      fprintf(stderr, "WARN: Batch insert failed at offset[%zu]: %s\n", offset,
+              insert_result.error().message().c_str());
+      total_failed += (end - offset);
+    } else {
+      total_indexed += (end - offset);
+    }
+    if (total_indexed % 10000 < batch_size) {
+      std::cout << "\r  Inserted " << total_indexed << " / "
+                << corpus_entries.size() << " docs..." << std::flush;
+    }
+  }
+
+  uint64_t build_ms = timer.milli_seconds();
+
+  // Flush collection
+  auto flush_status = collection->Flush();
+  if (!flush_status.ok()) {
+    fprintf(stderr, "WARN: Collection flush failed: %s\n",
+            flush_status.message().c_str());
+  }
+
+  // Optimize triggers segment dump which converts Roaring postings to
+  // BitPacked format (with inline tf/doc_len payloads).  Without this step
+  // the immutable reader path falls back to tf=1/doc_len=1 because the
+  // side CFs (_tf/_doc_len/_max_tf) are not opened for read-only segments.
+  auto optimize_status = collection->Optimize();
+  if (!optimize_status.ok()) {
+    fprintf(stderr, "WARN: Collection optimize failed: %s\n",
+            optimize_status.message().c_str());
+  }
+
+  std::cout << "\r  Inserted " << total_indexed << " docs total." << std::endl;
+  if (total_failed > 0) {
+    std::cout << "  Warning: " << total_failed << " docs failed to insert."
+              << std::endl;
+  }
+  std::cout << "=== BUILD COMPLETE (db) ===" << std::endl;
+  std::cout << "  Total docs  : " << total_indexed << std::endl;
+  std::cout << "  Build time  : " << build_ms << " ms" << std::endl;
+  std::cout << "  Throughput  : "
+            << (total_indexed > 0
+                    ? total_indexed * 1000ULL / (build_ms > 0 ? build_ms : 1)
+                    : 0)
+            << " docs/s" << std::endl;
+
+  return 0;
+}
+
+// ---------------------------------------------------------------------------
+// SEARCH MODE
+// ---------------------------------------------------------------------------
+
+// Parse qrels TSV file: returns map of query_id -> set<corpus_id>
+static std::unordered_map<std::string, std::unordered_set<std::string>>
+load_qrels(const std::string &qrels_dir) {
+  std::unordered_map<std::string, std::unordered_set<std::string>> qrels;
+
+  // Try test.tsv first, then train.tsv
+  std::vector<std::string> candidates = {qrels_dir + "/test.tsv",
+                                         qrels_dir + "/train.tsv"};
+  std::string qrels_file;
+  for (const auto &f : candidates) {
+    if (FileHelper::FileExists(f)) {
+      qrels_file = f;
+      break;
+    }
+  }
+
+  if (qrels_file.empty()) {
+    fprintf(stderr, "ERROR: No qrels file found in directory: %s\n",
+            qrels_dir.c_str());
+    return qrels;
+  }
+
+  std::cout << "Loading qrels from: " << qrels_file << std::endl;
+
+  std::ifstream f(qrels_file);
+  if (!f.is_open()) {
+    fprintf(stderr, "ERROR: Failed to open qrels file: %s\n",
+            qrels_file.c_str());
+    return qrels;
+  }
+
+  std::string line;
+  bool first_line = true;
+  while (std::getline(f, line)) {
+    if (first_line) {
+      first_line = false;
+      continue;  // skip header
+    }
+    if (line.empty()) continue;
+
+    std::istringstream ss(line);
+    std::string query_id, corpus_id, score_str;
+    if (!std::getline(ss, query_id, '\t') ||
+        !std::getline(ss, corpus_id, '\t') ||
+        !std::getline(ss, score_str, '\t')) {
+      continue;
+    }
+    // Only include relevant docs (score > 0)
+    int score = std::stoi(score_str);
+    if (score > 0) {
+      qrels[query_id].insert(corpus_id);
+    }
+  }
+
+  std::cout << "Loaded qrels for " << qrels.size() << " queries." << std::endl;
+  return qrels;
+}
+
+// ---------------------------------------------------------------------------
+// Unified single-/multi-threaded search:
+//   * Always pre-loads queries into memory and dispatches them to
+//     FLAGS_threads workers via an atomic index counter.
+//   * FtsColumnIndexer::search() and the shared TokenizerPipeline are both
+//     read-only / fork-safe, so a single shared reader and pipeline are
+//     reused across workers.
+//   * When FLAGS_threads == 1 the path collapses to a single worker,
+//     behaving equivalently to a sequential single-threaded search.
+// ---------------------------------------------------------------------------
+
+struct QueryEntry {
+  std::string query_id;
+  std::string match_text;
+};
+
+struct RecallCounter {
+  double sum{0.0};
+  uint64_t total{0};
+  void add(double recall_value) {
+    sum += recall_value;
+    total++;
+  }
+  double ratio() const {
+    return total > 0 ? sum / static_cast<double>(total) : 0.0;
+  }
+};
+
+
+static int do_search() {
+  if (!validate_default_operator(FLAGS_default_operator)) {
+    fprintf(stderr,
+            "ERROR: Invalid -default_operator[%s]. Must be 'or' or 'and'.\n",
+            FLAGS_default_operator.c_str());
+    return -1;
+  }
+
+  const int num_threads = std::max(1, FLAGS_threads);
+
+  const std::string fts_index_path = FLAGS_index;
+
+  std::cout << "=== SEARCH MODE ===" << std::endl;
+  std::cout << "Index            : " << fts_index_path << std::endl;
+  std::cout << "Query            : " << FLAGS_query << std::endl;
+  std::cout << "Qrels            : " << FLAGS_qrels << std::endl;
+  std::cout << "TopK             : " << FLAGS_topk << std::endl;
+  std::cout << "Field            : " << FLAGS_field << std::endl;
+  std::cout << "Threads          : " << num_threads << std::endl;
+  std::cout << "Default operator : " << FLAGS_default_operator << std::endl;
+
+  // Open FTS RocksDB (existing) — shared across threads (RocksDB reads are
+  // thread-safe at the CF level).  Open without $TF/$MAX_TF/$DOC_LEN since
+  // those CFs were dropped at build time after convert_postings_to_bitpacked().
+  RocksdbContext store;
+  if (!open_fts_store(&store, FLAGS_field, /*existing=*/true,
+                      /*index_path=*/fts_index_path,
+                      /*with_side_cfs=*/false,
+                      /*with_forward_cf=*/true)) {
+    return -1;
+  }
+
+  rocksdb::ColumnFamilyHandle *postings_cf = store.get_cf(FLAGS_field);
+  rocksdb::ColumnFamilyHandle *positions_cf =
+      store.get_cf(FLAGS_field + zvec::kFtsPositionsSuffix);
+  rocksdb::ColumnFamilyHandle *stat_cf = store.get_cf(zvec::kFtsStatCfName);
+  rocksdb::ColumnFamilyHandle *forward_cf = store.get_cf(kForwardCfName);
+
+  if (!postings_cf || !positions_cf || !stat_cf || !forward_cf) {
+    fprintf(stderr, "ERROR: Failed to get column families\n");
+    return -1;
+  }
+
+  // Load qrels
+  auto qrels = load_qrels(FLAGS_qrels);
+
+  // Pre-load all queries into memory so threads can access them without I/O
+  // contention
+  std::vector<QueryEntry> queries;
+  {
+    std::ifstream query_file(FLAGS_query);
+    if (!query_file.is_open()) {
+      fprintf(stderr, "ERROR: Failed to open query file: %s\n",
+              FLAGS_query.c_str());
+      return -1;
+    }
+    std::string line;
+    while (std::getline(query_file, line)) {
+      if (line.empty()) continue;
+      std::unordered_map<std::string, std::string> fields;
+      if (!parse_jsonl_line(line, &fields)) continue;
+      const std::string &query_id = fields["_id"];
+      const std::string &query_text = fields["text"];
+      if (query_id.empty() || query_text.empty()) continue;
+      queries.push_back({query_id, query_text});
+    }
+  }
+  std::cout << "Loaded " << queries.size() << " queries." << std::endl;
+
+  // Limit the number of queries if configured: first keep only queries that
+  // have qrels entries (relevant results), then truncate to max_queries.
+  if (FLAGS_max_queries > 0) {
+    std::vector<QueryEntry> filtered;
+    for (auto &q : queries) {
+      if (qrels.count(q.query_id) > 0) {
+        filtered.push_back(std::move(q));
+      }
+    }
+    queries = std::move(filtered);
+    if (static_cast<size_t>(FLAGS_max_queries) < queries.size()) {
+      queries.resize(FLAGS_max_queries);
+    }
+    std::cout << "Limited to " << queries.size()
+              << " queries with qrels (--max_queries)." << std::endl;
+  }
+
+  // Shared atomic index for work-stealing across threads
+  std::atomic<size_t> next_query_index{0};
+
+  // Per-thread result accumulators, merged after all threads finish
+  struct ThreadResult {
+    LatencyStats latency_stats;
+    RecallCounter recall1;
+    RecallCounter recall5;
+    RecallCounter recall10;
+    RecallCounter recallK;
+    uint64_t no_result_count{0};
+    uint64_t query_count{0};
+  };
+  std::vector<ThreadResult> thread_results(num_threads);
+
+  auto query_fts_params = build_fts_index_params(FLAGS_extra_params);
+  auto pipeline_result = query_fts_params->create_pipeline();
+  if (!pipeline_result.has_value()) {
+    fprintf(stderr,
+            "ERROR: Failed to create tokenizer pipeline for "
+            "extra_params[%s]: %s\n",
+            FLAGS_extra_params.c_str(),
+            pipeline_result.error().message().c_str());
+    return -1;
+  }
+  auto &query_pipeline = pipeline_result.value();
+
+  std::cout << "Running queries with " << num_threads << " thread(s)..."
+            << std::endl;
+
+  // Create a single shared FtsColumnIndexer in read-only mode. search() is a
+  // const method that only performs read-only RocksDB lookups, so it is safe
+  // to share across threads.
+  FtsColumnIndexer reader;
+  {
+    // $TF/$MAX_TF/$DOC_LEN are dropped at build time; pass nullptr — the
+    // BitPacked path doesn't need them and the Roaring fallback degrades
+    // to default tf=1/doc_len=1 when these pointers are null.
+    auto open_result =
+        reader.open_reader(FLAGS_field, &store, postings_cf, positions_cf,
+                           /*term_freq_cf=*/nullptr,
+                           /*max_tf_cf=*/nullptr,
+                           /*doc_len_cf=*/nullptr, stat_cf);
+    if (!open_result.has_value()) {
+      fprintf(stderr, "ERROR: Failed to open FtsColumnIndexer, status[%s]\n",
+              open_result.error().message().c_str());
+      return -1;
+    }
+  }
+
+  auto worker = [&](int thread_id) {
+    ThreadResult &result = thread_results[thread_id];
+
+    while (true) {
+      size_t query_idx =
+          next_query_index.fetch_add(1, std::memory_order_relaxed);
+      if (query_idx >= queries.size()) break;
+
+      const QueryEntry &entry = queries[query_idx];
+
+      std::vector<FtsResult> results;
+      bool search_ok = true;
+      uint64_t elapsed_us = 0;
+      {
+        zvec::ailego::ElapsedTime timer;
+        // Tokenize query text (match_string style: tokenize then build AST
+        // combining tokens with the configured default operator).
+        auto tokens = query_pipeline->process(entry.match_text);
+        auto ast_root =
+            build_query_ast_from_tokens(tokens, FLAGS_default_operator);
+        if (ast_root) {
+          fts::FtsQueryParams query_params;
+          query_params.topk = static_cast<uint32_t>(FLAGS_topk);
+          auto search_result = reader.search(*ast_root, query_params);
+          if (!search_result.has_value()) {
+            fprintf(stderr,
+                    "WARN: Thread[%d] search failed for query_id[%s], "
+                    "status[%s]\n",
+                    thread_id, entry.query_id.c_str(),
+                    search_result.error().message().c_str());
+            search_ok = false;
+          } else {
+            results = std::move(search_result.value());
+          }
+        }
+        elapsed_us = timer.micro_seconds();
+      }
+
+      if (!search_ok) {
+        continue;
+      }
+
+      result.latency_stats.add(elapsed_us);
+      ++result.query_count;
+
+      if (results.empty()) {
+        ++result.no_result_count;
+      }
+
+      // Resolve doc_id -> corpus_id (a.k.a. pk) via the forward CF.
+      std::vector<std::string> retrieved_corpus_ids;
+      retrieved_corpus_ids.reserve(results.size());
+      for (const auto &r : results) {
+        std::string corpus_id;
+        std::string doc_id_key;
+        fts::encode_uint32_big_endian(r.doc_id, &doc_id_key);
+        if (!store.db_
+                 ->Get(store.read_opts_, forward_cf, doc_id_key, &corpus_id)
+                 .ok()) {
+          corpus_id = "";
+        }
+        retrieved_corpus_ids.push_back(corpus_id);
+      }
+
+      // Compute recall at various cutoffs
+      const auto qrels_it = qrels.find(entry.query_id);
+      if (qrels_it == qrels.end()) continue;
+
+      const auto &relevant = qrels_it->second;
+
+      // Standard IR Recall@K = |retrieved_topK ∩ relevant| / |relevant|
+      auto compute_recall = [&](int cutoff) -> double {
+        if (relevant.empty()) return 0.0;
+        int limit =
+            std::min(cutoff, static_cast<int>(retrieved_corpus_ids.size()));
+        int hits = 0;
+        for (int i = 0; i < limit; ++i) {
+          if (relevant.count(retrieved_corpus_ids[i]) > 0) {
+            hits++;
+          }
+        }
+        return static_cast<double>(hits) / static_cast<double>(relevant.size());
+      };
+
+      result.recall1.add(compute_recall(1));
+      result.recall5.add(compute_recall(5));
+      result.recall10.add(compute_recall(10));
+      result.recallK.add(compute_recall(FLAGS_topk));
+    }
+  };
+
+  // Launch threads and measure total wall-clock time
+  auto wall_start = std::chrono::steady_clock::now();
+
+  std::vector<std::thread> threads;
+  threads.reserve(num_threads);
+  for (int thread_id = 0; thread_id < num_threads; ++thread_id) {
+    threads.emplace_back(worker, thread_id);
+  }
+  for (auto &thread : threads) {
+    thread.join();
+  }
+
+  auto wall_end = std::chrono::steady_clock::now();
+  uint64_t wall_ms = static_cast<uint64_t>(
+      std::chrono::duration_cast<std::chrono::milliseconds>(wall_end -
+                                                            wall_start)
+          .count());
+
+  // Merge per-thread results
+  LatencyStats merged_latency;
+  RecallCounter merged_recall1, merged_recall5, merged_recall10, merged_recallK;
+  uint64_t total_query_count = 0;
+  uint64_t total_no_result_count = 0;
+
+  for (const auto &result : thread_results) {
+    for (uint64_t sample : result.latency_stats.samples) {
+      merged_latency.add(sample);
+    }
+    merged_recall1.sum += result.recall1.sum;
+    merged_recall1.total += result.recall1.total;
+    merged_recall5.sum += result.recall5.sum;
+    merged_recall5.total += result.recall5.total;
+    merged_recall10.sum += result.recall10.sum;
+    merged_recall10.total += result.recall10.total;
+    merged_recallK.sum += result.recallK.sum;
+    merged_recallK.total += result.recallK.total;
+    total_query_count += result.query_count;
+    total_no_result_count += result.no_result_count;
+  }
+
+  // Output results
+  std::cout << std::endl;
+  std::cout << "=== SEARCH RESULTS ===" << std::endl;
+  std::cout << "Threads          : " << num_threads << std::endl;
+  std::cout << "Total queries    : " << total_query_count << std::endl;
+  std::cout << "No-result queries: " << total_no_result_count << std::endl;
+  std::cout << "Wall-clock time  : " << wall_ms << " ms" << std::endl;
+  if (wall_ms > 0) {
+    std::cout << "Throughput       : " << total_query_count * 1000ULL / wall_ms
+              << " queries/s" << std::endl;
+  }
+  std::cout << std::endl;
+
+  merged_latency.print("Search (per-query)");
+  std::cout << std::endl;
+
+  if (merged_recall1.total > 0) {
+    std::cout << "=== RECALL ===" << std::endl;
+    std::cout << "  Recall@1  : " << merged_recall1.ratio() << " (evaluated on "
+              << merged_recall1.total << " queries)" << std::endl;
+    std::cout << "  Recall@5  : " << merged_recall5.ratio() << " (evaluated on "
+              << merged_recall5.total << " queries)" << std::endl;
+    std::cout << "  Recall@10 : " << merged_recall10.ratio()
+              << " (evaluated on " << merged_recall10.total << " queries)"
+              << std::endl;
+    if (FLAGS_topk > 10) {
+      std::cout << "  Recall@" << FLAGS_topk << " : " << merged_recallK.ratio()
+                << " (evaluated on " << merged_recallK.total << " queries)"
+                << std::endl;
+    }
+  } else {
+    std::cout << "No qrels matched for evaluated queries." << std::endl;
+  }
+
+  store.close();
+  return 0;
+}
+
+// ---------------------------------------------------------------------------
+// SEARCH MODE (db): use zvec Collection::Query(FtsQuery)
+// ---------------------------------------------------------------------------
+static int do_search_db() {
+  const int num_threads = std::max(1, FLAGS_threads);
+
+  std::cout << "=== SEARCH MODE (db) ===" << std::endl;
+  std::cout << "Index            : " << FLAGS_index << std::endl;
+  std::cout << "Query            : " << FLAGS_query << std::endl;
+  std::cout << "Qrels            : " << FLAGS_qrels << std::endl;
+  std::cout << "TopK             : " << FLAGS_topk << std::endl;
+  std::cout << "Field            : " << FLAGS_field << std::endl;
+  std::cout << "Threads          : " << num_threads << std::endl;
+
+  // Open existing collection in read-only mode
+  CollectionOptions options;
+  options.read_only_ = true;
+
+  auto open_result = Collection::Open(FLAGS_index, options);
+  if (!open_result.has_value()) {
+    fprintf(stderr, "ERROR: Failed to open collection at [%s]: %s\n",
+            FLAGS_index.c_str(), open_result.error().message().c_str());
+    return -1;
+  }
+  auto collection = open_result.value();
+
+  // Load qrels
+  auto qrels = load_qrels(FLAGS_qrels);
+
+  // Pre-load queries
+  std::vector<QueryEntry> queries;
+  {
+    std::ifstream query_file(FLAGS_query);
+    if (!query_file.is_open()) {
+      fprintf(stderr, "ERROR: Failed to open query file: %s\n",
+              FLAGS_query.c_str());
+      return -1;
+    }
+    std::string line;
+    while (std::getline(query_file, line)) {
+      if (line.empty()) continue;
+      std::unordered_map<std::string, std::string> fields;
+      if (!parse_jsonl_line(line, &fields)) continue;
+      const std::string &query_id = fields["_id"];
+      const std::string &query_text = fields["text"];
+      if (query_id.empty() || query_text.empty()) continue;
+      queries.push_back({query_id, query_text});
+    }
+  }
+  std::cout << "Loaded " << queries.size() << " queries." << std::endl;
+
+  // Limit the number of queries if configured: first keep only queries that
+  // have qrels entries (relevant results), then truncate to max_queries.
+  if (FLAGS_max_queries > 0) {
+    std::vector<QueryEntry> filtered;
+    for (auto &q : queries) {
+      if (qrels.count(q.query_id) > 0) {
+        filtered.push_back(std::move(q));
+      }
+    }
+    queries = std::move(filtered);
+    if (static_cast<size_t>(FLAGS_max_queries) < queries.size()) {
+      queries.resize(FLAGS_max_queries);
+    }
+    std::cout << "Limited to " << queries.size()
+              << " queries with qrels (--max_queries)." << std::endl;
+  }
+
+  // Per-thread result accumulators
+  std::atomic<size_t> next_query_index{0};
+  std::atomic<bool> fatal_error{false};
+
+  struct ThreadResult {
+    LatencyStats latency_stats;
+    RecallCounter recall1;
+    RecallCounter recall5;
+    RecallCounter recall10;
+    RecallCounter recallK;
+    uint64_t no_result_count{0};
+    uint64_t query_count{0};
+  };
+  std::vector<ThreadResult> thread_results(num_threads);
+
+  std::cout << "Running queries via Collection API with " << num_threads
+            << " thread(s)..." << std::endl;
+
+  auto worker = [&](int thread_id) {
+    ThreadResult &result = thread_results[thread_id];
+
+    while (true) {
+      if (fatal_error.load(std::memory_order_relaxed)) break;
+      size_t query_idx =
+          next_query_index.fetch_add(1, std::memory_order_relaxed);
+      if (query_idx >= queries.size()) break;
+
+      const QueryEntry &entry = queries[query_idx];
+
+      VectorQuery vq;
+      vq.field_name_ = FLAGS_field;
+      vq.topk_ = FLAGS_topk;
+      FtsQuery fts_query;
+      fts_query.match_string_ = entry.match_text;
+      vq.fts_query_ = fts_query;
+
+      uint64_t elapsed_us = 0;
+      std::vector<std::string> retrieved_corpus_ids;
+      {
+        zvec::ailego::ElapsedTime query_timer;
+        auto query_result = collection->Query(vq);
+        elapsed_us = query_timer.micro_seconds();
+
+        if (query_result.has_value()) {
+          const auto &doc_list = query_result.value();
+          retrieved_corpus_ids.reserve(doc_list.size());
+          for (const auto &doc_ptr : doc_list) {
+            retrieved_corpus_ids.push_back(doc_ptr->pk());
+          }
+        } else {
+          fprintf(stderr,
+                  "ERROR: Thread[%d] FtsQuery failed for query_id[%s]: %s\n",
+                  thread_id, entry.query_id.c_str(),
+                  query_result.error().message().c_str());
+          fatal_error.store(true, std::memory_order_relaxed);
+          break;
+        }
+      }
+
+      result.latency_stats.add(elapsed_us);
+      ++result.query_count;
+
+      if (retrieved_corpus_ids.empty()) {
+        ++result.no_result_count;
+      }
+
+      // Compute recall
+      const auto qrels_it = qrels.find(entry.query_id);
+      if (qrels_it == qrels.end()) continue;
+      const auto &relevant = qrels_it->second;
+
+      auto compute_recall = [&](int cutoff) -> double {
+        if (relevant.empty()) return 0.0;
+        int limit =
+            std::min(cutoff, static_cast<int>(retrieved_corpus_ids.size()));
+        int hits = 0;
+        for (int i = 0; i < limit; ++i) {
+          if (relevant.count(retrieved_corpus_ids[i]) > 0) {
+            hits++;
+          }
+        }
+        return static_cast<double>(hits) / static_cast<double>(relevant.size());
+      };
+
+      result.recall1.add(compute_recall(1));
+      result.recall5.add(compute_recall(5));
+      result.recall10.add(compute_recall(10));
+      result.recallK.add(compute_recall(FLAGS_topk));
+    }
+  };
+
+  auto wall_start = std::chrono::steady_clock::now();
+  std::vector<std::thread> threads;
+  threads.reserve(num_threads);
+  for (int thread_id = 0; thread_id < num_threads; ++thread_id) {
+    threads.emplace_back(worker, thread_id);
+  }
+  for (auto &thread : threads) {
+    thread.join();
+  }
+
+  if (fatal_error.load()) {
+    fprintf(stderr, "ERROR: Aborting: FtsQuery failed during search\n");
+    return -1;
+  }
+
+  auto wall_end = std::chrono::steady_clock::now();
+  uint64_t wall_ms = static_cast<uint64_t>(
+      std::chrono::duration_cast<std::chrono::milliseconds>(wall_end -
+                                                            wall_start)
+          .count());
+
+  // Merge per-thread results
+  LatencyStats merged_latency;
+  RecallCounter merged_recall1, merged_recall5, merged_recall10, merged_recallK;
+  uint64_t total_query_count = 0;
+  uint64_t total_no_result_count = 0;
+
+  for (const auto &result : thread_results) {
+    for (uint64_t sample : result.latency_stats.samples) {
+      merged_latency.add(sample);
+    }
+    merged_recall1.sum += result.recall1.sum;
+    merged_recall1.total += result.recall1.total;
+    merged_recall5.sum += result.recall5.sum;
+    merged_recall5.total += result.recall5.total;
+    merged_recall10.sum += result.recall10.sum;
+    merged_recall10.total += result.recall10.total;
+    merged_recallK.sum += result.recallK.sum;
+    merged_recallK.total += result.recallK.total;
+    total_query_count += result.query_count;
+    total_no_result_count += result.no_result_count;
+  }
+
+  std::cout << std::endl;
+  std::cout << "=== SEARCH RESULTS (db) ===" << std::endl;
+  std::cout << "Threads          : " << num_threads << std::endl;
+  std::cout << "Total queries    : " << total_query_count << std::endl;
+  std::cout << "No-result queries: " << total_no_result_count << std::endl;
+  std::cout << "Wall-clock time  : " << wall_ms << " ms" << std::endl;
+  if (wall_ms > 0) {
+    std::cout << "Throughput       : " << total_query_count * 1000ULL / wall_ms
+              << " queries/s" << std::endl;
+  }
+  std::cout << std::endl;
+
+  merged_latency.print("Search (per-query)");
+  std::cout << std::endl;
+
+  if (merged_recall1.total > 0) {
+    std::cout << "=== RECALL ===" << std::endl;
+    std::cout << "  Recall@1  : " << merged_recall1.ratio() << " (evaluated on "
+              << merged_recall1.total << " queries)" << std::endl;
+    std::cout << "  Recall@5  : " << merged_recall5.ratio() << " (evaluated on "
+              << merged_recall5.total << " queries)" << std::endl;
+    std::cout << "  Recall@10 : " << merged_recall10.ratio()
+              << " (evaluated on " << merged_recall10.total << " queries)"
+              << std::endl;
+    if (FLAGS_topk > 10) {
+      std::cout << "  Recall@" << FLAGS_topk << " : " << merged_recallK.ratio()
+                << " (evaluated on " << merged_recallK.total << " queries)"
+                << std::endl;
+    }
+  } else {
+    std::cout << "No qrels matched for evaluated queries." << std::endl;
+  }
+
+  return 0;
+}
+
+// ---------------------------------------------------------------------------
+// STATS MODE
+// ---------------------------------------------------------------------------
+static int do_stats() {
+  std::cout << "=== STATS MODE ===" << std::endl;
+  std::cout << "Index : " << FLAGS_index << std::endl;
+  std::cout << "Field : " << FLAGS_field << std::endl;
+
+  // Open RocksDB (existing).  $TF/$MAX_TF/$DOC_LEN are dropped at build
+  // time, so open without them.  Sections that scan these CFs are now
+  // gated on the corresponding pointers being non-null (always null here
+  // post-drop) and simply skipped with an explanatory message.
+  RocksdbContext store;
+  if (!open_fts_store(&store, FLAGS_field, /*existing=*/true,
+                      /*index_path=*/"", /*with_side_cfs=*/false)) {
+    return -1;
+  }
+
+  rocksdb::ColumnFamilyHandle *postings_cf = store.get_cf(FLAGS_field);
+  rocksdb::ColumnFamilyHandle *stat_cf = store.get_cf(zvec::kFtsStatCfName);
+  // $MAX_TF/$DOC_LEN are not opened above; keep nullptrs so the
+  // doc-length / max-tf scan sections degrade gracefully.
+  rocksdb::ColumnFamilyHandle *max_tf_cf = nullptr;
+  rocksdb::ColumnFamilyHandle *doc_len_cf = nullptr;
+
+  if (!postings_cf || !stat_cf) {
+    fprintf(stderr, "ERROR: Failed to get required column families\n");
+    return -1;
+  }
+
+  // ---------------------------------------------------------------
+  // 1. Segment-level statistics (total_docs, total_tokens)
+  // ---------------------------------------------------------------
+  uint64_t total_docs = 0;
+  uint64_t total_tokens = 0;
+  {
+    const std::string total_docs_key = FLAGS_field + "_total_docs";
+    const std::string total_tokens_key = FLAGS_field + "_total_tokens";
+    std::string value;
+    if (store.db_->Get(store.read_opts_, stat_cf, total_docs_key, &value)
+            .ok() &&
+        value.size() >= sizeof(uint64_t)) {
+      std::memcpy(&total_docs, value.data(), sizeof(uint64_t));
+    }
+    value.clear();
+    if (store.db_->Get(store.read_opts_, stat_cf, total_tokens_key, &value)
+            .ok() &&
+        value.size() >= sizeof(uint64_t)) {
+      std::memcpy(&total_tokens, value.data(), sizeof(uint64_t));
+    }
+  }
+
+  double avg_doc_len = total_docs > 0 ? static_cast<double>(total_tokens) /
+                                            static_cast<double>(total_docs)
+                                      : 0.0;
+
+  std::cout << std::endl;
+  std::cout << "--- Segment Statistics ---" << std::endl;
+  std::cout << "  Total documents : " << total_docs << std::endl;
+  std::cout << "  Total tokens    : " << total_tokens << std::endl;
+  std::cout << "  Avg doc length  : " << avg_doc_len << std::endl;
+
+  // ---------------------------------------------------------------
+  // 2. Vocabulary & posting list statistics
+  // ---------------------------------------------------------------
+  std::cout << std::endl;
+  std::cout << "--- Vocabulary & Posting List Statistics ---" << std::endl;
+  std::cout << "  Scanning postings CF..." << std::flush;
+
+  uint64_t vocab_size = 0;
+  uint64_t total_postings_entries = 0;  // sum of all posting list lengths
+  uint64_t total_postings_bytes = 0;    // sum of serialized bitmap sizes
+  uint64_t max_posting_len = 0;
+  std::string max_posting_term;
+
+  // Posting list length distribution buckets
+  // [1], [2-10], [11-100], [101-1K], [1K-10K], [10K-100K], [100K+]
+  uint64_t bucket_1 = 0;
+  uint64_t bucket_2_10 = 0;
+  uint64_t bucket_11_100 = 0;
+  uint64_t bucket_101_1k = 0;
+  uint64_t bucket_1k_10k = 0;
+  uint64_t bucket_10k_100k = 0;
+  uint64_t bucket_100k_plus = 0;
+
+  // Format counters
+  uint64_t roaring_count = 0;
+  uint64_t bitpacked_count = 0;
+
+  {
+    auto iter = std::unique_ptr<rocksdb::Iterator>(
+        store.db_->NewIterator(store.read_opts_, postings_cf));
+    while (iter->Valid()) {
+      const std::string term = iter->key().ToString();
+      const std::string posting_data = iter->value().ToString();
+
+      ++vocab_size;
+      total_postings_bytes += posting_data.size();
+
+      uint64_t cardinality = 0;
+
+      if (BitPackedPostingList::is_bitpacked_format(posting_data.data(),
+                                                    posting_data.size())) {
+        // BitPacked format: read num_docs from FileHeader
+        ++bitpacked_count;
+        fts::BitPackedPostingIterator bp_iter;
+        if (bp_iter.open(posting_data.data(), posting_data.size()) == 0) {
+          cardinality = bp_iter.cost();
+        }
+      } else {
+        // Roaring Bitmap format
+        ++roaring_count;
+        roaring_bitmap_t *bitmap = roaring_bitmap_portable_deserialize_safe(
+            posting_data.data(), posting_data.size());
+        if (bitmap) {
+          cardinality = roaring_bitmap_get_cardinality(bitmap);
+          roaring_bitmap_free(bitmap);
+        }
+      }
+
+      total_postings_entries += cardinality;
+
+      if (cardinality > max_posting_len) {
+        max_posting_len = cardinality;
+        max_posting_term = term;
+      }
+
+      // Bucket distribution
+      if (cardinality <= 1) {
+        ++bucket_1;
+      } else if (cardinality <= 10) {
+        ++bucket_2_10;
+      } else if (cardinality <= 100) {
+        ++bucket_11_100;
+      } else if (cardinality <= 1000) {
+        ++bucket_101_1k;
+      } else if (cardinality <= 10000) {
+        ++bucket_1k_10k;
+      } else if (cardinality <= 100000) {
+        ++bucket_10k_100k;
+      } else {
+        ++bucket_100k_plus;
+      }
+
+      if (vocab_size % 10000 == 0) {
+        std::cout << "\r  Scanning postings CF... " << vocab_size << " terms"
+                  << std::flush;
+      }
+
+      iter->Next();
+    }
+    // iter auto-closes via unique_ptr
+  }
+
+  std::cout << "\r  Scanning postings CF... done.          " << std::endl;
+  std::cout << "  Posting format        : " << roaring_count << " Roaring, "
+            << bitpacked_count << " BitPacked" << std::endl;
+  std::cout << "  Vocabulary size       : " << vocab_size << std::endl;
+  std::cout << "  Total postings entries : " << total_postings_entries
+            << std::endl;
+  std::cout << "  Total postings bytes   : " << total_postings_bytes / 1024
+            << " KB" << std::endl;
+  if (vocab_size > 0) {
+    std::cout << "  Avg posting list len  : "
+              << static_cast<double>(total_postings_entries) / vocab_size
+              << std::endl;
+    std::cout << "  Avg posting bytes     : "
+              << static_cast<double>(total_postings_bytes) / vocab_size << " B"
+              << std::endl;
+  }
+  std::cout << "  Max posting list len  : " << max_posting_len;
+  if (!max_posting_term.empty()) {
+    std::cout << " (term: \"" << max_posting_term << "\")";
+  }
+  std::cout << std::endl;
+
+  std::cout << std::endl;
+  std::cout << "  Posting list length distribution:" << std::endl;
+  std::cout << "    [1]          : " << bucket_1 << std::endl;
+  std::cout << "    [2-10]       : " << bucket_2_10 << std::endl;
+  std::cout << "    [11-100]     : " << bucket_11_100 << std::endl;
+  std::cout << "    [101-1K]     : " << bucket_101_1k << std::endl;
+  std::cout << "    [1K-10K]     : " << bucket_1k_10k << std::endl;
+  std::cout << "    [10K-100K]   : " << bucket_10k_100k << std::endl;
+  std::cout << "    [100K+]      : " << bucket_100k_plus << std::endl;
+
+  // ---------------------------------------------------------------
+  // 3. Document length distribution
+  // ---------------------------------------------------------------
+  std::cout << std::endl;
+  std::cout << "--- Document Length Distribution ---" << std::endl;
+
+  uint64_t doc_count = 0;
+  uint64_t sum_doc_len = 0;
+  uint32_t min_doc_len = UINT32_MAX;
+  uint32_t max_doc_len = 0;
+  std::vector<uint32_t> all_doc_lens;
+
+  if (doc_len_cf) {
+    auto iter = std::unique_ptr<rocksdb::Iterator>(
+        store.db_->NewIterator(store.read_opts_, doc_len_cf));
+    while (iter->Valid()) {
+      const std::string value = iter->value().ToString();
+      if (value.size() >= sizeof(uint32_t)) {
+        uint32_t doc_len = 0;
+        std::memcpy(&doc_len, value.data(), sizeof(uint32_t));
+        ++doc_count;
+        sum_doc_len += doc_len;
+        if (doc_len < min_doc_len) min_doc_len = doc_len;
+        if (doc_len > max_doc_len) max_doc_len = doc_len;
+        all_doc_lens.push_back(doc_len);
+      }
+      iter->Next();
+    }
+    // iter auto-closes via unique_ptr
+  } else {
+    std::cout << "  $DOC_LEN CF was dropped at build time after"
+              << " convert_postings_to_bitpacked()." << std::endl
+              << "  Per-doc length info is now inlined in BitPacked"
+              << " postings; skipping distribution scan." << std::endl;
+  }
+
+  if (doc_count > 0) {
+    std::sort(all_doc_lens.begin(), all_doc_lens.end());
+
+    auto percentile = [&](double p) -> uint32_t {
+      size_t idx = static_cast<size_t>(p * all_doc_lens.size());
+      if (idx >= all_doc_lens.size()) idx = all_doc_lens.size() - 1;
+      return all_doc_lens[idx];
+    };
+
+    std::cout << "  Doc count     : " << doc_count << std::endl;
+    std::cout << "  Avg doc length: "
+              << static_cast<double>(sum_doc_len) / doc_count << std::endl;
+    std::cout << "  Min doc length: " << min_doc_len << std::endl;
+    std::cout << "  P25 doc length: " << percentile(0.25) << std::endl;
+    std::cout << "  P50 doc length: " << percentile(0.50) << std::endl;
+    std::cout << "  P75 doc length: " << percentile(0.75) << std::endl;
+    std::cout << "  P95 doc length: " << percentile(0.95) << std::endl;
+    std::cout << "  P99 doc length: " << percentile(0.99) << std::endl;
+    std::cout << "  Max doc length: " << max_doc_len << std::endl;
+  } else {
+    std::cout << "  No documents found in $DOC_LEN CF." << std::endl;
+  }
+
+  // ---------------------------------------------------------------
+  // 4. Max-TF statistics (top terms by max term frequency)
+  // ---------------------------------------------------------------
+  if (max_tf_cf) {
+    std::cout << std::endl;
+    std::cout << "--- Top Terms by Max Term Frequency ---" << std::endl;
+
+    struct TermMaxTf {
+      std::string term;
+      uint32_t max_tf;
+    };
+
+    // Collect all and sort by max_tf descending, show top 20
+    std::vector<TermMaxTf> term_max_tfs;
+    {
+      auto iter = std::unique_ptr<rocksdb::Iterator>(
+          store.db_->NewIterator(store.read_opts_, max_tf_cf));
+      while (iter->Valid()) {
+        const std::string term = iter->key().ToString();
+        const std::string value = iter->value().ToString();
+        uint32_t max_tf = 0;
+        if (value.size() >= sizeof(uint32_t)) {
+          std::memcpy(&max_tf, value.data(), sizeof(uint32_t));
+        }
+        term_max_tfs.push_back({term, max_tf});
+        iter->Next();
+      }
+      // iter auto-closes via unique_ptr
+    }
+
+    std::sort(term_max_tfs.begin(), term_max_tfs.end(),
+              [](const TermMaxTf &a, const TermMaxTf &b) {
+                return a.max_tf > b.max_tf;
+              });
+
+    size_t show_count = std::min<size_t>(20, term_max_tfs.size());
+    for (size_t i = 0; i < show_count; ++i) {
+      std::cout << "  " << (i + 1) << ". \"" << term_max_tfs[i].term
+                << "\" max_tf=" << term_max_tfs[i].max_tf << std::endl;
+    }
+  }
+
+  // ---------------------------------------------------------------
+  // 5. Storage size summary
+  // ---------------------------------------------------------------
+  std::cout << std::endl;
+  std::cout << "--- Storage Size Summary ---" << std::endl;
+  std::cout << "  Postings CF ($POSTINGS) : " << total_postings_bytes / 1024
+            << " KB (serialized bitmap data)" << std::endl;
+  std::cout << "  (Other CF sizes require RocksDB property queries or dump)"
+            << std::endl;
+
+  std::cout << std::endl;
+  std::cout << "=== STATS COMPLETE ===" << std::endl;
+
+  store.close();
+  return 0;
+}
+
+
+int main(int argc, char *argv[]) {
+  gflags::ParseCommandLineFlags(&argc, &argv, false);
+
+
+  if (FLAGS_index.empty()) {
+    std::cerr << "Error: -index is required." << std::endl;
+    std::cerr << "Usage:" << std::endl;
+    std::cerr << "  Build : bin/fts_bench -cmd build -index <path> -corpus "
+                 "<corpus.jsonl>"
+              << std::endl;
+    std::cerr << "  Search : bin/fts_bench -cmd search "
+                 "-index <path> -query <queries.jsonl> -qrels <qrels_dir>"
+              << std::endl;
+    std::cerr << "  Stats : bin/fts_bench -cmd stats -index <path>"
+              << std::endl;
+    return 1;
+  }
+
+  // Determine command: explicit -cmd flag takes priority, otherwise auto-detect
+  std::string cmd = FLAGS_cmd;
+  if (cmd.empty()) {
+    if (!FLAGS_corpus.empty()) {
+      cmd = "build";
+    } else if (!FLAGS_query.empty()) {
+      cmd = "search";
+    } else {
+      std::cerr << "Error: specify -cmd (build/search/stats) or -corpus/-query."
+                << std::endl;
+      return 1;
+    }
+  }
+
+
+  // Validate -mode flag
+  const bool db_mode = (FLAGS_mode == "db");
+  if (FLAGS_mode != "raw" && FLAGS_mode != "db") {
+    std::cerr << "Error: unknown -mode '" << FLAGS_mode
+              << "'. Use 'raw' or 'db'." << std::endl;
+    return 1;
+  }
+
+
+  if (cmd == "build") {
+    if (FLAGS_corpus.empty()) {
+      std::cerr << "Error: -corpus is required in build mode." << std::endl;
+      return 1;
+    }
+    return db_mode ? do_build_db() : do_build();
+  } else if (cmd == "search") {
+    if (FLAGS_query.empty()) {
+      std::cerr << "Error: -query is required in search mode." << std::endl;
+      return 1;
+    }
+    if (FLAGS_qrels.empty()) {
+      std::cerr << "Error: -qrels is required in search mode." << std::endl;
+      return 1;
+    }
+    return db_mode ? do_search_db() : do_search();
+  } else if (cmd == "stats") {
+    if (db_mode) {
+      std::cerr << "Error: stats command is not supported in db mode."
+                << std::endl;
+      return 1;
+    }
+    return do_stats();
+  } else {
+    std::cerr << "Error: unknown command '" << cmd
+              << "'. Use build, search, or stats." << std::endl;
+    return 1;
+  }
+}