royerlab · JoOkuma · Jan 12, 2026 · Nov 5, 2025 · Nov 5, 2025 · Nov 12, 2025
diff --git a/README.md b/README.md
@@ -13,6 +13,7 @@ A common data structure and basic tools for multi-object tracking.
 - Graph-based representation of tracking problems
 - In-memory (RustWorkX) and database-backed (SQL) graph backends
 - Nodes and edges can take arbitrary attributes
+- SQLGraph backend can index frequently queried attributes for faster filtering
 - Standardize API for node operators (e.g. defining objects and their attributes)
 - Standardize API for edge operators (e.g. creating edges between nodes)
 - Basic tracking solvers: nearest neighbors and integer linear programming

diff --git a/docs/concepts.md b/docs/concepts.md
@@ -20,7 +20,15 @@ TracksData supports multiple graph backends for different use cases:
 ### SQLGraph
 - **Use case**: Large datasets that don't fit in memory
 - **Performance**: Good for storage and querying
-- **Features**: Persistent storage, complex queries
+- **Features**: Persistent storage, complex queries, database indexes on frequently filtered attributes
+
+SQLGraph lets you create indexes on node or edge attributes to keep repeated
+filters fast (-150x speedup when selecting 1k nodes out of 10M total nodes.):
+
+```python
+graph.create_node_attr_index(["t", "label"])  # composite index
+graph.create_edge_attr_index("score", unique=True)
+```
 
 ### GraphView
 - **Use case**: Results subgraph either backends

diff --git a/pyproject.toml b/pyproject.toml
@@ -75,6 +75,7 @@ docs = [
   "mike",
   "traccuracy>=0.4.2",
 ]
+bench = ["asv"]
 
 [project.urls]
 Documentation = "https://github.com/jookuma/tracksdata#readme"

diff --git a/src/tracksdata/graph/_sql_graph.py b/src/tracksdata/graph/_sql_graph.py
@@ -1294,6 +1294,166 @@ def edge_attr_keys(self) -> list[str]:
             keys.remove(k)
         return keys
 
+    def _resolve_attr_keys(
+        self,
+        table_class: type[DeclarativeBase],
+        attr_keys: Sequence[str] | str,
+    ) -> tuple[list[sa.Column], str]:
+        """Check that the given attribute keys exist on the table class and
+        generate resolved columns and an index name based on them.
+
+        Parameters
+        ----------
+        table_class : type[DeclarativeBase]
+            The SQLAlchemy table class.
+        attr_keys : Sequence[str] | str
+            The attribute keys to include in the index name.
+
+        Returns
+        -------
+        str
+            The generated index name.
+        """
+        if isinstance(attr_keys, str):
+            attr_keys = [attr_keys]
+
+        if len(attr_keys) == 0:
+            raise ValueError("attr_keys must contain at least one column name")
+
+        missing = [key for key in attr_keys if key not in table_class.__table__.columns]
+        if missing:
+            raise ValueError(f"Columns {missing} do not exist on table {table_class.__tablename__}")
+        resolved_columns = [getattr(table_class, key) for key in attr_keys]
+
+        if isinstance(attr_keys, str):
+            attr_keys = [attr_keys]
+
+        cols_fragment = "_".join(attr_keys)
+        name = f"ix_{table_class.__tablename__.lower()}_{cols_fragment}"
+        return resolved_columns, name
+
+    def _create_attr_index(
+        self,
+        table_class: type[DeclarativeBase],
+        attr_keys: Sequence[str] | str,
+        *,
+        unique: bool = False,
+    ) -> str:
+        resolved_columns, name = self._resolve_attr_keys(table_class, attr_keys)
+
+        index = sa.Index(name, *resolved_columns, unique=unique)
+
+        LOG.info(
+            "Ensuring index '%s' on table %s (columns=%s, unique=%s)",
+            name,
+            table_class.__tablename__,
+            attr_keys,
+            unique,
+        )
+        index.create(bind=self._engine, checkfirst=True)
+        return name
+
+    def _drop_attr_index(
+        self,
+        table_class: type[DeclarativeBase],
+        attr_keys: Sequence[str] | str,
+    ) -> str:
+        resolved_columns, name = self._resolve_attr_keys(table_class, attr_keys)
+        index = sa.Index(name, *resolved_columns)
+
+        LOG.info(
+            "Dropping index '%s' on table %s (columns=%s)",
+            name,
+            table_class.__tablename__,
+            attr_keys,
+        )
+        index.drop(bind=self._engine, checkfirst=True)
+        return name
+
+    def create_node_attr_index(
+        self,
+        attr_keys: Sequence[str] | str,
+        *,
+        unique: bool = False,
+    ) -> str:
+        """
+        Ensure an index exists for the given node attribute columns.
+        If they are already indexed, they are kept as they are.
+
+        Parameters
+        ----------
+        attr_keys : Sequence[str] | str
+            Column names to include in the index. Can be a single column name
+            or a list of multiple columns for a composite index.
+        unique : bool, default False
+            Whether the index should enforce uniqueness.
+
+        Returns
+        -------
+        str
+            The name of the index.
+
+        """
+
+        return self._create_attr_index(self.Node, attr_keys, unique=unique)
+
+    def create_edge_attr_index(
+        self,
+        attr_keys: Sequence[str] | str,
+        *,
+        unique: bool = False,
+    ) -> str:
+        """Ensure an index exists for the given edge attribute columns.
+
+        Parameters
+        ----------
+        attr_keys : Sequence[str] | str
+            Column names to include in the index. Can be a single column name
+            or a list of multiple columns for a composite index.
+        unique : bool, default False
+            Whether the index should enforce uniqueness.
+
+        Returns
+        -------
+        str
+            The name of the index.
+
+        """
+
+        return self._create_attr_index(self.Edge, attr_keys, unique=unique)
+
+    def drop_node_attr_index(self, attr_keys: Sequence[str] | str) -> str:
+        """Drop an index for the given node attribute columns.
+
+        Parameters
+        ----------
+        attr_keys : Sequence[str] | str
+            Column names to include in the index. Can be a single column name
+            or a list of multiple columns for a composite index.
+
+        Returns
+        -------
+        str
+            The dropped index name.
+        """
+        return self._drop_attr_index(self.Node, attr_keys)
+
+    def drop_edge_attr_index(self, attr_keys: Sequence[str] | str) -> str:
+        """Drop an index for the given edge attribute columns.
+
+        Parameters
+        ----------
+        attr_keys : Sequence[str] | str
+            Column names to include in the index. Can be a single column name
+            or a list of multiple columns for a composite index.
+
+        Returns
+        -------
+        str
+            The dropped index name.
+        """
+        return self._drop_attr_index(self.Edge, attr_keys)
+
     def _sqlalchemy_type_inference(self, default_value: Any) -> TypeEngine:
         if np.isscalar(default_value) and hasattr(default_value, "item"):
             default_value = default_value.item()

diff --git a/src/tracksdata/graph/_test/test_graph_backends.py b/src/tracksdata/graph/_test/test_graph_backends.py
@@ -6,6 +6,7 @@
 import polars as pl
 import pytest
 import rustworkx as rx
+import sqlalchemy as sa
 from zarr.storage import MemoryStore
 
 from tracksdata.attrs import EdgeAttr, NodeAttr
@@ -2020,6 +2021,57 @@ def test_custom_indices(graph_backend: BaseGraph) -> None:
         graph_backend.bulk_add_nodes([{"t": 3, "x": 1.0, "y": 1.0}], indices=[1, 2, 3])
 
 
+def test_sqlgraph_node_attr_index_create_and_drop(graph_backend: BaseGraph) -> None:
+    if not isinstance(graph_backend, SQLGraph):
+        pytest.skip("Only SQLGraph supports explicit SQL indexes")
+
+    graph_backend.add_node_attr_key("label", "")
+    index_name = f"ix_{graph_backend.Node.__tablename__.lower()}_t_label"
+
+    graph_backend.create_node_attr_index(["t", "label"], unique=False)
+
+    inspector = sa.inspect(graph_backend._engine)
+    indexes = inspector.get_indexes(graph_backend.Node.__tablename__)
+    assert len(indexes) == 1
+    assert any(idx["name"] == index_name and idx["column_names"] == ["t", "label"] for idx in indexes)
+
+    dropped_name = graph_backend.drop_node_attr_index(["t", "label"])
+    assert dropped_name == index_name
+
+    indexes_after = sa.inspect(graph_backend._engine).get_indexes(graph_backend.Node.__tablename__)
+    assert all(idx["name"] != index_name for idx in indexes_after)
+
+
+def test_sqlgraph_edge_attr_index_create_and_drop(graph_backend: BaseGraph) -> None:
+    if not isinstance(graph_backend, SQLGraph):
+        pytest.skip("Only SQLGraph supports explicit SQL indexes")
+
+    graph_backend.add_edge_attr_key("score", 0.0)
+    index_name = f"ix_{graph_backend.Edge.__tablename__.lower()}_score"
+
+    graph_backend.create_edge_attr_index("score", unique=True)
+
+    inspector = sa.inspect(graph_backend._engine)
+    indexes = inspector.get_indexes(graph_backend.Edge.__tablename__)
+    assert len(indexes) == 3  # including source_id and target_id indexes
+    assert any(idx["name"] == index_name and idx.get("unique") for idx in indexes)
+
+    dropped_name = graph_backend.drop_edge_attr_index("score")
+    assert dropped_name == index_name
+
+    indexes_after = sa.inspect(graph_backend._engine).get_indexes(graph_backend.Edge.__tablename__)
+    assert len(indexes_after) == 2  # only source_id and target_id indexes remain
+    assert all(idx["name"] != index_name for idx in indexes_after)
+
+
+def test_sqlgraph_index_missing_column(graph_backend: BaseGraph) -> None:
+    if not isinstance(graph_backend, SQLGraph):
+        pytest.skip("Only SQLGraph supports explicit SQL indexes")
+
+    with pytest.raises(ValueError, match=r"Columns .* do not exist"):
+        graph_backend.create_node_attr_index("does_not_exist")
+
+
 def test_remove_node(graph_backend: BaseGraph) -> None:
     """Test removing nodes from the graph."""
     # Add attribute keys