Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
79 commits
Select commit Hold shift + click to select a range
fa90cb6
fixing benchmark results parsing and polars version
JoOkuma Nov 5, 2025
11b04a8
testing another fix
JoOkuma Nov 5, 2025
228224b
Merge branch 'main' of https://github.com/royerlab/tracksdata into fi…
yfukai Nov 12, 2025
31400e3
running?
yfukai Nov 12, 2025
4de0e30
updated workflow
yfukai Nov 12, 2025
19d9da3
appending comment to PR
yfukai Nov 12, 2025
3f557bf
added indexing functionality for SQLGraph
yfukai Nov 13, 2025
8468ab9
added mask regionprops computation
yfukai Nov 20, 2025
4b62a79
Merge branch 'main' of https://github.com/royerlab/tracksdata
yfukai Nov 20, 2025
13f70ae
max_id per timepoint fix
yfukai Nov 21, 2025
c874580
Merge branch 'fix_add_node_issue_sql'
yfukai Nov 21, 2025
9f828a3
patch spatial_filter
yfukai Nov 21, 2025
4195520
added test
yfukai Nov 21, 2025
6a95632
Merge branch 'sqlgraph_bbox_array'
yfukai Nov 21, 2025
1651665
Update src/tracksdata/nodes/_mask.py
yfukai Nov 22, 2025
741c15f
Update src/tracksdata/nodes/_mask.py
yfukai Nov 22, 2025
c8ed899
Update src/tracksdata/nodes/_mask.py
yfukai Nov 22, 2025
9cdff52
import change and changing to function
yfukai Nov 22, 2025
4555062
removed wrapping of regionprops
yfukai Nov 22, 2025
15a22d0
Merge branch 'mask_centroid_calc'
yfukai Nov 22, 2025
947b1de
Added caching to spatial_filter
yfukai Nov 22, 2025
73c1584
updated impl using group_by
yfukai Nov 23, 2025
cd33d22
test update
yfukai Nov 23, 2025
29d6a55
Merge branch 'fix_add_node_issue_sql'
yfukai Nov 23, 2025
e849db0
Merge branch 'cache_spatial_filter'
yfukai Nov 23, 2025
3fcc6df
Merge remote-tracking branch 'upstream/main' into fix_benchmark
yfukai Nov 25, 2025
58b06e4
Merge branch 'main' of https://github.com/royerlab/tracksdata
yfukai Nov 26, 2025
d3814e7
Merge branch 'main' of https://github.com/royerlab/tracksdata into fi…
yfukai Dec 1, 2025
da83638
removed constraint
yfukai Dec 1, 2025
800b0d7
trying conda
yfukai Dec 1, 2025
9a4297f
falling back to virtualenv
yfukai Dec 1, 2025
5de159b
removed machine infor from asv machine
yfukai Dec 1, 2025
995c792
added quick
yfukai Dec 1, 2025
d9d2c6e
ignored warnings
yfukai Dec 1, 2025
38d1744
updated tests
yfukai Dec 1, 2025
465c676
update
yfukai Dec 1, 2025
f9626c6
testing
yfukai Dec 1, 2025
1d24b0b
fix
yfukai Dec 1, 2025
7aa8861
further
yfukai Dec 1, 2025
0ffa86e
added main checkout
yfukai Dec 1, 2025
2cdb649
given up commenting on PR
yfukai Dec 1, 2025
b365c1c
reverted benchmark conds
yfukai Dec 1, 2025
2c46c1f
Update benchmarks/graph_backends.py
yfukai Dec 2, 2025
b13e7fc
udpated set_options
yfukai Dec 2, 2025
07dfa99
made benchmark faster
yfukai Dec 3, 2025
aef391e
Merge branch 'main' of https://github.com/royerlab/tracksdata
yfukai Dec 3, 2025
15c7434
Merge branch 'cache_spatial_filter'
yfukai Dec 3, 2025
5dd85ab
updating strategy
yfukai Dec 3, 2025
648ee9c
further
yfukai Dec 3, 2025
bc857ff
skipped time_points
yfukai Dec 3, 2025
f80fdac
shorten benchmark on ci
yfukai Dec 8, 2025
78463ab
updated workflow name and added tracklet_nodes benchmark
yfukai Dec 8, 2025
e15534f
changed param to save time
yfukai Dec 8, 2025
77a7e23
fixing _get_neighbors attribute order
JoOkuma Dec 9, 2025
aedc979
using modern sqlalchemy syntax
JoOkuma Dec 9, 2025
f098332
making edge and overlap table source and target ids indexible
JoOkuma Dec 9, 2025
d635039
stopped generating html report
yfukai Dec 9, 2025
32932f0
Merge remote-tracking branch 'upstream/main' into fix_benchmark
yfukai Dec 9, 2025
8fa64ee
reveerted pyproject
yfukai Dec 9, 2025
241cdd6
solved reinstallation issue
yfukai Dec 9, 2025
b5bc04c
update
yfukai Dec 9, 2025
8c614e2
Merge branch 'fix_benchmark'
yfukai Dec 9, 2025
c2588c2
Merge branch 'jookuma/sql-graph-performance-improv'
yfukai Dec 9, 2025
1f60f0e
Merge remote-tracking branch 'upstream/main' into sql_indexing
yfukai Dec 10, 2025
de518ec
updated test for adding index
yfukai Dec 10, 2025
1960079
fixed test
yfukai Dec 10, 2025
a5a4efd
Merge remote-tracking branch 'upstream/main' into sql_indexing
yfukai Dec 11, 2025
472be88
added benchmark
yfukai Dec 18, 2025
ac1dc37
Merge branch 'main' into sql_indexing
yfukai Jan 8, 2026
6dadf30
fixed bug
yfukai Jan 8, 2026
368185b
formatted
yfukai Jan 8, 2026
278f94a
Update src/tracksdata/graph/_sql_graph.py
yfukai Jan 8, 2026
f901779
Merge branch 'sql_indexing' of https://github.com/yfukai/tracksdata i…
yfukai Jan 8, 2026
bbb6b0a
update
yfukai Jan 8, 2026
673992a
fixed wrong update
yfukai Jan 8, 2026
3594699
fixed lint
yfukai Jan 8, 2026
8b43eb2
added performance to concepts.d
yfukai Jan 9, 2026
57e7ed9
renamed from ensure to create
yfukai Jan 10, 2026
72000bd
fixed test
yfukai Jan 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ A common data structure and basic tools for multi-object tracking.
- Graph-based representation of tracking problems
- In-memory (RustWorkX) and database-backed (SQL) graph backends
- Nodes and edges can take arbitrary attributes
- SQLGraph backend can index frequently queried attributes for faster filtering
- Standardize API for node operators (e.g. defining objects and their attributes)
- Standardize API for edge operators (e.g. creating edges between nodes)
- Basic tracking solvers: nearest neighbors and integer linear programming
Expand Down
10 changes: 9 additions & 1 deletion docs/concepts.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,15 @@ TracksData supports multiple graph backends for different use cases:
### SQLGraph
- **Use case**: Large datasets that don't fit in memory
- **Performance**: Good for storage and querying
- **Features**: Persistent storage, complex queries
- **Features**: Persistent storage, complex queries, database indexes on frequently filtered attributes

SQLGraph lets you create indexes on node or edge attributes to keep repeated
filters fast (-150x speedup when selecting 1k nodes out of 10M total nodes.):

```python
graph.create_node_attr_index(["t", "label"]) # composite index
graph.create_edge_attr_index("score", unique=True)
```

### GraphView
- **Use case**: Results subgraph either backends
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ docs = [
"mike",
"traccuracy>=0.4.2",
]
bench = ["asv"]

[project.urls]
Documentation = "https://github.com/jookuma/tracksdata#readme"
Expand Down
160 changes: 160 additions & 0 deletions src/tracksdata/graph/_sql_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,166 @@ def edge_attr_keys(self) -> list[str]:
keys.remove(k)
return keys

def _resolve_attr_keys(
self,
table_class: type[DeclarativeBase],
attr_keys: Sequence[str] | str,
) -> tuple[list[sa.Column], str]:
"""Check that the given attribute keys exist on the table class and
generate resolved columns and an index name based on them.

Parameters
----------
table_class : type[DeclarativeBase]
The SQLAlchemy table class.
attr_keys : Sequence[str] | str
The attribute keys to include in the index name.

Returns
-------
str
The generated index name.
"""
if isinstance(attr_keys, str):
attr_keys = [attr_keys]

if len(attr_keys) == 0:
raise ValueError("attr_keys must contain at least one column name")

missing = [key for key in attr_keys if key not in table_class.__table__.columns]
if missing:
raise ValueError(f"Columns {missing} do not exist on table {table_class.__tablename__}")
resolved_columns = [getattr(table_class, key) for key in attr_keys]

if isinstance(attr_keys, str):
attr_keys = [attr_keys]

cols_fragment = "_".join(attr_keys)
name = f"ix_{table_class.__tablename__.lower()}_{cols_fragment}"
return resolved_columns, name

def _create_attr_index(
self,
table_class: type[DeclarativeBase],
attr_keys: Sequence[str] | str,
*,
unique: bool = False,
) -> str:
resolved_columns, name = self._resolve_attr_keys(table_class, attr_keys)

index = sa.Index(name, *resolved_columns, unique=unique)

LOG.info(
"Ensuring index '%s' on table %s (columns=%s, unique=%s)",
name,
table_class.__tablename__,
attr_keys,
unique,
)
index.create(bind=self._engine, checkfirst=True)
return name

def _drop_attr_index(
self,
table_class: type[DeclarativeBase],
attr_keys: Sequence[str] | str,
) -> str:
resolved_columns, name = self._resolve_attr_keys(table_class, attr_keys)
index = sa.Index(name, *resolved_columns)

LOG.info(
"Dropping index '%s' on table %s (columns=%s)",
name,
table_class.__tablename__,
attr_keys,
)
index.drop(bind=self._engine, checkfirst=True)
return name

def create_node_attr_index(
self,
attr_keys: Sequence[str] | str,
*,
unique: bool = False,
) -> str:
"""
Ensure an index exists for the given node attribute columns.
If they are already indexed, they are kept as they are.

Parameters
----------
attr_keys : Sequence[str] | str
Column names to include in the index. Can be a single column name
or a list of multiple columns for a composite index.
unique : bool, default False
Whether the index should enforce uniqueness.

Returns
-------
str
The name of the index.

"""

return self._create_attr_index(self.Node, attr_keys, unique=unique)

def create_edge_attr_index(
self,
attr_keys: Sequence[str] | str,
*,
unique: bool = False,
) -> str:
"""Ensure an index exists for the given edge attribute columns.

Parameters
----------
attr_keys : Sequence[str] | str
Column names to include in the index. Can be a single column name
or a list of multiple columns for a composite index.
unique : bool, default False
Whether the index should enforce uniqueness.

Returns
-------
str
The name of the index.

"""

return self._create_attr_index(self.Edge, attr_keys, unique=unique)

def drop_node_attr_index(self, attr_keys: Sequence[str] | str) -> str:
"""Drop an index for the given node attribute columns.

Parameters
----------
attr_keys : Sequence[str] | str
Column names to include in the index. Can be a single column name
or a list of multiple columns for a composite index.

Returns
-------
str
The dropped index name.
"""
return self._drop_attr_index(self.Node, attr_keys)

def drop_edge_attr_index(self, attr_keys: Sequence[str] | str) -> str:
"""Drop an index for the given edge attribute columns.

Parameters
----------
attr_keys : Sequence[str] | str
Column names to include in the index. Can be a single column name
or a list of multiple columns for a composite index.

Returns
-------
str
The dropped index name.
"""
return self._drop_attr_index(self.Edge, attr_keys)

def _sqlalchemy_type_inference(self, default_value: Any) -> TypeEngine:
if np.isscalar(default_value) and hasattr(default_value, "item"):
default_value = default_value.item()
Expand Down
52 changes: 52 additions & 0 deletions src/tracksdata/graph/_test/test_graph_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import polars as pl
import pytest
import rustworkx as rx
import sqlalchemy as sa
from zarr.storage import MemoryStore

from tracksdata.attrs import EdgeAttr, NodeAttr
Expand Down Expand Up @@ -2020,6 +2021,57 @@ def test_custom_indices(graph_backend: BaseGraph) -> None:
graph_backend.bulk_add_nodes([{"t": 3, "x": 1.0, "y": 1.0}], indices=[1, 2, 3])


def test_sqlgraph_node_attr_index_create_and_drop(graph_backend: BaseGraph) -> None:
if not isinstance(graph_backend, SQLGraph):
pytest.skip("Only SQLGraph supports explicit SQL indexes")

graph_backend.add_node_attr_key("label", "")
index_name = f"ix_{graph_backend.Node.__tablename__.lower()}_t_label"

graph_backend.create_node_attr_index(["t", "label"], unique=False)

inspector = sa.inspect(graph_backend._engine)
indexes = inspector.get_indexes(graph_backend.Node.__tablename__)
assert len(indexes) == 1
assert any(idx["name"] == index_name and idx["column_names"] == ["t", "label"] for idx in indexes)

dropped_name = graph_backend.drop_node_attr_index(["t", "label"])
assert dropped_name == index_name

indexes_after = sa.inspect(graph_backend._engine).get_indexes(graph_backend.Node.__tablename__)
assert all(idx["name"] != index_name for idx in indexes_after)


def test_sqlgraph_edge_attr_index_create_and_drop(graph_backend: BaseGraph) -> None:
if not isinstance(graph_backend, SQLGraph):
pytest.skip("Only SQLGraph supports explicit SQL indexes")

graph_backend.add_edge_attr_key("score", 0.0)
index_name = f"ix_{graph_backend.Edge.__tablename__.lower()}_score"

graph_backend.create_edge_attr_index("score", unique=True)

inspector = sa.inspect(graph_backend._engine)
indexes = inspector.get_indexes(graph_backend.Edge.__tablename__)
assert len(indexes) == 3 # including source_id and target_id indexes
assert any(idx["name"] == index_name and idx.get("unique") for idx in indexes)

dropped_name = graph_backend.drop_edge_attr_index("score")
assert dropped_name == index_name

indexes_after = sa.inspect(graph_backend._engine).get_indexes(graph_backend.Edge.__tablename__)
assert len(indexes_after) == 2 # only source_id and target_id indexes remain
assert all(idx["name"] != index_name for idx in indexes_after)


def test_sqlgraph_index_missing_column(graph_backend: BaseGraph) -> None:
if not isinstance(graph_backend, SQLGraph):
pytest.skip("Only SQLGraph supports explicit SQL indexes")

with pytest.raises(ValueError, match=r"Columns .* do not exist"):
graph_backend.create_node_attr_index("does_not_exist")


def test_remove_node(graph_backend: BaseGraph) -> None:
"""Test removing nodes from the graph."""
# Add attribute keys
Expand Down
Loading