From 0acb2b3c7ec1a1c9f1fb4948a6149e1e135d5a2c Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Thu, 17 Jul 2025 21:14:54 -0400 Subject: [PATCH 1/9] Add benchmark tests and CI configuration for performance evaluation --- .github/workflows/ci.yaml | 19 +++ .github/workflows/docker-multiarch.yml | 0 pyproject.toml | 3 +- tests/test_bench.py | 167 +++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/docker-multiarch.yml create mode 100644 tests/test_bench.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 08cc6a0..e6768fd 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -42,6 +42,25 @@ jobs: with: token: ${{ secrets.CODECOV_TOKEN }} + benchmarks: + runs-on: ubuntu-latest + env: + UV_NO_SYNC: "1" + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v6 + with: + python-version: "3.12" + enable-cache: true + + - name: install + run: uv sync --no-dev --group test-codspeed + + - name: Run benchmarks + uses: CodSpeedHQ/action@v3 + with: + run: uv run pytest -W ignore --codspeed -v --color=yes + deploy: name: Deploy needs: test diff --git a/.github/workflows/docker-multiarch.yml b/.github/workflows/docker-multiarch.yml new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index 7f199ce..9b7f99f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,8 @@ classifiers = [ dependencies = ["witty>=v0.2.1", "CT3>=3.3.3", "numpy", "setuptools>=75.8.0"] [dependency-groups] -test = ["pytest>=8.3.5", "pytest-cov>=6.1.1"] +test = ["pytest>=8.3.5", "pytest-benchmark>=5.1.0", "pytest-cov>=6.1.1"] +test-codspeed = [{ include-group = "test" }, "pytest-codspeed >=3.2.0"] dev = [ { include-group = "test" }, "ipython>=8.18.1", diff --git a/tests/test_bench.py b/tests/test_bench.py new file mode 100644 index 0000000..1c4290f --- /dev/null +++ b/tests/test_bench.py @@ -0,0 +1,167 @@ +""" +Benchmark tests for spatial graph query performance. + +These tests extract the core spatial query functionality from the VisPy demonstration +to provide atomic, reproducible benchmarks without GUI dependencies. +""" + +import sys + +import numpy as np +import pytest + +from spatial_graph import SpatialGraph + +if all(x not in {"--codspeed", "tests/test_bench.py"} for x in sys.argv): + pytest.skip( + "use 'pytest tests/test_bench.py' to run benchmark", allow_module_level=True + ) + + +def _make_graph( + ndims=3, + node_dtype="uint64", + node_attr_dtypes=None, + edge_attr_dtypes=None, + directed=False, + n_nodes=100_000, +): + """Helper to create a SpatialGraph instance with default parameters.""" + if node_attr_dtypes is None: + node_attr_dtypes = {"position": "double[3]"} + if edge_attr_dtypes is None: + edge_attr_dtypes = {"score": "float32"} + + graph = SpatialGraph( + ndims=ndims, + node_dtype=node_dtype, + node_attr_dtypes=node_attr_dtypes, + edge_attr_dtypes=edge_attr_dtypes, + position_attr="position", + directed=directed, + ) + nodes = np.arange(n_nodes, dtype="uint64") + positions = np.random.random((n_nodes, ndims)) + graph.add_nodes(nodes, position=positions) + + return graph + + +@pytest.mark.parametrize("n_nodes", [100_000]) +def test_query_nearest_nodes_performance(n_nodes, benchmark): + """Benchmark the core nearest neighbor query operation.""" + large_graph = _make_graph(n_nodes=n_nodes) + query_point = np.array([0.5, 0.5, 0.5]) + + # Benchmark the key operation from the VisPy demo + def _run(): + return large_graph.query_nearest_nodes( + query_point, k=10_000, return_distances=True + ) + + closest, distances = benchmark(_run) + + # Verify results are reasonable + assert len(closest) == 10_000 + assert len(distances) == 10_000 + assert np.all(distances >= 0) + assert np.all(np.diff(distances) >= 0) # distances should be sorted + + +@pytest.mark.parametrize("n_nodes", [100_000]) +def test_node_attribute_access_performance(n_nodes, benchmark): + """Benchmark node attribute access as done in the VisPy demo.""" + large_graph = _make_graph(n_nodes=n_nodes) + nodes = np.arange(n_nodes, dtype="uint64") + + # This is the operation that was being timed in the demo + positions = benchmark(lambda: large_graph.node_attrs[nodes].position) + + # Verify results + assert positions.shape == (n_nodes, 3) + assert positions.dtype == np.float64 + + +@pytest.mark.parametrize("n_nodes", [10_000]) +def test_repeated_nearest_queries_performance(n_nodes, benchmark): + """Benchmark repeated nearest neighbor queries as in mouse movement.""" + + medium_graph = _make_graph(n_nodes=n_nodes) + # Simulate multiple mouse positions (like in the VisPy demo) + num_queries = 100 + query_points = np.random.random((num_queries, 3)) + + def _run(): + for i in range(num_queries): + # Query nearest nodes + closest, distances = medium_graph.query_nearest_nodes( + query_points[i], k=1000, return_distances=True + ) + positions = medium_graph.node_attrs[closest].position + return closest, distances, positions + + closest, distances, positions = benchmark(_run) + + # Verify results + assert len(closest) <= 1000 # may be fewer if graph is smaller + assert len(distances) == len(closest) + assert positions.shape[1] == 3 + + +@pytest.mark.parametrize("k_value", [1000, 10000]) +@pytest.mark.parametrize("n_nodes", [100_000, 100_000]) +def test_various_k_values_performance(n_nodes, k_value, benchmark): + """Benchmark performance across different k values.""" + medium_graph = _make_graph(n_nodes=n_nodes) + query_point = np.array([0.5, 0.5, 0.5]) + + closest, distances = benchmark( + lambda: medium_graph.query_nearest_nodes( + query_point, k=k_value, return_distances=True + ) + ) + + expected_k = min(k_value, len(medium_graph.nodes)) + assert len(closest) == expected_k + assert len(distances) == expected_k + + +@pytest.mark.parametrize("n_nodes", [100_000]) +def test_roi_query_performance(n_nodes, benchmark): + """Benchmark ROI (region of interest) queries.""" + large_graph = _make_graph(n_nodes=n_nodes) + # Define a ROI that should contain a reasonable number of nodes + roi = np.array([[0.25, 0.25, 0.25], [0.75, 0.75, 0.75]]) + + nodes_in_roi = benchmark(lambda: large_graph.query_nodes_in_roi(roi)) + + # Verify results + assert len(nodes_in_roi) > 0 + assert len(nodes_in_roi) < 100_000 # Should be subset + + # Verify nodes are actually in ROI + positions = large_graph.node_attrs[nodes_in_roi].position + assert np.all(positions >= roi[0]) + assert np.all(positions <= roi[1]) + + +@pytest.mark.parametrize("k", [1, 100, 1000, 10000]) +@pytest.mark.parametrize("n_nodes", [10_000]) +def test_nearest_query_correctness_and_performance(k: int, n_nodes: int, benchmark): + """Test both correctness and performance of nearest neighbor queries.""" + medium_graph = _make_graph(n_nodes=n_nodes) + query_point = np.array([0.0, 0.0, 0.0]) # Corner point + + closest, distances = benchmark( + lambda: medium_graph.query_nearest_nodes( + query_point, k=k, return_distances=True + ) + ) + + expected_k = min(k, len(medium_graph.nodes)) + + # Correctness checks + assert len(closest) == expected_k + assert len(distances) == expected_k + assert np.all(distances >= 0) + assert np.all(np.diff(distances) >= 0) # Should be sorted by distance From 47b9b4c24e2c86b96ea21c39c3717be5fa189a5d Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Thu, 17 Jul 2025 21:18:42 -0400 Subject: [PATCH 2/9] exclude bench from test deps --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9b7f99f..0c635b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,13 +31,14 @@ classifiers = [ dependencies = ["witty>=v0.2.1", "CT3>=3.3.3", "numpy", "setuptools>=75.8.0"] [dependency-groups] -test = ["pytest>=8.3.5", "pytest-benchmark>=5.1.0", "pytest-cov>=6.1.1"] +test = ["pytest>=8.3.5", "pytest-cov>=6.1.1"] test-codspeed = [{ include-group = "test" }, "pytest-codspeed >=3.2.0"] dev = [ { include-group = "test" }, "ipython>=8.18.1", "mypy>=1.15.0", "pre-commit>=4.2.0", + "pytest-benchmark>=5.1.0", # specifically excluded from test group for ci "ruff>=0.11.10", ] docs = [ From 537e2fca95c3d972bc650050ea63d71923282860 Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Thu, 17 Jul 2025 21:18:58 -0400 Subject: [PATCH 3/9] pca --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8539dc9..3b25a42 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.12.2 hooks: - - id: ruff + - id: ruff-check args: [--fix, --unsafe-fixes] - id: ruff-format From ef03aa0bd60465a4072ae7c06da8e714b23aa657 Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Thu, 17 Jul 2025 21:32:11 -0400 Subject: [PATCH 4/9] rm --- .github/workflows/docker-multiarch.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .github/workflows/docker-multiarch.yml diff --git a/.github/workflows/docker-multiarch.yml b/.github/workflows/docker-multiarch.yml deleted file mode 100644 index e69de29..0000000 From dccbc4a662da9bd5921d4a7307513602d68a3b18 Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Thu, 17 Jul 2025 21:37:22 -0400 Subject: [PATCH 5/9] add warmup --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e6768fd..0706302 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -59,7 +59,7 @@ jobs: - name: Run benchmarks uses: CodSpeedHQ/action@v3 with: - run: uv run pytest -W ignore --codspeed -v --color=yes + run: uv run pytest -W ignore --codspeed -v --color=yes --codspeed-warmup-time 2 deploy: name: Deploy From 7fbea3c734a0ba2d9bbb3e9d446100291dc6ff4e Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Thu, 17 Jul 2025 21:51:56 -0400 Subject: [PATCH 6/9] remove warmup --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0706302..e6768fd 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -59,7 +59,7 @@ jobs: - name: Run benchmarks uses: CodSpeedHQ/action@v3 with: - run: uv run pytest -W ignore --codspeed -v --color=yes --codspeed-warmup-time 2 + run: uv run pytest -W ignore --codspeed -v --color=yes deploy: name: Deploy From 73654e81770403b287f4c2eaf8c75b3db0563b29 Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Thu, 17 Jul 2025 21:52:06 -0400 Subject: [PATCH 7/9] bump python --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e6768fd..87462e7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -50,7 +50,7 @@ jobs: - uses: actions/checkout@v4 - uses: astral-sh/setup-uv@v6 with: - python-version: "3.12" + python-version: "3.13" enable-cache: true - name: install From f1d71ae8d5925554a9bffdc670b56d0b11c218dd Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Fri, 18 Jul 2025 13:29:55 -0400 Subject: [PATCH 8/9] less tests --- tests/test_bench.py | 107 +++++--------------------------------------- 1 file changed, 12 insertions(+), 95 deletions(-) diff --git a/tests/test_bench.py b/tests/test_bench.py index 1c4290f..0a31e36 100644 --- a/tests/test_bench.py +++ b/tests/test_bench.py @@ -1,10 +1,3 @@ -""" -Benchmark tests for spatial graph query performance. - -These tests extract the core spatial query functionality from the VisPy demonstration -to provide atomic, reproducible benchmarks without GUI dependencies. -""" - import sys import numpy as np @@ -12,6 +5,7 @@ from spatial_graph import SpatialGraph +# either run this file directly or with pytest --codspeed if all(x not in {"--codspeed", "tests/test_bench.py"} for x in sys.argv): pytest.skip( "use 'pytest tests/test_bench.py' to run benchmark", allow_module_level=True @@ -47,86 +41,31 @@ def _make_graph( return graph -@pytest.mark.parametrize("n_nodes", [100_000]) -def test_query_nearest_nodes_performance(n_nodes, benchmark): - """Benchmark the core nearest neighbor query operation.""" - large_graph = _make_graph(n_nodes=n_nodes) - query_point = np.array([0.5, 0.5, 0.5]) - - # Benchmark the key operation from the VisPy demo - def _run(): - return large_graph.query_nearest_nodes( - query_point, k=10_000, return_distances=True - ) - - closest, distances = benchmark(_run) - - # Verify results are reasonable - assert len(closest) == 10_000 - assert len(distances) == 10_000 - assert np.all(distances >= 0) - assert np.all(np.diff(distances) >= 0) # distances should be sorted - - -@pytest.mark.parametrize("n_nodes", [100_000]) -def test_node_attribute_access_performance(n_nodes, benchmark): - """Benchmark node attribute access as done in the VisPy demo.""" - large_graph = _make_graph(n_nodes=n_nodes) - nodes = np.arange(n_nodes, dtype="uint64") - - # This is the operation that was being timed in the demo - positions = benchmark(lambda: large_graph.node_attrs[nodes].position) - - # Verify results - assert positions.shape == (n_nodes, 3) - assert positions.dtype == np.float64 - - -@pytest.mark.parametrize("n_nodes", [10_000]) -def test_repeated_nearest_queries_performance(n_nodes, benchmark): - """Benchmark repeated nearest neighbor queries as in mouse movement.""" - - medium_graph = _make_graph(n_nodes=n_nodes) - # Simulate multiple mouse positions (like in the VisPy demo) - num_queries = 100 +@pytest.mark.parametrize("num_queries", [100]) +@pytest.mark.parametrize("k", [1000, 10000]) +@pytest.mark.parametrize("n_nodes", [100_000, 1_000_000]) +def test_bench_query_nearest_nodes(n_nodes: int, k: int, num_queries: int, benchmark): + """Benchmark query_nearest_nodes.""" + graph = _make_graph(n_nodes=n_nodes) query_points = np.random.random((num_queries, 3)) def _run(): for i in range(num_queries): # Query nearest nodes - closest, distances = medium_graph.query_nearest_nodes( - query_points[i], k=1000, return_distances=True + closest, distances = graph.query_nearest_nodes( + query_points[i], k=k, return_distances=True ) - positions = medium_graph.node_attrs[closest].position + positions = graph.node_attrs[closest].position return closest, distances, positions closest, distances, positions = benchmark(_run) # Verify results - assert len(closest) <= 1000 # may be fewer if graph is smaller assert len(distances) == len(closest) assert positions.shape[1] == 3 -@pytest.mark.parametrize("k_value", [1000, 10000]) -@pytest.mark.parametrize("n_nodes", [100_000, 100_000]) -def test_various_k_values_performance(n_nodes, k_value, benchmark): - """Benchmark performance across different k values.""" - medium_graph = _make_graph(n_nodes=n_nodes) - query_point = np.array([0.5, 0.5, 0.5]) - - closest, distances = benchmark( - lambda: medium_graph.query_nearest_nodes( - query_point, k=k_value, return_distances=True - ) - ) - - expected_k = min(k_value, len(medium_graph.nodes)) - assert len(closest) == expected_k - assert len(distances) == expected_k - - -@pytest.mark.parametrize("n_nodes", [100_000]) +@pytest.mark.parametrize("n_nodes", [100_000, 1_000_000]) def test_roi_query_performance(n_nodes, benchmark): """Benchmark ROI (region of interest) queries.""" large_graph = _make_graph(n_nodes=n_nodes) @@ -137,31 +76,9 @@ def test_roi_query_performance(n_nodes, benchmark): # Verify results assert len(nodes_in_roi) > 0 - assert len(nodes_in_roi) < 100_000 # Should be subset + assert len(nodes_in_roi) < n_nodes # Should be subset # Verify nodes are actually in ROI positions = large_graph.node_attrs[nodes_in_roi].position assert np.all(positions >= roi[0]) assert np.all(positions <= roi[1]) - - -@pytest.mark.parametrize("k", [1, 100, 1000, 10000]) -@pytest.mark.parametrize("n_nodes", [10_000]) -def test_nearest_query_correctness_and_performance(k: int, n_nodes: int, benchmark): - """Test both correctness and performance of nearest neighbor queries.""" - medium_graph = _make_graph(n_nodes=n_nodes) - query_point = np.array([0.0, 0.0, 0.0]) # Corner point - - closest, distances = benchmark( - lambda: medium_graph.query_nearest_nodes( - query_point, k=k, return_distances=True - ) - ) - - expected_k = min(k, len(medium_graph.nodes)) - - # Correctness checks - assert len(closest) == expected_k - assert len(distances) == expected_k - assert np.all(distances >= 0) - assert np.all(np.diff(distances) >= 0) # Should be sorted by distance From 3325dc311bf9d6014a9a7db933c070c8564d0ce7 Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Fri, 18 Jul 2025 13:34:07 -0400 Subject: [PATCH 9/9] add badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d34d020..437e8b5 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![Python Version](https://img.shields.io/pypi/pyversions/spatial-graph.svg?color=green)](https://python.org) [![CI](https://github.com/funkelab/spatial_graph/actions/workflows/ci.yaml/badge.svg)](https://github.com/funkelab/spatial_graph/actions/workflows/ci.yaml) [![codecov](https://codecov.io/gh/funkelab/spatial_graph/branch/main/graph/badge.svg)](https://codecov.io/gh/funkelab/spatial_graph) +[![CodSpeed](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/funkelab/spatial_graph) `spatial_graph` provides a data structure for directed and undirected graphs, where each node has an nD position (in time or space).