From bef3eaf12bbe978fc7f688ec1f0aee65de254d0d Mon Sep 17 00:00:00 2001 From: Jason Kai <21226986+kaitj@users.noreply.github.com> Date: Tue, 5 May 2026 16:47:30 -0400 Subject: [PATCH 1/9] Tests + dependencies for benchmarks --- pyproject.toml | 1 + tests/benchmarks/test_index.py | 114 +++++++++++++++++++++++++++++++++ tests/benchmarks/test_query.py | 88 +++++++++++++++++++++++++ uv.lock | 60 +++++++++++++++++ 4 files changed, 263 insertions(+) create mode 100644 tests/benchmarks/test_index.py create mode 100644 tests/benchmarks/test_query.py diff --git a/pyproject.toml b/pyproject.toml index ec2ab72..6622b2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ s3 = [ pybids = ["pandas>=2.0.0"] [dependency-groups] +benchmark = ["polars>=1.40.1", "pytest>=9.0.3", "pytest-benchmark>=5.2.3"] dev = [ "pdoc>=16.0.0", "pre-commit>=4.6.0", diff --git a/tests/benchmarks/test_index.py b/tests/benchmarks/test_index.py new file mode 100644 index 0000000..9894b58 --- /dev/null +++ b/tests/benchmarks/test_index.py @@ -0,0 +1,114 @@ +"""Indexing benchmarks.""" + +import os +import shutil +from pathlib import Path +from typing import Callable + +import pyarrow.parquet as pq +import pytest +from pytest_benchmark.fixture import BenchmarkFixture + +import bids2table as b2t2 + + +def du(path: Path) -> float: + """Compute directory size in mb.""" + total = 0 + stack = [path] + while stack: + for entry in os.scandir(stack.pop()): + try: + st = entry.stat(follow_symlinks=False) + if entry.is_dir(follow_symlinks=False): + stack.append(Path(entry.path)) + else: + total += st.st_size + except OSError: + continue + return total / 1_024**2 + + +def _run_benchmark( + benchmark: BenchmarkFixture, + func: Callable, + index_fpath: Path, + version: str, + workers: int, + *args, + **kwargs, +) -> None: + sizes = [] + + def _teardown(index_fpath: Path): + size = du(index_fpath.parent) + sizes.append(size) + if index_fpath.exists(): + shutil.rmtree(index_fpath.parent) + + # Benchmark + benchmark.pedantic( + func, + teardown=_teardown(index_fpath=index_fpath), + args=args, + kwargs=kwargs, + iterations=1, + rounds=11, # Include an additional round for warmup + ) + + # Additional info + benchmark.extra_info.update( + { + "size_mb": sizes, + "version": version or "Unknown", + "workers": workers or "Unknown", + } + ) + + +@pytest.mark.benchmark +def test_openneuro(benchmark: BenchmarkFixture, tmp_path: Path) -> None: + """Benchmark b2t2 with a subset of datasets on OpenNeuro.""" + workers = 4 + index_fpath = tmp_path / "index.parquet" + + def index() -> None: + path = b2t2._pathlib.as_path("s3://openneuro.org/ds002*") + paths = list(path.parent.glob(path.name)) + schema = b2t2.get_arrow_schema() + assert len(paths) > 1, "1 or less datasets found...check the path provided" + with pq.ParquetWriter(index_fpath, schema) as writer: + for table in b2t2.batch_index_dataset( + paths, # type: ignore[arg-type] + max_workers=workers, + show_progress=False, + ): + writer.write_table(table) + + _run_benchmark( + benchmark, + index, + index_fpath=index_fpath, + version=b2t2.__version__, + workers=workers, + ) + + +@pytest.mark.benchmark +@pytest.mark.parametrize("workers", (1, 4)) +def test_local(benchmark: BenchmarkFixture, tmp_path: Path, workers: int) -> None: + """Bids2Table v2 benchmarking on local dataset.""" + index_fpath = tmp_path / "index.parquet" + data_dir = Path("bids-examples/ds000117") + + def index() -> None: + table = b2t2.index_dataset(data_dir, max_workers=workers, show_progress=False) + pq.write_table(table, index_fpath) + + _run_benchmark( + benchmark, + index, + index_fpath=index_fpath, + version=b2t2.__version__, + workers=workers, + ) diff --git a/tests/benchmarks/test_query.py b/tests/benchmarks/test_query.py new file mode 100644 index 0000000..c93d19b --- /dev/null +++ b/tests/benchmarks/test_query.py @@ -0,0 +1,88 @@ +"""Querying benchmarks.""" + +import datetime +from pathlib import Path +from typing import Callable + +import polars as pl +import pytest +from pytest_benchmark.fixture import BenchmarkFixture + +import bids2table as b2t2 + +SUBJECTS = ["01", "10"] +NUM_VOLS = 184 +TARGET_TE = 0.00875 +TARGET_TIME = datetime.time(10).strftime("%H:%M:%S.%f") + + +def _run_benchmark( + benchmark: BenchmarkFixture, + func: Callable, + version: str, + *args, + **kwargs, +) -> None: + benchmark.pedantic(func, args=args, kwargs=kwargs, iterations=1, rounds=11) + benchmark.extra_info.update({"version": version or "Unknown"}) + + +@pytest.mark.benchmark +class TestB2TQuery: + """Benchmark different b2t queries.""" + + @pytest.fixture + def index(self) -> tuple: + """Index dataset with b2t.""" + data_dir = Path("bids-examples/ds000117") + table = b2t2.index_dataset(data_dir, show_progress=False) + df = pl.from_arrow(table) + df = df.with_columns( + pl.format("{}/{}", pl.col("root"), pl.col("path")).alias("fpath") + ) + df = df.with_columns( + pl.col("fpath") + .map_elements(b2t2.load_bids_metadata, return_dtype=pl.Object) + .alias("json") + ) + version = b2t2.__version__ + return df, version + + def test_subject_query(self, benchmark: BenchmarkFixture, index: tuple) -> None: + """Benchmark subject queries.""" + table, version = index + + def query() -> None: + table.get_column("sub").unique() + + _run_benchmark(benchmark, query, version=version) + + def test_bold_query(self, benchmark: BenchmarkFixture, index: tuple) -> None: + """Benchmark queries for bold images.""" + table, version = index + table = table.with_columns( + [pl.col("ext").cast(pl.Categorical), pl.col("suffix").cast(pl.Categorical)] + ) + + def query() -> None: + table.select(["ext", "suffix", "fpath"]).filter( + (pl.col("ext") == ".nii.gz") & (pl.col("suffix") == "bold") + ).get_column("fpath") + + _run_benchmark(benchmark, query, version=version) + + def test_metadata_query(self, benchmark: BenchmarkFixture, index: tuple) -> None: + """Benchmark query via metadata.""" + table, version = index + table = table.with_columns( + pl.col("json") + .map_elements(lambda x: x.get("EchoTime"), return_dtype=pl.Float64) + .alias("echo_time") + ) + + def query() -> None: + table.select(["sub", "echo_time", "fpath"]).filter( + (pl.col("sub").is_in(SUBJECTS)) & (pl.col("echo_time") == TARGET_TE) + ).get_column("fpath") + + _run_benchmark(benchmark, query, version=version) diff --git a/uv.lock b/uv.lock index f112a16..e4bcc26 100644 --- a/uv.lock +++ b/uv.lock @@ -43,6 +43,11 @@ s3 = [ ] [package.dev-dependencies] +benchmark = [ + { name = "polars" }, + { name = "pytest" }, + { name = "pytest-benchmark" }, +] dev = [ { name = "pdoc" }, { name = "pre-commit" }, @@ -63,6 +68,11 @@ requires-dist = [ provides-extras = ["cloud", "s3", "pybids"] [package.metadata.requires-dev] +benchmark = [ + { name = "polars", specifier = ">=1.40.1" }, + { name = "pytest", specifier = ">=9.0.3" }, + { name = "pytest-benchmark", specifier = ">=5.2.3" }, +] dev = [ { name = "pdoc", specifier = ">=16.0.0" }, { name = "pre-commit", specifier = ">=4.6.0" }, @@ -943,6 +953,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "polars" +version = "1.40.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "polars-runtime-32" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/8c/bc9bc948058348ed43117cecc3007cd608f395915dae8a00974579a5dab1/polars-1.40.1.tar.gz", hash = "sha256:ab2694134b137596b5a59bfd7b4c54ebbc9b59f9403127f18e32d363777552e8", size = 733574, upload-time = "2026-04-22T19:15:55.507Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/91/74fc60d94488685a92ac9d49d7ec55f3e91fe9b77942a6235a5fa7f249c3/polars-1.40.1-py3-none-any.whl", hash = "sha256:c0f861219d1319cdea45c4ce4d30355a47176b8f98dcedf95ea8269f131b8abd", size = 828723, upload-time = "2026-04-22T19:14:25.452Z" }, +] + +[[package]] +name = "polars-runtime-32" +version = "1.40.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ba/26d40f039be9f552b5fd7365a621bdfc0f8e912ef77094ae4693491b0bae/polars_runtime_32-1.40.1.tar.gz", hash = "sha256:37f3065615d1bf90d03b5326222df4c5c1f8a5d33e50470aa588e3465e6eb814", size = 2935843, upload-time = "2026-04-22T19:15:57.26Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7d/46/22c8af5eed68ac2eeb556e0fa3ca8a7b798e984ceff4450888f3b5ac61fd/polars_runtime_32-1.40.1-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b748ef652270cc49e9e69f99a035e0eb4d5f856d42bcd6ac4d9d80a40142aa1e", size = 52098755, upload-time = "2026-04-22T19:14:28.555Z" }, + { url = "https://files.pythonhosted.org/packages/c6/3e/48599a38009ca60ff82a6f38c8a621ce3c0286aa7397c7d79e741bd9060e/polars_runtime_32-1.40.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:d249b3743e05986060cec0a7aaa542d020df6c6b876e556023a310efd581f9be", size = 46367542, upload-time = "2026-04-22T19:14:32.433Z" }, + { url = "https://files.pythonhosted.org/packages/43/e9/384bc069367a1a36ee31c13782c178dbd039b2b873b772d4a0fc23a2373d/polars_runtime_32-1.40.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5987b30e7aa1059d069498496e8dda35afd592b0ac3d46ed87e3ff8df1ad652c", size = 50252104, upload-time = "2026-04-22T19:14:35.945Z" }, + { url = "https://files.pythonhosted.org/packages/15/ef/7d57ceb0651af74194e97ed6583e148d352f03d696090221b8059cdfc90b/polars_runtime_32-1.40.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d7f42a8b3f16fc66002cc0f6516f7dd7653396886ae0ed362ab95c0b3408b59", size = 56250788, upload-time = "2026-04-22T19:14:39.743Z" }, + { url = "https://files.pythonhosted.org/packages/10/0f/e4b3ffc748827a14a474ec9c42e45c066050e440fec57e914091d9adda75/polars_runtime_32-1.40.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e5f7becc237a7ec9d9a10878dc8e54b73bbf4e2d94a2991c37d7a0b38590d8f9", size = 50432590, upload-time = "2026-04-22T19:14:43.388Z" }, + { url = "https://files.pythonhosted.org/packages/d9/0b/b8d95fbed869fa4caabe9c400e4210374913b376e925e96fdcfa9be6416b/polars_runtime_32-1.40.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:992d14cf191dde043d36fbdbc98a65e43fbc7e9a5024cecd45f838ac4988c1ee", size = 54155564, upload-time = "2026-04-22T19:14:47.239Z" }, + { url = "https://files.pythonhosted.org/packages/06/d9/d091d8fb5cbed5e9536adfed955c4c89987a4cc3b8e73ae4532402b91c74/polars_runtime_32-1.40.1-cp310-abi3-win_amd64.whl", hash = "sha256:f78bb2abd00101cbb23cc0cb068f7e36e081057a15d2ec2dde3dda280709f030", size = 51829755, upload-time = "2026-04-22T19:14:50.85Z" }, + { url = "https://files.pythonhosted.org/packages/65/ad/b33c3022a394f3eb55c3310597cec615412a8a33880055eee191d154a628/polars_runtime_32-1.40.1-cp310-abi3-win_arm64.whl", hash = "sha256:b5cbfaf6b085b420b4bfcbe24e8f665076d1cccfdb80c0484c02a023ce205537", size = 45822104, upload-time = "2026-04-22T19:14:54.192Z" }, +] + [[package]] name = "pre-commit" version = "4.6.0" @@ -986,6 +1024,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b8/ef/50433d346c56657a70d27f156c7b349ac59a068b01de4eb796e747eecc43/protobuf-7.35.0-py3-none-any.whl", hash = "sha256:c13f325cf242bad135c350629eeb5d54b24228eb472fb3e2e9ebbd4c5dc20ca0", size = 171659, upload-time = "2026-05-19T23:02:27.842Z" }, ] +[[package]] +name = "py-cpuinfo" +version = "9.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" }, +] + [[package]] name = "pyarrow" version = "24.0.0" @@ -1091,6 +1138,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] +[[package]] +name = "pytest-benchmark" +version = "5.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "py-cpuinfo" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/34/9f732b76456d64faffbef6232f1f9dbec7a7c4999ff46282fa418bd1af66/pytest_benchmark-5.2.3.tar.gz", hash = "sha256:deb7317998a23c650fd4ff76e1230066a76cb45dcece0aca5607143c619e7779", size = 341340, upload-time = "2025-11-09T18:48:43.215Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/29/e756e715a48959f1c0045342088d7ca9762a2f509b945f362a316e9412b7/pytest_benchmark-5.2.3-py3-none-any.whl", hash = "sha256:bc839726ad20e99aaa0d11a127445457b4219bdb9e80a1afc4b51da7f96b0803", size = 45255, upload-time = "2025-11-09T18:48:39.765Z" }, +] + [[package]] name = "pytest-cov" version = "7.1.0" From fcad52b0f61342aad2f649260432ff113323b9e1 Mon Sep 17 00:00:00 2001 From: Jason Kai <21226986+kaitj@users.noreply.github.com> Date: Wed, 6 May 2026 13:15:28 -0400 Subject: [PATCH 2/9] Add scripts for benchmarking --- .github/scripts/compare_benchmarks.py | 161 ++++++++++++++++++++++++++ .github/scripts/run_benchmarks.py | 27 +++++ 2 files changed, 188 insertions(+) create mode 100644 .github/scripts/compare_benchmarks.py create mode 100644 .github/scripts/run_benchmarks.py diff --git a/.github/scripts/compare_benchmarks.py b/.github/scripts/compare_benchmarks.py new file mode 100644 index 0000000..531ccd9 --- /dev/null +++ b/.github/scripts/compare_benchmarks.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python +"""Compare benchmark results across PR, main, and tag and output a markdown table.""" + +import json +import statistics +from pathlib import Path +from typing import Literal, NamedTuple + + +class BenchmarkResult(NamedTuple): + fullname: str + kind: Literal["index", "query"] + locality: Literal["local", "remote"] | None = None + workers: int | None = None + median: float = 0.0 + mean: float = 0.0 + stddev: float = 0.0 + + +def parse_file(path: Path) -> dict[str, BenchmarkResult]: + data = json.loads(path.read_text()) + results = {} + for benchmark in data["benchmarks"]: + fullname: str = benchmark["fullname"] + data_trimmed = benchmark["stats"]["data"][1:] + median = statistics.median(data_trimmed) + mean = statistics.mean(data_trimmed) + stddev = statistics.stdev(data_trimmed) + + if "query" in fullname: + result = BenchmarkResult( + fullname=fullname, kind="query", median=median, mean=mean, stddev=stddev + ) + else: + locality: Literal["local", "remote"] = ( + "remote" if "openneuro" in fullname or "s3" in fullname else "local" + ) + workers = benchmark["extra_info"].get("workers", "Unknown") + result = BenchmarkResult( + fullname=fullname, + kind="index", + locality=locality, + workers=workers, + median=median, + mean=mean, + stddev=stddev, + ) + results[fullname] = result + return results + + +def _scale(val: float) -> float: + return val * 1000 + + +def _fmt(res: BenchmarkResult) -> str: + median = _scale(res.median) + mean = _scale(res.mean) + stddev = _scale(res.stddev) + return f"{median:.3f} ({mean:.3f} ยฑ {stddev:.3f}) ms" + + +def _delta(pr: BenchmarkResult, ref: BenchmarkResult) -> str: + if ref == 0: + return "N/A" + diff = _scale(pr.median - ref.median) + pct = (pr.median / ref.median - 1) * 100 + icon = "๐Ÿ”ด" if pct > 5 else "๐ŸŸข" if pct < -5 else "โšช" + return f"{icon} {diff:+.3f} ms ({pct:+.1f}%)" + + +def _label(result: BenchmarkResult) -> str: + if result.kind == "query": + return ( + result.fullname.split("::")[-1] + .replace("test_", "") + .replace("_", " ") + .capitalize() + ) + return f"{result.locality.capitalize()} index ({result.workers} workers)" + + +def build_table( + pr: dict[str, BenchmarkResult], + main: dict[str, BenchmarkResult], + tag: dict[str, BenchmarkResult], + tag_name: str, +) -> str: + all_keys = set(pr) | set(main) | set(tag) + labels = [_label((pr.get(k) or main.get(k) or tag.get(k))) for k in all_keys] + + col_sep = " | " + header = "| |" + col_sep.join(f" **{label}** " for label in labels) + " |" + divider = "|-|" + "|".join("---" for _ in all_keys) + "|" + + def row(name: str, results: dict[str, BenchmarkResult]) -> str: + cells = [_fmt(results[k]) if k in results else "โ€”" for k in all_keys] + return "| **" + name + "** |" + col_sep.join(f" {c} " for c in cells) + " |" + + def delta_row(label: str, ref: dict[str, BenchmarkResult]) -> str: + cells = [ + _delta(pr[k], ref[k]) if k in pr and k in ref else "โ€”" for k in all_keys + ] + return "| *" + label + "* |" + col_sep.join(f" {c} " for c in cells) + " |" + + lines = [ + "## Benchmark Results", + "", + header, + divider, + row("PR", pr), + row("main", main), + row(tag_name, tag), + divider.replace("-", ""), + delta_row("PR vs main", main), + delta_row(f"PR vs {tag_name}", tag), + "", + "> `median (mean ยฑ std)`", + "> ", + "๐Ÿ”ด >5% slower   โšช within 5%   ๐ŸŸข >5% faster", + ] + return "\n".join(lines) + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--pattern", + default="benchmark-*.json", + help="Glob pattern for benchmark JSON files", + ) + parser.add_argument( + "-o", + "--output", + help="Output markdown filepath containing benchmark comparisons", + ) + args = parser.parse_args() + + files = sorted(Path(".").glob(args.pattern)) + assert len(files) == 3, f"Expected 3 files, found {len(files)}: {files}" + + # Infer pr/main/tag from directory name + parsed: dict[str, BenchmarkResult] = {} + tag = None + for f in files: + stem = f.parent.name # e.g. "benchmark-pr" + key = stem.split("-")[-1] # "pr", "main", tag + if key not in ("pr", "main"): + tag = key + parsed[key] = parse_file(f) + if tag is None: + raise ValueError("Unknown tag") + table = build_table(parsed["pr"], parsed["main"], parsed[tag], tag_name=tag) + args.output.write_text(table) + print(table) + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/run_benchmarks.py b/.github/scripts/run_benchmarks.py new file mode 100644 index 0000000..64ce1e5 --- /dev/null +++ b/.github/scripts/run_benchmarks.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +"""Perform benchmarks across PR commit, main, and previous tag.""" + +import argparse + +import pytest + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-o", "--output", required=True, help="Output JSON file path") + args = parser.parse_args() + + pytest.main( + [ + "-m", + "benchmark", + "--benchmark-save-data", + f"--benchmark-json={args.output}", + "--benchmark-time-unit=ms", + "--benchmark-warmup=on", + ] + ) + + +if __name__ == "__main__": + main() From 72411332f2381de84f74e1496c34613718fd6179 Mon Sep 17 00:00:00 2001 From: Jason Kai <21226986+kaitj@users.noreply.github.com> Date: Wed, 6 May 2026 13:15:40 -0400 Subject: [PATCH 3/9] Add benchmark CI --- .github/workflows/benchmark.yaml | 62 ++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 .github/workflows/benchmark.yaml diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml new file mode 100644 index 0000000..9b15465 --- /dev/null +++ b/.github/workflows/benchmark.yaml @@ -0,0 +1,62 @@ +name: Benchmark + +on: + pull_request: + branches: [ "main" ] + +jobs: + get-tag: + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.last_tag.outputs.tag }} + steps: + - uses: actions/checkout@v6 + with: + fetch-tags: true + fetch-depth: 0 + - id: last_tag + run: echo ="tag=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT + + benchmark: + needs: get-tag + runs-on: ubuntu-latest + strategy: + matrix: + target: + - name: pr + ref: ${{ github.sha }} + - name: main + ref: main + - name: ${{ needs.get_tag.outputs.tag }} + ref: ${{ needs.get_tag.outputs.tag }} + steps: + - uses: actions/checkout@v6 + with: + ref: ${{ matrix.target.ref }} + submodules: true + - uses: astral-sh/setup-uv@v8.1.0 + - run: uv sync --group "benchmark" --extra "cloud" + - name: Run benchmarks + run: | + uv run .github/scripts/run_benchmarks.py \ + --output benchmark-${{matrix.target.name }}.json + - uses: actions/upload-artifact@v7 + with: + name: benchmark-${{ matrix.target.name }} + path: benchmark-${{ matrix.target.name }}.json + + report: + needs: [ get-tag, benchmark ] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: astral-sh/setup-uv@v8.1.0 + - run: uv sync --group "benchmark" + - uses: actions/download-artifact@v8 + with: + pattern: benchmark-* + - name: Generate report + run: | + uv run .github/scripts/compare_benchmarks.py \ + --output benchmarks.md \ + --pattern benchmark-*.json From 93f191400c32642bbf2f9cfdde4910bc04c338ec Mon Sep 17 00:00:00 2001 From: Jason Kai <21226986+kaitj@users.noreply.github.com> Date: Wed, 6 May 2026 13:45:30 -0400 Subject: [PATCH 4/9] Register benchmark pytest marker --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6622b2b..a9f7380 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,4 +69,7 @@ lint.extend-select = ["I"] [tool.pytest.ini_options] log_cli = true log_cli_level = "INFO" -markers = ["cloud: Tests requiring cloud group dependencies"] +markers = [ + "benchmark: Tests used for benchmarking", + "cloud: Tests requiring cloud group dependencies", +] From 4afac6dbb548392c76a5f66d5e64bfa078b863c8 Mon Sep 17 00:00:00 2001 From: Jason Kai <21226986+kaitj@users.noreply.github.com> Date: Wed, 6 May 2026 13:51:31 -0400 Subject: [PATCH 5/9] Fix CI workflow bugs - Mark tests with "cloud" and / or "benchmark" as needed - Combine both "dev" and "benchmark" dependencies, was causing issues with the pytest due to imports (alternatively, use `try-except` block for optional dependency import) - Replace pandas with polars in dev dependency (for benchmarking) --- .github/scripts/run_benchmarks.py | 2 +- .github/workflows/benchmark.yaml | 3 +-- .github/workflows/ci.yaml | 10 +++++----- pyproject.toml | 2 +- tests/benchmarks/test_index.py | 1 + uv.lock | 18 +++++------------- 6 files changed, 14 insertions(+), 22 deletions(-) diff --git a/.github/scripts/run_benchmarks.py b/.github/scripts/run_benchmarks.py index 64ce1e5..cf870c0 100644 --- a/.github/scripts/run_benchmarks.py +++ b/.github/scripts/run_benchmarks.py @@ -14,7 +14,7 @@ def main(): pytest.main( [ "-m", - "benchmark", + "benchmark and not cloud", "--benchmark-save-data", f"--benchmark-json={args.output}", "--benchmark-time-unit=ms", diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index 9b15465..cd91f8f 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -35,7 +35,7 @@ jobs: ref: ${{ matrix.target.ref }} submodules: true - uses: astral-sh/setup-uv@v8.1.0 - - run: uv sync --group "benchmark" --extra "cloud" + - run: uv sync --extra "cloud" - name: Run benchmarks run: | uv run .github/scripts/run_benchmarks.py \ @@ -51,7 +51,6 @@ jobs: steps: - uses: actions/checkout@v6 - uses: astral-sh/setup-uv@v8.1.0 - - run: uv sync --group "benchmark" - uses: actions/download-artifact@v8 with: pattern: benchmark-* diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 773ee8a..4117030 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -42,19 +42,19 @@ jobs: python-version: ${{ matrix.python-version }} - run: uv sync --all-extras - - name: Run tests without cloudpathlib + - name: Run non-cloud tests run: | uv run pytest \ - -m "not cloud" \ + -m "not cloud and not benchmark" \ --junitxml=pytest-cloudless.xml \ --cov-report=xml:coverage.xml \ - --cov bids2table \ + --cov=bids2table \ tests - - name: Run tests with cloudpathlib + - name: Run cloud tests run: | uv run pytest \ - -m "cloud" \ + -m "cloud and not benchmark" \ --junitxml=pytest-cloud.xml \ --cov-report=xml:coverage.xml \ --cov=bids2table \ diff --git a/pyproject.toml b/pyproject.toml index a9f7380..92f6514 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,11 +35,11 @@ s3 = [ pybids = ["pandas>=2.0.0"] [dependency-groups] -benchmark = ["polars>=1.40.1", "pytest>=9.0.3", "pytest-benchmark>=5.2.3"] dev = [ "pdoc>=16.0.0", "pre-commit>=4.6.0", "pytest>=9.0.3", + "pytest-benchmark>=5.2.3", "pytest-cov>=7.1.0", "ruff>=0.15.13", ] diff --git a/tests/benchmarks/test_index.py b/tests/benchmarks/test_index.py index 9894b58..8c3e37c 100644 --- a/tests/benchmarks/test_index.py +++ b/tests/benchmarks/test_index.py @@ -67,6 +67,7 @@ def _teardown(index_fpath: Path): @pytest.mark.benchmark +@pytest.mark.cloud def test_openneuro(benchmark: BenchmarkFixture, tmp_path: Path) -> None: """Benchmark b2t2 with a subset of datasets on OpenNeuro.""" workers = 4 diff --git a/uv.lock b/uv.lock index e4bcc26..8b75f83 100644 --- a/uv.lock +++ b/uv.lock @@ -43,15 +43,12 @@ s3 = [ ] [package.dev-dependencies] -benchmark = [ - { name = "polars" }, - { name = "pytest" }, - { name = "pytest-benchmark" }, -] dev = [ { name = "pdoc" }, + { name = "polars" }, { name = "pre-commit" }, { name = "pytest" }, + { name = "pytest-benchmark" }, { name = "pytest-cov" }, { name = "ruff" }, ] @@ -68,15 +65,12 @@ requires-dist = [ provides-extras = ["cloud", "s3", "pybids"] [package.metadata.requires-dev] -benchmark = [ - { name = "polars", specifier = ">=1.40.1" }, - { name = "pytest", specifier = ">=9.0.3" }, - { name = "pytest-benchmark", specifier = ">=5.2.3" }, -] dev = [ { name = "pdoc", specifier = ">=16.0.0" }, + { name = "polars", specifier = ">=1.40.1" }, { name = "pre-commit", specifier = ">=4.6.0" }, { name = "pytest", specifier = ">=9.0.3" }, + { name = "pytest-benchmark", specifier = ">=5.2.3" }, { name = "pytest-cov", specifier = ">=7.1.0" }, { name = "ruff", specifier = ">=0.15.13" }, ] @@ -105,9 +99,7 @@ dependencies = [ { name = "s3transfer" }, ] sdist = { url = "https://files.pythonhosted.org/packages/67/2f/c4159fa45079b41f11ad17d8c5df8e1d10169b94d1e4240df5be116d3f0a/boto3-1.43.12.tar.gz", hash = "sha256:4a60cdf02c52cb0a60f8dbc986142ce2c31e87e3df1438ffe6755b83008f3e4e", size = 113142, upload-time = "2026-05-20T19:38:13.163Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/35/b7ab4b6977811f9887405e24460640033c22f4515cf1e904480710bd6296/boto3-1.43.12-py3-none-any.whl", hash = "sha256:685c3e6093455623bfc22dac55b4946ea243095252f7f9c11a99d84b38033bcf", size = 140537, upload-time = "2026-05-20T19:38:09.995Z" }, -] + [[package]] name = "botocore" From 2b2dbcf5732160efb79f7bfa9094630c097cc7a1 Mon Sep 17 00:00:00 2001 From: Jason Kai <21226986+kaitj@users.noreply.github.com> Date: Wed, 6 May 2026 14:12:48 -0400 Subject: [PATCH 6/9] Fix benchmark workflow bugs - Switch to shortened SHA for PR - Add PR for unique output file artifact - Disable comparison against tag due to lack of dependency group - Add step to comment on PR - Sort labels for comment --- .github/scripts/compare_benchmarks.py | 46 +++++++++++++++-------- .github/scripts/run_benchmarks.py | 2 +- .github/workflows/benchmark.yaml | 54 ++++++++++++++++++++------- 3 files changed, 73 insertions(+), 29 deletions(-) diff --git a/.github/scripts/compare_benchmarks.py b/.github/scripts/compare_benchmarks.py index 531ccd9..2a8a46c 100644 --- a/.github/scripts/compare_benchmarks.py +++ b/.github/scripts/compare_benchmarks.py @@ -2,10 +2,17 @@ """Compare benchmark results across PR, main, and tag and output a markdown table.""" import json +import logging +import re import statistics from pathlib import Path from typing import Literal, NamedTuple +_logger = logging.getLogger(__name__) + + +ALERT = 250 # Value (arbitrary; in ms) to indicate difference between benchmarks + class BenchmarkResult(NamedTuple): fullname: str @@ -64,9 +71,9 @@ def _delta(pr: BenchmarkResult, ref: BenchmarkResult) -> str: if ref == 0: return "N/A" diff = _scale(pr.median - ref.median) - pct = (pr.median / ref.median - 1) * 100 - icon = "๐Ÿ”ด" if pct > 5 else "๐ŸŸข" if pct < -5 else "โšช" - return f"{icon} {diff:+.3f} ms ({pct:+.1f}%)" + # Indicator for 250ms absolute diff (arbitrary) + icon = "๐Ÿ”ด" if diff > ALERT else "๐ŸŸข" if diff < -ALERT else "โšช" + return f"{icon} {diff:+.3f}ms" def _label(result: BenchmarkResult) -> str: @@ -83,10 +90,13 @@ def _label(result: BenchmarkResult) -> str: def build_table( pr: dict[str, BenchmarkResult], main: dict[str, BenchmarkResult], - tag: dict[str, BenchmarkResult], - tag_name: str, + tag: dict[str, BenchmarkResult] = {}, + tag_name: str | None = None, ) -> str: all_keys = set(pr) | set(main) | set(tag) + all_keys = sorted( + all_keys, key=lambda x: (0 if "index" in x else 1 if "query" in x else 2, x) + ) labels = [_label((pr.get(k) or main.get(k) or tag.get(k))) for k in all_keys] col_sep = " | " @@ -110,14 +120,14 @@ def delta_row(label: str, ref: dict[str, BenchmarkResult]) -> str: divider, row("PR", pr), row("main", main), - row(tag_name, tag), + # row(tag_name, tag), divider.replace("-", ""), delta_row("PR vs main", main), - delta_row(f"PR vs {tag_name}", tag), + # delta_row(f"PR vs {tag_name}", tag), "", "> `median (mean ยฑ std)`", "> ", - "๐Ÿ”ด >5% slower   โšช within 5%   ๐ŸŸข >5% faster", + f"> ๐Ÿ”ด >{ALERT}ms slower   โšช within {ALERT}ms   ๐ŸŸข >{ALERT}ms faster", ] return "\n".join(lines) @@ -134,27 +144,33 @@ def main(): parser.add_argument( "-o", "--output", + type=Path, help="Output markdown filepath containing benchmark comparisons", ) args = parser.parse_args() files = sorted(Path(".").glob(args.pattern)) - assert len(files) == 3, f"Expected 3 files, found {len(files)}: {files}" + assert len(files) > 1, "Expected more than 1 file for benchmark comparison." # Infer pr/main/tag from directory name parsed: dict[str, BenchmarkResult] = {} tag = None for f in files: - stem = f.parent.name # e.g. "benchmark-pr" - key = stem.split("-")[-1] # "pr", "main", tag - if key not in ("pr", "main"): + stem = f.name # e.g. "benchmark-pr-PR-#" + key = stem.split("-")[1] # commit-sha, "main", tag + + # Special cases + if re.match(r"^v\d+\.\d+.\d+$", key): tag = key + elif key != "main": + key = "pr" + parsed[key] = parse_file(f) if tag is None: - raise ValueError("Unknown tag") - table = build_table(parsed["pr"], parsed["main"], parsed[tag], tag_name=tag) + _logger.warning("Tag not found") + table = build_table(parsed["pr"], parsed["main"], parsed.get(tag, {}), tag_name=tag) args.output.write_text(table) - print(table) + _logger.info(table) if __name__ == "__main__": diff --git a/.github/scripts/run_benchmarks.py b/.github/scripts/run_benchmarks.py index cf870c0..64ce1e5 100644 --- a/.github/scripts/run_benchmarks.py +++ b/.github/scripts/run_benchmarks.py @@ -14,7 +14,7 @@ def main(): pytest.main( [ "-m", - "benchmark and not cloud", + "benchmark", "--benchmark-save-data", f"--benchmark-json={args.output}", "--benchmark-time-unit=ms", diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index cd91f8f..d919a6a 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -4,31 +4,38 @@ on: pull_request: branches: [ "main" ] +permissions: + pull-requests: write jobs: - get-tag: + prep: runs-on: ubuntu-latest outputs: tag: ${{ steps.last_tag.outputs.tag }} + short_sha: ${{ steps.short.outputs.sha }} steps: - uses: actions/checkout@v6 with: fetch-tags: true fetch-depth: 0 - id: last_tag - run: echo ="tag=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT + run: echo "tag=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT + - id: short + run: echo "sha=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_OUTPUT benchmark: - needs: get-tag + needs: prep runs-on: ubuntu-latest strategy: + fail-fast: false matrix: target: - - name: pr + - name: ${{ needs.prep.outputs.short_sha }} ref: ${{ github.sha }} - name: main ref: main - - name: ${{ needs.get_tag.outputs.tag }} - ref: ${{ needs.get_tag.outputs.tag }} + # Tag comparison disabled until next release (missing benchmark dependencies) + # - name: ${{ needs.prep.outputs.tag }} + # ref: ${{ needs.prep.outputs.tag }} steps: - uses: actions/checkout@v6 with: @@ -38,15 +45,19 @@ jobs: - run: uv sync --extra "cloud" - name: Run benchmarks run: | - uv run .github/scripts/run_benchmarks.py \ - --output benchmark-${{matrix.target.name }}.json + FILENAME="benchmark-${{ matrix.target.name }}-PR-${{ github.event.pull_request.number }}.json" + uv run .github/scripts/run_benchmarks.py --output "$FILENAME" + echo "REPORT_PATH=$FILENAME" >> $GITHUB_ENV - uses: actions/upload-artifact@v7 with: - name: benchmark-${{ matrix.target.name }} - path: benchmark-${{ matrix.target.name }}.json + name: benchmark-${{ matrix.target.name }}-PR-${{ + github.event.pull_request.number }} + path: ${{ env.REPORT_PATH }} + retention-days: 1 + overwrite: true report: - needs: [ get-tag, benchmark ] + needs: [ prep, benchmark ] runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -54,8 +65,25 @@ jobs: - uses: actions/download-artifact@v8 with: pattern: benchmark-* + merge-multiple: true + path: benchmark-results - name: Generate report run: | uv run .github/scripts/compare_benchmarks.py \ - --output benchmarks.md \ - --pattern benchmark-*.json + --output "benchmarks.md" \ + --pattern "benchmark-results/benchmark-*-PR-${{ github.event.pull_request.number }}.json" + - name: Find Comment + uses: peter-evans/find-comment@v3 + id: fc + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: "github-actions[bot]" + body-includes: "Benchmark Results" + + - name: Create / update comment + uses: peter-evans/create-or-update-comment@v5 + with: + comment-id: ${{ steps.fc.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body-path: "benchmarks.md" + edit-mode: replace From cea0a0bbb8040bde52ae49493656ff5b863b3d16 Mon Sep 17 00:00:00 2001 From: Jason Kai <21226986+kaitj@users.noreply.github.com> Date: Wed, 13 May 2026 17:25:57 -0400 Subject: [PATCH 7/9] Add benchmarking script - Fold CI scripts into local benchmark script - Remove CI workflow - Use importlib for pytest for identical file names across different test modules --- .github/scripts/compare_benchmarks.py | 177 ------------- .github/scripts/run_benchmarks.py | 27 -- .github/workflows/benchmark.yaml | 82 +----- .gitignore | 1 + pyproject.toml | 2 + scripts/benchmark.py | 368 ++++++++++++++++++++++++++ uv.lock | 4 +- 7 files changed, 384 insertions(+), 277 deletions(-) delete mode 100644 .github/scripts/compare_benchmarks.py delete mode 100644 .github/scripts/run_benchmarks.py create mode 100644 scripts/benchmark.py diff --git a/.github/scripts/compare_benchmarks.py b/.github/scripts/compare_benchmarks.py deleted file mode 100644 index 2a8a46c..0000000 --- a/.github/scripts/compare_benchmarks.py +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env python -"""Compare benchmark results across PR, main, and tag and output a markdown table.""" - -import json -import logging -import re -import statistics -from pathlib import Path -from typing import Literal, NamedTuple - -_logger = logging.getLogger(__name__) - - -ALERT = 250 # Value (arbitrary; in ms) to indicate difference between benchmarks - - -class BenchmarkResult(NamedTuple): - fullname: str - kind: Literal["index", "query"] - locality: Literal["local", "remote"] | None = None - workers: int | None = None - median: float = 0.0 - mean: float = 0.0 - stddev: float = 0.0 - - -def parse_file(path: Path) -> dict[str, BenchmarkResult]: - data = json.loads(path.read_text()) - results = {} - for benchmark in data["benchmarks"]: - fullname: str = benchmark["fullname"] - data_trimmed = benchmark["stats"]["data"][1:] - median = statistics.median(data_trimmed) - mean = statistics.mean(data_trimmed) - stddev = statistics.stdev(data_trimmed) - - if "query" in fullname: - result = BenchmarkResult( - fullname=fullname, kind="query", median=median, mean=mean, stddev=stddev - ) - else: - locality: Literal["local", "remote"] = ( - "remote" if "openneuro" in fullname or "s3" in fullname else "local" - ) - workers = benchmark["extra_info"].get("workers", "Unknown") - result = BenchmarkResult( - fullname=fullname, - kind="index", - locality=locality, - workers=workers, - median=median, - mean=mean, - stddev=stddev, - ) - results[fullname] = result - return results - - -def _scale(val: float) -> float: - return val * 1000 - - -def _fmt(res: BenchmarkResult) -> str: - median = _scale(res.median) - mean = _scale(res.mean) - stddev = _scale(res.stddev) - return f"{median:.3f} ({mean:.3f} ยฑ {stddev:.3f}) ms" - - -def _delta(pr: BenchmarkResult, ref: BenchmarkResult) -> str: - if ref == 0: - return "N/A" - diff = _scale(pr.median - ref.median) - # Indicator for 250ms absolute diff (arbitrary) - icon = "๐Ÿ”ด" if diff > ALERT else "๐ŸŸข" if diff < -ALERT else "โšช" - return f"{icon} {diff:+.3f}ms" - - -def _label(result: BenchmarkResult) -> str: - if result.kind == "query": - return ( - result.fullname.split("::")[-1] - .replace("test_", "") - .replace("_", " ") - .capitalize() - ) - return f"{result.locality.capitalize()} index ({result.workers} workers)" - - -def build_table( - pr: dict[str, BenchmarkResult], - main: dict[str, BenchmarkResult], - tag: dict[str, BenchmarkResult] = {}, - tag_name: str | None = None, -) -> str: - all_keys = set(pr) | set(main) | set(tag) - all_keys = sorted( - all_keys, key=lambda x: (0 if "index" in x else 1 if "query" in x else 2, x) - ) - labels = [_label((pr.get(k) or main.get(k) or tag.get(k))) for k in all_keys] - - col_sep = " | " - header = "| |" + col_sep.join(f" **{label}** " for label in labels) + " |" - divider = "|-|" + "|".join("---" for _ in all_keys) + "|" - - def row(name: str, results: dict[str, BenchmarkResult]) -> str: - cells = [_fmt(results[k]) if k in results else "โ€”" for k in all_keys] - return "| **" + name + "** |" + col_sep.join(f" {c} " for c in cells) + " |" - - def delta_row(label: str, ref: dict[str, BenchmarkResult]) -> str: - cells = [ - _delta(pr[k], ref[k]) if k in pr and k in ref else "โ€”" for k in all_keys - ] - return "| *" + label + "* |" + col_sep.join(f" {c} " for c in cells) + " |" - - lines = [ - "## Benchmark Results", - "", - header, - divider, - row("PR", pr), - row("main", main), - # row(tag_name, tag), - divider.replace("-", ""), - delta_row("PR vs main", main), - # delta_row(f"PR vs {tag_name}", tag), - "", - "> `median (mean ยฑ std)`", - "> ", - f"> ๐Ÿ”ด >{ALERT}ms slower   โšช within {ALERT}ms   ๐ŸŸข >{ALERT}ms faster", - ] - return "\n".join(lines) - - -def main(): - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument( - "--pattern", - default="benchmark-*.json", - help="Glob pattern for benchmark JSON files", - ) - parser.add_argument( - "-o", - "--output", - type=Path, - help="Output markdown filepath containing benchmark comparisons", - ) - args = parser.parse_args() - - files = sorted(Path(".").glob(args.pattern)) - assert len(files) > 1, "Expected more than 1 file for benchmark comparison." - - # Infer pr/main/tag from directory name - parsed: dict[str, BenchmarkResult] = {} - tag = None - for f in files: - stem = f.name # e.g. "benchmark-pr-PR-#" - key = stem.split("-")[1] # commit-sha, "main", tag - - # Special cases - if re.match(r"^v\d+\.\d+.\d+$", key): - tag = key - elif key != "main": - key = "pr" - - parsed[key] = parse_file(f) - if tag is None: - _logger.warning("Tag not found") - table = build_table(parsed["pr"], parsed["main"], parsed.get(tag, {}), tag_name=tag) - args.output.write_text(table) - _logger.info(table) - - -if __name__ == "__main__": - main() diff --git a/.github/scripts/run_benchmarks.py b/.github/scripts/run_benchmarks.py deleted file mode 100644 index 64ce1e5..0000000 --- a/.github/scripts/run_benchmarks.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -"""Perform benchmarks across PR commit, main, and previous tag.""" - -import argparse - -import pytest - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("-o", "--output", required=True, help="Output JSON file path") - args = parser.parse_args() - - pytest.main( - [ - "-m", - "benchmark", - "--benchmark-save-data", - f"--benchmark-json={args.output}", - "--benchmark-time-unit=ms", - "--benchmark-warmup=on", - ] - ) - - -if __name__ == "__main__": - main() diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index d919a6a..5649216 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -2,88 +2,26 @@ name: Benchmark on: pull_request: - branches: [ "main" ] + branches: ["main"] -permissions: - pull-requests: write jobs: - prep: - runs-on: ubuntu-latest - outputs: - tag: ${{ steps.last_tag.outputs.tag }} - short_sha: ${{ steps.short.outputs.sha }} - steps: - - uses: actions/checkout@v6 - with: - fetch-tags: true - fetch-depth: 0 - - id: last_tag - run: echo "tag=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT - - id: short - run: echo "sha=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_OUTPUT - benchmark: - needs: prep runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - target: - - name: ${{ needs.prep.outputs.short_sha }} - ref: ${{ github.sha }} - - name: main - ref: main - # Tag comparison disabled until next release (missing benchmark dependencies) - # - name: ${{ needs.prep.outputs.tag }} - # ref: ${{ needs.prep.outputs.tag }} + env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} steps: - uses: actions/checkout@v6 with: - ref: ${{ matrix.target.ref }} + fetch-depth: 0 submodules: true + ref: ${{ github.head_ref }} - uses: astral-sh/setup-uv@v8.1.0 - - run: uv sync --extra "cloud" + - run: uv sync --frozen --all-extras - name: Run benchmarks + id: run-benchmarks run: | - FILENAME="benchmark-${{ matrix.target.name }}-PR-${{ github.event.pull_request.number }}.json" - uv run .github/scripts/run_benchmarks.py --output "$FILENAME" - echo "REPORT_PATH=$FILENAME" >> $GITHUB_ENV + uv run python scripts/benchmark.py --branch $BRANCH_NAME - uses: actions/upload-artifact@v7 with: - name: benchmark-${{ matrix.target.name }}-PR-${{ - github.event.pull_request.number }} - path: ${{ env.REPORT_PATH }} - retention-days: 1 - overwrite: true - - report: - needs: [ prep, benchmark ] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - uses: astral-sh/setup-uv@v8.1.0 - - uses: actions/download-artifact@v8 - with: - pattern: benchmark-* - merge-multiple: true - path: benchmark-results - - name: Generate report - run: | - uv run .github/scripts/compare_benchmarks.py \ - --output "benchmarks.md" \ - --pattern "benchmark-results/benchmark-*-PR-${{ github.event.pull_request.number }}.json" - - name: Find Comment - uses: peter-evans/find-comment@v3 - id: fc - with: - issue-number: ${{ github.event.pull_request.number }} - comment-author: "github-actions[bot]" - body-includes: "Benchmark Results" - - - name: Create / update comment - uses: peter-evans/create-or-update-comment@v5 - with: - comment-id: ${{ steps.fc.outputs.comment-id }} - issue-number: ${{ github.event.pull_request.number }} - body-path: "benchmarks.md" - edit-mode: replace + name: benchmark-${{ matrix.target.name }} + path: ${{ steps.run-benchmarks.outputs.report_file }} diff --git a/.gitignore b/.gitignore index 748e2f9..2d1a0f2 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ htmlcov # Local data and scratch .scratch +benchmarks/ # Local virtual environment .venv diff --git a/pyproject.toml b/pyproject.toml index 92f6514..17e249e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ pybids = ["pandas>=2.0.0"] [dependency-groups] dev = [ "pdoc>=16.0.0", + "polars>=1.40.1", "pre-commit>=4.6.0", "pytest>=9.0.3", "pytest-benchmark>=5.2.3", @@ -69,6 +70,7 @@ lint.extend-select = ["I"] [tool.pytest.ini_options] log_cli = true log_cli_level = "INFO" +addopts = "--import-mode=importlib" markers = [ "benchmark: Tests used for benchmarking", "cloud: Tests requiring cloud group dependencies", diff --git a/scripts/benchmark.py b/scripts/benchmark.py new file mode 100644 index 0000000..2ae13b3 --- /dev/null +++ b/scripts/benchmark.py @@ -0,0 +1,368 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [] +# /// +"""Perform benchmarking of bids2table against last tag, main and feature branches. + +Run with: + uv run --with scripts/benchmark.py -b [-o ] +""" + +from __future__ import annotations + +import argparse +import json +import logging +import os +import statistics +import subprocess +import sys +from contextlib import contextmanager +from datetime import datetime, timezone +from pathlib import Path +from typing import Literal, NamedTuple + +import pytest + +logging.basicConfig(level=logging.INFO) +_logger = logging.getLogger("bids2table.benchmark") + + +# Suppression and resetting (after checkout) necessary due to streaming of outputs +@contextmanager +def _suppress_log_exceptions(): + logging.raiseExceptions = False + try: + yield + finally: + logging.raiseExceptions = True + + +def _reset_logger(): + for h in _logger.handlers[:]: + _logger.removeHandler(h) + h.close() + logging.basicConfig(stream=sys.stderr, level=logging.INFO) + + +class Git: + """Class to simplify git calls via subprocess.""" + + def __init__(self): + """Initialize the repository object, pulling in latest changes.""" + self.repo_path = self._root() + self._head_ref = self._run("rev-parse", "--abbrev-ref", "HEAD") + + def __enter__(self): + if bool(self._run("status", "--porcelain")): + _logger.error("Please stash or commit changes before benchmarking.") + sys.exit(1) + self.pull() + self.submodule_update() + return self + + def __exit__(self, *_): + """On context closure, checkout the HEAD ref.""" + self.checkout(self._head_ref) + + @staticmethod + def _root() -> Path: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], capture_output=True, text=True + ) + return Path(result.stdout.strip()) + + def _run(self, *args: str) -> str: + result = subprocess.run( + ["git", "-C", str(self.repo_path), *args], capture_output=True, text=True + ) + if result.returncode != 0: + _logger.error(result.stderr.strip()) + sys.exit(result.returncode) + return result.stdout.strip() + + def checkout(self, ref: str) -> None: + """Checkout reference. + + Args: + ref: Reference to checkout (e.g. branch, SHA, tag) + """ + self._run("checkout", ref) + + def pull(self) -> None: + """Pull from the remote repository.""" + self._run("pull") + + def submodule_update(self) -> None: + """Update submodules of the repo, initializing if necessary.""" + self._run("submodule", "update", "--init", "--recursive") + + def last_tag(self) -> str: + """Get last tag. + + Returns: + A string value of the last tag + """ + return self._run("describe", "--tags", "--abbrev=0") + + +class BenchmarkResult(NamedTuple): + fullname: str + kind: Literal["index", "query"] + locality: Literal["local", "remote"] | None = None + workers: int = 1 + median: float = 0.0 + mean: float = 0.0 + stddev: float = 0.0 + + +def parse_file(path: Path) -> dict[str, BenchmarkResult]: + data = json.loads(path.read_text()) + results = {} + for benchmark in data["benchmarks"]: + fullname: str = benchmark["fullname"] + data_trimmed = benchmark["stats"]["data"][1:] + median = statistics.median(data_trimmed) + mean = statistics.mean(data_trimmed) + stddev = statistics.stdev(data_trimmed) + + if "query" in fullname: + result = BenchmarkResult( + fullname=fullname, kind="query", median=median, mean=mean, stddev=stddev + ) + else: + locality: Literal["local", "remote"] = ( + "remote" if "openneuro" in fullname or "s3" in fullname else "local" + ) + workers = benchmark["extra_info"].get("workers", "Unknown") + result = BenchmarkResult( + fullname=fullname, + kind="index", + locality=locality, + workers=workers, + median=median, + mean=mean, + stddev=stddev, + ) + results[fullname] = result + return results + + +# Values are alwaays provided in seconds in the json outputs. +# Need to scale appropriately (also noting factor and unit to pass along for +# formatting). +class Value(NamedTuple): + value: float + factor: float + unit: str + + +def _scale(val: float) -> Value: + if val >= 1.0: + return Value(value=val, factor=1, unit="s") + elif val >= 1e-3: + return Value(value=val * 1e3, factor=1e3, unit="ms") + else: + return Value(value=val * 1e6, factor=1e6, unit="ยตs") + + +def _fmt(res: BenchmarkResult) -> str: + median = _scale(res.median) + mean = res.mean * median.factor + stddev = res.stddev * median.factor + return f"{median.value:.3f} ({mean:.3f} ยฑ {stddev:.3f}) {median.unit}" + + +def _ratio(pr: BenchmarkResult, ref: BenchmarkResult) -> str: + ratio = pr.median / ref.median + icon = "๐Ÿ”ด" if ratio > 1 else "๐ŸŸข" if ratio < 1 else "โšช" + return f"{icon} {ratio:.2f}" + + +def _label(result: BenchmarkResult) -> str: + if result.kind == "query": + return ( + result.fullname.split("::")[-1] + .replace("test_", "") + .replace("_", " ") + .capitalize() + ) + return f"{result.locality.capitalize()} index ({result.workers} workers)" + + +def build_table( + branch_name: str, + branch: dict[str, BenchmarkResult], + main: dict[str, BenchmarkResult], + tag: dict[str, BenchmarkResult] | None = None, +) -> str: + tag = tag or {} + all_keys = sorted( + set(branch) | set(main) | set(tag), + key=lambda x: (0 if "index" in x else 1 if "query" in x else 2, x), + ) + labels = [_label(branch.get(k) or main.get(k) or tag.get(k)) for k in all_keys] + + col_sep = " | " + header = "| |" + col_sep.join(f" **{label}** " for label in labels) + " |" + divider = "|-|" + "|".join("---" for _ in all_keys) + "|" + + def row(name: str, results: dict[str, BenchmarkResult]) -> str: + cells = [_fmt(results[k]) if k in results else "โ€”" for k in all_keys] + return "| **" + name + "** |" + col_sep.join(f" {c} " for c in cells) + " |" + + def ratio_row(label: str, ref: dict[str, BenchmarkResult]) -> str: + cells = [ + _ratio(branch[k], ref[k]) if k in branch and k in ref else "โ€”" + for k in all_keys + ] + return "| *" + label + "* |" + col_sep.join(f" {c} " for c in cells) + " |" + + lines = [ + "## Benchmark Results", + "", + header, + divider, + row(branch_name, branch), + row("main", main), + divider.replace("-", ""), + ratio_row(f"{branch_name} vs main ratio", main), + "", + "> `median (mean ยฑ std)`", + "> ", + "> ๐Ÿ”ด Slower   โšช No change   ๐ŸŸข Faster", + ] + return "\n".join(lines) + + +def _parser() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("-b", "--branch", required=True, help="PR branch to benchmark") + parser.add_argument( + "-o", + "--output-dir", + default="benchmarks", + type=Path, + help="Output directory to save benchmarks to", + ) + return parser.parse_args() + + +def _sanitize(s: str) -> str: + return s.replace("/", "-") + + +def run_benchmark(git: Git, branch: str, out_dir: Path) -> None: + """Perform benchmarking. + + Args: + git: Representation of current git repository for benchmarking + branch: Feature branch to benchmark + out_dir: Output directory to save benchmarks to + """ + + tag = git.last_tag() + targets = {branch: branch, "main": "main", tag: None} + + with _suppress_log_exceptions(): + for name, ref in targets.items(): + # Skip if the reference is not provided + if ref is None: + continue + git.checkout(ref) + _reset_logger() + _logger.info("Running benchmarks for '%s'", name) + + safe_name = _sanitize(name) + fname = out_dir / f"benchmark-{safe_name}.json" + if fname.exists(): + _logger.warning( + "Existing benchmarks found for %s. File will be overwritten.", fname + ) + + # Run benchmark + pytest.main( + [ + "-m", + "benchmark", + "--benchmark-save-data", + f"--benchmark-json={fname}", + "--benchmark-time-unit=ms", + "--benchmark-warmup=on", + f"{git.repo_path}/tests", + ] + ) + + +def generate_report(git: Git, branch: str, out_dir: Path) -> Path: + """Generate markdown report from benchmarks. + + Args: + git: Representation of current git repository for benchmarking + branch: Feature branch benchmarked + out_dir: Directory benchmarks are saved to / output report to + + Returns: + Path to file containing benchmark comparison table + + Raises: + AssertionError: if less than 2 benchmark files found + """ + with _suppress_log_exceptions(): + git.checkout(branch) + _reset_logger() + _logger.info("Generating benchmark report") + + files = sorted(out_dir.glob("benchmark-*.json")) + if len(files) < 2: + raise AssertionError( + "Expected 2 or more benchmark files to perform comparisons." + ) + + tag = git.last_tag() + parsed: dict[str, dict[str, BenchmarkResult]] = {} + for f in files: + if not f.exists(): + _logger.warning("File %s does not exist - skipping", f) + continue + key = f.stem.split("-")[1] + if key == tag: + pass # keep as tag name + elif key != "main": + key = branch + parsed[key] = parse_file(f) + + if tag not in parsed: + _logger.warning("Tag '%s' not found in benchmark files.", tag) + + report_contents = build_table( + branch, + parsed[branch], + parsed["main"], + None, # parsed.get(tag) + ) + dt = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M") + report_file = out_dir / f"benchmark-{_sanitize(branch)}-{dt}.md" + report_file.write_text(report_contents) + _logger.info("Report written to %s", report_file) + + return report_file + + +def main() -> None: + args = _parser() + args.output_dir.mkdir(parents=True, exist_ok=True) + + with Git() as git: + run_benchmark(git=git, branch=args.branch, out_dir=args.output_dir) + report_file = generate_report( + git=git, branch=args.branch, out_dir=args.output_dir + ) + + if "GITHUB_OUTPUT" in os.environ: + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write(f"report_file={report_file}\n") + + +if __name__ == "__main__": + main() diff --git a/uv.lock b/uv.lock index 8b75f83..1a52de3 100644 --- a/uv.lock +++ b/uv.lock @@ -99,7 +99,9 @@ dependencies = [ { name = "s3transfer" }, ] sdist = { url = "https://files.pythonhosted.org/packages/67/2f/c4159fa45079b41f11ad17d8c5df8e1d10169b94d1e4240df5be116d3f0a/boto3-1.43.12.tar.gz", hash = "sha256:4a60cdf02c52cb0a60f8dbc986142ce2c31e87e3df1438ffe6755b83008f3e4e", size = 113142, upload-time = "2026-05-20T19:38:13.163Z" } - +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/35/b7ab4b6977811f9887405e24460640033c22f4515cf1e904480710bd6296/boto3-1.43.12-py3-none-any.whl", hash = "sha256:685c3e6093455623bfc22dac55b4946ea243095252f7f9c11a99d84b38033bcf", size = 140537, upload-time = "2026-05-20T19:38:09.995Z" }, +] [[package]] name = "botocore" From d72905367c25915e4de3f34578dfe49900c660b4 Mon Sep 17 00:00:00 2001 From: Jason Kai <21226986+kaitj@users.noreply.github.com> Date: Fri, 29 May 2026 11:02:33 -0400 Subject: [PATCH 8/9] Re-add benchmark CI reporting - Rename coverage.yaml -> report.yaml and expand to handle both CI (coverage) and Benchmark results in a single workflow - Benchmark workflow: fix undefined matrix reference in artifact name, add PR number recording, record PR number and report file as artifacts - scripts/benchmark.py: add -f/--output-file flag to allow specifying a fixed output filename for CI artifact consumption - Report workflow conditionally downloads the correct artifact based on which upstream workflow triggered it, posts coverage comment for CI and creates/updates comment for benchmarks --- .github/workflows/benchmark.yaml | 15 ++++++-- .github/workflows/coverage.yaml | 33 ---------------- .github/workflows/report.yaml | 64 ++++++++++++++++++++++++++++++++ scripts/benchmark.py | 22 +++++++++-- 4 files changed, 94 insertions(+), 40 deletions(-) delete mode 100644 .github/workflows/coverage.yaml create mode 100644 .github/workflows/report.yaml diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index 5649216..3b4635d 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -1,5 +1,8 @@ name: Benchmark +permissions: + contents: read + on: pull_request: branches: ["main"] @@ -20,8 +23,14 @@ jobs: - name: Run benchmarks id: run-benchmarks run: | - uv run python scripts/benchmark.py --branch $BRANCH_NAME + uv run python scripts/benchmark.py -b $BRANCH_NAME -o . -f benchmarks.md + - name: Record pr number + run: | + echo "${{ github.event.number }}" > pr-number.txt - uses: actions/upload-artifact@v7 with: - name: benchmark-${{ matrix.target.name }} - path: ${{ steps.run-benchmarks.outputs.report_file }} + name: benchmark + retention-days: 7 + path: | + benchmarks.md + pr-number.txt diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml deleted file mode 100644 index 5083e10..0000000 --- a/.github/workflows/coverage.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: Coverage report - -on: - workflow_run: - workflows: ["CI"] - types: [completed] - -permissions: - pull-requests: write - actions: read - -jobs: - coverage: - if: >- - github.event.workflow_run.event == 'pull_request' && - github.event.workflow_run.conclusion == 'success' - runs-on: ubuntu-latest - steps: - - uses: actions/download-artifact@v8 - with: - name: coverage - run-id: ${{ github.event.workflow_run.id }} - github-token: ${{ secrets.GITHUB_TOKEN }} - - - id: pr - run: echo "number=$(cat pr-number.txt)" >> "$GITHUB_OUTPUT" - - - name: Pytest coverage comment - uses: MishaKav/pytest-coverage-comment@v1 - with: - issue-number: ${{ steps.pr.outputs.number }} - pytest-xml-coverage-path: ./coverage.xml - junitxml-path: ./pytest.xml diff --git a/.github/workflows/report.yaml b/.github/workflows/report.yaml new file mode 100644 index 0000000..49eee9e --- /dev/null +++ b/.github/workflows/report.yaml @@ -0,0 +1,64 @@ +name: Report + +on: + workflow_run: + workflows: ["CI", "Benchmark"] + types: [completed] + +permissions: + pull-requests: write + actions: read + +jobs: + report: + if: >- + github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' + runs-on: ubuntu-latest + steps: + - name: Download CI artifact + if: github.event.workflow_run.name == 'CI' + continue-on-error: true + uses: actions/download-artifact@v8 + with: + name: coverage + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Download Benchmark artifact + if: github.event.workflow_run.name == 'Benchmark' + continue-on-error: true + uses: actions/download-artifact@v8 + with: + name: benchmark + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - id: pr + run: echo "number=$(cat pr-number.txt)" >> "$GITHUB_OUTPUT" + + - name: Pytest coverage comment + if: github.event.workflow_run.name == 'CI' + uses: MishaKav/pytest-coverage-comment@v1 + with: + issue-number: ${{ steps.pr.outputs.number }} + pytest-xml-coverage-path: ./coverage.xml + junitxml-path: ./pytest.xml + + - name: Benchmark find comment + if: github.event.workflow_run.name == 'Benchmark' + uses: peter-evans/find-comment@v3 + id: fc + with: + issue-number: ${{ steps.pr.outputs.number }} + comment-author: "github-actions[bot]" + body-includes: "Benchmark Results" + + - name: Benchmark create / update comment + if: github.event.workflow_run.name == 'Benchmark' + uses: peter-evans/create-or-update-comment@v5 + with: + comment-id: ${{ steps.fc.outputs.comment-id }} + issue-number: ${{ steps.pr.outputs.number }} + body-path: "benchmarks.md" + edit-mode: replace diff --git a/scripts/benchmark.py b/scripts/benchmark.py index 2ae13b3..a92e049 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -245,6 +245,13 @@ def _parser() -> argparse.Namespace: type=Path, help="Output directory to save benchmarks to", ) + parser.add_argument( + "-f", + "--output-file", + required=False, + type=str, + help="Output file name", + ) return parser.parse_args() @@ -294,7 +301,9 @@ def run_benchmark(git: Git, branch: str, out_dir: Path) -> None: ) -def generate_report(git: Git, branch: str, out_dir: Path) -> Path: +def generate_report( + git: Git, branch: str, out_dir: Path, out_fname: str | None = None +) -> Path: """Generate markdown report from benchmarks. Args: @@ -341,8 +350,10 @@ def generate_report(git: Git, branch: str, out_dir: Path) -> Path: parsed["main"], None, # parsed.get(tag) ) - dt = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M") - report_file = out_dir / f"benchmark-{_sanitize(branch)}-{dt}.md" + if out_fname is None: + dt = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M") + out_fname = f"benchmark-{_sanitize(branch)}-{dt}.md" + report_file = out_dir / out_fname report_file.write_text(report_contents) _logger.info("Report written to %s", report_file) @@ -356,7 +367,10 @@ def main() -> None: with Git() as git: run_benchmark(git=git, branch=args.branch, out_dir=args.output_dir) report_file = generate_report( - git=git, branch=args.branch, out_dir=args.output_dir + git=git, + branch=args.branch, + out_dir=args.output_dir, + out_fname=args.output_file, ) if "GITHUB_OUTPUT" in os.environ: From d98efc8cf8cd24a351f04f2bfd879606abe0ae84 Mon Sep 17 00:00:00 2001 From: Jason Kai <21226986+kaitj@users.noreply.github.com> Date: Fri, 29 May 2026 13:26:16 -0400 Subject: [PATCH 9/9] Add configurable threshold for benchmark ratio - Add --threshold/-t flag to benchmark script (default 0.05) to define the minimum ratio difference before marking a change as slower/faster - Rewrite _ratio to compare % change against threshold - Display 3 decimal places in ratio output to surface small differences --- .github/workflows/benchmark.yaml | 2 +- scripts/benchmark.py | 34 +++++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index 3b4635d..8ef00bb 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -23,7 +23,7 @@ jobs: - name: Run benchmarks id: run-benchmarks run: | - uv run python scripts/benchmark.py -b $BRANCH_NAME -o . -f benchmarks.md + uv run python scripts/benchmark.py -b $BRANCH_NAME -o . -f benchmarks.md -t 0.05 - name: Record pr number run: | echo "${{ github.event.number }}" > pr-number.txt diff --git a/scripts/benchmark.py b/scripts/benchmark.py index a92e049..b324ee0 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -5,7 +5,8 @@ """Perform benchmarking of bids2table against last tag, main and feature branches. Run with: - uv run --with scripts/benchmark.py -b [-o ] + uv run --with scripts/benchmark.py \ + -b [-o ] [-f ] [-t ] """ from __future__ import annotations @@ -173,10 +174,15 @@ def _fmt(res: BenchmarkResult) -> str: return f"{median.value:.3f} ({mean:.3f} ยฑ {stddev:.3f}) {median.unit}" -def _ratio(pr: BenchmarkResult, ref: BenchmarkResult) -> str: +def _ratio(pr: BenchmarkResult, ref: BenchmarkResult, threshold: float) -> str: ratio = pr.median / ref.median - icon = "๐Ÿ”ด" if ratio > 1 else "๐ŸŸข" if ratio < 1 else "โšช" - return f"{icon} {ratio:.2f}" + if abs(1 - ratio) <= threshold: + icon = "โšช" + elif ratio > 1: + icon = "๐Ÿ”ด" + else: + icon = "๐ŸŸข" + return f"{icon} {ratio:.3f}" def _label(result: BenchmarkResult) -> str: @@ -191,6 +197,7 @@ def _label(result: BenchmarkResult) -> str: def build_table( + threshold: float, branch_name: str, branch: dict[str, BenchmarkResult], main: dict[str, BenchmarkResult], @@ -213,7 +220,7 @@ def row(name: str, results: dict[str, BenchmarkResult]) -> str: def ratio_row(label: str, ref: dict[str, BenchmarkResult]) -> str: cells = [ - _ratio(branch[k], ref[k]) if k in branch and k in ref else "โ€”" + _ratio(branch[k], ref[k], threshold) if k in branch and k in ref else "โ€”" for k in all_keys ] return "| *" + label + "* |" + col_sep.join(f" {c} " for c in cells) + " |" @@ -230,7 +237,7 @@ def ratio_row(label: str, ref: dict[str, BenchmarkResult]) -> str: "", "> `median (mean ยฑ std)`", "> ", - "> ๐Ÿ”ด Slower   โšช No change   ๐ŸŸข Faster", + f"> ๐Ÿ”ด Slower   โšช No change (<{threshold * 100:.0f} %)   ๐ŸŸข Faster", ] return "\n".join(lines) @@ -252,6 +259,13 @@ def _parser() -> argparse.Namespace: type=str, help="Output file name", ) + parser.add_argument( + "-t", + "--threshold", + default=0.05, + type=float, + help="Threshold for performance to be considered unchanged", + ) return parser.parse_args() @@ -302,14 +316,16 @@ def run_benchmark(git: Git, branch: str, out_dir: Path) -> None: def generate_report( - git: Git, branch: str, out_dir: Path, out_fname: str | None = None + git: Git, branch: str, threshold: float, out_dir: Path, out_fname: str | None = None ) -> Path: """Generate markdown report from benchmarks. Args: git: Representation of current git repository for benchmarking branch: Feature branch benchmarked + threshold: Threshold for performance to be considered unchanged out_dir: Directory benchmarks are saved to / output report to + out_fname: Benchmark output file name Returns: Path to file containing benchmark comparison table @@ -345,6 +361,7 @@ def generate_report( _logger.warning("Tag '%s' not found in benchmark files.", tag) report_contents = build_table( + threshold, branch, parsed[branch], parsed["main"], @@ -362,6 +379,8 @@ def generate_report( def main() -> None: args = _parser() + if abs(args.threshold) > 1: + raise ValueError(f"Threshold should be between 0 and 1, got: {args.threshold}") args.output_dir.mkdir(parents=True, exist_ok=True) with Git() as git: @@ -369,6 +388,7 @@ def main() -> None: report_file = generate_report( git=git, branch=args.branch, + threshold=args.threshold, out_dir=args.output_dir, out_fname=args.output_file, )