diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6c29f34..a1e613f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_stages: [pre-commit] repos: - repo: https://github.com/psf/black-pre-commit-mirror - rev: 25.11.0 + rev: 26.1.0 hooks: - id: black @@ -28,9 +28,9 @@ repos: language: python types: [jupyter] - - id: check-badges - name: check badges - entry: check_badges + - id: check-notebook-open-atmos-structure + name: check notebook has open-atmos structure + entry: python -m hooks.check_notebook_open_atmos_structure additional_dependencies: - nbformat - pytest diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 922a4c2..6695bde 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -6,10 +6,10 @@ stages: [pre-commit] types: [jupyter] -- id: check-badges - name: check badges - description: check badges in Jupyter Notebook - entry: check_badges +- id: check-notebook-open-atmos-structure + name: check notebook has open-atmos structure + entry: check_notebook_open_atmos_structure + description: check notebook has open-atmos structure language: python stages: [pre-commit] types: [jupyter] diff --git a/hooks/check_badges.py b/hooks/check_notebook_open_atmos_structure.py similarity index 63% rename from hooks/check_badges.py rename to hooks/check_notebook_open_atmos_structure.py index dc6f19a..0d465ae 100755 --- a/hooks/check_badges.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -# pylint: disable=missing-function-docstring -""" -Checks whether notebooks contain badges.""" +"""pre-commit hook checking if badges in first cell +match pattern used in open-atmos Jupyter Notebooks""" + from __future__ import annotations import argparse @@ -9,80 +9,11 @@ import nbformat - -def _header_cell_text(repo_name, version): - if version is None: - version = "" - return f"""import os, sys -os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS -if 'google.colab' in sys.modules: - !pip --quiet install open-atmos-jupyter-utils - from open_atmos_jupyter_utils import pip_install_on_colab - pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" - - -HEADER_KEY_PATTERNS = [ - "install open-atmos-jupyter-utils", - "google.colab", - "pip_install_on_colab", -] - - -def is_colab_header(cell_source: str) -> bool: - """Return True if the cell looks like a Colab header.""" - return all(pat in cell_source for pat in HEADER_KEY_PATTERNS) - - -def check_colab_header(notebook_path, repo_name, fix, version): - """Check Colab-magic cell and fix if is misspelled, in wrong position or not exists""" - nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) - - header_index = None - correct_header = _header_cell_text(repo_name, version) - modified = False - - if not fix: - if nb.cells[2].cell_type != "code" or nb.cells[2].source != correct_header: - raise ValueError("Third cell does not contain correct header") - return modified - - for idx, cell in enumerate(nb.cells): - if cell.cell_type == "code" and is_colab_header(cell.source): - header_index = idx - break - - if header_index is not None: - if nb.cells[header_index].source != correct_header: - nb.cells[header_index].source = correct_header - modified = True - if header_index != 2: - nb.cells.insert(2, nb.cells.pop(header_index)) - modified = True - else: - new_cell = nbformat.v4.new_code_cell(correct_header) - nb.cells.insert(2, new_cell) - modified = True - if modified: - nbformat.write(nb, notebook_path) - return modified - - -def print_hook_summary(reformatted_files, unchanged_files): - """Print a Black-style summary.""" - for f in reformatted_files: - print(f"\nreformatted {f}") - - total_ref = len(reformatted_files) - total_unchanged = len(unchanged_files) - if total_ref > 0: - print("\nAll done! ✨ 🍰 ✨") - print( - f"{total_ref} file{'s' if total_ref != 1 else ''} reformatted, " - f"{total_unchanged} file{'s' if total_unchanged != 1 else ''} left unchanged." - ) +from .open_atmos_colab_header import check_colab_header def _preview_badge_markdown(absolute_path, repo_name): + """Markdown preview badge structure used in open-atmos notebooks""" svg_badge_url = ( "https://img.shields.io/static/v1?" + "label=render%20on&logo=github&color=87ce3e&message=GitHub" @@ -92,6 +23,7 @@ def _preview_badge_markdown(absolute_path, repo_name): def _mybinder_badge_markdown(absolute_path, repo_name): + """mybinder badge structure used in open-atmos notebooks""" svg_badge_url = "https://mybinder.org/badge_logo.svg" link = ( f"https://mybinder.org/v2/gh/open-atmos/{repo_name}.git/main?urlpath=lab/tree/" @@ -101,6 +33,7 @@ def _mybinder_badge_markdown(absolute_path, repo_name): def _colab_badge_markdown(absolute_path, repo_name): + """colab badge structure used in open-atmos notebooks""" svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg" link = ( f"https://colab.research.google.com/github/open-atmos/{repo_name}/blob/main/" @@ -110,22 +43,25 @@ def _colab_badge_markdown(absolute_path, repo_name): def test_notebook_has_at_least_three_cells(notebook_filename): - """checks if all notebooks have at least three cells""" + """check if notebook has enough cells to have all required ones""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) if len(nb.cells) < 3: - raise ValueError("Notebook should have at least 4 cells") + raise ValueError("Notebook should have at least 3 cells") def test_first_cell_contains_three_badges(notebook_filename, repo_name): - """checks if all notebooks feature three badges in the first cell""" + """check if badges are in the first cell and match patterns""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) + if nb.cells[0].cell_type != "markdown": raise ValueError("First cell is not a markdown cell") + lines = nb.cells[0].source.split("\n") if len(lines) != 3: raise ValueError("First cell does not contain exactly 3 lines (badges)") + if lines[0] != _preview_badge_markdown(notebook_filename, repo_name): raise ValueError("First badge does not match Github preview badge") if lines[1] != _mybinder_badge_markdown(notebook_filename, repo_name): @@ -135,32 +71,49 @@ def test_first_cell_contains_three_badges(notebook_filename, repo_name): def test_second_cell_is_a_markdown_cell(notebook_filename): - """checks if all notebooks have their second cell with some markdown - (hopefully clarifying what the example is about)""" + """Test if second cell is a markdown cell + it should contain description for the notebook""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) if nb.cells[1].cell_type != "markdown": raise ValueError("Second cell is not a markdown cell") +def print_hook_summary(reformatted_files, unchanged_files): + """Summary for the whole hook""" + for f in reformatted_files: + print(f"\nreformatted {f}") + + total_ref = len(reformatted_files) + total_unchanged = len(unchanged_files) + if total_ref > 0: + print("\nAll done! ✨ 🍰 ✨") + print( + f"{total_ref} file{'s' if total_ref != 1 else ''} reformatted, " + f"{total_unchanged} file{'s' if total_unchanged != 1 else ''} left unchanged." + ) + + def main(argv: Sequence[str] | None = None) -> int: - """collect failed notebook checks""" + """collect arguments and run hook""" parser = argparse.ArgumentParser() parser.add_argument("--repo-name") parser.add_argument("--fix-header", action="store_true") parser.add_argument("--pip-install-on-colab-version") parser.add_argument("filenames", nargs="*", help="Filenames to check.") args = parser.parse_args(argv) + failed_files = False reformatted_files = [] unchanged_files = [] + for filename in args.filenames: try: modified = check_colab_header( filename, repo_name=args.repo_name, fix=args.fix_header, - version=args.pip_install_on_colab_version, + hook_version=args.pip_install_on_colab_version, ) if modified: reformatted_files.append(str(filename)) @@ -169,11 +122,11 @@ def main(argv: Sequence[str] | None = None) -> int: except ValueError as exc: print(f"[ERROR] {filename}: {exc}") failed_files = True + try: test_notebook_has_at_least_three_cells(filename) test_first_cell_contains_three_badges(filename, repo_name=args.repo_name) test_second_cell_is_a_markdown_cell(filename) - except ValueError as exc: print(f"[ERROR] {filename}: {exc}") failed_files = True diff --git a/hooks/check_notebooks.py b/hooks/check_notebooks.py index 870a124..b7d6d3e 100755 --- a/hooks/check_notebooks.py +++ b/hooks/check_notebooks.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 """ Checks notebook execution status for Jupyter notebooks""" + from __future__ import annotations import argparse @@ -64,10 +65,8 @@ def test_show_anim_used_instead_of_matplotlib(notebook): if "show_anim(" in cell.source: show_anim_used = True if matplot_used and not show_anim_used: - raise AssertionError( - """if using matplotlib for animations, - please use open_atmos_jupyter_utils.show_anim()""" - ) + raise AssertionError("""if using matplotlib for animations, + please use open_atmos_jupyter_utils.show_anim()""") def test_jetbrains_bug_py_66491(notebook): diff --git a/hooks/open_atmos_colab_header.py b/hooks/open_atmos_colab_header.py new file mode 100644 index 0000000..d863fd5 --- /dev/null +++ b/hooks/open_atmos_colab_header.py @@ -0,0 +1,118 @@ +"""Extract version from existing header and check if header is correct""" + +from __future__ import annotations + +import re +import nbformat + +_PIP_INSTALL_RE = re.compile( + r"pip_install_on_colab\(\s*" + r"['\"](?P[^'\"]+)['\"]\s*,\s*" + r"['\"](?P
[^'\"]+)['\"]\s*\)" +) + + +def extract_versions(cell_source: str, repo_name: str): + """ + Extract version info from cell source + Returns: + (examples_version, main_version) or (None, None) if invalid. + """ + text_found = _PIP_INSTALL_RE.search(cell_source) + if not text_found: + return None, None + + examples_pkg = text_found.group("examples") + main_pkg = text_found.group("main") + + if not main_pkg.startswith(repo_name) or not examples_pkg.startswith( + f"{repo_name}-examples" + ): + return None, None + return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] + + +def resolve_version(existing: str | None, hook_version: str | None) -> str: + """ + Precedence: + 1. Version in notebook + 2. Hook version + 3. No version + """ + if existing: + return existing + if hook_version: + return hook_version + return "" + + +def build_header(repo_name: str, version: str) -> str: + """required header pattern in open-atmos notebooks""" + return f"""import os, sys +os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS +if 'google.colab' in sys.modules: + !pip --quiet install open-atmos-jupyter-utils + from open_atmos_jupyter_utils import pip_install_on_colab + pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" + + +HEADER_REQUIRED_PATTERNS = [ + "google.colab", + "open-atmos-jupyter-utils", + "pip_install_on_colab", +] + + +def looks_like_header(cell_source: str) -> bool: + """check if the cell source looks like required header""" + return all(pat in cell_source for pat in HEADER_REQUIRED_PATTERNS) + + +def check_colab_header(notebook_path, repo_name, fix, hook_version): + """check if colab header is correct""" + nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) + + if len(nb.cells) < 3: + raise ValueError("Notebook should have at least 3 cells") + + header_index = None + for idx, cell in enumerate(nb.cells): + if cell.cell_type == "code" and looks_like_header(cell.source): + header_index = idx + break + + if header_index is None: + final_version = resolve_version(None, hook_version) + header_source = build_header(repo_name, final_version) + nb.cells.insert(2, nbformat.v4.new_code_cell(header_source)) + nbformat.write(nb, notebook_path) + return True + + header_cell = nb.cells[header_index] + examples_version, main_version = extract_versions(header_cell.source, repo_name) + + if examples_version is None or main_version is None: + raise ValueError("Colab header is malformed") + + if examples_version != main_version: + raise ValueError( + f"Version mismatch in header: {examples_version!r} != {main_version!r}" + ) + + final_version = resolve_version(main_version, hook_version) + correct_header = build_header(repo_name, final_version) + + modified = False + if header_cell.source != correct_header: + if not fix: + raise ValueError("Colab header is incorrect") + header_cell.source = correct_header + modified = True + + if header_index != 2: + nb.cells.insert(2, nb.cells.pop(header_index)) + modified = True + + if modified: + nbformat.write(nb, notebook_path) + return modified diff --git a/pyproject.toml b/pyproject.toml index e3dfb25..3d1851a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,4 +28,4 @@ dynamic = ['version'] [project.scripts] check_notebooks = "hooks.check_notebooks:main" -check_badges = "hooks.check_badges:main" +check_badges = "hooks.check_notebook_open_atmos_structure:main" diff --git a/test_files/template.ipynb b/test_files/template.ipynb index be45e76..1d40fbe 100644 --- a/test_files/template.ipynb +++ b/test_files/template.ipynb @@ -22,8 +22,8 @@ "id": "72ccd23c0ab9f08e", "metadata": { "ExecuteTime": { - "end_time": "2024-10-26T12:29:32.925592Z", - "start_time": "2024-10-26T12:29:32.919920Z" + "end_time": "2026-01-27T14:51:07.477258Z", + "start_time": "2026-01-27T14:51:07.473160Z" } }, "outputs": [],