From 382006a826cde26bd260f04d3ec1c4281e64eeec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 15:42:52 +0100 Subject: [PATCH 1/7] update black and create separate file for colab header --- .pre-commit-config.yaml | 2 +- hooks/check_badges.py | 173 ++++++++++++++++++++++++++++------------ hooks/colab_header.py | 119 +++++++++++++++++++++++++++ 3 files changed, 240 insertions(+), 54 deletions(-) create mode 100644 hooks/colab_header.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6c29f34..ba10bf0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_stages: [pre-commit] repos: - repo: https://github.com/psf/black-pre-commit-mirror - rev: 25.11.0 + rev: 26.1.0 hooks: - id: black diff --git a/hooks/check_badges.py b/hooks/check_badges.py index dc6f19a..0ef15f7 100755 --- a/hooks/check_badges.py +++ b/hooks/check_badges.py @@ -1,24 +1,59 @@ #!/usr/bin/env python3 # pylint: disable=missing-function-docstring """ -Checks whether notebooks contain badges.""" +Checks whether notebooks contain badges and ensures a consistent Colab header. +""" + from __future__ import annotations import argparse from collections.abc import Sequence +import re import nbformat +_PIP_INSTALL_RE = re.compile( + r"pip_install_on_colab\(\s*" + r"['\"](?P[^'\"]+)['\"]\s*,\s*" + r"['\"](?P
[^'\"]+)['\"]\s*\)" +) -def _header_cell_text(repo_name, version): - if version is None: - version = "" - return f"""import os, sys -os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS -if 'google.colab' in sys.modules: - !pip --quiet install open-atmos-jupyter-utils - from open_atmos_jupyter_utils import pip_install_on_colab - pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" + +def extract_versions(cell_source: str, repo_name: str): + """ + Extract versions from both arguments: + pip_install_on_colab('repo-examples{v}', 'repo{v}') + + Returns: + (examples_version, main_version) or (None, None) if invalid. + """ + m = _PIP_INSTALL_RE.search(cell_source) + if not m: + return None, None + + examples_pkg = m.group("examples") + main_pkg = m.group("main") + + if not examples_pkg.startswith(f"{repo_name}-examples") or not main_pkg.startswith( + repo_name + ): + return None, None + + return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] + + +def resolve_version(existing: str | None, hook_version: str | None) -> str: + """ + Precedence: + 1. Version in notebook + 2. Hook version + 3. No version + """ + if existing: + return existing + if hook_version: + return hook_version + return "" HEADER_KEY_PATTERNS = [ @@ -28,58 +63,76 @@ def _header_cell_text(repo_name, version): ] -def is_colab_header(cell_source: str) -> bool: - """Return True if the cell looks like a Colab header.""" +def build_header(repo_name: str, version: str) -> str: + return f"""import os, sys +os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' +if 'google.colab' in sys.modules: + !pip --quiet install open-atmos-jupyter-utils + from open_atmos_jupyter_utils import pip_install_on_colab + pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}') +""" + + +def looks_like_header(cell_source: str) -> bool: return all(pat in cell_source for pat in HEADER_KEY_PATTERNS) -def check_colab_header(notebook_path, repo_name, fix, version): - """Check Colab-magic cell and fix if is misspelled, in wrong position or not exists""" +def check_colab_header(notebook_path, repo_name, fix, hook_version): nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) - header_index = None - correct_header = _header_cell_text(repo_name, version) - modified = False - - if not fix: - if nb.cells[2].cell_type != "code" or nb.cells[2].source != correct_header: - raise ValueError("Third cell does not contain correct header") - return modified + if len(nb.cells) < 3: + raise ValueError("Notebook should have at least 3 cells") + # Find existing header if present + header_index = None for idx, cell in enumerate(nb.cells): - if cell.cell_type == "code" and is_colab_header(cell.source): + if cell.cell_type == "code" and looks_like_header(cell.source): header_index = idx break - if header_index is not None: - if nb.cells[header_index].source != correct_header: - nb.cells[header_index].source = correct_header - modified = True - if header_index != 2: - nb.cells.insert(2, nb.cells.pop(header_index)) - modified = True - else: - new_cell = nbformat.v4.new_code_cell(correct_header) - nb.cells.insert(2, new_cell) + # Build final header + if header_index is None: + final_version = resolve_version(None, hook_version) + header_source = build_header(repo_name, final_version) + nb.cells.insert(2, nbformat.v4.new_code_cell(header_source)) + nbformat.write(nb, notebook_path) + return True + + header_cell = nb.cells[header_index] + examples_version, main_version = extract_versions(header_cell.source, repo_name) + + if examples_version is None or main_version is None: + raise ValueError("Colab header is malformed") + + if examples_version != main_version: + raise ValueError( + f"Version mismatch in header: {examples_version!r} != {main_version!r}" + ) + + final_version = resolve_version(main_version, hook_version) + header_source = build_header(repo_name, final_version) + + modified = False + + if header_cell.source != header_source: + if not fix: + raise ValueError("Colab header is incorrect") + header_cell.source = header_source + modified = True + + if header_index != 2: + nb.cells.insert(2, nb.cells.pop(header_index)) modified = True + if modified: nbformat.write(nb, notebook_path) + return modified -def print_hook_summary(reformatted_files, unchanged_files): - """Print a Black-style summary.""" - for f in reformatted_files: - print(f"\nreformatted {f}") - - total_ref = len(reformatted_files) - total_unchanged = len(unchanged_files) - if total_ref > 0: - print("\nAll done! ✨ 🍰 ✨") - print( - f"{total_ref} file{'s' if total_ref != 1 else ''} reformatted, " - f"{total_unchanged} file{'s' if total_unchanged != 1 else ''} left unchanged." - ) +# ------------------------------------------------------------------- +# Badge checks +# ------------------------------------------------------------------- def _preview_badge_markdown(absolute_path, repo_name): @@ -110,7 +163,6 @@ def _colab_badge_markdown(absolute_path, repo_name): def test_notebook_has_at_least_three_cells(notebook_filename): - """checks if all notebooks have at least three cells""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) if len(nb.cells) < 3: @@ -118,14 +170,16 @@ def test_notebook_has_at_least_three_cells(notebook_filename): def test_first_cell_contains_three_badges(notebook_filename, repo_name): - """checks if all notebooks feature three badges in the first cell""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) + if nb.cells[0].cell_type != "markdown": raise ValueError("First cell is not a markdown cell") + lines = nb.cells[0].source.split("\n") if len(lines) != 3: raise ValueError("First cell does not contain exactly 3 lines (badges)") + if lines[0] != _preview_badge_markdown(notebook_filename, repo_name): raise ValueError("First badge does not match Github preview badge") if lines[1] != _mybinder_badge_markdown(notebook_filename, repo_name): @@ -135,32 +189,45 @@ def test_first_cell_contains_three_badges(notebook_filename, repo_name): def test_second_cell_is_a_markdown_cell(notebook_filename): - """checks if all notebooks have their second cell with some markdown - (hopefully clarifying what the example is about)""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) if nb.cells[1].cell_type != "markdown": raise ValueError("Second cell is not a markdown cell") +def print_hook_summary(reformatted_files, unchanged_files): + for f in reformatted_files: + print(f"\nreformatted {f}") + + total_ref = len(reformatted_files) + total_unchanged = len(unchanged_files) + if total_ref > 0: + print("\nAll done! ✨ 🍰 ✨") + print( + f"{total_ref} file{'s' if total_ref != 1 else ''} reformatted, " + f"{total_unchanged} file{'s' if total_unchanged != 1 else ''} left unchanged." + ) + + def main(argv: Sequence[str] | None = None) -> int: - """collect failed notebook checks""" parser = argparse.ArgumentParser() parser.add_argument("--repo-name") parser.add_argument("--fix-header", action="store_true") parser.add_argument("--pip-install-on-colab-version") parser.add_argument("filenames", nargs="*", help="Filenames to check.") args = parser.parse_args(argv) + failed_files = False reformatted_files = [] unchanged_files = [] + for filename in args.filenames: try: modified = check_colab_header( filename, repo_name=args.repo_name, fix=args.fix_header, - version=args.pip_install_on_colab_version, + hook_version=args.pip_install_on_colab_version, ) if modified: reformatted_files.append(str(filename)) @@ -169,11 +236,11 @@ def main(argv: Sequence[str] | None = None) -> int: except ValueError as exc: print(f"[ERROR] {filename}: {exc}") failed_files = True + try: test_notebook_has_at_least_three_cells(filename) test_first_cell_contains_three_badges(filename, repo_name=args.repo_name) test_second_cell_is_a_markdown_cell(filename) - except ValueError as exc: print(f"[ERROR] {filename}: {exc}") failed_files = True diff --git a/hooks/colab_header.py b/hooks/colab_header.py new file mode 100644 index 0000000..79e9263 --- /dev/null +++ b/hooks/colab_header.py @@ -0,0 +1,119 @@ +# colab_header.py +from __future__ import annotations + +import re +import nbformat + +_PIP_INSTALL_RE = re.compile( + r"pip_install_on_colab\(\s*" + r"['\"](?P[^'\"]+)['\"]\s*,\s*" + r"['\"](?P
[^'\"]+)['\"]\s*\)" +) + +HEADER_KEY_PATTERNS = [ + "install open-atmos-jupyter-utils", + "google.colab", + "pip_install_on_colab", +] + + +def extract_versions(cell_source: str, repo_name: str): + """ + Extract version info from cell source + Returns: + (examples_version, main_version) or (None, None) if invalid. + """ + m = _PIP_INSTALL_RE.search(cell_source) + if not m: + return None, None + + examples_pkg = m.group("examples") + main_pkg = m.group("main") + + if not examples_pkg.startswith(f"{repo_name}-examples") or not main_pkg.startswith( + repo_name + ): + return None, None + + return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] + + +def resolve_version(existing: str | None, hook_version: str | None) -> str: + """ + Precedence: + 1. Version in notebook + 2. Hook version + 3. No version + """ + if existing: + return existing + if hook_version: + return hook_version + return "" + + +def build_header(repo_name: str, version: str) -> str: + return f"""import os, sys +os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS +if 'google.colab' in sys.modules: + !pip --quiet install open-atmos-jupyter-utils + from open_atmos_jupyter_utils import pip_install_on_colab + pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" + + +def looks_like_header(cell_source: str) -> bool: + return all(pat in cell_source for pat in HEADER_KEY_PATTERNS) + + +def check_colab_header(notebook_path, repo_name, fix, hook_version): + nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) + + if len(nb.cells) < 3: + raise ValueError("Notebook should have at least 3 cells") + + # Find existing header if present + header_index = None + for idx, cell in enumerate(nb.cells): + if cell.cell_type == "code" and looks_like_header(cell.source): + header_index = idx + break + + # If header doesn't exist, create it + if header_index is None: + final_version = resolve_version(None, hook_version) + header_source = build_header(repo_name, final_version) + nb.cells.insert(2, nbformat.v4.new_code_cell(header_source)) + nbformat.write(nb, notebook_path) + return True + + # If header exists, validate and optionally fix + header_cell = nb.cells[header_index] + examples_version, main_version = extract_versions(header_cell.source, repo_name) + + if examples_version is None or main_version is None: + raise ValueError("Colab header is malformed") + + if examples_version != main_version: + raise ValueError( + f"Version mismatch in header: {examples_version!r} != {main_version!r}" + ) + + final_version = resolve_version(main_version, hook_version) + header_source = build_header(repo_name, final_version) + + modified = False + + if header_cell.source != header_source: + if not fix: + raise ValueError("Colab header is incorrect") + header_cell.source = header_source + modified = True + + if header_index != 2: + nb.cells.insert(2, nb.cells.pop(header_index)) + modified = True + + if modified: + nbformat.write(nb, notebook_path) + + return modified From 4cb386300a93db8f62c82bd9c2b9148ea9fd1382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 15:43:45 +0100 Subject: [PATCH 2/7] run new black --- hooks/check_notebooks.py | 7 +++---- test_files/template.ipynb | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hooks/check_notebooks.py b/hooks/check_notebooks.py index 870a124..b7d6d3e 100755 --- a/hooks/check_notebooks.py +++ b/hooks/check_notebooks.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 """ Checks notebook execution status for Jupyter notebooks""" + from __future__ import annotations import argparse @@ -64,10 +65,8 @@ def test_show_anim_used_instead_of_matplotlib(notebook): if "show_anim(" in cell.source: show_anim_used = True if matplot_used and not show_anim_used: - raise AssertionError( - """if using matplotlib for animations, - please use open_atmos_jupyter_utils.show_anim()""" - ) + raise AssertionError("""if using matplotlib for animations, + please use open_atmos_jupyter_utils.show_anim()""") def test_jetbrains_bug_py_66491(notebook): diff --git a/test_files/template.ipynb b/test_files/template.ipynb index be45e76..5b7ef4e 100644 --- a/test_files/template.ipynb +++ b/test_files/template.ipynb @@ -29,11 +29,11 @@ "outputs": [], "source": [ "import os, sys\n", - "os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS\n", + "os.environ['NUMBA_THREADING_LAYER'] = 'workqueue'\n", "if 'google.colab' in sys.modules:\n", " !pip --quiet install open-atmos-jupyter-utils\n", " from open_atmos_jupyter_utils import pip_install_on_colab\n", - " pip_install_on_colab('devops_tests-examples', 'devops_tests')" + " pip_install_on_colab('devops_tests-examples', 'devops_tests')\n" ] } ], From 5443b729800ee59280ead962978a40851e459d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 15:54:33 +0100 Subject: [PATCH 3/7] update check-badges file --- hooks/check_badges.py | 130 +----------------------------------------- 1 file changed, 2 insertions(+), 128 deletions(-) diff --git a/hooks/check_badges.py b/hooks/check_badges.py index 0ef15f7..b714620 100755 --- a/hooks/check_badges.py +++ b/hooks/check_badges.py @@ -1,138 +1,12 @@ -#!/usr/bin/env python3 -# pylint: disable=missing-function-docstring -""" -Checks whether notebooks contain badges and ensures a consistent Colab header. -""" - +# notebook_checks.py from __future__ import annotations import argparse from collections.abc import Sequence -import re import nbformat -_PIP_INSTALL_RE = re.compile( - r"pip_install_on_colab\(\s*" - r"['\"](?P[^'\"]+)['\"]\s*,\s*" - r"['\"](?P
[^'\"]+)['\"]\s*\)" -) - - -def extract_versions(cell_source: str, repo_name: str): - """ - Extract versions from both arguments: - pip_install_on_colab('repo-examples{v}', 'repo{v}') - - Returns: - (examples_version, main_version) or (None, None) if invalid. - """ - m = _PIP_INSTALL_RE.search(cell_source) - if not m: - return None, None - - examples_pkg = m.group("examples") - main_pkg = m.group("main") - - if not examples_pkg.startswith(f"{repo_name}-examples") or not main_pkg.startswith( - repo_name - ): - return None, None - - return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] - - -def resolve_version(existing: str | None, hook_version: str | None) -> str: - """ - Precedence: - 1. Version in notebook - 2. Hook version - 3. No version - """ - if existing: - return existing - if hook_version: - return hook_version - return "" - - -HEADER_KEY_PATTERNS = [ - "install open-atmos-jupyter-utils", - "google.colab", - "pip_install_on_colab", -] - - -def build_header(repo_name: str, version: str) -> str: - return f"""import os, sys -os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' -if 'google.colab' in sys.modules: - !pip --quiet install open-atmos-jupyter-utils - from open_atmos_jupyter_utils import pip_install_on_colab - pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}') -""" - - -def looks_like_header(cell_source: str) -> bool: - return all(pat in cell_source for pat in HEADER_KEY_PATTERNS) - - -def check_colab_header(notebook_path, repo_name, fix, hook_version): - nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) - - if len(nb.cells) < 3: - raise ValueError("Notebook should have at least 3 cells") - - # Find existing header if present - header_index = None - for idx, cell in enumerate(nb.cells): - if cell.cell_type == "code" and looks_like_header(cell.source): - header_index = idx - break - - # Build final header - if header_index is None: - final_version = resolve_version(None, hook_version) - header_source = build_header(repo_name, final_version) - nb.cells.insert(2, nbformat.v4.new_code_cell(header_source)) - nbformat.write(nb, notebook_path) - return True - - header_cell = nb.cells[header_index] - examples_version, main_version = extract_versions(header_cell.source, repo_name) - - if examples_version is None or main_version is None: - raise ValueError("Colab header is malformed") - - if examples_version != main_version: - raise ValueError( - f"Version mismatch in header: {examples_version!r} != {main_version!r}" - ) - - final_version = resolve_version(main_version, hook_version) - header_source = build_header(repo_name, final_version) - - modified = False - - if header_cell.source != header_source: - if not fix: - raise ValueError("Colab header is incorrect") - header_cell.source = header_source - modified = True - - if header_index != 2: - nb.cells.insert(2, nb.cells.pop(header_index)) - modified = True - - if modified: - nbformat.write(nb, notebook_path) - - return modified - - -# ------------------------------------------------------------------- -# Badge checks -# ------------------------------------------------------------------- +from .colab_header import check_colab_header def _preview_badge_markdown(absolute_path, repo_name): From 02fdfee7f38986c551ba3cb578c1c9b52bd7c01f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 15:55:01 +0100 Subject: [PATCH 4/7] rerun notebook --- test_files/template.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test_files/template.ipynb b/test_files/template.ipynb index 5b7ef4e..1d40fbe 100644 --- a/test_files/template.ipynb +++ b/test_files/template.ipynb @@ -22,18 +22,18 @@ "id": "72ccd23c0ab9f08e", "metadata": { "ExecuteTime": { - "end_time": "2024-10-26T12:29:32.925592Z", - "start_time": "2024-10-26T12:29:32.919920Z" + "end_time": "2026-01-27T14:51:07.477258Z", + "start_time": "2026-01-27T14:51:07.473160Z" } }, "outputs": [], "source": [ "import os, sys\n", - "os.environ['NUMBA_THREADING_LAYER'] = 'workqueue'\n", + "os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS\n", "if 'google.colab' in sys.modules:\n", " !pip --quiet install open-atmos-jupyter-utils\n", " from open_atmos_jupyter_utils import pip_install_on_colab\n", - " pip_install_on_colab('devops_tests-examples', 'devops_tests')\n" + " pip_install_on_colab('devops_tests-examples', 'devops_tests')" ] } ], From f8cb79717ea65998267cb7dfaea3808aad5721bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 16:23:13 +0100 Subject: [PATCH 5/7] rename check --- .pre-commit-config.yaml | 6 +-- .pre-commit-hooks.yaml | 8 ++-- ...=> check_notebook_open_atmos_structure.py} | 2 +- hooks/colab_header.py | 41 ++++++++----------- pyproject.toml | 2 +- 5 files changed, 27 insertions(+), 32 deletions(-) rename hooks/{check_badges.py => check_notebook_open_atmos_structure.py} (97%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ba10bf0..a65d07a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,9 +28,9 @@ repos: language: python types: [jupyter] - - id: check-badges - name: check badges - entry: check_badges + - id: check-notebook-open-atmos-structure + name: check notebook has open-atmos structure + entry: check_notebook_open_atmos_structure additional_dependencies: - nbformat - pytest diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 922a4c2..6695bde 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -6,10 +6,10 @@ stages: [pre-commit] types: [jupyter] -- id: check-badges - name: check badges - description: check badges in Jupyter Notebook - entry: check_badges +- id: check-notebook-open-atmos-structure + name: check notebook has open-atmos structure + entry: check_notebook_open_atmos_structure + description: check notebook has open-atmos structure language: python stages: [pre-commit] types: [jupyter] diff --git a/hooks/check_badges.py b/hooks/check_notebook_open_atmos_structure.py similarity index 97% rename from hooks/check_badges.py rename to hooks/check_notebook_open_atmos_structure.py index b714620..965edd7 100755 --- a/hooks/check_badges.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,4 +1,4 @@ -# notebook_checks.py +# pre-commit hook checking if badges in first cell match pattern used in open-atmos Jupyter Notebooks from __future__ import annotations import argparse diff --git a/hooks/colab_header.py b/hooks/colab_header.py index 79e9263..1206b31 100644 --- a/hooks/colab_header.py +++ b/hooks/colab_header.py @@ -1,4 +1,4 @@ -# colab_header.py +# Extract version from existing header and check if header is correct from __future__ import annotations import re @@ -10,12 +10,6 @@ r"['\"](?P
[^'\"]+)['\"]\s*\)" ) -HEADER_KEY_PATTERNS = [ - "install open-atmos-jupyter-utils", - "google.colab", - "pip_install_on_colab", -] - def extract_versions(cell_source: str, repo_name: str): """ @@ -23,18 +17,17 @@ def extract_versions(cell_source: str, repo_name: str): Returns: (examples_version, main_version) or (None, None) if invalid. """ - m = _PIP_INSTALL_RE.search(cell_source) - if not m: + text_found = _PIP_INSTALL_RE.search(cell_source) + if not text_found: return None, None - examples_pkg = m.group("examples") - main_pkg = m.group("main") + examples_pkg = text_found.group("examples") + main_pkg = text_found.group("main") - if not examples_pkg.startswith(f"{repo_name}-examples") or not main_pkg.startswith( - repo_name + if not main_pkg.startswith(repo_name) or not examples_pkg.startswith( + f"{repo_name}-examples" ): return None, None - return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] @@ -61,8 +54,15 @@ def build_header(repo_name: str, version: str) -> str: pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" +HEADER_REQUIRED_PATTERNS = [ + "google.colab", + "open-atmos-jupyter-utils", + "pip_install_on_colab", +] + + def looks_like_header(cell_source: str) -> bool: - return all(pat in cell_source for pat in HEADER_KEY_PATTERNS) + return all(pat in cell_source for pat in HEADER_REQUIRED_PATTERNS) def check_colab_header(notebook_path, repo_name, fix, hook_version): @@ -71,14 +71,12 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): if len(nb.cells) < 3: raise ValueError("Notebook should have at least 3 cells") - # Find existing header if present header_index = None for idx, cell in enumerate(nb.cells): if cell.cell_type == "code" and looks_like_header(cell.source): header_index = idx break - # If header doesn't exist, create it if header_index is None: final_version = resolve_version(None, hook_version) header_source = build_header(repo_name, final_version) @@ -86,7 +84,6 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): nbformat.write(nb, notebook_path) return True - # If header exists, validate and optionally fix header_cell = nb.cells[header_index] examples_version, main_version = extract_versions(header_cell.source, repo_name) @@ -99,14 +96,13 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): ) final_version = resolve_version(main_version, hook_version) - header_source = build_header(repo_name, final_version) + correct_header = build_header(repo_name, final_version) modified = False - - if header_cell.source != header_source: + if header_cell.source != correct_header: if not fix: raise ValueError("Colab header is incorrect") - header_cell.source = header_source + header_cell.source = correct_header modified = True if header_index != 2: @@ -115,5 +111,4 @@ def check_colab_header(notebook_path, repo_name, fix, hook_version): if modified: nbformat.write(nb, notebook_path) - return modified diff --git a/pyproject.toml b/pyproject.toml index e3dfb25..3d1851a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,4 +28,4 @@ dynamic = ['version'] [project.scripts] check_notebooks = "hooks.check_notebooks:main" -check_badges = "hooks.check_badges:main" +check_badges = "hooks.check_notebook_open_atmos_structure:main" From 59b3e8cb95405ca1cd5bdbd1f7d7790ececba375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 16:38:36 +0100 Subject: [PATCH 6/7] change name and entry --- .pre-commit-config.yaml | 2 +- hooks/check_notebook_open_atmos_structure.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a65d07a..a1e613f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,7 +30,7 @@ repos: - id: check-notebook-open-atmos-structure name: check notebook has open-atmos structure - entry: check_notebook_open_atmos_structure + entry: python -m hooks.check_notebook_open_atmos_structure additional_dependencies: - nbformat - pytest diff --git a/hooks/check_notebook_open_atmos_structure.py b/hooks/check_notebook_open_atmos_structure.py index 965edd7..9f4f28b 100755 --- a/hooks/check_notebook_open_atmos_structure.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + # pre-commit hook checking if badges in first cell match pattern used in open-atmos Jupyter Notebooks from __future__ import annotations From 83ed0231180dd1370026c07cdab004ad4079acdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agnieszka=20=C5=BBaba?= Date: Tue, 27 Jan 2026 16:57:50 +0100 Subject: [PATCH 7/7] add missing docstrings --- hooks/check_notebook_open_atmos_structure.py | 16 +++++++++++++--- ...olab_header.py => open_atmos_colab_header.py} | 6 +++++- 2 files changed, 18 insertions(+), 4 deletions(-) rename hooks/{colab_header.py => open_atmos_colab_header.py} (93%) diff --git a/hooks/check_notebook_open_atmos_structure.py b/hooks/check_notebook_open_atmos_structure.py index 9f4f28b..0d465ae 100755 --- a/hooks/check_notebook_open_atmos_structure.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 +"""pre-commit hook checking if badges in first cell +match pattern used in open-atmos Jupyter Notebooks""" -# pre-commit hook checking if badges in first cell match pattern used in open-atmos Jupyter Notebooks from __future__ import annotations import argparse @@ -8,10 +9,11 @@ import nbformat -from .colab_header import check_colab_header +from .open_atmos_colab_header import check_colab_header def _preview_badge_markdown(absolute_path, repo_name): + """Markdown preview badge structure used in open-atmos notebooks""" svg_badge_url = ( "https://img.shields.io/static/v1?" + "label=render%20on&logo=github&color=87ce3e&message=GitHub" @@ -21,6 +23,7 @@ def _preview_badge_markdown(absolute_path, repo_name): def _mybinder_badge_markdown(absolute_path, repo_name): + """mybinder badge structure used in open-atmos notebooks""" svg_badge_url = "https://mybinder.org/badge_logo.svg" link = ( f"https://mybinder.org/v2/gh/open-atmos/{repo_name}.git/main?urlpath=lab/tree/" @@ -30,6 +33,7 @@ def _mybinder_badge_markdown(absolute_path, repo_name): def _colab_badge_markdown(absolute_path, repo_name): + """colab badge structure used in open-atmos notebooks""" svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg" link = ( f"https://colab.research.google.com/github/open-atmos/{repo_name}/blob/main/" @@ -39,13 +43,15 @@ def _colab_badge_markdown(absolute_path, repo_name): def test_notebook_has_at_least_three_cells(notebook_filename): + """check if notebook has enough cells to have all required ones""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) if len(nb.cells) < 3: - raise ValueError("Notebook should have at least 4 cells") + raise ValueError("Notebook should have at least 3 cells") def test_first_cell_contains_three_badges(notebook_filename, repo_name): + """check if badges are in the first cell and match patterns""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) @@ -65,6 +71,8 @@ def test_first_cell_contains_three_badges(notebook_filename, repo_name): def test_second_cell_is_a_markdown_cell(notebook_filename): + """Test if second cell is a markdown cell + it should contain description for the notebook""" with open(notebook_filename, encoding="utf8") as fp: nb = nbformat.read(fp, nbformat.NO_CONVERT) if nb.cells[1].cell_type != "markdown": @@ -72,6 +80,7 @@ def test_second_cell_is_a_markdown_cell(notebook_filename): def print_hook_summary(reformatted_files, unchanged_files): + """Summary for the whole hook""" for f in reformatted_files: print(f"\nreformatted {f}") @@ -86,6 +95,7 @@ def print_hook_summary(reformatted_files, unchanged_files): def main(argv: Sequence[str] | None = None) -> int: + """collect arguments and run hook""" parser = argparse.ArgumentParser() parser.add_argument("--repo-name") parser.add_argument("--fix-header", action="store_true") diff --git a/hooks/colab_header.py b/hooks/open_atmos_colab_header.py similarity index 93% rename from hooks/colab_header.py rename to hooks/open_atmos_colab_header.py index 1206b31..d863fd5 100644 --- a/hooks/colab_header.py +++ b/hooks/open_atmos_colab_header.py @@ -1,4 +1,5 @@ -# Extract version from existing header and check if header is correct +"""Extract version from existing header and check if header is correct""" + from __future__ import annotations import re @@ -46,6 +47,7 @@ def resolve_version(existing: str | None, hook_version: str | None) -> str: def build_header(repo_name: str, version: str) -> str: + """required header pattern in open-atmos notebooks""" return f"""import os, sys os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS if 'google.colab' in sys.modules: @@ -62,10 +64,12 @@ def build_header(repo_name: str, version: str) -> str: def looks_like_header(cell_source: str) -> bool: + """check if the cell source looks like required header""" return all(pat in cell_source for pat in HEADER_REQUIRED_PATTERNS) def check_colab_header(notebook_path, repo_name, fix, hook_version): + """check if colab header is correct""" nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) if len(nb.cells) < 3: