Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ default_stages: [pre-commit]

repos:
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 25.11.0
rev: 26.1.0
hooks:
- id: black

Expand All @@ -28,9 +28,9 @@ repos:
language: python
types: [jupyter]

- id: check-badges
name: check badges
entry: check_badges
- id: check-notebook-open-atmos-structure
name: check notebook has open-atmos structure
entry: python -m hooks.check_notebook_open_atmos_structure
additional_dependencies:
- nbformat
- pytest
Expand Down
8 changes: 4 additions & 4 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
stages: [pre-commit]
types: [jupyter]

- id: check-badges
name: check badges
description: check badges in Jupyter Notebook
entry: check_badges
- id: check-notebook-open-atmos-structure
name: check notebook has open-atmos structure
entry: check_notebook_open_atmos_structure
description: check notebook has open-atmos structure
language: python
stages: [pre-commit]
types: [jupyter]
117 changes: 35 additions & 82 deletions hooks/check_badges.py → hooks/check_notebook_open_atmos_structure.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,19 @@
#!/usr/bin/env python3
# pylint: disable=missing-function-docstring
"""
Checks whether notebooks contain badges."""
"""pre-commit hook checking if badges in first cell
match pattern used in open-atmos Jupyter Notebooks"""

from __future__ import annotations

import argparse
from collections.abc import Sequence

import nbformat


def _header_cell_text(repo_name, version):
if version is None:
version = ""
return f"""import os, sys
os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS
if 'google.colab' in sys.modules:
!pip --quiet install open-atmos-jupyter-utils
from open_atmos_jupyter_utils import pip_install_on_colab
pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')"""


HEADER_KEY_PATTERNS = [
"install open-atmos-jupyter-utils",
"google.colab",
"pip_install_on_colab",
]


def is_colab_header(cell_source: str) -> bool:
"""Return True if the cell looks like a Colab header."""
return all(pat in cell_source for pat in HEADER_KEY_PATTERNS)


def check_colab_header(notebook_path, repo_name, fix, version):
"""Check Colab-magic cell and fix if is misspelled, in wrong position or not exists"""
nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT)

header_index = None
correct_header = _header_cell_text(repo_name, version)
modified = False

if not fix:
if nb.cells[2].cell_type != "code" or nb.cells[2].source != correct_header:
raise ValueError("Third cell does not contain correct header")
return modified

for idx, cell in enumerate(nb.cells):
if cell.cell_type == "code" and is_colab_header(cell.source):
header_index = idx
break

if header_index is not None:
if nb.cells[header_index].source != correct_header:
nb.cells[header_index].source = correct_header
modified = True
if header_index != 2:
nb.cells.insert(2, nb.cells.pop(header_index))
modified = True
else:
new_cell = nbformat.v4.new_code_cell(correct_header)
nb.cells.insert(2, new_cell)
modified = True
if modified:
nbformat.write(nb, notebook_path)
return modified


def print_hook_summary(reformatted_files, unchanged_files):
"""Print a Black-style summary."""
for f in reformatted_files:
print(f"\nreformatted {f}")

total_ref = len(reformatted_files)
total_unchanged = len(unchanged_files)
if total_ref > 0:
print("\nAll done! ✨ 🍰 ✨")
print(
f"{total_ref} file{'s' if total_ref != 1 else ''} reformatted, "
f"{total_unchanged} file{'s' if total_unchanged != 1 else ''} left unchanged."
)
from .open_atmos_colab_header import check_colab_header


def _preview_badge_markdown(absolute_path, repo_name):
"""Markdown preview badge structure used in open-atmos notebooks"""
svg_badge_url = (
"https://img.shields.io/static/v1?"
+ "label=render%20on&logo=github&color=87ce3e&message=GitHub"
Expand All @@ -92,6 +23,7 @@ def _preview_badge_markdown(absolute_path, repo_name):


def _mybinder_badge_markdown(absolute_path, repo_name):
"""mybinder badge structure used in open-atmos notebooks"""
svg_badge_url = "https://mybinder.org/badge_logo.svg"
link = (
f"https://mybinder.org/v2/gh/open-atmos/{repo_name}.git/main?urlpath=lab/tree/"
Expand All @@ -101,6 +33,7 @@ def _mybinder_badge_markdown(absolute_path, repo_name):


def _colab_badge_markdown(absolute_path, repo_name):
"""colab badge structure used in open-atmos notebooks"""
svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg"
link = (
f"https://colab.research.google.com/github/open-atmos/{repo_name}/blob/main/"
Expand All @@ -110,22 +43,25 @@ def _colab_badge_markdown(absolute_path, repo_name):


def test_notebook_has_at_least_three_cells(notebook_filename):
"""checks if all notebooks have at least three cells"""
"""check if notebook has enough cells to have all required ones"""
with open(notebook_filename, encoding="utf8") as fp:
nb = nbformat.read(fp, nbformat.NO_CONVERT)
if len(nb.cells) < 3:
raise ValueError("Notebook should have at least 4 cells")
raise ValueError("Notebook should have at least 3 cells")


def test_first_cell_contains_three_badges(notebook_filename, repo_name):
"""checks if all notebooks feature three badges in the first cell"""
"""check if badges are in the first cell and match patterns"""
with open(notebook_filename, encoding="utf8") as fp:
nb = nbformat.read(fp, nbformat.NO_CONVERT)

if nb.cells[0].cell_type != "markdown":
raise ValueError("First cell is not a markdown cell")

lines = nb.cells[0].source.split("\n")
if len(lines) != 3:
raise ValueError("First cell does not contain exactly 3 lines (badges)")

if lines[0] != _preview_badge_markdown(notebook_filename, repo_name):
raise ValueError("First badge does not match Github preview badge")
if lines[1] != _mybinder_badge_markdown(notebook_filename, repo_name):
Expand All @@ -135,32 +71,49 @@ def test_first_cell_contains_three_badges(notebook_filename, repo_name):


def test_second_cell_is_a_markdown_cell(notebook_filename):
"""checks if all notebooks have their second cell with some markdown
(hopefully clarifying what the example is about)"""
"""Test if second cell is a markdown cell
it should contain description for the notebook"""
with open(notebook_filename, encoding="utf8") as fp:
nb = nbformat.read(fp, nbformat.NO_CONVERT)
if nb.cells[1].cell_type != "markdown":
raise ValueError("Second cell is not a markdown cell")


def print_hook_summary(reformatted_files, unchanged_files):
"""Summary for the whole hook"""
for f in reformatted_files:
print(f"\nreformatted {f}")

total_ref = len(reformatted_files)
total_unchanged = len(unchanged_files)
if total_ref > 0:
print("\nAll done! ✨ 🍰 ✨")
print(
f"{total_ref} file{'s' if total_ref != 1 else ''} reformatted, "
f"{total_unchanged} file{'s' if total_unchanged != 1 else ''} left unchanged."
)


def main(argv: Sequence[str] | None = None) -> int:
"""collect failed notebook checks"""
"""collect arguments and run hook"""
parser = argparse.ArgumentParser()
parser.add_argument("--repo-name")
parser.add_argument("--fix-header", action="store_true")
parser.add_argument("--pip-install-on-colab-version")
parser.add_argument("filenames", nargs="*", help="Filenames to check.")
args = parser.parse_args(argv)

failed_files = False
reformatted_files = []
unchanged_files = []

for filename in args.filenames:
try:
modified = check_colab_header(
filename,
repo_name=args.repo_name,
fix=args.fix_header,
version=args.pip_install_on_colab_version,
hook_version=args.pip_install_on_colab_version,
)
if modified:
reformatted_files.append(str(filename))
Expand All @@ -169,11 +122,11 @@ def main(argv: Sequence[str] | None = None) -> int:
except ValueError as exc:
print(f"[ERROR] {filename}: {exc}")
failed_files = True

try:
test_notebook_has_at_least_three_cells(filename)
test_first_cell_contains_three_badges(filename, repo_name=args.repo_name)
test_second_cell_is_a_markdown_cell(filename)

except ValueError as exc:
print(f"[ERROR] {filename}: {exc}")
failed_files = True
Expand Down
7 changes: 3 additions & 4 deletions hooks/check_notebooks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""
Checks notebook execution status for Jupyter notebooks"""

from __future__ import annotations

import argparse
Expand Down Expand Up @@ -64,10 +65,8 @@ def test_show_anim_used_instead_of_matplotlib(notebook):
if "show_anim(" in cell.source:
show_anim_used = True
if matplot_used and not show_anim_used:
raise AssertionError(
"""if using matplotlib for animations,
please use open_atmos_jupyter_utils.show_anim()"""
)
raise AssertionError("""if using matplotlib for animations,
please use open_atmos_jupyter_utils.show_anim()""")


def test_jetbrains_bug_py_66491(notebook):
Expand Down
118 changes: 118 additions & 0 deletions hooks/open_atmos_colab_header.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Extract version from existing header and check if header is correct"""

from __future__ import annotations

import re
import nbformat

_PIP_INSTALL_RE = re.compile(
r"pip_install_on_colab\(\s*"
r"['\"](?P<examples>[^'\"]+)['\"]\s*,\s*"
r"['\"](?P<main>[^'\"]+)['\"]\s*\)"
)


def extract_versions(cell_source: str, repo_name: str):
"""
Extract version info from cell source
Returns:
(examples_version, main_version) or (None, None) if invalid.
"""
text_found = _PIP_INSTALL_RE.search(cell_source)
if not text_found:
return None, None

examples_pkg = text_found.group("examples")
main_pkg = text_found.group("main")

if not main_pkg.startswith(repo_name) or not examples_pkg.startswith(
f"{repo_name}-examples"
):
return None, None
return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :]


def resolve_version(existing: str | None, hook_version: str | None) -> str:
"""
Precedence:
1. Version in notebook
2. Hook version
3. No version
"""
if existing:
return existing
if hook_version:
return hook_version
return ""


def build_header(repo_name: str, version: str) -> str:
"""required header pattern in open-atmos notebooks"""
return f"""import os, sys
os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS
if 'google.colab' in sys.modules:
!pip --quiet install open-atmos-jupyter-utils
from open_atmos_jupyter_utils import pip_install_on_colab
pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')"""


HEADER_REQUIRED_PATTERNS = [
"google.colab",
"open-atmos-jupyter-utils",
"pip_install_on_colab",
]


def looks_like_header(cell_source: str) -> bool:
"""check if the cell source looks like required header"""
return all(pat in cell_source for pat in HEADER_REQUIRED_PATTERNS)


def check_colab_header(notebook_path, repo_name, fix, hook_version):
"""check if colab header is correct"""
nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT)

if len(nb.cells) < 3:
raise ValueError("Notebook should have at least 3 cells")

header_index = None
for idx, cell in enumerate(nb.cells):
if cell.cell_type == "code" and looks_like_header(cell.source):
header_index = idx
break

if header_index is None:
final_version = resolve_version(None, hook_version)
header_source = build_header(repo_name, final_version)
nb.cells.insert(2, nbformat.v4.new_code_cell(header_source))
nbformat.write(nb, notebook_path)
return True

header_cell = nb.cells[header_index]
examples_version, main_version = extract_versions(header_cell.source, repo_name)

if examples_version is None or main_version is None:
raise ValueError("Colab header is malformed")

if examples_version != main_version:
raise ValueError(
f"Version mismatch in header: {examples_version!r} != {main_version!r}"
)

final_version = resolve_version(main_version, hook_version)
correct_header = build_header(repo_name, final_version)

modified = False
if header_cell.source != correct_header:
if not fix:
raise ValueError("Colab header is incorrect")
header_cell.source = correct_header
modified = True

if header_index != 2:
nb.cells.insert(2, nb.cells.pop(header_index))
modified = True

if modified:
nbformat.write(nb, notebook_path)
return modified
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ dynamic = ['version']

[project.scripts]
check_notebooks = "hooks.check_notebooks:main"
check_badges = "hooks.check_badges:main"
check_badges = "hooks.check_notebook_open_atmos_structure:main"
4 changes: 2 additions & 2 deletions test_files/template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
"id": "72ccd23c0ab9f08e",
"metadata": {
"ExecuteTime": {
"end_time": "2024-10-26T12:29:32.925592Z",
"start_time": "2024-10-26T12:29:32.919920Z"
"end_time": "2026-01-27T14:51:07.477258Z",
"start_time": "2026-01-27T14:51:07.473160Z"
}
},
"outputs": [],
Expand Down
Loading