diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6c29f34..01895ac 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_stages: [pre-commit] repos: - repo: https://github.com/psf/black-pre-commit-mirror - rev: 25.11.0 + rev: 25.12.0 hooks: - id: black @@ -13,10 +13,10 @@ repos: - id: end-of-file-fixer - id: debug-statements - - repo: https://github.com/christopher-hacker/enforce-notebook-run-order - rev: 2.1.1 - hooks: - - id: enforce-notebook-run-order +# - repo: https://github.com/christopher-hacker/enforce-notebook-run-order +# rev: 2.1.1 +# hooks: +# - id: enforce-notebook-run-order - repo: local hooks: @@ -27,6 +27,7 @@ repos: - nbformat language: python types: [jupyter] + exclude: tests/examples/bad.ipynb - id: check-badges name: check badges @@ -38,3 +39,21 @@ repos: args: [--fix-header, --repo-name=devops_tests] language: python types: [jupyter] + exclude: tests/examples/bad.ipynb + + - id: notebooks-output + name: notebooks output + entry: notebooks_output + additional_dependencies: + - nbformat + language: python + types: [jupyter] + exclude: tests/examples/bad.ipynb + + - id: notebooks-using-jupyter-utils + name: notebooks using open-atmos-jupyter-utils + entry: notebooks_using_jupyter_utils + additional_dependencies: + - nbformat + language: python + types: [ jupyter ] diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 922a4c2..29786dc 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -13,3 +13,19 @@ language: python stages: [pre-commit] types: [jupyter] + +- id: notebooks-output + name: notebooks output + description: check if notebooks are executed and without errors and warnings + entry: notebooks_output + language: python + stages: [pre-commit] + types: [jupyter] + +- id: notebooks-using-jupyter-utils + name: notebooks using open-atmos-jupyter-utils + description: check if notebook use show_anim() and show_plot() methods from open-atmos-jupyter-utils + entry: notebooks_using_jupyter_utils + language: python + stages: [pre-commit] + types: [jupyter] diff --git a/hooks/check_badges.py b/hooks/check_badges.py index dc6f19a..0d36695 100755 --- a/hooks/check_badges.py +++ b/hooks/check_badges.py @@ -1,185 +1,276 @@ #!/usr/bin/env python3 # pylint: disable=missing-function-docstring """ -Checks whether notebooks contain badges.""" +Checks/repairs notebook badge headers. + +This version optionally uses Git to discover the repository root (to build +repo-relative notebook URLs) and uses Python's logging module instead of print() +for structured messages. + +Behavior: +- By default it will attempt to detect the git repo root (if GitPython is + installed and the file is in a git working tree) and fall back to Path.cwd(). +- Use --no-git to force using Path.cwd(). +- Use --repo-root PATH to explicitly set repository root. +- Use --verbose to enable debug logging. + +Usage: + check_badges --repo-name=devops_tests [--repo-owner=open-atmos] [--fix-header] + [--no-git] [--repo-root PATH] [--verbose] FILES... +""" from __future__ import annotations import argparse +import logging from collections.abc import Sequence +from pathlib import Path +from typing import Iterable, List, Tuple, Optional import nbformat +from nbformat import NotebookNode +from .utils import NotebookTestError -def _header_cell_text(repo_name, version): - if version is None: - version = "" - return f"""import os, sys -os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS -if 'google.colab' in sys.modules: - !pip --quiet install open-atmos-jupyter-utils - from open_atmos_jupyter_utils import pip_install_on_colab - pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" - - -HEADER_KEY_PATTERNS = [ - "install open-atmos-jupyter-utils", - "google.colab", - "pip_install_on_colab", -] - - -def is_colab_header(cell_source: str) -> bool: - """Return True if the cell looks like a Colab header.""" - return all(pat in cell_source for pat in HEADER_KEY_PATTERNS) - - -def check_colab_header(notebook_path, repo_name, fix, version): - """Check Colab-magic cell and fix if is misspelled, in wrong position or not exists""" - nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) +REPO_OWNER_DEFAULT = "open-atmos" - header_index = None - correct_header = _header_cell_text(repo_name, version) - modified = False - if not fix: - if nb.cells[2].cell_type != "code" or nb.cells[2].source != correct_header: - raise ValueError("Third cell does not contain correct header") - return modified +logger = logging.getLogger(__name__) - for idx, cell in enumerate(nb.cells): - if cell.cell_type == "code" and is_colab_header(cell.source): - header_index = idx - break - if header_index is not None: - if nb.cells[header_index].source != correct_header: - nb.cells[header_index].source = correct_header - modified = True - if header_index != 2: - nb.cells.insert(2, nb.cells.pop(header_index)) - modified = True - else: - new_cell = nbformat.v4.new_code_cell(correct_header) - nb.cells.insert(2, new_cell) - modified = True - if modified: - nbformat.write(nb, notebook_path) - return modified - - -def print_hook_summary(reformatted_files, unchanged_files): - """Print a Black-style summary.""" - for f in reformatted_files: - print(f"\nreformatted {f}") - - total_ref = len(reformatted_files) - total_unchanged = len(unchanged_files) - if total_ref > 0: - print("\nAll done! ✨ 🍰 ✨") - print( - f"{total_ref} file{'s' if total_ref != 1 else ''} reformatted, " - f"{total_unchanged} file{'s' if total_unchanged != 1 else ''} left unchanged." - ) - - -def _preview_badge_markdown(absolute_path, repo_name): +def preview_badge_markdown(absolute_path: str, repo_name: str, repo_owner: str) -> str: svg_badge_url = ( "https://img.shields.io/static/v1?" + "label=render%20on&logo=github&color=87ce3e&message=GitHub" ) - link = f"https://github.com/open-atmos/{repo_name}/blob/main/" + f"{absolute_path}" + link = f"https://github.com/{repo_owner}/{repo_name}/blob/main/{absolute_path}" return f"[![preview notebook]({svg_badge_url})]({link})" -def _mybinder_badge_markdown(absolute_path, repo_name): +def mybinder_badge_markdown(absolute_path: str, repo_name: str, repo_owner: str) -> str: svg_badge_url = "https://mybinder.org/badge_logo.svg" link = ( - f"https://mybinder.org/v2/gh/open-atmos/{repo_name}.git/main?urlpath=lab/tree/" + f"https://mybinder.org/v2/gh/{repo_owner}/{repo_name}.git/main?urlpath=lab/tree/" + f"{absolute_path}" ) return f"[![launch on mybinder.org]({svg_badge_url})]({link})" -def _colab_badge_markdown(absolute_path, repo_name): +def colab_badge_markdown(absolute_path: str, repo_name: str, repo_owner: str) -> str: svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg" link = ( - f"https://colab.research.google.com/github/open-atmos/{repo_name}/blob/main/" + f"https://colab.research.google.com/github/{repo_owner}/{repo_name}/blob/main/" + f"{absolute_path}" ) return f"[![launch on Colab]({svg_badge_url})]({link})" -def test_notebook_has_at_least_three_cells(notebook_filename): +def find_repo_root(start_path: Path, prefer_git: bool = True) -> Path: + """ + Find repository root for the given start_path. + + If prefer_git is True, attempt to use GitPython to locate the repository root + (searching parent directories). If that fails, fall back to cwd(). + """ + if prefer_git: + try: + # Import locally so the module doesn't hard-depend on GitPython at import time + from git import Repo # pylint: disable=import-outside-toplevel + + try: + repo = Repo(start_path, search_parent_directories=True) + if repo.working_tree_dir: + root = Path(repo.working_tree_dir) + logger.debug("Discovered git repository root: %s", root) + return root + except Exception as exc: # pylint: disable=broad-exception-caught + logger.debug("Git repo detection failed for %s: %s", start_path, exc) + except ImportError as exc: + logger.debug("GitPython not available or import failed: %s", exc) + + cwd = Path.cwd() + logger.debug("Using current working directory as repo root: %s", cwd) + return cwd + + +def expected_badges_for( + notebook_path: Path, + repo_name: str, + repo_owner: str, + repo_root: Optional[Path] = None, +) -> List[str]: + """ + Return the canonical badge lines expected for notebook_path. + If repo_root is provided, attempt to build a relative path from it; otherwise + find repository root automatically (using find_repo_root). + """ + if repo_root is None: + repo_root = find_repo_root(notebook_path) + try: + rel = notebook_path.relative_to(repo_root) + except ValueError: + # fallback to just the given path + rel = notebook_path + absolute_path = rel.as_posix() + return [ + preview_badge_markdown(absolute_path, repo_name, repo_owner), + mybinder_badge_markdown(absolute_path, repo_name, repo_owner), + colab_badge_markdown(absolute_path, repo_name, repo_owner), + ] + + +def read_notebook(path: Path) -> NotebookNode: + with path.open(encoding="utf8") as fp: + return nbformat.read(fp, nbformat.NO_CONVERT) + + +def write_notebook(path: Path, nb: NotebookNode) -> None: + with path.open("w", encoding="utf8") as fp: + nbformat.write(nb, fp) + + +def first_cell_lines(nb: NotebookNode) -> List[str]: + """Return list of stripped lines from the first cell if it's markdown, else []""" + if not nb.cells: + return [] + first = nb.cells[0] + if first.cell_type != "markdown": + return [] + return [ln.strip() for ln in str(first.source).splitlines() if ln.strip() != ""] + + +def badges_match( + actual_lines: Iterable[str], expected_lines: Iterable[str] +) -> Tuple[bool, str]: + """ + Check whether the expected badge lines are present in actual_lines. + Tolerant: ignores order, strips whitespace. + Returns (matches, message). Message empty on match else explains which badges missing. + """ + actual_set = {ln.strip() for ln in actual_lines} + expected_list = list(expected_lines) + missing = [exp for exp in expected_list if exp.strip() not in actual_set] + if not missing: + return True, "" + return False, f"Missing badges: {missing}" + + +def test_notebook_has_at_least_three_cells(notebook_filename: str) -> None: """checks if all notebooks have at least three cells""" - with open(notebook_filename, encoding="utf8") as fp: - nb = nbformat.read(fp, nbformat.NO_CONVERT) - if len(nb.cells) < 3: - raise ValueError("Notebook should have at least 4 cells") - - -def test_first_cell_contains_three_badges(notebook_filename, repo_name): - """checks if all notebooks feature three badges in the first cell""" - with open(notebook_filename, encoding="utf8") as fp: - nb = nbformat.read(fp, nbformat.NO_CONVERT) - if nb.cells[0].cell_type != "markdown": - raise ValueError("First cell is not a markdown cell") - lines = nb.cells[0].source.split("\n") - if len(lines) != 3: - raise ValueError("First cell does not contain exactly 3 lines (badges)") - if lines[0] != _preview_badge_markdown(notebook_filename, repo_name): - raise ValueError("First badge does not match Github preview badge") - if lines[1] != _mybinder_badge_markdown(notebook_filename, repo_name): - raise ValueError("Second badge does not match MyBinder badge") - if lines[2] != _colab_badge_markdown(notebook_filename, repo_name): - raise ValueError("Third badge does not match Colab badge") - - -def test_second_cell_is_a_markdown_cell(notebook_filename): - """checks if all notebooks have their second cell with some markdown - (hopefully clarifying what the example is about)""" - with open(notebook_filename, encoding="utf8") as fp: - nb = nbformat.read(fp, nbformat.NO_CONVERT) + nb = read_notebook(Path(notebook_filename)) + if len(nb.cells) < 3: + raise ValueError("Notebook should have at least 3 cells") + + +def test_first_cell_contains_three_badges( + notebook_filename: str, + repo_name: str, + repo_owner: str = REPO_OWNER_DEFAULT, + repo_root: Optional[Path] = None, +) -> None: + """ + checks if the notebook's first cell contains the three badges. + Raises ValueError on failure. + + The optional repo_root can be provided to control how the notebook path is + converted into the remote URL. If None, the module will attempt to detect + a git repo root and fall back to cwd(). + """ + nb = read_notebook(Path(notebook_filename)) + lines = first_cell_lines(nb) + expected = expected_badges_for( + Path(notebook_filename), repo_name, repo_owner, repo_root + ) + ok, msg = badges_match(lines, expected) + if not ok: + raise ValueError(msg) + + +def test_second_cell_is_a_markdown_cell(notebook_filename: str) -> None: + """checks if all notebooks have their second cell as markdown""" + nb = read_notebook(Path(notebook_filename)) + if len(nb.cells) < 2: + raise ValueError("Notebook has no second cell") if nb.cells[1].cell_type != "markdown": raise ValueError("Second cell is not a markdown cell") +def fix_header_inplace( + path: Path, + repo_name: str, + repo_owner: str = REPO_OWNER_DEFAULT, + repo_root: Optional[Path] = None, +) -> None: + """ + Replace the first cell with the canonical 3-badge header if the header is missing + or malformed. If the notebook has no cells, a new markdown cell is inserted. + """ + nb = read_notebook(path) + expected = expected_badges_for(path, repo_name, repo_owner, repo_root) + new_first = {"cell_type": "markdown", "metadata": {}, "source": "\n".join(expected)} + if not nb.cells: + nb.cells.insert(0, nbformat.from_dict(new_first)) + else: + nb.cells[0] = nbformat.from_dict(new_first) + write_notebook(path, nb) + + def main(argv: Sequence[str] | None = None) -> int: - """collect failed notebook checks""" parser = argparse.ArgumentParser() - parser.add_argument("--repo-name") - parser.add_argument("--fix-header", action="store_true") - parser.add_argument("--pip-install-on-colab-version") + parser.add_argument("--repo-name", required=True) + parser.add_argument("--repo-owner", default=REPO_OWNER_DEFAULT) + parser.add_argument( + "--fix-header", + action="store_true", + help="If set, attempt to fix notebooks missing the header.", + ) + parser.add_argument( + "--no-git", + action="store_true", + help="Do not attempt to detect git repo root; use cwd()", + ) + parser.add_argument( + "--repo-root", help="Explicit repository root to use when building URLs" + ) + parser.add_argument("--verbose", action="store_true", help="Enable debug logging") parser.add_argument("filenames", nargs="*", help="Filenames to check.") args = parser.parse_args(argv) - failed_files = False - reformatted_files = [] - unchanged_files = [] + + # configure logging + level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig(level=level, format="%(levelname)s: %(message)s") + + prefer_git = not args.no_git + repo_root_path: Optional[Path] = Path(args.repo_root) if args.repo_root else None + retval = 0 for filename in args.filenames: + path = Path(filename) try: - modified = check_colab_header( - filename, - repo_name=args.repo_name, - fix=args.fix_header, - version=args.pip_install_on_colab_version, + effective_repo_root = repo_root_path or ( + find_repo_root(path, prefer_git) if prefer_git else Path.cwd() ) - if modified: - reformatted_files.append(str(filename)) - else: - unchanged_files.append(str(filename)) - except ValueError as exc: - print(f"[ERROR] {filename}: {exc}") - failed_files = True - try: + test_notebook_has_at_least_three_cells(filename) - test_first_cell_contains_three_badges(filename, repo_name=args.repo_name) + test_first_cell_contains_three_badges( + filename, args.repo_name, args.repo_owner, effective_repo_root + ) test_second_cell_is_a_markdown_cell(filename) - except ValueError as exc: - print(f"[ERROR] {filename}: {exc}") - failed_files = True - - print_hook_summary(reformatted_files, unchanged_files) - return 1 if (reformatted_files or failed_files) else 0 + logger.info("%s: OK", filename) + retval = retval or 0 + except NotebookTestError as exc: + logger.error("%s: %s", filename, exc) + retval = 1 + if args.fix_header: + try: + fix_header_inplace( + path, args.repo_name, args.repo_owner, effective_repo_root + ) + logger.info("%s: header fixed", filename) + retval = 0 + except NotebookTestError as fix_exc: + logger.exception("%s: failed to fix header: %s", filename, fix_exc) + retval = 2 + return retval if __name__ == "__main__": diff --git a/hooks/check_notebooks.py b/hooks/check_notebooks.py index 870a124..c23648b 100755 --- a/hooks/check_notebooks.py +++ b/hooks/check_notebooks.py @@ -1,73 +1,9 @@ #!/usr/bin/env python3 -""" -Checks notebook execution status for Jupyter notebooks""" +# pylint: disable=missing-module-docstring from __future__ import annotations -import argparse from collections.abc import Sequence - -import nbformat - - -class NotebookTestError(Exception): - """Raised when a notebook validation test fails.""" - - -def test_cell_contains_output(notebook): - """checks if all notebook cells have an output present""" - for cell in notebook.cells: - if cell.cell_type == "code" and cell.source != "": - if cell.execution_count is None: - raise ValueError("Cell does not contain output!") - - -def test_no_errors_or_warnings_in_output(notebook): - """checks if all example Jupyter notebooks have clear std-err output - (i.e., no errors or warnings) visible; except acceptable - diagnostics from the joblib package""" - for cell in notebook.cells: - if cell.cell_type == "code": - for output in cell.outputs: - if "name" in output and output["name"] == "stderr": - if not output["text"].startswith("[Parallel(n_jobs="): - raise ValueError(output["text"]) - - -def test_show_plot_used_instead_of_matplotlib(notebook): - """checks if plotting is done with open_atmos_jupyter_utils show_plot()""" - matplot_used = False - show_plot_used = False - for cell in notebook.cells: - if cell.cell_type == "code": - if "pyplot.show(" in cell.source or "plt.show(" in cell.source: - matplot_used = True - if "show_plot(" in cell.source: - show_plot_used = True - if matplot_used and not show_plot_used: - raise ValueError( - "if using matplotlib, please use open_atmos_jupyter_utils.show_plot()" - ) - - -def test_show_anim_used_instead_of_matplotlib(notebook): - """checks if animation generation is done with open_atmos_jupyter_utils show_anim()""" - matplot_used = False - show_anim_used = False - for cell in notebook.cells: - if cell.cell_type == "code": - if ( - "funcAnimation" in cell.source - or "matplotlib.animation" in cell.source - or "from matplotlib import animation" in cell.source - ): - matplot_used = True - if "show_anim(" in cell.source: - show_anim_used = True - if matplot_used and not show_anim_used: - raise AssertionError( - """if using matplotlib for animations, - please use open_atmos_jupyter_utils.show_anim()""" - ) +from .utils import open_and_test_notebooks def test_jetbrains_bug_py_66491(notebook): @@ -84,28 +20,12 @@ def test_jetbrains_bug_py_66491(notebook): def main(argv: Sequence[str] | None = None) -> int: """test all notebooks""" - parser = argparse.ArgumentParser() - parser.add_argument("filenames", nargs="*", help="Filenames to check.") - args = parser.parse_args(argv) - - retval = 0 - test_functions = [ - test_cell_contains_output, - test_no_errors_or_warnings_in_output, - test_jetbrains_bug_py_66491, - test_show_anim_used_instead_of_matplotlib, - test_show_plot_used_instead_of_matplotlib, - ] - for filename in args.filenames: - with open(filename, encoding="utf8") as notebook_file: - notebook = nbformat.read(notebook_file, nbformat.NO_CONVERT) - for func in test_functions: - try: - func(notebook) - except NotebookTestError as e: - print(f"{filename} : {e}") - retval = 1 - return retval + return open_and_test_notebooks( + argv=argv, + test_functions=[ + test_jetbrains_bug_py_66491, + ], + ) if __name__ == "__main__": diff --git a/hooks/notebooks_output.py b/hooks/notebooks_output.py new file mode 100755 index 0000000..20dcdec --- /dev/null +++ b/hooks/notebooks_output.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""checks if notebook is executed and do not contain 'stderr""" + +from __future__ import annotations + +from collections.abc import Sequence +from .utils import open_and_test_notebooks + + +def test_cell_contains_output(notebook): + """checks if all notebook cells have an output present""" + for cell_idx, cell in enumerate(notebook.cells): + if cell.cell_type == "code" and cell.source != "": + if cell.execution_count is None: + raise ValueError(f"Cell {cell_idx} does not contain output") + + +def test_no_errors_or_warnings_in_output(notebook): + """checks if all example Jupyter notebooks have clear std-err output + (i.e., no errors or warnings) visible; except acceptable + diagnostics from the joblib package""" + for cell_idx, cell in enumerate(notebook.cells): + if cell.cell_type == "code": + for output in cell.outputs: + ot = output.get("output_type") + if ot == "error": + raise ValueError( + f"Cell [{cell_idx}] contain error or warning. \n\n" + f"Cell [{cell_idx}] output:\n{output}\n" + ) + if ot == "stream" and output.get("name") == "stderr": + out_text = output.get("text") + if out_text and not out_text.startswith("[Parallel(n_jobs="): + raise ValueError(f" Cell [{cell_idx}]: {out_text}") + + +def main(argv: Sequence[str] | None = None) -> int: + """test all notebooks""" + return open_and_test_notebooks( + argv=argv, + test_functions=[ + test_cell_contains_output, + test_no_errors_or_warnings_in_output, + ], + ) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/hooks/notebooks_using_jupyter_utils.py b/hooks/notebooks_using_jupyter_utils.py new file mode 100755 index 0000000..8d69044 --- /dev/null +++ b/hooks/notebooks_using_jupyter_utils.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +""" +Checks notebook execution status for Jupyter notebooks""" +from __future__ import annotations + +from collections.abc import Sequence +from .utils import open_and_test_notebooks + + +def test_show_plot_used_instead_of_matplotlib(notebook): + """checks if plotting is done with open_atmos_jupyter_utils show_plot()""" + matplot_used = False + show_plot_used = False + for cell in notebook.cells: + if cell.cell_type == "code": + if "pyplot.show(" in cell.source or "plt.show(" in cell.source: + matplot_used = True + if "show_plot(" in cell.source: + show_plot_used = True + if matplot_used and not show_plot_used: + raise ValueError( + "if using matplotlib, please use open_atmos_jupyter_utils.show_plot()" + ) + + +def test_show_anim_used_instead_of_matplotlib(notebook): + """checks if animation generation is done with open_atmos_jupyter_utils show_anim()""" + matplot_used = False + show_anim_used = False + for cell in notebook.cells: + if cell.cell_type == "code": + if ( + "funcAnimation" in cell.source + or "matplotlib.animation" in cell.source + or "from matplotlib import animation" in cell.source + ): + matplot_used = True + if "show_anim(" in cell.source: + show_anim_used = True + if matplot_used and not show_anim_used: + raise AssertionError( + """if using matplotlib for animations, + please use open_atmos_jupyter_utils.show_anim()""" + ) + + +def main(argv: Sequence[str] | None = None) -> int: + """test all notebooks""" + return open_and_test_notebooks( + argv=argv, + test_functions=[ + test_show_anim_used_instead_of_matplotlib, + test_show_plot_used_instead_of_matplotlib, + ], + ) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/hooks/utils.py b/hooks/utils.py index 54b83c0..4ffa6aa 100644 --- a/hooks/utils.py +++ b/hooks/utils.py @@ -4,8 +4,13 @@ import os import pathlib - +import argparse from git import Git +import nbformat + + +class NotebookTestError(BaseException): + """Raised when a notebook validation test fails.""" def find_files(path_to_folder_from_project_root=".", file_extension=None): @@ -43,3 +48,22 @@ def repo_path(): while not (path.is_dir() and Git(path).rev_parse("--git-dir") == ".git"): path = path.parent return path + + +def open_and_test_notebooks(argv, test_functions): + """Create argparser and run notebook tests""" + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="*", help="Filenames to check.") + args = parser.parse_args(argv) + + retval = 0 + for filename in args.filenames: + with open(filename, encoding="utf8") as notebook_file: + notebook = nbformat.read(notebook_file, nbformat.NO_CONVERT) + for func in test_functions: + try: + func(notebook) + except NotebookTestError as e: + print(f"{filename} : {e}") + retval = 1 + return retval diff --git a/pyproject.toml b/pyproject.toml index e3dfb25..7fea13a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,3 +29,5 @@ dynamic = ['version'] [project.scripts] check_notebooks = "hooks.check_notebooks:main" check_badges = "hooks.check_badges:main" +notebooks_output = "hooks.notebooks_output:main" +notebooks_using_jupyter_utils = "hooks.notebooks_using_jupyter_utils:main" diff --git a/test_files/empty.pdf b/tests/empty.pdf similarity index 100% rename from test_files/empty.pdf rename to tests/empty.pdf diff --git a/tests/examples/bad.ipynb b/tests/examples/bad.ipynb new file mode 100644 index 0000000..1c10614 --- /dev/null +++ b/tests/examples/bad.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a deliberately failing notebook for tests: it contains two failing patterns\n", + "- a code cell missing the execution_count key (should trigger `Cell does not contain output!` / jetbrains check)\n", + "- a stderr output (should trigger `test_no_errors_or_warnings_in_output` if reached)\n" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "outputs": [], + "source": [ + "print('this cell lacks the execution_count key')" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": "Example error on stderr\n" + } + ], + "source": [ + "import sys\n", + "print('error to stderr', file=sys.stderr)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/test_files/template.ipynb b/tests/examples/good.ipynb similarity index 69% rename from test_files/template.ipynb rename to tests/examples/good.ipynb index be45e76..a4277ca 100644 --- a/test_files/template.ipynb +++ b/tests/examples/good.ipynb @@ -2,28 +2,30 @@ "cells": [ { "cell_type": "markdown", - "id": "29186ad9e7311ae0", + "id": "18b1047b", "metadata": {}, "source": [ - "[![preview notebook](https://img.shields.io/static/v1?label=render%20on&logo=github&color=87ce3e&message=GitHub)](https://github.com/open-atmos/devops_tests/blob/main/test_files/template.ipynb)\n", - "[![launch on mybinder.org](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/open-atmos/devops_tests.git/main?urlpath=lab/tree/test_files/template.ipynb)\n", - "[![launch on Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/open-atmos/devops_tests/blob/main/test_files/template.ipynb)" + "[![preview notebook](https://img.shields.io/static/v1?label=render%20on&logo=github&color=87ce3e&message=GitHub)](https://github.com/open-atmos/devops_tests/blob/main/tests/examples/good.ipynb)\n", + "[![launch on mybinder.org](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/open-atmos/devops_tests.git/main?urlpath=lab/tree/tests/examples/good.ipynb)\n", + "[![launch on Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/open-atmos/devops_tests/blob/main/tests/examples/good.ipynb)" ] }, { "cell_type": "markdown", - "id": "7a729b2624b70eae", + "id": "59bad8ebb84ac4ce", "metadata": {}, - "source": [] + "source": [ + "Notebook description" + ] }, { "cell_type": "code", - "execution_count": 1, - "id": "72ccd23c0ab9f08e", + "execution_count": 3, + "id": "cc05fb4d", "metadata": { "ExecuteTime": { - "end_time": "2024-10-26T12:29:32.925592Z", - "start_time": "2024-10-26T12:29:32.919920Z" + "end_time": "2026-01-10T01:02:24.187099Z", + "start_time": "2026-01-10T01:02:24.179982Z" } }, "outputs": [], @@ -35,6 +37,19 @@ " from open_atmos_jupyter_utils import pip_install_on_colab\n", " pip_install_on_colab('devops_tests-examples', 'devops_tests')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "405d8abe602ec659", + "metadata": { + "ExecuteTime": { + "end_time": "2026-01-10T01:02:24.193663Z", + "start_time": "2026-01-10T01:02:24.192050Z" + } + }, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/tests/test_check_badges_examples.py b/tests/test_check_badges_examples.py new file mode 100644 index 0000000..068572b --- /dev/null +++ b/tests/test_check_badges_examples.py @@ -0,0 +1,82 @@ +# pylint: disable=missing-function-docstring + +""" +Unit tests for hooks/check_badges.py: good and bad notebook examples. + +These tests write small notebooks to temporary files and call the +badge-check functions that expect filenames. +""" + +import nbformat +from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell +import pytest + +from hooks import check_badges as cb + + +def _write_nb_and_return_path(tmp_path, nb, name="nb.ipynb"): + p = tmp_path / name + nbformat.write(nb, str(p)) + return str(p) + + +def test_good_notebook_header_and_second_cell(tmp_path): + # arrange + nb_path = tmp_path / "good.ipynb" + repo_name = "devops_tests" + repo_owner = "open-atmos" + first_cell = "\n".join( + [ + cb.preview_badge_markdown(str(nb_path), repo_name, repo_owner), + cb.mybinder_badge_markdown(str(nb_path), repo_name, repo_owner), + cb.colab_badge_markdown(str(nb_path), repo_name, repo_owner), + ] + ) + + # act + nb = new_notebook( + cells=[ + new_markdown_cell(first_cell), + new_markdown_cell("Some description"), + new_code_cell(source="print('ok')", execution_count=1, outputs=[]), + ] + ) + path = _write_nb_and_return_path(tmp_path, nb, name="good.ipynb") + + # assert + cb.test_notebook_has_at_least_three_cells(path) + cb.test_first_cell_contains_three_badges(path, repo_name) + cb.test_second_cell_is_a_markdown_cell(path) + + +def test_too_few_cells_raises(tmp_path): + nb = new_notebook(cells=[new_markdown_cell("only one cell")]) + path = _write_nb_and_return_path(tmp_path, nb, name="few.ipynb") + with pytest.raises(ValueError): + cb.test_notebook_has_at_least_three_cells(path) + + +def test_first_cell_bad_badges_raises(tmp_path): + nb = new_notebook( + cells=[ + new_markdown_cell("not the right badges\nline2\nline3"), + new_markdown_cell("desc"), + new_code_cell(source="print(1)", execution_count=1, outputs=[]), + ] + ) + path = _write_nb_and_return_path(tmp_path, nb, name="badbadges.ipynb") + with pytest.raises(ValueError): + cb.test_first_cell_contains_three_badges(path, "devops_tests") + + +def test_second_cell_not_markdown_raises(tmp_path): + nb = new_notebook( + cells=[ + new_markdown_cell("badge1\nbadge2\nbadge3"), + new_code_cell(source="print('I am code')", execution_count=1, outputs=[]), + new_code_cell(source="print('more')", execution_count=2, outputs=[]), + ] + ) + path = _write_nb_and_return_path(tmp_path, nb, name="second_not_md.ipynb") + with pytest.raises(ValueError): + cb.test_second_cell_is_a_markdown_cell(path) diff --git a/tests/test_check_notebooks_examples.py b/tests/test_check_notebooks_examples.py new file mode 100644 index 0000000..d32a5fe --- /dev/null +++ b/tests/test_check_notebooks_examples.py @@ -0,0 +1,95 @@ +# pylint: disable=missing-function-docstring +""" +Unit tests for hooks/check_notebooks.py: good and bad notebook examples. + +These tests create in-memory notebooks with nbformat and call the +validation functions exported by hooks.check_notebooks to ensure they +accept valid notebooks and raise on invalid ones. +""" + +import pytest +from nbformat.v4 import new_notebook, new_code_cell, new_markdown_cell + +from hooks import notebooks_output as no +from hooks import check_notebooks as cn +from hooks import notebooks_using_jupyter_utils as nuju + + +def test_good_notebook_passes_all_checks(): + nb = new_notebook( + cells=[ + new_markdown_cell("Intro"), + new_code_cell( + source="x = 1\nprint(x)", + execution_count=1, + outputs=[{"output_type": "stream", "name": "stdout", "text": "1\n"}], + ), + ] + ) + + no.test_cell_contains_output(nb) + no.test_no_errors_or_warnings_in_output(nb) + cn.test_jetbrains_bug_py_66491(nb) + nuju.test_show_plot_used_instead_of_matplotlib(nb) + nuju.test_show_anim_used_instead_of_matplotlib(nb) + + +def test_cell_missing_execution_count_raises(): + nb = new_notebook( + cells=[new_code_cell(source="print(1)", execution_count=None, outputs=[])] + ) + with pytest.raises(ValueError): + no.test_cell_contains_output(nb) + + +def test_stderr_output_raises(): + nb = new_notebook( + cells=[ + new_code_cell( + source="print('oops')", + execution_count=1, + outputs=[ + {"output_type": "stream", "name": "stderr", "text": "Traceback..."} + ], + ) + ] + ) + with pytest.raises(ValueError): + no.test_no_errors_or_warnings_in_output(nb) + + +def test_using_matplotlib_show_without_show_plot_raises(): + nb = new_notebook( + cells=[ + new_code_cell( + source="import matplotlib.pyplot as plt\nplt.plot([1,2,3])\nplt.show()", + execution_count=1, + outputs=[], + ) + ] + ) + with pytest.raises(ValueError): + nuju.test_show_plot_used_instead_of_matplotlib(nb) + + +def test_animation_without_show_anim_raises(): + nb = new_notebook( + cells=[ + new_code_cell( + source="from matplotlib import animation\nanimation.FuncAnimation(...)", + execution_count=1, + outputs=[], + ) + ] + ) + with pytest.raises(AssertionError): + nuju.test_show_anim_used_instead_of_matplotlib(nb) + + +def test_missing_execution_count_key_raises(): + nb = new_notebook( + cells=[new_code_cell(source="1+1", execution_count=1, outputs=[])] + ) + del nb.cells[0]["execution_count"] + with pytest.raises(ValueError): + cn.test_jetbrains_bug_py_66491(nb) diff --git a/tests/test_run_hooks_on_examples.py b/tests/test_run_hooks_on_examples.py new file mode 100644 index 0000000..cfb11a4 --- /dev/null +++ b/tests/test_run_hooks_on_examples.py @@ -0,0 +1,121 @@ +# pylint: disable=missing-function-docstring + +""" +Run the hooks on good and bad example notebooks and assert behavior + logs. + +This test executes the hook modules the same way pre-commit would: + python -m hooks.check_badges --repo-name=... PATH + python -m hooks.check_notebooks PATH + +It captures stdout/stderr (which is where logging.basicConfig writes), asserts +expected exit codes, and checks stderr for expected failure substrings. + +Adjust the candidate paths in _find_example() if your examples are stored elsewhere. +""" +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +import pytest + + +def _find_example(basename: str) -> Path | None: + """ + Try a set of common locations for example notebooks and return the first + existing Path or None if not found. + """ + candidates = [ + Path("tests") / "examples" / basename, + Path(basename), + ] + for p in candidates: + if p.exists(): + return p + return None + + +def _run_module(module: str, args: list[str]) -> subprocess.CompletedProcess: + cmd = [sys.executable, "-m", module] + args + return subprocess.run(cmd, capture_output=True, text=True, check=True) + + +@pytest.mark.parametrize( + "name, expected_badge_exit, expected_badge_msg", + [ + ("good.ipynb", 0, ""), + ("bad.ipynb", 1, "Missing badges"), + ], +) +def test_check_badges_on_examples( + name: str, expected_badge_exit: int, expected_badge_msg: str +): + nb_path = _find_example(name) + if nb_path is None: + pytest.skip(f"No example notebook found for {name};") + + res = _run_module("hooks.check_badges", ["--repo-name=devops_tests", str(nb_path)]) + combined_msgs = (res.stderr or "") + "\n" + (res.stdout or "") + + # Check exit code first + if expected_badge_exit == 0: + assert ( + res.returncode == 0 + ), f"Expected success; stderr:\n{res.stderr}\nstdout:\n{res.stdout}" + # For success, ensure no obvious error strings are present + assert "Missing badges" not in combined_msgs + assert "ERROR" not in combined_msgs + assert "Traceback" not in combined_msgs + else: + assert ( + res.returncode != 0 + ), f"Expected failure; got exit {res.returncode}\nstdout:\n{res.stdout}" + + assert expected_badge_msg in combined_msgs, ( + f"Expected to find {expected_badge_msg!r} in output" + f"\n---STDERR---\n{res.stderr}" + f"\n---STDOUT---\n{res.stdout}" + ) + + +@pytest.mark.parametrize( + "name, should_fail, expected_msg_substr", + [ + ("good.ipynb", False, ""), + ("bad.ipynb", True, "Cell does not contain output!"), + ("bad.ipynb", True, "Cell does not contain output!"), + ], +) +def test_check_notebooks_on_examples( + name: str, should_fail: bool, expected_msg_substr: str +): + nb_path = _find_example(name) + if nb_path is None: + pytest.skip(f"No example notebook found for {name}") + + res = _run_module("hooks.check_notebooks", [str(nb_path)]) + + if should_fail: + assert ( + res.returncode != 0 + ), f"Expected check_notebooks to fail on {nb_path}, but it succeeded" + combined = (res.stderr or "") + "\n" + (res.stdout or "") + + possible_substrings = [ + expected_msg_substr, + "Notebook cell missing execution_count attribute", + "Cell does not contain output!", + "Traceback", + "stderr", + ] + assert any(s and s in combined for s in possible_substrings), ( + f"Expected one of {possible_substrings!r} in output for failing notebook" + f"\nSTDERR:\n{res.stderr}" + f"\nSTDOUT:\n{res.stdout}" + ) + else: + assert res.returncode == 0, ( + f"Expected check_notebooks to succeed on {nb_path};" + f" stderr:\n{res.stderr}\nstdout:\n{res.stdout}" + )