From 6aeeee1601e30a73229b251ab1d683d95d7cabed Mon Sep 17 00:00:00 2001 From: Paul Molodowitch Date: Wed, 8 Apr 2026 14:02:04 -0700 Subject: [PATCH] bundle images and use relative paths for .md output NOTE: while this same filter could also be used for html, we currently embed all images as base-64 encoded strings to make completely self-contained html with `--embed-resources`, so there is no need. --- doc_build/doc_builder.py | 15 ++- doc_build/filters/filter_bundle_images.py | 100 ++++++++++++++++++ doc_build/filters/filter_railroad.py | 6 +- tests/build_scripts/build_docs.py | 28 ++++- tests/specification/Inlined.md | 6 ++ .../specification/inlined/images/octagon.png | Bin 0 -> 391 bytes .../inlined/images/rectangle.svg | 4 + 7 files changed, 150 insertions(+), 9 deletions(-) create mode 100755 doc_build/filters/filter_bundle_images.py create mode 100644 tests/specification/inlined/images/octagon.png create mode 100644 tests/specification/inlined/images/rectangle.svg diff --git a/doc_build/doc_builder.py b/doc_build/doc_builder.py index 2c9e96f..4a263e7 100644 --- a/doc_build/doc_builder.py +++ b/doc_build/doc_builder.py @@ -124,7 +124,8 @@ def build_docs(self, args): elif len(args.diff) > 2: raise ValueError(f"At most 2 arguments for --diff - got {len(args.diff)}") args.output.mkdir(parents=True, exist_ok=True) - self.get_artifacts_dir(args.output).mkdir(parents=True, exist_ok=True) + artifacts_dir = self.get_artifacts_dir(args.output) + artifacts_dir.mkdir(parents=True, exist_ok=True) if args.diff: combined = self.generate_combined_diff( @@ -146,7 +147,7 @@ def build_docs(self, args): doc_build_filters.extend(["-F", doc_filter]) # Set the cwd to the artifacts dir because it's easier for some filters to work relatively to it - os.chdir(self.get_artifacts_dir(args.output)) + os.chdir(artifacts_dir) shared_command = [ "--defaults", spec, @@ -176,7 +177,7 @@ def build_docs(self, args): # "-V", # "monofontoptions=Scale=0.8", # scale down a bit for better sizing of listings and PEG "-V", - f"AOUSD_ARTIFACTS_ROOT={self.get_artifacts_dir(args.output)}", + f"AOUSD_ARTIFACTS_ROOT={artifacts_dir}", "-V", "colorlinks=true", "-V", "linkcolor=OliveGreen", "-V", "toccolor=OliveGreen", @@ -203,8 +204,14 @@ def build_docs(self, args): if not args.no_md: md = args.output / f"{filename}.md" md_template = self.get_scripts_root() / "template" / "default.md" + bundle_images_filter = self.get_filter("bundle_images") + bundle_images_args = [ + "-M", f"AOUSD_OUTPUT_DIR={args.output}", + "-M", f"AOUSD_IMAGES_ROOT={artifacts_dir}", + "-F", bundle_images_filter, + ] log(f"\tBuilding Markdown to {md}...") - pandoc(shared_command + ["-o", md, "--to", MARKDOWN_OUTPUT_FORMAT, f"--template={md_template}"]) + pandoc(shared_command + bundle_images_args + ["-o", md, "--to", MARKDOWN_OUTPUT_FORMAT, f"--template={md_template}"]) if not args.no_html: html = args.output / f"{filename}.html" diff --git a/doc_build/filters/filter_bundle_images.py b/doc_build/filters/filter_bundle_images.py new file mode 100755 index 0000000..9004f28 --- /dev/null +++ b/doc_build/filters/filter_bundle_images.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +"""Pandoc filter to bundle images into output/images/ and rewrite paths to be relative. + +For each image path (assumed to be under AOUSD_IMAGES_ROOT): + 1. Compute the path relative to AOUSD_IMAGES_ROOT. + 2. Remove any path components named "images". + 3. Copy the image to AOUSD_OUTPUT_DIR/images/. + 4. Rewrite the AST image path to images/ (relative from output/ to output/images/). + +Both absolute and relative image paths are processed. Relative paths are +resolved against the images root directory (the pandoc input file's directory). + +Required pandoc metadata: + AOUSD_IMAGES_ROOT: absolute path to the images root directory + AOUSD_OUTPUT_DIR: absolute path to the output directory + +An in-process dict tracks which source files have been copied to each destination, +detecting collisions where two different sources map to the same destination path. +""" + +import shutil +from pathlib import Path + +from pandocfilters import toJSONFilter, Image + +# Maps rel_key -> str(src_abs) for collision detection within a single pandoc run. +_seen: dict[str, str] = {} + + +def _get_metadata_str(metadata: dict, key: str) -> str: + """Extract a string value from pandoc filter metadata. + + Handles both MetaString (produced by -M on the command line) and + MetaInlines (produced by --metadata-file YAML). + """ + try: + entry = metadata[key] + if entry.get("t") == "MetaString": + return entry["c"] + return entry["c"][0]["c"] + except (KeyError, IndexError, TypeError) as e: + raise KeyError(f"Missing or malformed metadata key {key!r}: {e}") from e + + +def _get_image_rel(src_abs: Path, images_root: Path) -> Path: + """Compute destination relative path under images/, stripping 'images' components.""" + try: + rel = src_abs.relative_to(images_root) + except ValueError: + raise ValueError( + f"Image path {src_abs} is not under images_root {images_root}" + ) + parts = [p for p in rel.parts if p != "images"] + if not parts: + raise ValueError( + f"Image {src_abs} reduces to an empty path after removing 'images' components" + ) + return Path(*parts) + + +def bundle_image(key, value, _format, metadata): + if key != "Image": + return + + image_path = value[2][0] + + images_root = Path(_get_metadata_str(metadata, "AOUSD_IMAGES_ROOT")) + output_dir = Path(_get_metadata_str(metadata, "AOUSD_OUTPUT_DIR")) + + src = Path(image_path) + if not src.is_absolute(): + # Relative paths are relative to the images root (pandoc input file location) + src = images_root / src + + image_rel = _get_image_rel(src, images_root) + + dest = output_dir / "images" / image_rel + rel_key = image_rel.as_posix() + + if rel_key in _seen: + if _seen[rel_key] != str(src): + raise RuntimeError( + f"Image name collision at {rel_key!r}: already mapped from " + f"{_seen[rel_key]!r}, cannot also map from {str(src)!r}" + ) + # Already copied earlier in this run; skip + else: + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dest) + _seen[rel_key] = str(src) + + # Relative from output/ (where the .md output file lives) to output/images/. + new_path = (Path("images") / image_rel).as_posix() + + value[2][0] = new_path + return Image(value[0], value[1], value[2]) + + +if __name__ == "__main__": + toJSONFilter(bundle_image) diff --git a/doc_build/filters/filter_railroad.py b/doc_build/filters/filter_railroad.py index a79d68e..b571659 100755 --- a/doc_build/filters/filter_railroad.py +++ b/doc_build/filters/filter_railroad.py @@ -96,8 +96,8 @@ def create_diagram(key, value, format, metadata): while (new := rule.simplify()) != rule: rule = new if not isinstance(rule, Nothing): - filename = f"{build_directory}/{part_name}_{counter}.svg" - f = open(filename, "w") + abs_filename = f"{build_directory}/{part_name}_{counter}.svg" + f = open(abs_filename, "w") structured = split_for_stack(rule.as_railroad()) diagram = railroad.Diagram(structured) diagram.writeStandalone(f.write) @@ -126,7 +126,7 @@ def pixels_to_points(pixels, dpi=96*1.2): # scaling to fit better with the font return [ CodeBlock([ident, classes, keyvals_code], code), - Para([Image([ident, [], keyvals], caption, [filename, typef])]), + Para([Image([ident, [], keyvals], caption, [abs_filename, typef])]), ] diff --git a/tests/build_scripts/build_docs.py b/tests/build_scripts/build_docs.py index 72c3dbb..c8361e1 100644 --- a/tests/build_scripts/build_docs.py +++ b/tests/build_scripts/build_docs.py @@ -1,12 +1,36 @@ #! /usr/bin/env python3 -from doc_build.doc_builder import DocBuilder +import re from pathlib import Path +from doc_build.doc_builder import DocBuilder + test_root = Path(__file__).parent.parent + +def check_no_absolute_image_paths(output_dir: Path): + """Assert that HTML and MD outputs contain no absolute image paths.""" + absolute_path_pattern = re.compile(r'!\[.*?\]\((/[^)]+)\)|src="(/[^"]+\.(svg|png|jpg|jpeg|gif))"') + errors = [] + for suffix in (".html", ".md"): + for output_file in output_dir.glob(f"*{suffix}"): + content = output_file.read_text(encoding="utf-8") + for match in absolute_path_pattern.finditer(content): + abs_path = match.group(1) or match.group(2) + errors.append(f"{output_file}: absolute image path found: {abs_path!r}") + if errors: + raise AssertionError( + "Absolute image paths found in output (should be relative):\n" + + "\n".join(f" {e}" for e in errors) + ) + + class MyDocBuilder(DocBuilder): - pass + + def build_docs(self, args): + result = super().build_docs(args) + check_no_absolute_image_paths(args.output) + return result if __name__ == "__main__": diff --git a/tests/specification/Inlined.md b/tests/specification/Inlined.md index 9f0a3b9..7d8b662 100644 --- a/tests/specification/Inlined.md +++ b/tests/specification/Inlined.md @@ -3,4 +3,10 @@ This section belongs in an external Markdown file and should get inlined during the build preprocess. +Here are test images to verify image bundling (path stripping and subdir preservation): + +![Blue rectangle SVG](inlined/images/rectangle.svg) + +![Steel blue octagon PNG](inlined/images/octagon.png) + TODO: check the todo implementation. \ No newline at end of file diff --git a/tests/specification/inlined/images/octagon.png b/tests/specification/inlined/images/octagon.png new file mode 100644 index 0000000000000000000000000000000000000000..06ce3e0482a89fa05ef9e5096e37453e6256f2bf GIT binary patch literal 391 zcmeAS@N?(olHy`uVBq!ia0vp^DImq`|0^R3E*`fBDYkY$hMyJv%PDPoaU8TTCZngTR3%AY)DT_iz6!zkWUV z>-U-C*S}vWZw{~WxchjjS&(_f(#cnH)|*V8^=fKe(C0mug0d!s@A5kP!7F*?!?>5a zrq{IJE@i7qWnUQ>che=SBmCRMUGJu@U7}Z&YQ9oC?qY`Rp zeYW1W+8W>bsV%N`8CO^JROy2nuR}}XmM#c?`EXuP9Jh~?=*OtEoHvE*kEXqm`w~^8 z@KJDm>vHq_Z&7tuzM8F9D?a!?u5E?66~rBH`njxg HN@xNAI8d)K literal 0 HcmV?d00001 diff --git a/tests/specification/inlined/images/rectangle.svg b/tests/specification/inlined/images/rectangle.svg new file mode 100644 index 0000000..30d7cee --- /dev/null +++ b/tests/specification/inlined/images/rectangle.svg @@ -0,0 +1,4 @@ + + + Test +