Skip to content

Commit 9b90137

Browse files
authored
Merge pull request #50 from aousd/pmolodowitch/bundle-images
bundle images and use relative paths for .md output
2 parents 3e513d4 + f28cbf5 commit 9b90137

7 files changed

Lines changed: 150 additions & 9 deletions

File tree

doc_build/doc_builder.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ def build_docs(self, args):
124124
elif len(args.diff) > 2:
125125
raise ValueError(f"At most 2 arguments for --diff - got {len(args.diff)}")
126126
args.output.mkdir(parents=True, exist_ok=True)
127-
self.get_artifacts_dir(args.output).mkdir(parents=True, exist_ok=True)
127+
artifacts_dir = self.get_artifacts_dir(args.output)
128+
artifacts_dir.mkdir(parents=True, exist_ok=True)
128129

129130
if args.diff:
130131
combined = self.generate_combined_diff(
@@ -146,7 +147,7 @@ def build_docs(self, args):
146147
doc_build_filters.extend(["-F", doc_filter])
147148

148149
# Set the cwd to the artifacts dir because it's easier for some filters to work relatively to it
149-
os.chdir(self.get_artifacts_dir(args.output))
150+
os.chdir(artifacts_dir)
150151
shared_command = [
151152
"--defaults",
152153
spec,
@@ -176,7 +177,7 @@ def build_docs(self, args):
176177
# "-V",
177178
# "monofontoptions=Scale=0.8", # scale down a bit for better sizing of listings and PEG
178179
"-V",
179-
f"AOUSD_ARTIFACTS_ROOT={self.get_artifacts_dir(args.output)}",
180+
f"AOUSD_ARTIFACTS_ROOT={artifacts_dir}",
180181
"-V", "colorlinks=true",
181182
"-V", "linkcolor=OliveGreen",
182183
"-V", "toccolor=OliveGreen",
@@ -203,8 +204,14 @@ def build_docs(self, args):
203204
if not args.no_md:
204205
md = args.output / f"{filename}.md"
205206
md_template = self.get_scripts_root() / "template" / "default.md"
207+
bundle_images_filter = self.get_filter("bundle_images")
208+
bundle_images_args = [
209+
"-M", f"AOUSD_OUTPUT_DIR={args.output}",
210+
"-M", f"AOUSD_IMAGES_ROOT={artifacts_dir}",
211+
"-F", bundle_images_filter,
212+
]
206213
log(f"\tBuilding Markdown to {md}...")
207-
pandoc(shared_command + ["-o", md, "--to", MARKDOWN_OUTPUT_FORMAT, f"--template={md_template}"])
214+
pandoc(shared_command + bundle_images_args + ["-o", md, "--to", MARKDOWN_OUTPUT_FORMAT, f"--template={md_template}"])
208215

209216
if not args.no_html:
210217
html = args.output / f"{filename}.html"
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#!/usr/bin/env python3
2+
"""Pandoc filter to bundle images into output/images/ and rewrite paths to be relative.
3+
4+
For each image path (assumed to be under AOUSD_IMAGES_ROOT):
5+
1. Compute the path relative to AOUSD_IMAGES_ROOT.
6+
2. Remove any path components named "images".
7+
3. Copy the image to AOUSD_OUTPUT_DIR/images/<relative>.
8+
4. Rewrite the AST image path to images/<relative> (relative from output/ to output/images/).
9+
10+
Both absolute and relative image paths are processed. Relative paths are
11+
resolved against the images root directory (the pandoc input file's directory).
12+
13+
Required pandoc metadata:
14+
AOUSD_IMAGES_ROOT: absolute path to the images root directory
15+
AOUSD_OUTPUT_DIR: absolute path to the output directory
16+
17+
An in-process dict tracks which source files have been copied to each destination,
18+
detecting collisions where two different sources map to the same destination path.
19+
"""
20+
21+
import shutil
22+
from pathlib import Path
23+
24+
from pandocfilters import toJSONFilter, Image
25+
26+
# Maps rel_key -> str(src_abs) for collision detection within a single pandoc run.
27+
_seen: dict[str, str] = {}
28+
29+
30+
def _get_metadata_str(metadata: dict, key: str) -> str:
31+
"""Extract a string value from pandoc filter metadata.
32+
33+
Handles both MetaString (produced by -M on the command line) and
34+
MetaInlines (produced by --metadata-file YAML).
35+
"""
36+
try:
37+
entry = metadata[key]
38+
if entry.get("t") == "MetaString":
39+
return entry["c"]
40+
return entry["c"][0]["c"]
41+
except (KeyError, IndexError, TypeError) as e:
42+
raise KeyError(f"Missing or malformed metadata key {key!r}: {e}") from e
43+
44+
45+
def _get_image_rel(src_abs: Path, images_root: Path) -> Path:
46+
"""Compute destination relative path under images/, stripping 'images' components."""
47+
try:
48+
rel = src_abs.relative_to(images_root)
49+
except ValueError:
50+
raise ValueError(
51+
f"Image path {src_abs} is not under images_root {images_root}"
52+
)
53+
parts = [p for p in rel.parts if p != "images"]
54+
if not parts:
55+
raise ValueError(
56+
f"Image {src_abs} reduces to an empty path after removing 'images' components"
57+
)
58+
return Path(*parts)
59+
60+
61+
def bundle_image(key, value, _format, metadata):
62+
if key != "Image":
63+
return
64+
65+
image_path = value[2][0]
66+
67+
images_root = Path(_get_metadata_str(metadata, "AOUSD_IMAGES_ROOT"))
68+
output_dir = Path(_get_metadata_str(metadata, "AOUSD_OUTPUT_DIR"))
69+
70+
src = Path(image_path)
71+
if not src.is_absolute():
72+
# Relative paths are relative to the images root (pandoc input file location)
73+
src = images_root / src
74+
75+
image_rel = _get_image_rel(src, images_root)
76+
77+
dest = output_dir / "images" / image_rel
78+
rel_key = image_rel.as_posix()
79+
80+
if rel_key in _seen:
81+
if _seen[rel_key] != str(src):
82+
raise RuntimeError(
83+
f"Image name collision at {rel_key!r}: already mapped from "
84+
f"{_seen[rel_key]!r}, cannot also map from {str(src)!r}"
85+
)
86+
# Already copied earlier in this run; skip
87+
else:
88+
dest.parent.mkdir(parents=True, exist_ok=True)
89+
shutil.copy2(src, dest)
90+
_seen[rel_key] = str(src)
91+
92+
# Relative from output/ (where the .md output file lives) to output/images/.
93+
new_path = (Path("images") / image_rel).as_posix()
94+
95+
value[2][0] = new_path
96+
return Image(value[0], value[1], value[2])
97+
98+
99+
if __name__ == "__main__":
100+
toJSONFilter(bundle_image)

doc_build/filters/filter_railroad.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ def create_diagram(key, value, format, metadata):
9696
while (new := rule.simplify()) != rule:
9797
rule = new
9898
if not isinstance(rule, Nothing):
99-
filename = f"{build_directory}/{part_name}_{counter}.svg"
100-
f = open(filename, "w")
99+
abs_filename = f"{build_directory}/{part_name}_{counter}.svg"
100+
f = open(abs_filename, "w")
101101
structured = split_for_stack(rule.as_railroad())
102102
diagram = railroad.Diagram(structured)
103103
diagram.writeStandalone(f.write)
@@ -126,7 +126,7 @@ def pixels_to_points(pixels, dpi=96*1.2): # scaling to fit better with the font
126126

127127
return [
128128
CodeBlock([ident, classes, keyvals_code], code),
129-
Para([Image([ident, [], keyvals], caption, [filename, typef])]),
129+
Para([Image([ident, [], keyvals], caption, [abs_filename, typef])]),
130130
]
131131

132132

tests/build_scripts/build_docs.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,36 @@
11
#! /usr/bin/env python3
22

3-
from doc_build.doc_builder import DocBuilder
3+
import re
44
from pathlib import Path
55

6+
from doc_build.doc_builder import DocBuilder
7+
68
test_root = Path(__file__).parent.parent
79

10+
11+
def check_no_absolute_image_paths(output_dir: Path):
12+
"""Assert that HTML and MD outputs contain no absolute image paths."""
13+
absolute_path_pattern = re.compile(r'!\[.*?\]\((/[^)]+)\)|src="(/[^"]+\.(svg|png|jpg|jpeg|gif))"')
14+
errors = []
15+
for suffix in (".html", ".md"):
16+
for output_file in output_dir.glob(f"*{suffix}"):
17+
content = output_file.read_text(encoding="utf-8")
18+
for match in absolute_path_pattern.finditer(content):
19+
abs_path = match.group(1) or match.group(2)
20+
errors.append(f"{output_file}: absolute image path found: {abs_path!r}")
21+
if errors:
22+
raise AssertionError(
23+
"Absolute image paths found in output (should be relative):\n"
24+
+ "\n".join(f" {e}" for e in errors)
25+
)
26+
27+
828
class MyDocBuilder(DocBuilder):
9-
pass
29+
30+
def build_docs(self, args):
31+
result = super().build_docs(args)
32+
check_no_absolute_image_paths(args.output)
33+
return result
1034

1135

1236
if __name__ == "__main__":

tests/specification/Inlined.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,10 @@
33
This section belongs in an external Markdown file and should get inlined
44
during the build preprocess.
55

6+
Here are test images to verify image bundling (path stripping and subdir preservation):
7+
8+
![Blue rectangle SVG](inlined/images/rectangle.svg)
9+
10+
![Steel blue octagon PNG](inlined/images/octagon.png)
11+
612
TODO: check the todo implementation.
391 Bytes
Loading
Lines changed: 4 additions & 0 deletions
Loading

0 commit comments

Comments
 (0)