Skip to content

Commit e707541

Browse files
authored
Add PR change-validation workflow for wiki/docs/assets content (#260)
Add PR change validation workflow and CI validator scripts.
1 parent fd67069 commit e707541

5 files changed

Lines changed: 460 additions & 0 deletions

File tree

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/usr/bin/env python3
2+
"""CI orchestration for incoming-change validation."""
3+
4+
from __future__ import annotations
5+
6+
import argparse
7+
import subprocess
8+
import sys
9+
from pathlib import Path
10+
11+
ROOT = Path(__file__).resolve().parents[2]
12+
CI_SCRIPTS = ROOT / ".github" / "scripts"
13+
ALLOWED_PREFIXES = ("wiki/", "docs/", "assets/")
14+
15+
16+
def _git_changed(base: str, head: str) -> list[str]:
17+
proc = subprocess.run(["git", "diff", "--name-only", f"{base}...{head}"], capture_output=True, text=True)
18+
if proc.returncode != 0:
19+
raise RuntimeError((proc.stderr or proc.stdout).strip())
20+
return [line.strip() for line in proc.stdout.splitlines() if line.strip()]
21+
22+
23+
def _run(cmd: list[str]) -> int:
24+
return subprocess.run(cmd, cwd=ROOT).returncode
25+
26+
27+
def _in_scope(path: str) -> bool:
28+
return path.startswith(ALLOWED_PREFIXES)
29+
30+
31+
def _all_files_in_scope() -> list[str]:
32+
files: list[str] = []
33+
for prefix in ALLOWED_PREFIXES:
34+
base = ROOT / prefix.rstrip("/")
35+
if not base.exists():
36+
continue
37+
for path in base.rglob("*"):
38+
if path.is_file():
39+
files.append(str(path.relative_to(ROOT)))
40+
return sorted(files)
41+
42+
43+
def main() -> int:
44+
parser = argparse.ArgumentParser(description="Validate changed files in CI")
45+
parser.add_argument("--base", default="origin/master")
46+
parser.add_argument("--head", default="HEAD")
47+
parser.add_argument("--all", action="store_true")
48+
parser.add_argument("paths", nargs="*")
49+
args = parser.parse_args()
50+
51+
if args.all:
52+
changed = _all_files_in_scope()
53+
elif args.paths:
54+
changed = [p for p in args.paths if _in_scope(p)]
55+
else:
56+
changed = [p for p in _git_changed(args.base, args.head) if _in_scope(p)]
57+
58+
md_files = [p for p in changed if p.lower().endswith((".md", ".markdown"))]
59+
yaml_files = [p for p in changed if p.lower().endswith((".yml", ".yaml"))]
60+
61+
status = 0
62+
63+
if md_files:
64+
status |= _run([sys.executable, str(CI_SCRIPTS / "validate_markdown_ci.py"), *md_files])
65+
status |= _run([sys.executable, str(CI_SCRIPTS / "validate_images_ci.py"), *md_files])
66+
else:
67+
print("Skipping markdown/image validation (no markdown files changed).")
68+
69+
if yaml_files:
70+
status |= _run([sys.executable, str(CI_SCRIPTS / "validate_yaml_ci.py"), *yaml_files])
71+
else:
72+
print("Skipping YAML validation (no yaml files changed).")
73+
74+
if status != 0:
75+
print("\nIncoming change validation failed.")
76+
return 1
77+
78+
print("\nIncoming change validation passed.")
79+
return 0
80+
81+
82+
if __name__ == "__main__":
83+
sys.exit(main())
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/usr/bin/env python3
2+
"""CI image validation for changed markdown files only."""
3+
4+
from __future__ import annotations
5+
6+
import argparse
7+
import re
8+
import sys
9+
from pathlib import Path
10+
from urllib.parse import unquote
11+
12+
IMG_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)")
13+
MD_EXTS = {".md", ".markdown"}
14+
FENCE_RE = re.compile(r"^```")
15+
16+
17+
def _clean_target(target: str) -> str:
18+
return target.strip().split()[0].strip("<>").split("#", 1)[0]
19+
20+
21+
def _iter_markdown(root: Path, inputs: list[str]) -> list[Path]:
22+
out: list[Path] = []
23+
for raw in inputs:
24+
p = (root / raw).resolve() if not Path(raw).is_absolute() else Path(raw)
25+
if p.is_file() and p.suffix.lower() in MD_EXTS:
26+
out.append(p)
27+
return sorted(set(out))
28+
29+
30+
def _mask_fenced_code_blocks(text: str) -> str:
31+
lines = text.splitlines(keepends=True)
32+
masked: list[str] = []
33+
in_fence = False
34+
for line in lines:
35+
if FENCE_RE.match(line.strip()):
36+
in_fence = not in_fence
37+
masked.append("\n")
38+
continue
39+
masked.append("\n" if in_fence else line)
40+
return "".join(masked)
41+
42+
43+
44+
45+
def _candidate_targets(base: Path) -> list[Path]:
46+
s = str(base)
47+
cands = [base]
48+
if not base.suffix:
49+
cands.append(Path(s + ".md"))
50+
cands.append(Path(s + ".markdown"))
51+
cands.append(base / "index.md")
52+
cands.append(base / "index.markdown")
53+
return cands
54+
55+
56+
def _resolve_target(raw: str, md: Path, root: Path) -> Path | None:
57+
cleaned = unquote(raw)
58+
base = (root / cleaned.lstrip("/")) if cleaned.startswith("/") else (md.parent / cleaned)
59+
for cand in _candidate_targets(base):
60+
if cand.exists():
61+
return cand
62+
return None
63+
64+
def main() -> int:
65+
parser = argparse.ArgumentParser(description="Validate image references in changed markdown")
66+
parser.add_argument("paths", nargs="*", help="Changed file paths")
67+
parser.add_argument("--root", default=".", help="Repository root")
68+
args = parser.parse_args()
69+
70+
root = Path(args.root).resolve()
71+
md_files = _iter_markdown(root, args.paths)
72+
if not md_files:
73+
print("Image CI validation skipped (no markdown files changed).")
74+
return 0
75+
76+
issues: list[str] = []
77+
for md in md_files:
78+
text = _mask_fenced_code_blocks(md.read_text(encoding="utf-8"))
79+
for m in IMG_RE.finditer(text):
80+
raw = _clean_target(m.group(1))
81+
if not raw or raw.startswith(("http://", "https://", "data:")):
82+
continue
83+
target = _resolve_target(raw, md, root)
84+
if target is None:
85+
line = text.count("\n", 0, m.start()) + 1
86+
issues.append(f"{md}:{line} missing image: {raw}")
87+
88+
if issues:
89+
print("Image CI validation failed:")
90+
for issue in issues:
91+
print(f"- {issue}")
92+
return 1
93+
94+
print(f"Image CI validation passed ({len(md_files)} markdown file(s)).")
95+
return 0
96+
97+
98+
if __name__ == "__main__":
99+
sys.exit(main())
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
#!/usr/bin/env python3
2+
"""CI markdown validation for changed markdown files."""
3+
4+
from __future__ import annotations
5+
6+
import argparse
7+
import re
8+
import subprocess
9+
import sys
10+
from pathlib import Path
11+
from urllib.parse import unquote
12+
13+
MD_EXTS = {".md", ".markdown"}
14+
LINK_RE = re.compile(r"\[[^\]]+\]\(([^)]+)\)")
15+
IMAGE_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)")
16+
FENCE_RE = re.compile(r"^```")
17+
INLINE_MATH_RE = re.compile(r"\$(?:\\.|[^\n$])+\$")
18+
BLOCK_MATH_RE = re.compile(r"\$\$(?:.|\n)*?\$\$", re.MULTILINE)
19+
VALID_MD_BASENAME_RE = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*\.md$")
20+
21+
22+
def _clean_target(target: str) -> str:
23+
return target.strip().split()[0].strip("<>").split("#", 1)[0]
24+
25+
26+
def _is_external(target: str) -> bool:
27+
return target.startswith(("http://", "https://", "mailto:", "tel:"))
28+
29+
30+
def _iter_markdown_paths(root: Path, inputs: list[str]) -> list[Path]:
31+
out: list[Path] = []
32+
for raw in inputs:
33+
p = (root / raw).resolve() if not Path(raw).is_absolute() else Path(raw)
34+
if p.is_file() and p.suffix.lower() in MD_EXTS:
35+
out.append(p)
36+
return sorted(set(out))
37+
38+
39+
def _validate_front_matter(lines: list[str], path: Path) -> list[str]:
40+
issues = []
41+
if lines and lines[0].strip() == "---":
42+
try:
43+
end_idx = next(i for i, line in enumerate(lines[1:], start=1) if line.strip() == "---")
44+
if end_idx == 1:
45+
issues.append(f"{path}:1 empty front matter block")
46+
except StopIteration:
47+
issues.append(f"{path}:1 missing closing front matter delimiter '---'")
48+
return issues
49+
50+
51+
def _validate_filename(path: Path, root: Path) -> list[str]:
52+
rel = path.relative_to(root)
53+
name = path.name
54+
if name in {"index.md", "__all_subsections.md"}:
55+
return []
56+
if name.lower() != name:
57+
return [f"{path}:1 invalid markdown filename casing: {rel}"]
58+
if not VALID_MD_BASENAME_RE.match(name):
59+
return [f"{path}:1 invalid markdown filename format (use kebab-case): {rel}"]
60+
return []
61+
62+
63+
def _candidate_targets(base: Path) -> list[Path]:
64+
s = str(base)
65+
cands = [base]
66+
if not base.suffix:
67+
cands.append(Path(s + ".md"))
68+
cands.append(Path(s + ".markdown"))
69+
cands.append(base / "index.md")
70+
cands.append(base / "index.markdown")
71+
return cands
72+
73+
74+
def _resolve_target(raw: str, path: Path, root: Path) -> Path | None:
75+
cleaned = unquote(raw)
76+
base = (root / cleaned.lstrip("/")) if cleaned.startswith("/") else (path.parent / cleaned)
77+
for cand in _candidate_targets(base):
78+
if cand.exists():
79+
return cand
80+
return None
81+
82+
83+
def _validate_links(text: str, path: Path, root: Path) -> list[str]:
84+
issues = []
85+
for regex, kind in ((LINK_RE, "link"), (IMAGE_RE, "image")):
86+
for m in regex.finditer(text):
87+
raw = _clean_target(m.group(1))
88+
if not raw or raw.startswith("#") or _is_external(raw):
89+
continue
90+
if _resolve_target(raw, path, root) is None:
91+
line = text.count("\n", 0, m.start()) + 1
92+
issues.append(f"{path}:{line} broken {kind} target: {raw}")
93+
return issues
94+
95+
96+
def _mask_fenced_code_blocks(text: str) -> str:
97+
lines = text.splitlines(keepends=True)
98+
masked: list[str] = []
99+
in_fence = False
100+
for line in lines:
101+
if FENCE_RE.match(line.strip()):
102+
in_fence = not in_fence
103+
masked.append("\n")
104+
continue
105+
masked.append("\n" if in_fence else line)
106+
return "".join(masked)
107+
108+
109+
def _mask_math_spans(text: str) -> str:
110+
def _blank(match: re.Match[str]) -> str:
111+
return "".join("\n" if ch == "\n" else " " for ch in match.group(0))
112+
113+
out = BLOCK_MATH_RE.sub(_blank, text)
114+
out = INLINE_MATH_RE.sub(_blank, out)
115+
return out
116+
117+
118+
def main() -> int:
119+
parser = argparse.ArgumentParser(description="Validate changed markdown files")
120+
parser.add_argument("paths", nargs="*", help="Changed file paths")
121+
args = parser.parse_args()
122+
123+
root = Path('.').resolve()
124+
md_files = _iter_markdown_paths(root, args.paths)
125+
if not md_files:
126+
print("Markdown CI validation skipped (no markdown files changed).")
127+
return 0
128+
129+
issues: list[str] = []
130+
failed_files: set[Path] = set()
131+
for path in md_files:
132+
try:
133+
text = path.read_text(encoding="utf-8")
134+
except Exception as exc:
135+
issues.append(f"{path}:1 unreadable markdown: {exc}")
136+
failed_files.add(path)
137+
continue
138+
path_issues = []
139+
path_issues.extend(_validate_filename(path, root))
140+
path_issues.extend(_validate_front_matter(text.splitlines(), path))
141+
scan_text = _mask_math_spans(_mask_fenced_code_blocks(text))
142+
path_issues.extend(_validate_links(scan_text, path, root))
143+
if path_issues:
144+
issues.extend(path_issues)
145+
failed_files.add(path)
146+
147+
passed = len(md_files) - len(failed_files)
148+
failed = len(failed_files)
149+
if issues:
150+
print("Markdown validation failed:")
151+
for issue in issues:
152+
print(f"- {issue}")
153+
print(f"\nSummary: {passed} file(s) passed, {failed} file(s) failed.")
154+
return 1
155+
156+
print(f"Markdown validation passed ({len(md_files)} file(s)).")
157+
print(f"Summary: {passed} file(s) passed, {failed} file(s) failed.")
158+
return 0
159+
160+
161+
if __name__ == "__main__":
162+
sys.exit(main())

0 commit comments

Comments
 (0)