diff --git a/.github/scripts/validate_changed_files_ci.py b/.github/scripts/validate_changed_files_ci.py new file mode 100755 index 00000000..25f4e256 --- /dev/null +++ b/.github/scripts/validate_changed_files_ci.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +"""CI orchestration for incoming-change validation.""" + +from __future__ import annotations + +import argparse +import subprocess +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[2] +CI_SCRIPTS = ROOT / ".github" / "scripts" +ALLOWED_PREFIXES = ("wiki/", "docs/", "assets/") + + +def _git_changed(base: str, head: str) -> list[str]: + proc = subprocess.run(["git", "diff", "--name-only", f"{base}...{head}"], capture_output=True, text=True) + if proc.returncode != 0: + raise RuntimeError((proc.stderr or proc.stdout).strip()) + return [line.strip() for line in proc.stdout.splitlines() if line.strip()] + + +def _run(cmd: list[str]) -> int: + return subprocess.run(cmd, cwd=ROOT).returncode + + +def _in_scope(path: str) -> bool: + return path.startswith(ALLOWED_PREFIXES) + + +def _all_files_in_scope() -> list[str]: + files: list[str] = [] + for prefix in ALLOWED_PREFIXES: + base = ROOT / prefix.rstrip("/") + if not base.exists(): + continue + for path in base.rglob("*"): + if path.is_file(): + files.append(str(path.relative_to(ROOT))) + return sorted(files) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Validate changed files in CI") + parser.add_argument("--base", default="origin/master") + parser.add_argument("--head", default="HEAD") + parser.add_argument("--all", action="store_true") + parser.add_argument("paths", nargs="*") + args = parser.parse_args() + + if args.all: + changed = _all_files_in_scope() + elif args.paths: + changed = [p for p in args.paths if _in_scope(p)] + else: + changed = [p for p in _git_changed(args.base, args.head) if _in_scope(p)] + + md_files = [p for p in changed if p.lower().endswith((".md", ".markdown"))] + yaml_files = [p for p in changed if p.lower().endswith((".yml", ".yaml"))] + + status = 0 + + if md_files: + status |= _run([sys.executable, str(CI_SCRIPTS / "validate_markdown_ci.py"), *md_files]) + status |= _run([sys.executable, str(CI_SCRIPTS / "validate_images_ci.py"), *md_files]) + else: + print("Skipping markdown/image validation (no markdown files changed).") + + if yaml_files: + status |= _run([sys.executable, str(CI_SCRIPTS / "validate_yaml_ci.py"), *yaml_files]) + else: + print("Skipping YAML validation (no yaml files changed).") + + if status != 0: + print("\nIncoming change validation failed.") + return 1 + + print("\nIncoming change validation passed.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/scripts/validate_images_ci.py b/.github/scripts/validate_images_ci.py new file mode 100755 index 00000000..f07aa593 --- /dev/null +++ b/.github/scripts/validate_images_ci.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +"""CI image validation for changed markdown files only.""" + +from __future__ import annotations + +import argparse +import re +import sys +from pathlib import Path +from urllib.parse import unquote + +IMG_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") +MD_EXTS = {".md", ".markdown"} +FENCE_RE = re.compile(r"^```") + + +def _clean_target(target: str) -> str: + return target.strip().split()[0].strip("<>").split("#", 1)[0] + + +def _iter_markdown(root: Path, inputs: list[str]) -> list[Path]: + out: list[Path] = [] + for raw in inputs: + p = (root / raw).resolve() if not Path(raw).is_absolute() else Path(raw) + if p.is_file() and p.suffix.lower() in MD_EXTS: + out.append(p) + return sorted(set(out)) + + +def _mask_fenced_code_blocks(text: str) -> str: + lines = text.splitlines(keepends=True) + masked: list[str] = [] + in_fence = False + for line in lines: + if FENCE_RE.match(line.strip()): + in_fence = not in_fence + masked.append("\n") + continue + masked.append("\n" if in_fence else line) + return "".join(masked) + + + + +def _candidate_targets(base: Path) -> list[Path]: + s = str(base) + cands = [base] + if not base.suffix: + cands.append(Path(s + ".md")) + cands.append(Path(s + ".markdown")) + cands.append(base / "index.md") + cands.append(base / "index.markdown") + return cands + + +def _resolve_target(raw: str, md: Path, root: Path) -> Path | None: + cleaned = unquote(raw) + base = (root / cleaned.lstrip("/")) if cleaned.startswith("/") else (md.parent / cleaned) + for cand in _candidate_targets(base): + if cand.exists(): + return cand + return None + +def main() -> int: + parser = argparse.ArgumentParser(description="Validate image references in changed markdown") + parser.add_argument("paths", nargs="*", help="Changed file paths") + parser.add_argument("--root", default=".", help="Repository root") + args = parser.parse_args() + + root = Path(args.root).resolve() + md_files = _iter_markdown(root, args.paths) + if not md_files: + print("Image CI validation skipped (no markdown files changed).") + return 0 + + issues: list[str] = [] + for md in md_files: + text = _mask_fenced_code_blocks(md.read_text(encoding="utf-8")) + for m in IMG_RE.finditer(text): + raw = _clean_target(m.group(1)) + if not raw or raw.startswith(("http://", "https://", "data:")): + continue + target = _resolve_target(raw, md, root) + if target is None: + line = text.count("\n", 0, m.start()) + 1 + issues.append(f"{md}:{line} missing image: {raw}") + + if issues: + print("Image CI validation failed:") + for issue in issues: + print(f"- {issue}") + return 1 + + print(f"Image CI validation passed ({len(md_files)} markdown file(s)).") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/scripts/validate_markdown_ci.py b/.github/scripts/validate_markdown_ci.py new file mode 100755 index 00000000..aa2c5fef --- /dev/null +++ b/.github/scripts/validate_markdown_ci.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +"""CI markdown validation for changed markdown files.""" + +from __future__ import annotations + +import argparse +import re +import subprocess +import sys +from pathlib import Path +from urllib.parse import unquote + +MD_EXTS = {".md", ".markdown"} +LINK_RE = re.compile(r"\[[^\]]+\]\(([^)]+)\)") +IMAGE_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)") +FENCE_RE = re.compile(r"^```") +INLINE_MATH_RE = re.compile(r"\$(?:\\.|[^\n$])+\$") +BLOCK_MATH_RE = re.compile(r"\$\$(?:.|\n)*?\$\$", re.MULTILINE) +VALID_MD_BASENAME_RE = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*\.md$") + + +def _clean_target(target: str) -> str: + return target.strip().split()[0].strip("<>").split("#", 1)[0] + + +def _is_external(target: str) -> bool: + return target.startswith(("http://", "https://", "mailto:", "tel:")) + + +def _iter_markdown_paths(root: Path, inputs: list[str]) -> list[Path]: + out: list[Path] = [] + for raw in inputs: + p = (root / raw).resolve() if not Path(raw).is_absolute() else Path(raw) + if p.is_file() and p.suffix.lower() in MD_EXTS: + out.append(p) + return sorted(set(out)) + + +def _validate_front_matter(lines: list[str], path: Path) -> list[str]: + issues = [] + if lines and lines[0].strip() == "---": + try: + end_idx = next(i for i, line in enumerate(lines[1:], start=1) if line.strip() == "---") + if end_idx == 1: + issues.append(f"{path}:1 empty front matter block") + except StopIteration: + issues.append(f"{path}:1 missing closing front matter delimiter '---'") + return issues + + +def _validate_filename(path: Path, root: Path) -> list[str]: + rel = path.relative_to(root) + name = path.name + if name in {"index.md", "__all_subsections.md"}: + return [] + if name.lower() != name: + return [f"{path}:1 invalid markdown filename casing: {rel}"] + if not VALID_MD_BASENAME_RE.match(name): + return [f"{path}:1 invalid markdown filename format (use kebab-case): {rel}"] + return [] + + +def _candidate_targets(base: Path) -> list[Path]: + s = str(base) + cands = [base] + if not base.suffix: + cands.append(Path(s + ".md")) + cands.append(Path(s + ".markdown")) + cands.append(base / "index.md") + cands.append(base / "index.markdown") + return cands + + +def _resolve_target(raw: str, path: Path, root: Path) -> Path | None: + cleaned = unquote(raw) + base = (root / cleaned.lstrip("/")) if cleaned.startswith("/") else (path.parent / cleaned) + for cand in _candidate_targets(base): + if cand.exists(): + return cand + return None + + +def _validate_links(text: str, path: Path, root: Path) -> list[str]: + issues = [] + for regex, kind in ((LINK_RE, "link"), (IMAGE_RE, "image")): + for m in regex.finditer(text): + raw = _clean_target(m.group(1)) + if not raw or raw.startswith("#") or _is_external(raw): + continue + if _resolve_target(raw, path, root) is None: + line = text.count("\n", 0, m.start()) + 1 + issues.append(f"{path}:{line} broken {kind} target: {raw}") + return issues + + +def _mask_fenced_code_blocks(text: str) -> str: + lines = text.splitlines(keepends=True) + masked: list[str] = [] + in_fence = False + for line in lines: + if FENCE_RE.match(line.strip()): + in_fence = not in_fence + masked.append("\n") + continue + masked.append("\n" if in_fence else line) + return "".join(masked) + + +def _mask_math_spans(text: str) -> str: + def _blank(match: re.Match[str]) -> str: + return "".join("\n" if ch == "\n" else " " for ch in match.group(0)) + + out = BLOCK_MATH_RE.sub(_blank, text) + out = INLINE_MATH_RE.sub(_blank, out) + return out + + +def main() -> int: + parser = argparse.ArgumentParser(description="Validate changed markdown files") + parser.add_argument("paths", nargs="*", help="Changed file paths") + args = parser.parse_args() + + root = Path('.').resolve() + md_files = _iter_markdown_paths(root, args.paths) + if not md_files: + print("Markdown CI validation skipped (no markdown files changed).") + return 0 + + issues: list[str] = [] + failed_files: set[Path] = set() + for path in md_files: + try: + text = path.read_text(encoding="utf-8") + except Exception as exc: + issues.append(f"{path}:1 unreadable markdown: {exc}") + failed_files.add(path) + continue + path_issues = [] + path_issues.extend(_validate_filename(path, root)) + path_issues.extend(_validate_front_matter(text.splitlines(), path)) + scan_text = _mask_math_spans(_mask_fenced_code_blocks(text)) + path_issues.extend(_validate_links(scan_text, path, root)) + if path_issues: + issues.extend(path_issues) + failed_files.add(path) + + passed = len(md_files) - len(failed_files) + failed = len(failed_files) + if issues: + print("Markdown validation failed:") + for issue in issues: + print(f"- {issue}") + print(f"\nSummary: {passed} file(s) passed, {failed} file(s) failed.") + return 1 + + print(f"Markdown validation passed ({len(md_files)} file(s)).") + print(f"Summary: {passed} file(s) passed, {failed} file(s) failed.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/scripts/validate_yaml_ci.py b/.github/scripts/validate_yaml_ci.py new file mode 100755 index 00000000..2fc4b4ac --- /dev/null +++ b/.github/scripts/validate_yaml_ci.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +"""CI YAML validation for changed yaml files.""" + +from __future__ import annotations + +import argparse +import subprocess +import sys +from pathlib import Path + +YAML_EXTS = {".yml", ".yaml"} + + +def _iter_yaml_paths(root: Path, inputs: list[str]) -> list[Path]: + out: list[Path] = [] + for raw in inputs: + p = (root / raw).resolve() if not Path(raw).is_absolute() else Path(raw) + if p.is_file() and p.suffix.lower() in YAML_EXTS: + out.append(p) + return sorted(set(out)) + + +def _validate_yaml(path: Path) -> str | None: + cmd = [ + "ruby", + "-e", + "require 'yaml'; YAML.safe_load(File.read(ARGV[0]), permitted_classes: [], aliases: true)", + str(path), + ] + proc = subprocess.run(cmd, capture_output=True, text=True) + if proc.returncode != 0: + err = (proc.stderr or proc.stdout).strip().splitlines()[-1] + return f"{path}: {err}" + return None + + +def main() -> int: + parser = argparse.ArgumentParser(description="Validate changed yaml files") + parser.add_argument("paths", nargs="*", help="Changed file paths") + args = parser.parse_args() + + root = Path('.').resolve() + yaml_files = _iter_yaml_paths(root, args.paths) + if not yaml_files: + print("YAML CI validation skipped (no yaml files changed).") + return 0 + + issues: list[str] = [] + for path in yaml_files: + err = _validate_yaml(path) + if err: + issues.append(err) + + if issues: + print("YAML validation failed:") + for issue in issues: + print(f"- {issue}") + return 1 + + print(f"YAML validation passed ({len(yaml_files)} file(s)).") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/pr-change-validation.yml b/.github/workflows/pr-change-validation.yml new file mode 100644 index 00000000..7d6e75f1 --- /dev/null +++ b/.github/workflows/pr-change-validation.yml @@ -0,0 +1,51 @@ +name: PR Change Validation + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + workflow_dispatch: + inputs: + validate_all: + description: "Run validators against full repository" + required: false + default: false + type: boolean + +permissions: + contents: read + +jobs: + validate-incoming-changes: + runs-on: ubuntu-22.04 + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Setup Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.2" + + - name: Run change validators (PR scope) + if: github.event_name == 'pull_request' + run: | + python .github/scripts/validate_changed_files_ci.py \ + --base "origin/${{ github.base_ref }}" \ + --head "${{ github.sha }}" + + - name: Run change validators (manual) + if: github.event_name == 'workflow_dispatch' + run: | + if [ "${{ inputs.validate_all }}" = "true" ]; then + python .github/scripts/validate_changed_files_ci.py --all + else + python .github/scripts/validate_changed_files_ci.py --base origin/master --head HEAD + fi