Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions .github/scripts/validate_changed_files_ci.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python3
"""CI orchestration for incoming-change validation."""

from __future__ import annotations

import argparse
import subprocess
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parents[2]
CI_SCRIPTS = ROOT / ".github" / "scripts"
ALLOWED_PREFIXES = ("wiki/", "docs/", "assets/")


def _git_changed(base: str, head: str) -> list[str]:
proc = subprocess.run(["git", "diff", "--name-only", f"{base}...{head}"], capture_output=True, text=True)
if proc.returncode != 0:
raise RuntimeError((proc.stderr or proc.stdout).strip())
return [line.strip() for line in proc.stdout.splitlines() if line.strip()]


def _run(cmd: list[str]) -> int:
return subprocess.run(cmd, cwd=ROOT).returncode


def _in_scope(path: str) -> bool:
return path.startswith(ALLOWED_PREFIXES)


def _all_files_in_scope() -> list[str]:
files: list[str] = []
for prefix in ALLOWED_PREFIXES:
base = ROOT / prefix.rstrip("/")
if not base.exists():
continue
for path in base.rglob("*"):
if path.is_file():
files.append(str(path.relative_to(ROOT)))
return sorted(files)


def main() -> int:
parser = argparse.ArgumentParser(description="Validate changed files in CI")
parser.add_argument("--base", default="origin/master")
parser.add_argument("--head", default="HEAD")
parser.add_argument("--all", action="store_true")
parser.add_argument("paths", nargs="*")
args = parser.parse_args()

if args.all:
changed = _all_files_in_scope()
elif args.paths:
changed = [p for p in args.paths if _in_scope(p)]
else:
changed = [p for p in _git_changed(args.base, args.head) if _in_scope(p)]

md_files = [p for p in changed if p.lower().endswith((".md", ".markdown"))]
yaml_files = [p for p in changed if p.lower().endswith((".yml", ".yaml"))]

status = 0

if md_files:
status |= _run([sys.executable, str(CI_SCRIPTS / "validate_markdown_ci.py"), *md_files])
status |= _run([sys.executable, str(CI_SCRIPTS / "validate_images_ci.py"), *md_files])
else:
print("Skipping markdown/image validation (no markdown files changed).")

if yaml_files:
status |= _run([sys.executable, str(CI_SCRIPTS / "validate_yaml_ci.py"), *yaml_files])
else:
print("Skipping YAML validation (no yaml files changed).")

if status != 0:
print("\nIncoming change validation failed.")
return 1

print("\nIncoming change validation passed.")
return 0


if __name__ == "__main__":
sys.exit(main())
99 changes: 99 additions & 0 deletions .github/scripts/validate_images_ci.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3
"""CI image validation for changed markdown files only."""

from __future__ import annotations

import argparse
import re
import sys
from pathlib import Path
from urllib.parse import unquote

IMG_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)")
MD_EXTS = {".md", ".markdown"}
FENCE_RE = re.compile(r"^```")


def _clean_target(target: str) -> str:
return target.strip().split()[0].strip("<>").split("#", 1)[0]


def _iter_markdown(root: Path, inputs: list[str]) -> list[Path]:
out: list[Path] = []
for raw in inputs:
p = (root / raw).resolve() if not Path(raw).is_absolute() else Path(raw)
if p.is_file() and p.suffix.lower() in MD_EXTS:
out.append(p)
return sorted(set(out))


def _mask_fenced_code_blocks(text: str) -> str:
lines = text.splitlines(keepends=True)
masked: list[str] = []
in_fence = False
for line in lines:
if FENCE_RE.match(line.strip()):
in_fence = not in_fence
masked.append("\n")
continue
masked.append("\n" if in_fence else line)
return "".join(masked)




def _candidate_targets(base: Path) -> list[Path]:
s = str(base)
cands = [base]
if not base.suffix:
cands.append(Path(s + ".md"))
cands.append(Path(s + ".markdown"))
cands.append(base / "index.md")
cands.append(base / "index.markdown")
return cands


def _resolve_target(raw: str, md: Path, root: Path) -> Path | None:
cleaned = unquote(raw)
base = (root / cleaned.lstrip("/")) if cleaned.startswith("/") else (md.parent / cleaned)
for cand in _candidate_targets(base):
if cand.exists():
return cand
return None

def main() -> int:
parser = argparse.ArgumentParser(description="Validate image references in changed markdown")
parser.add_argument("paths", nargs="*", help="Changed file paths")
parser.add_argument("--root", default=".", help="Repository root")
args = parser.parse_args()

root = Path(args.root).resolve()
md_files = _iter_markdown(root, args.paths)
if not md_files:
print("Image CI validation skipped (no markdown files changed).")
return 0

issues: list[str] = []
for md in md_files:
text = _mask_fenced_code_blocks(md.read_text(encoding="utf-8"))
for m in IMG_RE.finditer(text):
raw = _clean_target(m.group(1))
if not raw or raw.startswith(("http://", "https://", "data:")):
continue
target = _resolve_target(raw, md, root)
if target is None:
line = text.count("\n", 0, m.start()) + 1
issues.append(f"{md}:{line} missing image: {raw}")

if issues:
print("Image CI validation failed:")
for issue in issues:
print(f"- {issue}")
return 1

print(f"Image CI validation passed ({len(md_files)} markdown file(s)).")
return 0


if __name__ == "__main__":
sys.exit(main())
162 changes: 162 additions & 0 deletions .github/scripts/validate_markdown_ci.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/usr/bin/env python3
"""CI markdown validation for changed markdown files."""

from __future__ import annotations

import argparse
import re
import subprocess
import sys
from pathlib import Path
from urllib.parse import unquote

MD_EXTS = {".md", ".markdown"}
LINK_RE = re.compile(r"\[[^\]]+\]\(([^)]+)\)")
IMAGE_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)")
FENCE_RE = re.compile(r"^```")
INLINE_MATH_RE = re.compile(r"\$(?:\\.|[^\n$])+\$")
BLOCK_MATH_RE = re.compile(r"\$\$(?:.|\n)*?\$\$", re.MULTILINE)
VALID_MD_BASENAME_RE = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*\.md$")


def _clean_target(target: str) -> str:
return target.strip().split()[0].strip("<>").split("#", 1)[0]


def _is_external(target: str) -> bool:
return target.startswith(("http://", "https://", "mailto:", "tel:"))


def _iter_markdown_paths(root: Path, inputs: list[str]) -> list[Path]:
out: list[Path] = []
for raw in inputs:
p = (root / raw).resolve() if not Path(raw).is_absolute() else Path(raw)
if p.is_file() and p.suffix.lower() in MD_EXTS:
out.append(p)
return sorted(set(out))


def _validate_front_matter(lines: list[str], path: Path) -> list[str]:
issues = []
if lines and lines[0].strip() == "---":
try:
end_idx = next(i for i, line in enumerate(lines[1:], start=1) if line.strip() == "---")
if end_idx == 1:
issues.append(f"{path}:1 empty front matter block")
except StopIteration:
issues.append(f"{path}:1 missing closing front matter delimiter '---'")
return issues


def _validate_filename(path: Path, root: Path) -> list[str]:
rel = path.relative_to(root)
name = path.name
if name in {"index.md", "__all_subsections.md"}:
return []
if name.lower() != name:
return [f"{path}:1 invalid markdown filename casing: {rel}"]
if not VALID_MD_BASENAME_RE.match(name):
return [f"{path}:1 invalid markdown filename format (use kebab-case): {rel}"]
return []


def _candidate_targets(base: Path) -> list[Path]:
s = str(base)
cands = [base]
if not base.suffix:
cands.append(Path(s + ".md"))
cands.append(Path(s + ".markdown"))
cands.append(base / "index.md")
cands.append(base / "index.markdown")
return cands


def _resolve_target(raw: str, path: Path, root: Path) -> Path | None:
cleaned = unquote(raw)
base = (root / cleaned.lstrip("/")) if cleaned.startswith("/") else (path.parent / cleaned)
for cand in _candidate_targets(base):
if cand.exists():
return cand
return None


def _validate_links(text: str, path: Path, root: Path) -> list[str]:
issues = []
for regex, kind in ((LINK_RE, "link"), (IMAGE_RE, "image")):
for m in regex.finditer(text):
raw = _clean_target(m.group(1))
if not raw or raw.startswith("#") or _is_external(raw):
continue
if _resolve_target(raw, path, root) is None:
line = text.count("\n", 0, m.start()) + 1
issues.append(f"{path}:{line} broken {kind} target: {raw}")
return issues


def _mask_fenced_code_blocks(text: str) -> str:
lines = text.splitlines(keepends=True)
masked: list[str] = []
in_fence = False
for line in lines:
if FENCE_RE.match(line.strip()):
in_fence = not in_fence
masked.append("\n")
continue
masked.append("\n" if in_fence else line)
return "".join(masked)


def _mask_math_spans(text: str) -> str:
def _blank(match: re.Match[str]) -> str:
return "".join("\n" if ch == "\n" else " " for ch in match.group(0))

out = BLOCK_MATH_RE.sub(_blank, text)
out = INLINE_MATH_RE.sub(_blank, out)
return out


def main() -> int:
parser = argparse.ArgumentParser(description="Validate changed markdown files")
parser.add_argument("paths", nargs="*", help="Changed file paths")
args = parser.parse_args()

root = Path('.').resolve()
md_files = _iter_markdown_paths(root, args.paths)
if not md_files:
print("Markdown CI validation skipped (no markdown files changed).")
return 0

issues: list[str] = []
failed_files: set[Path] = set()
for path in md_files:
try:
text = path.read_text(encoding="utf-8")
except Exception as exc:
issues.append(f"{path}:1 unreadable markdown: {exc}")
failed_files.add(path)
continue
path_issues = []
path_issues.extend(_validate_filename(path, root))
path_issues.extend(_validate_front_matter(text.splitlines(), path))
scan_text = _mask_math_spans(_mask_fenced_code_blocks(text))
path_issues.extend(_validate_links(scan_text, path, root))
if path_issues:
issues.extend(path_issues)
failed_files.add(path)

passed = len(md_files) - len(failed_files)
failed = len(failed_files)
if issues:
print("Markdown validation failed:")
for issue in issues:
print(f"- {issue}")
print(f"\nSummary: {passed} file(s) passed, {failed} file(s) failed.")
return 1

print(f"Markdown validation passed ({len(md_files)} file(s)).")
print(f"Summary: {passed} file(s) passed, {failed} file(s) failed.")
return 0


if __name__ == "__main__":
sys.exit(main())
Loading
Loading