Skip to content

Commit db4f874

Browse files
ci(papers): validate docs/papers.yml format
1 parent 9ad9802 commit db4f874

2 files changed

Lines changed: 165 additions & 0 deletions

File tree

.github/workflows/CI.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,24 @@ permissions:
1818
contents: write
1919

2020
jobs:
21+
validate_papers_yml:
22+
name: Validate docs/papers.yml
23+
runs-on: ubuntu-latest
24+
timeout-minutes: 5
25+
steps:
26+
- uses: actions/checkout@v6
27+
- name: "Set up Python"
28+
uses: actions/setup-python@v6
29+
with:
30+
python-version: '3.x'
31+
cache: pip
32+
- name: "Install validator deps"
33+
run: |
34+
python -m pip install --upgrade pip
35+
pip install pyyaml
36+
- name: "Validate papers.yml"
37+
run: python scripts/validate_papers_yml.py
38+
2139
test:
2240
runs-on: ${{ matrix.os }}
2341
timeout-minutes: 60

scripts/validate_papers_yml.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#!/usr/bin/env python3
2+
"""Validate docs/papers.yml.
3+
4+
Goal: a fast, deterministic PR check that catches the common formatting issues
5+
maintainers would otherwise check manually.
6+
7+
This is intentionally "simple yet robust": it validates structure and a few
8+
key invariants, without trying to enforce stylistic preferences.
9+
10+
Rules:
11+
- YAML must parse.
12+
- Top-level must be a mapping with key `papers` as a list.
13+
- Each paper must be a mapping with required keys:
14+
- title: non-empty string
15+
- authors: non-empty list of strings
16+
- link: non-empty string
17+
- date: string in YYYY-MM-DD format (basic ISO date)
18+
- image: string that is either:
19+
- absolute http(s) URL, OR
20+
- a basename filename that exists under docs/src/public/images/
21+
22+
We also reject a common footgun:
23+
- image URLs pointing at the *temporary* bot branch `paper-images/pr-...`.
24+
"""
25+
26+
from __future__ import annotations
27+
28+
import os
29+
import re
30+
import sys
31+
import datetime as _dt
32+
from pathlib import Path
33+
34+
import yaml
35+
36+
PAPERS_PATH = Path("docs/papers.yml")
37+
IMAGES_DIR = Path("docs/src/public/images")
38+
39+
DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
40+
IMAGE_EXT_RE = re.compile(r"\.(png|jpe?g|webp)$", re.IGNORECASE)
41+
TEMP_BOT_BRANCH_RE = re.compile(r"/paper-images/pr-\d+(/|$)")
42+
43+
44+
def fail(msg: str) -> "NoReturn":
45+
print(f"ERROR: {msg}")
46+
raise SystemExit(1)
47+
48+
49+
def is_nonempty_str(x: object) -> bool:
50+
return isinstance(x, str) and x.strip() != ""
51+
52+
53+
def main() -> None:
54+
if not PAPERS_PATH.exists():
55+
fail(f"Missing {PAPERS_PATH}")
56+
57+
raw = PAPERS_PATH.read_text(encoding="utf-8")
58+
59+
try:
60+
obj = yaml.safe_load(raw)
61+
except Exception as e:
62+
fail(f"YAML parse error in {PAPERS_PATH}: {e}")
63+
64+
if not isinstance(obj, dict):
65+
fail(f"{PAPERS_PATH} must be a mapping at top level")
66+
67+
papers = obj.get("papers")
68+
if not isinstance(papers, list):
69+
fail(f"{PAPERS_PATH} must contain `papers:` as a list")
70+
71+
errors: list[str] = []
72+
73+
for i, paper in enumerate(papers):
74+
prefix = f"papers[{i}]"
75+
76+
if not isinstance(paper, dict):
77+
errors.append(f"{prefix}: must be a mapping")
78+
continue
79+
80+
# Required fields
81+
title = paper.get("title")
82+
if not is_nonempty_str(title):
83+
errors.append(f"{prefix}.title: required non-empty string")
84+
85+
authors = paper.get("authors")
86+
if not isinstance(authors, list) or len(authors) == 0 or not all(
87+
is_nonempty_str(a) for a in authors
88+
):
89+
errors.append(f"{prefix}.authors: required non-empty list of strings")
90+
91+
link = paper.get("link")
92+
if not is_nonempty_str(link):
93+
errors.append(f"{prefix}.link: required non-empty string")
94+
95+
date = paper.get("date")
96+
# PyYAML parses unquoted ISO dates as datetime.date
97+
if isinstance(date, _dt.date):
98+
date_s = date.isoformat()
99+
elif isinstance(date, str):
100+
date_s = date.strip()
101+
else:
102+
date_s = ""
103+
104+
if not date_s or not DATE_RE.match(date_s):
105+
errors.append(f"{prefix}.date: required YYYY-MM-DD")
106+
107+
image = paper.get("image")
108+
if not is_nonempty_str(image):
109+
errors.append(f"{prefix}.image: required string")
110+
else:
111+
image_s = image.strip()
112+
113+
if image_s.startswith("http://") or image_s.startswith("https://"):
114+
# Reject temporary bot-branch URLs (these are not stable and shouldn't
115+
# be used in papers.yml).
116+
if TEMP_BOT_BRANCH_RE.search(image_s):
117+
errors.append(
118+
f"{prefix}.image: points to temporary bot branch URL; use a stable URL (e.g. PySR_Docs master)"
119+
)
120+
else:
121+
# Must be a basename only (no paths)
122+
if os.path.basename(image_s) != image_s or "/" in image_s or "\\" in image_s:
123+
errors.append(
124+
f"{prefix}.image: local image must be a basename (e.g. myfig.jpg), not a path"
125+
)
126+
else:
127+
if not IMAGE_EXT_RE.search(image_s):
128+
errors.append(
129+
f"{prefix}.image: local image must end in .png/.jpg/.jpeg/.webp"
130+
)
131+
img_path = IMAGES_DIR / image_s
132+
if not img_path.exists():
133+
errors.append(
134+
f"{prefix}.image: local image {image_s!r} not found at {img_path}"
135+
)
136+
137+
if errors:
138+
print("Papers.yml validation failed:\n")
139+
for e in errors:
140+
print(f"- {e}")
141+
raise SystemExit(1)
142+
143+
print("OK: docs/papers.yml looks valid")
144+
145+
146+
if __name__ == "__main__":
147+
main()

0 commit comments

Comments
 (0)