diff --git a/lychee.toml b/lychee.toml index f0e5da1..f601600 100644 --- a/lychee.toml +++ b/lychee.toml @@ -26,6 +26,7 @@ exclude = [ "^https://github\\.com/gardenlinux/python-gardenlinux-lib/blob/main/_static/", "^https://github\\.com/gardenlinux/docs-ng/blob/main/src/README\\.md$", "^https://github\\.com/gardenlinux/docs-ng/blob/main/tests/README\\.md$", + "^http://packages\\.gardenlinux\\.io/gardenlinux$", ] # Accept these status codes as valid diff --git a/repos-config.json b/repos-config.json index e725c8d..6e8803e 100644 --- a/repos-config.json +++ b/repos-config.json @@ -6,7 +6,7 @@ "docs_path": "docs", "target_path": "projects/gardenlinux", "ref": "docs-ng", - "commit": "063253164fffb481c5d1400f92c5455218f27897", + "commit": "3d3392ef4f115a2c37d0963200b6086c4e972f3d", "root_files": [ "CONTRIBUTING.md", "SECURITY.md", @@ -21,11 +21,7 @@ "reference": "reference", "contributing": "contributing" }, - "media_directories": [ - ".media", - "assets", - "_static" - ] + "media_directories": [".media", "assets"] }, { "name": "builder", @@ -34,11 +30,7 @@ "target_path": "projects/builder", "ref": "docs-ng", "commit": "9a9a7e0ed3b0507e360c37dc2acd5d4d9ce36020", - "media_directories": [ - ".media", - "assets", - "_static" - ] + "media_directories": [".media", "assets"] }, { "name": "python-gardenlinux-lib", @@ -46,13 +38,13 @@ "docs_path": "docs", "target_path": "projects/python-gardenlinux-lib", "ref": "docs-ng", - "commit": "73a16adcc674e43ad9b811c399fa5af1b698d62f", + "commit": "cf301b55dd89f5989ef626f3605b59c69b48f3cb", "structure": "sphinx", - "media_directories": [ - ".media", - "assets", - "_static" - ] + "target_map": { + "cli.md": "reference/python-gardenlinux-lib-cli.md", + "api.md": "reference/python-gardenlinux-lib-api.md" + }, + "media_directories": [".media", "assets"] }, { "name": "package-linux", @@ -61,11 +53,7 @@ "target_path": "projects/package-linux", "ref": "docs-ng", "commit": "9570d0f81eae4e1e500dd3f36ea845a2dce14357", - "media_directories": [ - ".media", - "assets", - "_static" - ] + "media_directories": [".media", "assets"] }, { "name": "package-build", @@ -74,11 +62,7 @@ "target_path": "projects/package-build", "ref": "docs-ng", "commit": "b4bce8b1dbb48cfedefdd866f3bc28d1c395c060", - "media_directories": [ - ".media", - "assets", - "_static" - ] + "media_directories": [".media", "assets"] }, { "name": "repo", @@ -87,11 +71,7 @@ "target_path": "projects/repo", "ref": "docs-ng", "commit": "4953feba27c5efd31421a883dec7c5bd61ac40f5", - "media_directories": [ - ".media", - "assets", - "_static" - ] + "media_directories": [".media", "assets"] }, { "name": "glrd", @@ -100,11 +80,7 @@ "target_path": "projects/glrd", "ref": "docs-ng", "commit": "b5262e0bf409065ee1b9d62f4d0270695d9081a2", - "media_directories": [ - ".media", - "assets", - "_static" - ] + "media_directories": [".media", "assets"] } ] } diff --git a/repos-config.local.json b/repos-config.local.json index 0f4c3f5..2bbaff6 100644 --- a/repos-config.local.json +++ b/repos-config.local.json @@ -20,7 +20,7 @@ "contributing": "contributing" }, "special_files": {}, - "media_directories": [".media", "assets", "_static"] + "media_directories": [".media", "assets"] }, { "name": "builder", @@ -28,7 +28,7 @@ "docs_path": "docs", "target_path": "projects/builder", "structure": "flat", - "media_directories": [".media", "assets", "_static"] + "media_directories": [".media", "assets"] }, { "name": "python-gardenlinux-lib", @@ -36,7 +36,11 @@ "docs_path": "docs", "target_path": "projects/python-gardenlinux-lib", "structure": "sphinx", - "media_directories": [".media", "assets", "_static"] + "target_map": { + "cli.md": "reference/python-gardenlinux-lib-cli.md", + "api.md": "reference/python-gardenlinux-lib-api.md" + }, + "media_directories": [".media", "assets"] }, { "name": "package-linux", @@ -44,7 +48,7 @@ "docs_path": "docs", "target_path": "projects/package-linux", "structure": "flat", - "media_directories": [".media", "assets", "_static"] + "media_directories": [".media", "assets"] }, { "name": "package-build", @@ -52,7 +56,7 @@ "docs_path": "docs", "target_path": "projects/package-build", "structure": "flat", - "media_directories": [".media", "assets", "_static"] + "media_directories": [".media", "assets"] }, { "name": "repo", @@ -60,7 +64,7 @@ "docs_path": "docs", "target_path": "projects/repo", "structure": "flat", - "media_directories": [".media", "assets", "_static"] + "media_directories": [".media", "assets"] }, { "name": "glrd", @@ -68,7 +72,7 @@ "docs_path": "docs", "target_path": "projects/glrd", "structure": "flat", - "media_directories": [".media", "assets", "_static"] + "media_directories": [".media", "assets"] } ] } diff --git a/requirements.txt b/requirements.txt index 2db0426..68a74e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,8 @@ pytest pyyaml # glrd @ git+https://github.com/gardenlinux/glrd.git@v4.2.0 # gardenlinux @ git+https://github.com/gardenlinux/python-gardenlinux-lib.git@0.10.20 +sphinx +sphinx-markdown-builder +sphinx-rtd-theme +sphinx-click +sphinxcontrib-autoprogram diff --git a/src/aggregation/__init__.py b/src/aggregation/__init__.py index 5c4e302..964df22 100644 --- a/src/aggregation/__init__.py +++ b/src/aggregation/__init__.py @@ -18,6 +18,7 @@ ) from .releases import generate_release_docs from .flavor_matrix import generate_flavor_matrix_docs +from .sphinx_builder import build_sphinx_markdown __all__ = [ # Models @@ -41,4 +42,6 @@ "generate_release_docs", # Flavor Matrix "generate_flavor_matrix_docs", + # Sphinx builder + "build_sphinx_markdown", ] \ No newline at end of file diff --git a/src/aggregation/fetcher.py b/src/aggregation/fetcher.py index b34dd4f..c911d27 100644 --- a/src/aggregation/fetcher.py +++ b/src/aggregation/fetcher.py @@ -8,6 +8,7 @@ from typing import Tuple, Optional from .models import RepoConfig, AggregateResult +from .sphinx_builder import build_sphinx_markdown def _convert_to_git_pattern(pattern: str) -> str: @@ -119,11 +120,21 @@ def _fetch_remote( resolved_commit = result.stdout.strip() print(f" Resolved commit: {resolved_commit}") - # Copy docs to output directory + # Copy docs to output directory (or build Sphinx Markdown first) docs_source = temp_dir / repo.docs_path if docs_source.exists(): - print(f" Copying docs to {output_dir}") - self._copy_docs(docs_source, output_dir) + if repo.structure == "sphinx": + print(f" Building Sphinx Markdown from {repo.docs_path}/") + sphinx_ok = build_sphinx_markdown( + temp_dir, repo.docs_path, output_dir, + target_map=repo.target_map or None, + ) + if not sphinx_ok: + print(f" Warning: Sphinx build failed; falling back to raw docs copy") + self._copy_docs(docs_source, output_dir) + else: + print(f" Copying docs to {output_dir}") + self._copy_docs(docs_source, output_dir) else: print(f" Warning: docs_path '{repo.docs_path}' not found in repository") @@ -168,11 +179,21 @@ def _fetch_local( print(f" Error: Local repository not found: {repo_abs_path}", file=sys.stderr) return False - # Copy docs directory + # Copy docs directory (or build Sphinx Markdown first) docs_source = repo_abs_path / repo.docs_path if docs_source.exists(): - print(f" Copying docs from {repo.docs_path}/") - self._copy_docs(docs_source, output_dir) + if repo.structure == "sphinx": + print(f" Building Sphinx Markdown from {repo.docs_path}/") + sphinx_ok = build_sphinx_markdown( + repo_abs_path, repo.docs_path, output_dir, + target_map=repo.target_map or None, + ) + if not sphinx_ok: + print(f" Warning: Sphinx build failed; falling back to raw docs copy") + self._copy_docs(docs_source, output_dir) + else: + print(f" Copying docs from {repo.docs_path}/") + self._copy_docs(docs_source, output_dir) else: print(f" Warning: docs_path '{repo.docs_path}' not found in local repository") diff --git a/src/aggregation/models.py b/src/aggregation/models.py index de4133e..912c3f8 100644 --- a/src/aggregation/models.py +++ b/src/aggregation/models.py @@ -18,6 +18,7 @@ class RepoConfig: structure: Union[str, Dict[str, str]] = "flat" special_files: Dict[str, str] = field(default_factory=dict) media_directories: List[str] = field(default_factory=list) + target_map: Dict[str, str] = field(default_factory=dict) @property def is_local(self) -> bool: @@ -56,6 +57,7 @@ def from_dict(cls, data: Dict) -> "RepoConfig": structure=data.get("structure", "flat"), special_files=data.get("special_files", {}), media_directories=data.get("media_directories", []), + target_map=data.get("target_map", {}), ) diff --git a/src/aggregation/sphinx_builder.py b/src/aggregation/sphinx_builder.py new file mode 100644 index 0000000..f82e969 --- /dev/null +++ b/src/aggregation/sphinx_builder.py @@ -0,0 +1,382 @@ +"""Sphinx Markdown builder support for documentation aggregation. + +When a repository uses ``"structure": "sphinx"`` in repos-config.json, this +module runs ``sphinx-build -M markdown`` to produce plain Markdown output that +can be consumed by VitePress via the normal aggregation pipeline. + +Sphinx is invoked via the current Python interpreter (``python -m sphinx``), +so ``sphinx``, ``sphinx-markdown-builder``, and all Sphinx extensions required +by the documented project must be installed in the same Python environment that +runs the aggregator. +""" + +import os +import re +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import Dict, Optional + + +def build_sphinx_markdown( + repo_dir: Path, + docs_path: str, + output_dir: Path, + target_map: Optional[Dict[str, str]] = None, +) -> bool: + """ + Build Sphinx Markdown output from a fetched repository and copy it to + *output_dir*. + + Steps performed: + + 1. Run ``python -m sphinx -M markdown `` using + the current Python interpreter. + 2. Copy the resulting ``/markdown/`` contents to *output_dir*. + 3. Copy hand-written Markdown files from the raw docs source that carry a + ``github_target_path`` frontmatter field (e.g. how-to guides). + 4. Inject VitePress frontmatter (``title``, ``description``, and optionally + ``github_target_path`` from *target_map*) into each generated file. + 5. Strip sphinx-style artifacts that break VitePress compatibility. + + Args: + repo_dir: Root directory of the cloned/copied repository. + docs_path: Relative path to the Sphinx docs source within the repo. + output_dir: Destination directory for the built Markdown files. + target_map: Optional mapping of generated filename (e.g. ``"cli.md"``) + to its desired VitePress path (e.g. + ``"reference/python-gardenlinux-lib-cli.md"``). When provided, + the matching files receive a ``github_target_path`` frontmatter + field so the existing ``copy_targeted_docs`` mechanism places them + at the correct URL in the docs site. + + Returns: + ``True`` on success, ``False`` on any failure. + """ + docs_source = repo_dir / docs_path + conf_py = docs_source / "conf.py" + + if not conf_py.exists(): + print( + f" [sphinx] No conf.py found at {conf_py} — skipping Sphinx build", + file=sys.stderr, + ) + return False + + print(f" [sphinx] Building Markdown documentation from {docs_source}") + + with tempfile.TemporaryDirectory() as tmp: + build_dir = Path(tmp) / "build" + build_dir.mkdir() + + # Step 1: Run sphinx-build -M markdown via the current Python interpreter. + # Using sys.executable ensures we run sphinx from the same environment + # that is running the aggregator, avoiding stale PATH entries. + # + # Prepend the repo's src/ directory to PYTHONPATH so sphinx imports + # the documented project's source code from the fetched repo rather + # than from any system-installed release version. This is necessary + # when the repo's docs reference APIs (e.g. get_parser()) that were + # added after the last published release. + print(" [sphinx] Running sphinx-build -M markdown...") + env = os.environ.copy() + # Prevent Python 3.13+ argparse from emitting ANSI color codes in + # help/usage text that sphinxcontrib-autoprogram captures and includes + # verbatim in the generated Markdown. NO_COLOR is the standard + # convention (https://no-color.org/) respected unconditionally by + # Python 3.13's argparse regardless of TERM or FORCE_COLOR. + env["NO_COLOR"] = "1" + src_dir = repo_dir / "src" + if src_dir.is_dir(): + env["PYTHONPATH"] = ( + str(src_dir) + os.pathsep + env.get("PYTHONPATH", "") + ) + print(f" [sphinx] PYTHONPATH prepended with: {src_dir}") + + result = subprocess.run( + [sys.executable, "-m", "sphinx", "-M", "markdown", + str(docs_source), str(build_dir)], + capture_output=True, + text=True, + cwd=str(docs_source), + env=env, + ) + + if result.stdout: + for line in result.stdout.splitlines(): + print(f" [sphinx] {line}") + if result.stderr: + for line in result.stderr.splitlines(): + print(f" [sphinx] {line}") + + if result.returncode != 0: + print( + f" [sphinx] sphinx-build failed (exit {result.returncode})", + file=sys.stderr, + ) + return False + + # Step 2: Copy built Markdown files to output_dir + markdown_build = build_dir / "markdown" + if not markdown_build.exists(): + print( + f" [sphinx] Expected output directory not found: {markdown_build}", + file=sys.stderr, + ) + return False + + print(f" [sphinx] Copying Markdown output to {output_dir}") + output_dir.mkdir(parents=True, exist_ok=True) + for item in markdown_build.iterdir(): + # Skip Sphinx build artifact directories (_static/, _sources/, etc.) + # — these contain theme JS/CSS that VitePress does not need and that + # cause codespell false positives. + if item.is_dir() and item.name.startswith("_"): + continue + target = output_dir / item.name + if item.is_file(): + shutil.copy2(item, target) + elif item.is_dir(): + shutil.copytree(item, target, dirs_exist_ok=True) + + # Step 3: Copy hand-written Markdown files from the raw docs source + # that carry a ``github_target_path`` frontmatter field. These are + # manually authored docs-ng pages (how-to guides, overviews, etc.) that + # live alongside the RST source in the repo. The sphinx builder only + # produces output from RST sources, so these files would be silently + # dropped without this step. + _copy_manual_markdown(docs_source, output_dir, target_map or {}) + + # Step 4: Inject VitePress frontmatter into all generated Markdown files + _inject_frontmatter_in_dir(output_dir, target_map or {}) + + # Step 5: Strip sphinx-style HTML anchor tags and fix heading content + # that causes VitePress/Vue compatibility issues. + _strip_sphinx_anchors(output_dir) + + md_count = sum(1 for _ in output_dir.rglob("*.md")) + print(f" [sphinx] ✓ Sphinx Markdown build complete ({md_count} .md files)") + return True + + +def _copy_manual_markdown( + docs_source: Path, + output_dir: Path, + target_map: Dict[str, str], +) -> None: + """ + Copy hand-written Markdown files from *docs_source* into the root of + *output_dir* (flat, no subdirectory), but **only** for files that carry a + ``github_target_path:`` frontmatter field and whose target path is + **not** already covered by a sphinx-built file in *target_map*. + + These files are manually authored docs-ng pages (how-to guides, overview + pages, etc.) that live alongside the RST sources in the repository. The + sphinx builder ignores them because they are not RST, so without this step + they would be silently dropped when the sphinx builder takes over the full + ``output_dir``. + + Files are copied flat (preserving only the filename, not the source + subdirectory path) because placement is determined entirely by the + ``github_target_path`` frontmatter value, which ``copy_targeted_docs`` + reads in Step 1 of the aggregation pipeline. Preserving subdirectory + structure would cause the files to be included in the sphinx + ``target_path`` (e.g. ``projects/python-gardenlinux-lib/how-to/``) with + relative asset links that cannot be resolved. + + **Exclusion rules:** + * Files inside ``_build/`` subdirectories are skipped (stale pre-built + artifacts that may exist in the working tree). + * Files whose ``github_target_path`` value resolves to the same destination + as an entry in *target_map* are skipped — the sphinx-built version takes + precedence over any hand-written placeholder. + """ + _gtp_re = re.compile(r"^github_target_path\s*:\s*(.+)$", re.MULTILINE) + + # Normalise target_map values to bare paths (strip leading "docs/") + # so we can compare them against github_target_path values. + covered_targets = set() + for dest in target_map.values(): + if dest.startswith("docs/"): + dest = dest[5:] + covered_targets.add(dest.strip()) + + copied = 0 + for md_file in docs_source.rglob("*.md"): + # Skip anything inside a _build/ directory + if "_build" in md_file.parts: + continue + + try: + content = md_file.read_text(encoding="utf-8") + except OSError: + continue + + gtp_match = _gtp_re.search(content) + if not gtp_match: + continue + + # Normalise the target path from the frontmatter (strip quotes and + # optional leading "docs/" prefix) + gtp_value = gtp_match.group(1).strip().strip("\"'") + if gtp_value.startswith("docs/"): + gtp_value = gtp_value[5:] + + # Skip if the target is already covered by a sphinx-built file + if gtp_value in covered_targets: + print( + f" [sphinx] Skipping manual markdown (covered by target_map): " + f"{md_file.relative_to(docs_source)}" + ) + continue + + # Copy flat into output_dir root — placement is driven by github_target_path + dest = output_dir / md_file.name + shutil.copy2(md_file, dest) + print(f" [sphinx] Carried over manual markdown: {md_file.relative_to(docs_source)}") + copied += 1 + + if copied: + print(f" [sphinx] Carried over {copied} manual markdown file(s)") + + +def _extract_title(content: str) -> Optional[str]: + """Extract the first H1 heading from Markdown content.""" + for line in content.splitlines(): + line = line.strip() + if line.startswith("# "): + return line[2:].strip() + return None + + +def _extract_description(content: str) -> Optional[str]: + """ + Extract the first plain-text paragraph that immediately follows the H1 + heading in sphinx-markdown-builder output. + + The function skips blank lines, raw HTML lines, and new headings, returning + the first non-empty plain-text line found after the H1. + """ + lines = content.splitlines() + found_h1 = False + for line in lines: + stripped = line.strip() + if not found_h1: + if stripped.startswith("# "): + found_h1 = True + continue + if not stripped: + continue + if stripped.startswith("<") or stripped.startswith("#"): + continue + return stripped + return None + + +def _inject_frontmatter_in_dir( + directory: Path, + target_map: Optional[Dict[str, str]] = None, +) -> None: + """ + Inject minimal VitePress frontmatter into every Markdown file under + *directory* that does not already have frontmatter. + + Fields written: + * ``title`` — from the first ``# Heading`` in the file. + * ``description`` — from the first paragraph after the H1. + * ``github_target_path`` — only when the filename matches a *target_map* + key, so ``copy_targeted_docs`` places the file at the correct VitePress URL. + """ + target_map = target_map or {} + for md_file in directory.rglob("*.md"): + try: + content = md_file.read_text(encoding="utf-8") + except OSError: + continue + + # Skip files that already have frontmatter + if content.startswith("---\n"): + continue + + title = _extract_title(content) + if not title: + continue + + safe_title = title.replace('"', '\\"') + fm_lines = [f'title: "{safe_title}"'] + + description = _extract_description(content) + if description: + safe_desc = description.replace('"', '\\"') + fm_lines.append(f'description: "{safe_desc}"') + + github_target = target_map.get(md_file.name) + if github_target: + if github_target.startswith("docs/"): + github_target = github_target[5:] + fm_lines.append(f"github_target_path: {github_target}") + print(f" [sphinx] target_map: {md_file.name} → {github_target}") + + frontmatter = "---\n" + "\n".join(fm_lines) + "\n---\n\n" + md_file.write_text(frontmatter + content, encoding="utf-8") + + +def _strip_sphinx_anchors(directory: Path) -> None: + """ + Fix sphinx-markdown-builder output for VitePress compatibility. + + Three issues are addressed: + + 1. **Standalone anchor tags** — ```` lines emitted + before every heading. VitePress inlines these into the adjacent heading, + causing it to use the ``id`` attribute as the permalink anchor instead of + the slugified heading text. Multiple symbols sharing a dotted prefix + then produce duplicate MiniSearch section IDs, crashing the dev server. + + 2. **Intra-page links in headings** — ``[TypeName](#qualified.Name)`` links + in heading lines. VitePress's ``headingContentRegex`` extracts the + ``href`` fragment as the section anchor for MiniSearch, so multiple + method headings referencing the same type produce colliding IDs. + + 3. **Angle-bracket metavars in headings** — ```` placeholders from + sphinxcontrib-autoprogram (e.g. ``### --arch ``). Vue's template + compiler interprets them as unclosed HTML elements and raises + "Element is missing end tag". + + 4. **``_static/`` image references** — Sphinx copies theme assets into + ``_static/`` relative to the docs build output. When a generated file + is placed at a different URL via ``github_target_path`` the relative path + ``_static/...`` cannot be resolved by Vite/Rollup and breaks the build. + """ + _anchor_tag_re = re.compile(r"^\s*$", re.MULTILINE) + _heading_link_re = re.compile(r"\[([^\]]+)\]\(#[^)]+\)") + _heading_metavar_re = re.compile(r"<([a-z][a-z0-9_]*)>") + # Sphinx copies theme assets into _static/; the relative path breaks when + # the generated file is placed at a different URL by github_target_path. + _static_image_re = re.compile(r"!\[[^\]]*\]\(_static/[^)]+\)\n?") + + for md_file in directory.rglob("*.md"): + try: + content = md_file.read_text(encoding="utf-8") + except OSError: + continue + + # Pass 1: remove standalone anchor tags and _static/ image refs + new_content = _anchor_tag_re.sub("", content) + new_content = _static_image_re.sub("", new_content) + + # Pass 2 & 3: heading-line fixups + lines = new_content.splitlines(keepends=True) + fixed_lines = [] + for line in lines: + if re.match(r"^#{1,6}\s", line): + line = _heading_link_re.sub(r"\1", line) + line = _heading_metavar_re.sub(r"`<\1>`", line) + fixed_lines.append(line) + new_content = "".join(fixed_lines) + + if new_content != content: + new_content = re.sub(r"\n{3,}", "\n\n", new_content) + md_file.write_text(new_content, encoding="utf-8") diff --git a/src/aggregation/structure.py b/src/aggregation/structure.py index 500120a..750215d 100644 --- a/src/aggregation/structure.py +++ b/src/aggregation/structure.py @@ -31,6 +31,13 @@ def transform_directory_structure( """ source_path = Path(source_dir) target_path = Path(target_dir) + + # For sphinx structure: clean the target directory first so stale source + # files from previous runs do not accumulate alongside the built Markdown. + if structure_map == "sphinx" and target_path.exists(): + print(f" Cleaning target directory for sphinx build: {target_path}") + shutil.rmtree(target_path) + target_path.mkdir(parents=True, exist_ok=True) special_files = special_files or {} @@ -75,7 +82,26 @@ def transform_directory_structure( else: # Flat/sphinx structure - copy all files as-is (merged logic) print(f" Copying {structure_map} structure") + # Files generated by Sphinx that are not useful in VitePress + _sphinx_artifacts = {"genindex.md", "py-modindex.md", "search.md"} for item in source_path.glob("*"): + if structure_map == "sphinx" and item.name in _sphinx_artifacts: + print(f" Skipping Sphinx artifact: {item.name}") + continue + # For sphinx structure: skip Markdown files that already have a + # github_target_path frontmatter field — they were placed at their + # canonical VitePress location by copy_targeted_docs (Step 1) and + # must not be duplicated here, as duplicate HTML anchor IDs across + # two pages break VitePress's MiniSearch indexer. + if structure_map == "sphinx" and item.is_file() and item.suffix == ".md": + try: + content = item.read_text(encoding="utf-8") + frontmatter, _ = parse_frontmatter(content) + if frontmatter and "github_target_path" in frontmatter: + print(f" Skipping targeted file (already placed): {item.name}") + continue + except Exception: + pass target_item = target_path / item.name if item.is_file(): shutil.copy2(item, target_item)