From 6ec1c259e210d958f3e431dbed6fc39b3a279168 Mon Sep 17 00:00:00 2001 From: Jason Wen Date: Sat, 18 Apr 2026 00:09:12 -0400 Subject: [PATCH 1/8] fix: make Discourse sync idempotent and PR preview accurate PR preview reported all 87 topics as modified regardless of how many docs the PR actually changed, because dry-run counted every cache-miss as "Would update" without consulting Discourse. Live sync also rewrote sidebar About topics unconditionally, even when nav was unchanged, and reissued update_topic whenever a post body changed even if title and category were unchanged. Changes: - sync_to_discourse.py: add --only PATH... filter; run remote get_post_raw match check in dry-run, live, and title-fallback paths so matching content is classified as Skipped (Discourse Match) instead of Updated. - _generate_sidebars now fetches remote raw before update_post and skips unchanged sidebars (Skipped_sidebar_match stat). - update_topic is gated behind _topic_metadata_differs, avoiding unnecessary title/category writes. - Skip sidebar generation entirely on --only runs; CI falls back to a full sync preview when zensical.toml, tools/, or docs/**/index.md changed. - discourse_client.py: add get_topic helper returning full topic metadata. - ci.yml preview job: compute changed doc paths via git diff ...HEAD, classify structural vs leaf changes, and pass --only to the dry run only when the PR is leaf-only. Write a no-change preview when nothing relevant changed. - tests: cover --only filtering, dry-run match classification, sidebar match skip, and metadata-gated update_topic. --- .github/workflows/ci.yml | 98 ++++++++- tools/discourse_client.py | 15 ++ tools/sync_to_discourse.py | 325 +++++++++++++++++++---------- tools/test_sync_to_discourse.py | 355 ++++++++++++++++++++++++++++++++ 4 files changed, 675 insertions(+), 118 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dfc4a5f..7dde6e7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,6 +68,9 @@ jobs: if: github.event_name == 'pull_request' steps: - uses: actions/checkout@v6 + with: + # Need history to compute diff vs base branch + fetch-depth: 0 - name: Restore Discourse sync cache uses: actions/cache/restore@v4 @@ -87,28 +90,93 @@ jobs: pip install uv uv pip install --system zensical + - name: Determine changed paths + id: diff + env: + BASE_REF: ${{ github.base_ref }} + run: | + set -euo pipefail + git fetch --depth=1 origin "$BASE_REF" + BASE_SHA=$(git rev-parse "origin/$BASE_REF") + echo "Base: $BASE_REF ($BASE_SHA)" + + # Docs changed (added or modified only — deletions not handled by sync) + CHANGED_DOCS=$(git diff --name-only --diff-filter=AM \ + "$BASE_SHA"...HEAD -- 'docs/**/*.md' \ + | sed 's|^docs/||' | sort -u) || CHANGED_DOCS="" + + # Structural changes that require a full sync: nav definition, + # sync tooling itself, or any index.md (affects sidebars). + STRUCTURAL=$(git diff --name-only \ + "$BASE_SHA"...HEAD -- zensical.toml 'tools/**' 'docs/**/index.md' \ + | sort -u) || STRUCTURAL="" + + echo "CHANGED_DOCS:" + printf '%s\n' "$CHANGED_DOCS" + echo "STRUCTURAL:" + printf '%s\n' "$STRUCTURAL" + + ANY_CHANGE="" + if [ -n "$CHANGED_DOCS" ] || [ -n "$STRUCTURAL" ]; then + ANY_CHANGE="1" + fi + + # Space-separated doc paths for CLI --only + ONLY_ARGS=$(printf '%s ' $CHANGED_DOCS) + ONLY_ARGS=${ONLY_ARGS% } + + # Structural collapsed to single-line marker (presence only) + STRUCTURAL_LINE=$(printf '%s ' $STRUCTURAL) + STRUCTURAL_LINE=${STRUCTURAL_LINE% } + + DOC_COUNT=0 + if [ -n "$CHANGED_DOCS" ]; then + DOC_COUNT=$(printf '%s\n' "$CHANGED_DOCS" | wc -l | tr -d ' ') + fi + + { + echo "any_change=$ANY_CHANGE" + echo "structural=$STRUCTURAL_LINE" + echo "only_args=$ONLY_ARGS" + echo "doc_count=$DOC_COUNT" + } >> "$GITHUB_OUTPUT" + - name: Dry-run Discourse sync + if: steps.diff.outputs.any_change == '1' env: DISCOURSE_URL: ${{ secrets.DISCOURSE_URL }} DISCOURSE_API_KEY: ${{ secrets.DISCOURSE_API_KEY }} DISCOURSE_API_USER: ${{ secrets.DISCOURSE_API_USER }} DISCOURSE_CATEGORY_MAP: ${{ vars.DISCOURSE_CATEGORY_MAP }} + STRUCTURAL: ${{ steps.diff.outputs.structural }} + ONLY_ARGS: ${{ steps.diff.outputs.only_args }} + run: | + ARGS="--dry-run --verbose" + # Structural changes → full sync preview (sidebar regen eligible). + # Leaf-only doc changes → filter via --only for accurate preview. + if [ -z "$STRUCTURAL" ] && [ -n "$ONLY_ARGS" ]; then + ARGS="$ARGS --only $ONLY_ARGS" + fi + uv run --python 3.12 python tools/sync_to_discourse.py $ARGS 2>&1 | tee sync_preview.txt + + - name: Write no-change preview + if: steps.diff.outputs.any_change != '1' run: | - uv run --python 3.12 python tools/sync_to_discourse.py --dry-run --verbose 2>&1 | tee sync_preview.txt + cat > sync_preview.txt <<'EOF' + No documentation or sync tooling changes detected in this PR. + Skipping Discourse dry-run. + EOF - name: Build PR comment id: comment + env: + ANY_CHANGE: ${{ steps.diff.outputs.any_change }} + DOC_COUNT: ${{ steps.diff.outputs.doc_count }} run: | - # Extract the summary table (everything after the === line) - SUMMARY=$(sed -n '/^=\{10,\}/,/^=\{10,\}$/p' sync_preview.txt | head -20) - - # Count actions from the summary CREATED=$(grep -oP 'Created:\s+\K\d+' sync_preview.txt || echo "0") UPDATED=$(grep -oP 'Updated \(Normal\):\s+\K\d+' sync_preview.txt || echo "0") UPDATED_IDX=$(grep -oP 'Updated \(Index\):\s+\K\d+' sync_preview.txt || echo "0") - SKIPPED_CACHE=$(grep -oP 'Skipped \(Cached\):\s+\K\d+' sync_preview.txt || echo "0") - SKIPPED_MATCH=$(grep -oP 'Skipped \(Discourse Match\):\s+\K\d+' sync_preview.txt || echo "0") - SKIPPED_CAT=$(grep -oP 'Skipped \(No Category\):\s+\K\d+' sync_preview.txt || echo "0") + SIDEBARS=$(grep -oP 'Sidebars Updated:\s+\K\d+' sync_preview.txt || echo "0") FAILED=$(grep -oP 'Failed:\s+\K\d+' sync_preview.txt || echo "0") TOTAL=$(grep -oP 'Total Parsed:\s+\K\d+' sync_preview.txt || echo "0") @@ -118,10 +186,20 @@ jobs: echo 'body< int | None: return None return posts[0].get("id") + def get_topic(self, topic_id: int) -> dict[str, Any] | None: + """Fetch full topic metadata including title and category_id. + + Used to detect whether a topic's title or category differs from + the desired values so we can skip unnecessary update_topic calls. + + Args: + topic_id: The Discourse topic ID. + + Returns: + Topic dict with keys including 'title', 'category_id', + 'post_stream', or None if the request failed. + """ + return self._get(f"/t/{topic_id}.json") + def get_post_raw(self, post_id: int) -> str | None: """Fetch the current raw markdown content of a post. diff --git a/tools/sync_to_discourse.py b/tools/sync_to_discourse.py index eb2c662..2cd297c 100644 --- a/tools/sync_to_discourse.py +++ b/tools/sync_to_discourse.py @@ -94,6 +94,44 @@ def _normalize(s: str) -> str: return _normalize(local_body) == _normalize(remote_raw) +def _post_body_matches_remote( + client: DiscourseClient, post_id: int, local_body: str, +) -> bool: + """Fetch remote post raw and return True if it matches local_body. + + Centralizes the fetch-and-compare pattern so both dry-run and live + paths make the same authoritative no-op decision. + """ + time.sleep(1.0) + remote_raw = client.get_post_raw(post_id) + return _body_matches_discourse(local_body, remote_raw) + + +def _topic_metadata_differs( + client: DiscourseClient, + topic_id: int, + new_title: str, + new_category_id: int, +) -> bool: + """Return True if topic's current title or category differs from target. + + Used to gate ``update_topic`` calls so Discourse is only mutated + when metadata truly needs to change. A failed fetch returns True + (update defensively) so transient read errors do not silently drop + legitimate metadata updates. + """ + time.sleep(1.0) + topic = client.get_topic(topic_id) + if topic is None: + return True + current_title = (topic.get("title") or "").strip() + current_category = topic.get("category_id") + return ( + current_title != new_title.strip() + or current_category != new_category_id + ) + + def build_footer(doc_path: str) -> str: """Build the sync footer with GitHub link and sync ID.""" gh_url = ( @@ -332,14 +370,6 @@ def _generate_sidebars( # Discourse requires at least one paragraph in category descriptions combined = f"Browse the **{section_title}** documentation:\n\n{sidebar_content}\n" - if dry_run: - print( - f" [DRY RUN] Would update sidebar: {section_title} " - f"(category {category_id}, {len(sidebar_lines)} items)" - ) - stats["sidebars_updated"] += 1 - continue - about_topic_id = client.get_category_about_topic_id(category_id) if about_topic_id is None: print( @@ -356,6 +386,23 @@ def _generate_sidebars( ) continue + if _post_body_matches_remote(client, post_id, combined): + if verbose: + print( + f" SKIP sidebar (discourse up-to-date): " + f"{section_title} (category {category_id})" + ) + stats["skipped_sidebar_match"] += 1 + continue + + if dry_run: + print( + f" [DRY RUN] Would update sidebar: {section_title} " + f"(category {category_id}, {len(sidebar_lines)} items)" + ) + stats["sidebars_updated"] += 1 + continue + time.sleep(1.0) result = client.update_post( post_id, combined, @@ -458,7 +505,33 @@ def sync_docs(args: argparse.Namespace) -> None: client = DiscourseClient(config) cache = ContentCache() - entries = parse_all(ZENSICAL_TOML) + all_entries = parse_all(ZENSICAL_TOML) + + # Optional path filter (e.g. from PR preview) — restrict which docs + # Pass 2 and sidebar generation consider. Pass 1 still resolves all + # topic IDs so internal .md -> /t/ link rewriting remains correct. + only_paths: set[str] | None = None + raw_only = getattr(args, "only", None) + if raw_only: + only_paths = {p.strip() for p in raw_only if p.strip()} + + if only_paths is not None: + entries = [e for e in all_entries if e.path in only_paths] + missing = sorted(only_paths - {e.path for e in all_entries}) + if missing: + print( + " WARN: --only paths not present in zensical.toml, ignoring: " + f"{missing}" + ) + else: + entries = all_entries + + # Sidebar regeneration rebuilds all 8 section About topics from the + # full index_contents map. A filtered run cannot populate every + # section without reading the whole nav, so we always skip sidebar + # generation when --only is set. Nav/index changes must trigger a + # full run (no --only) to refresh sidebars. + skip_sidebars = only_paths is not None stats: dict[str, int] = { "total": len(entries), @@ -470,20 +543,32 @@ def sync_docs(args: argparse.Namespace) -> None: "skipped_no_category": 0, "failed": 0, "sidebars_updated": 0, + "skipped_sidebar_match": 0, + "skipped_metadata_match": 0, } # Store converted index.md content for sidebar combination index_contents: dict[str, str] = {} - print(f"Sync: {len(entries)} entries from zensical.toml") + filter_note = ( + f" (filtered to {len(entries)} of {len(all_entries)})" + if only_paths is not None + else "" + ) + print(f"Sync: {len(entries)} entries from zensical.toml{filter_note}") print(f" Discourse: {config.base_url}") print(f" Dry run: {args.dry_run}") print(f" Force: {args.force}") + if only_paths is not None: + print(f" Only: {sorted(only_paths)}") print() # --- Pass 1: Resolve all topic IDs --- + # Resolve the full nav so dry-run link rewriting and sidebar lookups + # still find every topic when the caller only supplied a subset via + # --only. synced_topics = _resolve_all_topic_ids( - entries, config=config, client=client, verbose=args.verbose, + all_entries, config=config, client=client, verbose=args.verbose, ) # --- Pass 2: Convert and push --- @@ -540,72 +625,85 @@ def sync_docs(args: argparse.Namespace) -> None: # Each index.md maps to the most specific matching category # (e.g. features/cruise/index.md -> sub-category 140). about_topic_id = synced_topics.get(entry.path) - if args.dry_run: - if about_topic_id is not None: - print( - f" [DRY RUN] Would update index: {label}\n" - f" -> About topic {about_topic_id} " - f"(category {category_id})\n" - f" -> {config.base_url}/t/{about_topic_id}" - ) - else: + + if about_topic_id is None: + if args.dry_run: print( f" [DRY RUN] WARN: No About topic for " f"category {category_id}: {label}" ) - stats["updated_index"] += 1 - else: - if about_topic_id is None: + stats["updated_index"] += 1 + else: print( f" FAIL: No About topic for " f"category {category_id}: {label}" ) stats["failed"] += 1 - continue + continue - post_id = client.first_post_id(about_topic_id) - if post_id is None: - print( - f" FAIL: No first post for " - f"About topic {about_topic_id}: {label}" - ) - stats["failed"] += 1 - continue + post_id = client.first_post_id(about_topic_id) + if post_id is None: + print( + f" FAIL: No first post for " + f"About topic {about_topic_id}: {label}" + ) + stats["failed"] += 1 + continue - time.sleep(1.0) - current_raw = client.get_post_raw(post_id) - if _body_matches_discourse(body, current_raw): - if args.verbose: - print(f" SKIP (discourse up-to-date): {label}") - stats["skipped_discourse_match"] += 1 + if _post_body_matches_remote(client, post_id, body): + if args.verbose: + print(f" SKIP (discourse up-to-date): {label}") + stats["skipped_discourse_match"] += 1 + if not args.dry_run: cache.save(entry.path, raw_content) - continue + continue - time.sleep(1.0) - result = client.update_post( - post_id, body, - edit_reason="Docs sync: index page", + if args.dry_run: + print( + f" [DRY RUN] Would update index: {label}\n" + f" -> About topic {about_topic_id} " + f"(category {category_id})\n" + f" -> {config.base_url}/t/{about_topic_id}" ) - if result is None: - print( - f" FAIL: Could not update " - f"About topic {about_topic_id}: {label}" - ) - stats["failed"] += 1 - continue - - print(f" Updated index: {label} -> About topic {about_topic_id}") stats["updated_index"] += 1 + continue + + time.sleep(1.0) + result = client.update_post( + post_id, body, + edit_reason="Docs sync: index page", + ) + if result is None: + print( + f" FAIL: Could not update " + f"About topic {about_topic_id}: {label}" + ) + stats["failed"] += 1 + continue - # Update cache for index pages - if not args.dry_run: - cache.save(entry.path, raw_content) + print(f" Updated index: {label} -> About topic {about_topic_id}") + stats["updated_index"] += 1 + cache.save(entry.path, raw_content) continue # --- Normal doc: find or create --- topic_id = synced_topics.get(entry.path) if topic_id is not None: + post_id = client.first_post_id(topic_id) + if post_id is None: + print(f" FAIL: No first post for topic {topic_id}: {label}") + stats["failed"] += 1 + continue + + if _post_body_matches_remote(client, post_id, body): + if args.verbose: + print(f" SKIP (discourse up-to-date): {label}") + stats["skipped_discourse_match"] += 1 + if not args.dry_run: + cache.save(entry.path, raw_content) + continue + if args.dry_run: print( f" [DRY RUN] Would update: {label}\n" @@ -615,21 +713,6 @@ def sync_docs(args: argparse.Namespace) -> None: ) stats["updated"] += 1 else: - post_id = client.first_post_id(topic_id) - if post_id is None: - print(f" FAIL: No first post for topic {topic_id}: {label}") - stats["failed"] += 1 - continue - - time.sleep(1.0) - current_raw = client.get_post_raw(post_id) - if _body_matches_discourse(body, current_raw): - if args.verbose: - print(f" SKIP (discourse up-to-date): {label}") - stats["skipped_discourse_match"] += 1 - cache.save(entry.path, raw_content) - continue - time.sleep(1.0) result = client.update_post( post_id, body, @@ -640,12 +723,15 @@ def sync_docs(args: argparse.Namespace) -> None: stats["failed"] += 1 continue - # Update topic title and category - time.sleep(1.0) - client.update_topic( - topic_id, title, - category_id=category_id, - ) + # Update topic title and category only when drift detected + if _topic_metadata_differs(client, topic_id, title, category_id): + time.sleep(1.0) + client.update_topic( + topic_id, title, + category_id=category_id, + ) + else: + stats["skipped_metadata_match"] += 1 print(f" Updated: {label} -> topic {topic_id}") stats["updated"] += 1 @@ -660,6 +746,22 @@ def sync_docs(args: argparse.Namespace) -> None: found_id = existing_by_title["id"] synced_topics[entry.path] = found_id + post_id = client.first_post_id(found_id) + if post_id is None: + print( + f" FAIL: No first post for topic {found_id}: {label}" + ) + stats["failed"] += 1 + continue + + if _post_body_matches_remote(client, post_id, body): + if args.verbose: + print(f" SKIP (discourse up-to-date): {label}") + stats["skipped_discourse_match"] += 1 + if not args.dry_run: + cache.save(entry.path, raw_content) + continue + if args.dry_run: print( f" [DRY RUN] Would update (found by title): " @@ -670,23 +772,6 @@ def sync_docs(args: argparse.Namespace) -> None: ) stats["updated"] += 1 else: - post_id = client.first_post_id(found_id) - if post_id is None: - print( - f" FAIL: No first post for topic {found_id}: {label}" - ) - stats["failed"] += 1 - continue - - time.sleep(1.0) - current_raw = client.get_post_raw(post_id) - if _body_matches_discourse(body, current_raw): - if args.verbose: - print(f" SKIP (discourse up-to-date): {label}") - stats["skipped_discourse_match"] += 1 - cache.save(entry.path, raw_content) - continue - time.sleep(1.0) result = client.update_post( post_id, body, @@ -699,11 +784,14 @@ def sync_docs(args: argparse.Namespace) -> None: stats["failed"] += 1 continue - time.sleep(1.0) - client.update_topic( - found_id, title, - category_id=category_id, - ) + if _topic_metadata_differs(client, found_id, title, category_id): + time.sleep(1.0) + client.update_topic( + found_id, title, + category_id=category_id, + ) + else: + stats["skipped_metadata_match"] += 1 print( f" Updated (found by title): {label} -> topic {found_id}" @@ -739,17 +827,27 @@ def sync_docs(args: argparse.Namespace) -> None: cache.save(entry.path, raw_content) # --- Sidebar generation --- + # Skip entirely when --only is set: sidebar regeneration depends on + # full index_contents for every section, and regenerating with a + # partial set would overwrite untouched About topics with placeholder + # text. Nav structure changes should run without --only (driven by + # CI detecting zensical.toml or index.md changes). print() - print("Generating sidebars...") - _generate_sidebars( - config=config, - client=client, - synced_topics=synced_topics, - index_contents=index_contents, - dry_run=args.dry_run, - verbose=args.verbose, - stats=stats, - ) + if skip_sidebars: + print( + "Skipping sidebar generation (filtered run without nav or index changes)" + ) + else: + print("Generating sidebars...") + _generate_sidebars( + config=config, + client=client, + synced_topics=synced_topics, + index_contents=index_contents, + dry_run=args.dry_run, + verbose=args.verbose, + stats=stats, + ) # --- Summary --- print() @@ -762,9 +860,11 @@ def sync_docs(args: argparse.Namespace) -> None: print(f" Updated (Index): {stats['updated_index']}") print(f" Skipped (Cached): {stats['skipped_cached']}") print(f" Skipped (Discourse Match): {stats['skipped_discourse_match']}") + print(f" Skipped (Metadata Match): {stats['skipped_metadata_match']}") print(f" Skipped (No Category): {stats['skipped_no_category']}") print(f" Failed: {stats['failed']}") print(f" Sidebars Updated: {stats['sidebars_updated']}") + print(f" Sidebars Skipped (Match): {stats['skipped_sidebar_match']}") print("=" * 60) if stats["failed"] > 0: @@ -776,6 +876,15 @@ def main() -> None: parser.add_argument("--dry-run", action="store_true", help="Preview changes without making API calls.") parser.add_argument("--verbose", "-v", action="store_true", help="Print detailed progress for skipped entries.") parser.add_argument("--force", action="store_true", help="Bypass content cache and re-sync all entries.") + parser.add_argument( + "--only", + nargs="+", + metavar="PATH", + help=( + "Restrict Pass 2 to the given doc paths (relative to docs/). " + "Sidebar generation is skipped when this is set." + ), + ) args = parser.parse_args() sync_docs(args) diff --git a/tools/test_sync_to_discourse.py b/tools/test_sync_to_discourse.py index cb5e736..e8e9bed 100644 --- a/tools/test_sync_to_discourse.py +++ b/tools/test_sync_to_discourse.py @@ -463,6 +463,355 @@ def side_effect(req: object) -> MagicMock: print(" PASS: sync_docs_skips_when_discourse_matches") +# --------------------------------------------------------------------------- +# --only filter + idempotency guarantees +# --------------------------------------------------------------------------- + + +def _build_two_doc_workspace(tmpdir: str) -> tuple[Path, Path]: + """Create a docs tree with two docs under getting-started. + + Returns ``(docs_dir, toml_path)``. + """ + docs_dir = Path(tmpdir) / "docs" + docs_dir.mkdir() + gs_dir = docs_dir / "getting-started" + gs_dir.mkdir() + + (gs_dir / "doc-a.md").write_text( + "---\ntitle: Doc A\n---\n\n# Doc A\n\nAlpha content.\n" + ) + (gs_dir / "doc-b.md").write_text( + "---\ntitle: Doc B\n---\n\n# Doc B\n\nBravo content.\n" + ) + + toml_path = Path(tmpdir) / "zensical.toml" + toml_path.write_text(textwrap.dedent("""\ + [project] + docs_dir = "docs" + nav = [ + { "Getting Started" = [ + { "Doc A" = "getting-started/doc-a.md" }, + { "Doc B" = "getting-started/doc-b.md" }, + ]}, + ] + """)) + return docs_dir, toml_path + + +def _patch_cache_dir(tmpdir: str) -> patch: + """Redirect the content cache to an isolated per-test directory.""" + cache_dir = Path(tmpdir) / ".cache" + return patch("sync_to_discourse.ContentCache", lambda: _ephemeral_cache(cache_dir)) + + +def _ephemeral_cache(cache_dir: Path): + from content_cache import ContentCache + return ContentCache(cache_dir=cache_dir) + + +def test_sync_docs_only_filter_restricts_pass2(): + """--only restricts Pass 2 to listed paths; others are not processed.""" + with tempfile.TemporaryDirectory() as tmpdir: + docs_dir, toml_path = _build_two_doc_workspace(tmpdir) + + env = { + "DISCOURSE_URL": "https://community.sunnypilot.ai", + "DISCOURSE_API_KEY": "test-key", + "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', + } + args = argparse.Namespace( + dry_run=True, verbose=True, force=True, + only=["getting-started/doc-a.md"], + ) + + with ( + patch.dict("os.environ", env, clear=False), + patch("sync_to_discourse.DOCS_DIR", docs_dir), + patch("sync_to_discourse.ZENSICAL_TOML", toml_path), + _patch_cache_dir(tmpdir), + patch("urllib.request.urlopen") as mock_urlopen, + ): + mock_urlopen.return_value = _mock_response({"topics": []}) + + # Capture printed output to verify only doc-a is mentioned in Pass 2 + import io + import contextlib + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + sync_docs(args) + output = buf.getvalue() + + assert "doc-a.md" in output, "Filtered doc must appear in dry-run output" + # doc-b.md must not appear in Pass 2 processing lines + pass2_section = output.split("Pass 2: Syncing content...", 1)[1] + pass2_end = pass2_section.split("Sync Summary", 1)[0] + assert "doc-b.md" not in pass2_end, ( + f"Excluded doc must not be processed in Pass 2. Found in:\n{pass2_end}" + ) + assert "Skipping sidebar generation" in output, ( + "Filtered runs must skip sidebar generation" + ) + + print(" PASS: sync_docs_only_filter_restricts_pass2") + + +def test_sync_docs_only_filter_warns_on_unknown_path(): + """--only paths not in zensical.toml produce a WARN, not an error.""" + with tempfile.TemporaryDirectory() as tmpdir: + docs_dir, toml_path = _build_two_doc_workspace(tmpdir) + + env = { + "DISCOURSE_URL": "https://community.sunnypilot.ai", + "DISCOURSE_API_KEY": "test-key", + "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', + } + args = argparse.Namespace( + dry_run=True, verbose=True, force=True, + only=["getting-started/doc-a.md", "getting-started/ghost.md"], + ) + + with ( + patch.dict("os.environ", env, clear=False), + patch("sync_to_discourse.DOCS_DIR", docs_dir), + patch("sync_to_discourse.ZENSICAL_TOML", toml_path), + _patch_cache_dir(tmpdir), + patch("urllib.request.urlopen") as mock_urlopen, + ): + mock_urlopen.return_value = _mock_response({"topics": []}) + + import io + import contextlib + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + sync_docs(args) + output = buf.getvalue() + + assert "WARN: --only paths not present" in output + assert "getting-started/ghost.md" in output + assert "doc-a.md" in output + + print(" PASS: sync_docs_only_filter_warns_on_unknown_path") + + +def test_sync_docs_dry_run_counts_match_as_skip_not_update(): + """Dry-run must fetch remote and classify matching content as skipped.""" + with tempfile.TemporaryDirectory() as tmpdir: + docs_dir, toml_path = _build_two_doc_workspace(tmpdir) + + env = { + "DISCOURSE_URL": "https://community.sunnypilot.ai", + "DISCOURSE_API_KEY": "test-key", + "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', + } + # Force=True bypasses content cache so remote check becomes authoritative. + args = argparse.Namespace( + dry_run=True, verbose=True, force=True, + only=["getting-started/doc-a.md"], + ) + + def side_effect(req): + url = req.full_url + method = req.method + if method == "GET" and "/search.json" in url: + # Resolve via sync-id + return _mock_response({"topics": [{"id": 500, "title": "Doc A"}]}) + if method == "GET" and url.endswith("/t/500.json"): + return _mock_response({ + "id": 500, + "title": "Doc A", + "category_id": 133, + "post_stream": {"posts": [{"id": 900, "post_number": 1}]}, + }) + if method == "GET" and url.endswith("/posts/900.json"): + # Return what _post_body_matches_remote will compare against. + # Patching _body_matches_discourse below makes this trivial. + return _mock_response({"id": 900, "raw": "whatever"}) + return _mock_response({}) + + with ( + patch.dict("os.environ", env, clear=False), + patch("sync_to_discourse.DOCS_DIR", docs_dir), + patch("sync_to_discourse.ZENSICAL_TOML", toml_path), + _patch_cache_dir(tmpdir), + patch("sync_to_discourse.time") as mock_time, + patch("urllib.request.urlopen") as mock_urlopen, + patch("sync_to_discourse._body_matches_discourse", return_value=True), + ): + mock_time.sleep = MagicMock() + mock_urlopen.side_effect = side_effect + + import io + import contextlib + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + sync_docs(args) + output = buf.getvalue() + + # Should skip, not "Would update" + assert "Would update" not in output, ( + f"Dry-run must not count matching remote as update:\n{output}" + ) + assert "SKIP (discourse up-to-date)" in output, ( + f"Must log skip for matching remote:\n{output}" + ) + # Summary line must show 1 skipped discourse match + assert "Skipped (Discourse Match): 1" in output + assert "Updated (Normal): 0" in output + + print(" PASS: sync_docs_dry_run_counts_match_as_skip_not_update") + + +def test_sidebar_skips_update_when_remote_matches(): + """_generate_sidebars must fetch remote and skip unchanged sidebars.""" + from sync_to_discourse import _generate_sidebars + + with tempfile.TemporaryDirectory() as tmpdir: + docs_dir, toml_path = _build_two_doc_workspace(tmpdir) + # Add an index so there's something to render + (docs_dir / "getting-started" / "index.md").write_text( + "# Getting Started\n\nIntro text.\n" + ) + + env = { + "DISCOURSE_URL": "https://community.sunnypilot.ai", + "DISCOURSE_API_KEY": "test-key", + "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', + } + config = DiscourseConfig( + base_url=env["DISCOURSE_URL"], + api_key=env["DISCOURSE_API_KEY"], + category_mapping={"getting-started": 133}, + ) + client = DiscourseClient(config) + stats = { + "sidebars_updated": 0, + "skipped_sidebar_match": 0, + } + synced_topics = { + "getting-started/doc-a.md": 501, + "getting-started/doc-b.md": 502, + } + + def side_effect(req): + url = req.full_url + method = req.method + if method == "GET" and "/c/133/show.json" in url: + return _mock_response({ + "category": {"topic_url": "/t/about-getting-started/700"}, + }) + if method == "GET" and url.endswith("/t/700.json"): + return _mock_response({ + "post_stream": {"posts": [{"id": 800, "post_number": 1}]}, + }) + if method == "GET" and url.endswith("/posts/800.json"): + return _mock_response({"id": 800, "raw": "matches"}) + # Any PUT/POST would indicate an unwanted update + raise AssertionError(f"Unexpected {method} to {url}") + + with ( + patch.dict("os.environ", env, clear=False), + patch("sync_to_discourse.ZENSICAL_TOML", toml_path), + patch("sync_to_discourse.time") as mock_time, + patch("urllib.request.urlopen") as mock_urlopen, + patch("sync_to_discourse._body_matches_discourse", return_value=True), + ): + mock_time.sleep = MagicMock() + mock_urlopen.side_effect = side_effect + + _generate_sidebars( + config=config, + client=client, + synced_topics=synced_topics, + index_contents={"getting-started/index.md": "Intro.\n"}, + dry_run=False, + verbose=True, + stats=stats, + ) + + assert stats["skipped_sidebar_match"] == 1, ( + f"Matching sidebar must be skipped, got stats={stats}" + ) + assert stats["sidebars_updated"] == 0, ( + f"No sidebars should have updated, got stats={stats}" + ) + + print(" PASS: sidebar_skips_update_when_remote_matches") + + +def test_update_topic_skipped_when_metadata_matches(): + """update_topic PUT must be suppressed when title + category already match.""" + with tempfile.TemporaryDirectory() as tmpdir: + docs_dir, toml_path = _build_two_doc_workspace(tmpdir) + + env = { + "DISCOURSE_URL": "https://community.sunnypilot.ai", + "DISCOURSE_API_KEY": "test-key", + "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', + } + args = argparse.Namespace( + dry_run=False, verbose=True, force=True, + only=["getting-started/doc-a.md"], + ) + + calls: list[tuple[str, str, bytes | None]] = [] + + def side_effect(req): + url = req.full_url + method = req.method + data = req.data + calls.append((method, url, data)) + + if method == "GET" and "/search.json" in url: + return _mock_response({"topics": [{"id": 500, "title": "Doc A"}]}) + if method == "GET" and url.endswith("/t/500.json"): + # Metadata check + first_post_id both hit this endpoint; + # return matching metadata + post stream + return _mock_response({ + "id": 500, + "title": "Doc A", + "category_id": 133, + "post_stream": {"posts": [{"id": 900, "post_number": 1}]}, + }) + if method == "GET" and url.endswith("/posts/900.json"): + return _mock_response({"id": 900, "raw": "body-differs"}) + if method == "PUT" and url.endswith("/posts/900.json"): + return _mock_response({"post": {"id": 900}}) + return _mock_response({}) + + with ( + patch.dict("os.environ", env, clear=False), + patch("sync_to_discourse.DOCS_DIR", docs_dir), + patch("sync_to_discourse.ZENSICAL_TOML", toml_path), + _patch_cache_dir(tmpdir), + patch("sync_to_discourse.time") as mock_time, + patch("urllib.request.urlopen") as mock_urlopen, + ): + mock_time.sleep = MagicMock() + mock_urlopen.side_effect = side_effect + # Force body mismatch so update_post fires; metadata check then runs + with patch( + "sync_to_discourse._body_matches_discourse", return_value=False, + ): + sync_docs(args) + + # Must PUT the post (body changed), but MUST NOT PUT the topic + put_topic_calls = [ + (m, u) for m, u, _ in calls + if m == "PUT" and "/t/-/" in u + ] + put_post_calls = [ + (m, u) for m, u, _ in calls + if m == "PUT" and "/posts/" in u + ] + assert put_post_calls, f"Post should be updated when body differs: {calls}" + assert not put_topic_calls, ( + f"Topic metadata matches — no update_topic PUT expected. Got: {put_topic_calls}" + ) + + print(" PASS: update_topic_skipped_when_metadata_matches") + + # --------------------------------------------------------------------------- # Runner # --------------------------------------------------------------------------- @@ -503,6 +852,12 @@ def side_effect(req: object) -> MagicMock: # Integration test_sync_docs_dry_run, test_sync_docs_skips_when_discourse_matches, + # --only filter + idempotency + test_sync_docs_only_filter_restricts_pass2, + test_sync_docs_only_filter_warns_on_unknown_path, + test_sync_docs_dry_run_counts_match_as_skip_not_update, + test_sidebar_skips_update_when_remote_matches, + test_update_topic_skipped_when_metadata_matches, ] passed = 0 failed = 0 From 7b28a9d7971900f5f909630c290829b1df175a08 Mon Sep 17 00:00:00 2001 From: Jason Wen Date: Sat, 18 Apr 2026 00:33:11 -0400 Subject: [PATCH 2/8] fix(ci): pin GITHUB_REF_NAME to base_ref in PR preview build_footer embeds GITHUB_REF_NAME in the GitHub "Suggest changes" link. On PR runs GitHub sets GITHUB_REF_NAME to "/merge", which makes the rendered footer differ from the master-synced raw stored on Discourse for every topic. _body_matches_discourse then returns False even when the doc body itself is untouched, so the preview shows every topic as "Would update". Override GITHUB_REF_NAME to github.base_ref in the preview step so the footer URL matches what the last master sync wrote. --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7dde6e7..f1dea8e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -150,6 +150,12 @@ jobs: DISCOURSE_CATEGORY_MAP: ${{ vars.DISCOURSE_CATEGORY_MAP }} STRUCTURAL: ${{ steps.diff.outputs.structural }} ONLY_ARGS: ${{ steps.diff.outputs.only_args }} + # Force footer GitHub link to base branch so dry-run body + # matches what sync_to_discourse.py wrote from the last master + # push. Otherwise build_footer embeds the PR ref (e.g. + # "14/merge"), making every remote match check fail and + # inflating the preview count. + GITHUB_REF_NAME: ${{ github.base_ref }} run: | ARGS="--dry-run --verbose" # Structural changes → full sync preview (sidebar regen eligible). From 785dfc69cc58387e47f0fc36b9b5ee198281fba5 Mon Sep 17 00:00:00 2001 From: Jason Wen Date: Sat, 18 Apr 2026 00:45:56 -0400 Subject: [PATCH 3/8] debug(ci): log first divergent line on remote match failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR preview still reports all topics as modified even with GITHUB_REF_NAME pinned to master. Local rendering of the same docs matches the stored Discourse raw, so the drift must be introduced somewhere inside the CI environment (auth payload, converter output, zensical install, or something else). Add an opt-in SYNC_DEBUG_DIFF env flag that prints the first divergent line pair whenever _post_body_matches_remote returns False. Enable it in the CI preview step only — not the production sync — so the next PR run surfaces what the mismatch actually is. --- .github/workflows/ci.yml | 3 ++ tools/sync_to_discourse.py | 63 ++++++++++++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1dea8e..440ff4c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -156,6 +156,9 @@ jobs: # "14/merge"), making every remote match check fail and # inflating the preview count. GITHUB_REF_NAME: ${{ github.base_ref }} + # Print first divergence on mismatch so CI logs reveal what + # drifted between local render and stored Discourse raw. + SYNC_DEBUG_DIFF: "1" run: | ARGS="--dry-run --verbose" # Structural changes → full sync preview (sidebar regen eligible). diff --git a/tools/sync_to_discourse.py b/tools/sync_to_discourse.py index 2cd297c..b94905a 100644 --- a/tools/sync_to_discourse.py +++ b/tools/sync_to_discourse.py @@ -95,16 +95,57 @@ def _normalize(s: str) -> str: def _post_body_matches_remote( - client: DiscourseClient, post_id: int, local_body: str, + client: DiscourseClient, + post_id: int, + local_body: str, + *, + debug_label: str | None = None, ) -> bool: """Fetch remote post raw and return True if it matches local_body. Centralizes the fetch-and-compare pattern so both dry-run and live paths make the same authoritative no-op decision. + + When ``SYNC_DEBUG_DIFF`` is set in the environment and a mismatch is + detected, print the first divergent line pair to aid diagnosis of + unexpected drift (e.g. forum URL differences, hidden whitespace, or + Discourse-side reformatting). """ time.sleep(1.0) remote_raw = client.get_post_raw(post_id) - return _body_matches_discourse(local_body, remote_raw) + matches = _body_matches_discourse(local_body, remote_raw) + if ( + not matches + and os.environ.get("SYNC_DEBUG_DIFF") + and debug_label is not None + ): + _print_first_divergence(debug_label, local_body, remote_raw) + return matches + + +def _print_first_divergence( + label: str, local_body: str, remote_raw: str | None, +) -> None: + """Print the first point where normalized local and remote diverge.""" + if remote_raw is None: + print(f" DEBUG {label}: remote is None (get_post_raw failed)") + return + + def _norm(s: str) -> list[str]: + return [line.rstrip() for line in s.splitlines()] + + local_lines = _norm(local_body) + remote_lines = _norm(remote_raw) + limit = max(len(local_lines), len(remote_lines)) + for i in range(limit): + local = local_lines[i] if i < len(local_lines) else "" + remote = remote_lines[i] if i < len(remote_lines) else "" + if local != remote: + print(f" DEBUG {label}: first diff at line {i + 1}") + print(f" local : {local[:200]!r}") + print(f" remote: {remote[:200]!r}") + return + print(f" DEBUG {label}: normalized lines equal but match returned False") def _topic_metadata_differs( @@ -386,7 +427,10 @@ def _generate_sidebars( ) continue - if _post_body_matches_remote(client, post_id, combined): + if _post_body_matches_remote( + client, post_id, combined, + debug_label=f"sidebar {section_title} (cat {category_id})", + ): if verbose: print( f" SKIP sidebar (discourse up-to-date): " @@ -650,7 +694,9 @@ def sync_docs(args: argparse.Namespace) -> None: stats["failed"] += 1 continue - if _post_body_matches_remote(client, post_id, body): + if _post_body_matches_remote( + client, post_id, body, debug_label=f"index {label}", + ): if args.verbose: print(f" SKIP (discourse up-to-date): {label}") stats["skipped_discourse_match"] += 1 @@ -696,7 +742,9 @@ def sync_docs(args: argparse.Namespace) -> None: stats["failed"] += 1 continue - if _post_body_matches_remote(client, post_id, body): + if _post_body_matches_remote( + client, post_id, body, debug_label=f"topic {topic_id} {label}", + ): if args.verbose: print(f" SKIP (discourse up-to-date): {label}") stats["skipped_discourse_match"] += 1 @@ -754,7 +802,10 @@ def sync_docs(args: argparse.Namespace) -> None: stats["failed"] += 1 continue - if _post_body_matches_remote(client, post_id, body): + if _post_body_matches_remote( + client, post_id, body, + debug_label=f"title-fallback {found_id} {label}", + ): if args.verbose: print(f" SKIP (discourse up-to-date): {label}") stats["skipped_discourse_match"] += 1 From 84bd73ca33be6d6d82f157cf073a58ab1aef183a Mon Sep 17 00:00:00 2001 From: Jason Wen Date: Sat, 18 Apr 2026 01:09:04 -0400 Subject: [PATCH 4/8] fix: use DOCS_SYNC_REF_NAME to override footer ref in PR preview GitHub Actions reserves GITHUB_REF_NAME and silently ignores step-level env overrides, so the earlier attempt to pin it to github.base_ref had no effect: build_footer still embedded the PR merge ref (e.g. "14/merge") and every remote match check in dry-run saw a divergent footer line and counted the topic as modified. Read the ref at call time via a new DOCS_SYNC_REF_NAME env var, falling back to GITHUB_REF_NAME and then "master". The CI preview step sets DOCS_SYNC_REF_NAME to the base branch so the dry-run body matches what the last master sync wrote. Diagnostic SYNC_DEBUG_DIFF output that identified the drift remains available behind the env flag; the CI preview no longer sets it. --- .github/workflows/ci.yml | 10 +++++----- tools/sync_to_discourse.py | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 440ff4c..83a2cf4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -154,11 +154,11 @@ jobs: # matches what sync_to_discourse.py wrote from the last master # push. Otherwise build_footer embeds the PR ref (e.g. # "14/merge"), making every remote match check fail and - # inflating the preview count. - GITHUB_REF_NAME: ${{ github.base_ref }} - # Print first divergence on mismatch so CI logs reveal what - # drifted between local render and stored Discourse raw. - SYNC_DEBUG_DIFF: "1" + # inflating the preview count. GitHub Actions reserves + # GITHUB_REF_NAME and ignores step-level env overrides, so + # we use a dedicated env var that _resolve_ref_name consults + # first. + DOCS_SYNC_REF_NAME: ${{ github.base_ref }} run: | ARGS="--dry-run --verbose" # Structural changes → full sync preview (sidebar regen eligible). diff --git a/tools/sync_to_discourse.py b/tools/sync_to_discourse.py index b94905a..9936bdf 100644 --- a/tools/sync_to_discourse.py +++ b/tools/sync_to_discourse.py @@ -35,7 +35,22 @@ REPO_ROOT = Path(__file__).resolve().parent.parent DOCS_DIR = REPO_ROOT / "docs" ZENSICAL_TOML = REPO_ROOT / "zensical.toml" -GITHUB_REF_NAME = os.environ.get("GITHUB_REF_NAME", "master") + + +def _resolve_ref_name() -> str: + """Return the Git ref used in the footer "Suggest changes" link. + + ``DOCS_SYNC_REF_NAME`` takes precedence so callers (notably the PR + preview CI step) can force the ref to the base branch; the built-in + ``GITHUB_REF_NAME`` is reserved by GitHub Actions and step-level + ``env:`` blocks cannot override it, so we cannot rely on it alone in + pull-request runs. Falls back to ``"master"`` for local dev. + """ + return ( + os.environ.get("DOCS_SYNC_REF_NAME") + or os.environ.get("GITHUB_REF_NAME") + or "master" + ) SKIP_INDEX_FILES = frozenset({"index.md", "README.md"}) @@ -177,7 +192,7 @@ def build_footer(doc_path: str) -> str: """Build the sync footer with GitHub link and sync ID.""" gh_url = ( f"https://github.com/sunnypilot/user-docs/blob/" - f"{GITHUB_REF_NAME}/docs/{doc_path}" + f"{_resolve_ref_name()}/docs/{doc_path}" ) return ( "\n\n---\n" From 1e494b19465e7c7748b62464bac17a3365d7f9fc Mon Sep 17 00:00:00 2001 From: Jason Wen Date: Sat, 18 Apr 2026 01:21:19 -0400 Subject: [PATCH 5/8] fix: defer About topic updates to sidebar generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every index.md was being double-counted as "Would update" in the PR preview because two different code paths tried to maintain the category About topic: 1. is_about_topic branch wrote body+footer via update_post. 2. _generate_sidebars wrote body+footer+sidebar_lines via another update_post immediately after. The pre-update idempotency check in branch 1 fetched the stored raw (which always includes the sidebar lines from branch 2's previous run) and compared it against local body+footer. The comparison always diverged at the sidebar block, so every About topic was classified as "Would update" even when its authoritative combined body matched Discourse perfectly — which _generate_sidebars then confirmed by reporting Sidebars Updated: 0. Remove the redundant branch 1 update. _generate_sidebars now owns the full lifecycle of every About topic. Drop the obsolete "Updated (Index)" stat and fold the CI preview's sidebar counter into the overall changes total so the comment still surfaces index changes. For PR 14 the preview now reports 4 (1 create + 3 normal updates) — the genuine drift from the last master sync — instead of 13. --- .github/workflows/ci.yml | 14 +++---- tools/sync_to_discourse.py | 78 +++++++------------------------------- 2 files changed, 19 insertions(+), 73 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 83a2cf4..5f691e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -184,12 +184,14 @@ jobs: run: | CREATED=$(grep -oP 'Created:\s+\K\d+' sync_preview.txt || echo "0") UPDATED=$(grep -oP 'Updated \(Normal\):\s+\K\d+' sync_preview.txt || echo "0") - UPDATED_IDX=$(grep -oP 'Updated \(Index\):\s+\K\d+' sync_preview.txt || echo "0") SIDEBARS=$(grep -oP 'Sidebars Updated:\s+\K\d+' sync_preview.txt || echo "0") FAILED=$(grep -oP 'Failed:\s+\K\d+' sync_preview.txt || echo "0") TOTAL=$(grep -oP 'Total Parsed:\s+\K\d+' sync_preview.txt || echo "0") - CHANGES=$((CREATED + UPDATED + UPDATED_IDX)) + # Sidebar updates include every About topic (index.md) that + # actually differs from Discourse, so they count toward the + # "modified" total. + CHANGES=$((CREATED + UPDATED + SIDEBARS)) { echo 'body< None: "total": len(entries), "created": 0, "updated": 0, - "updated_index": 0, "skipped_cached": 0, "skipped_discourse_match": 0, "skipped_no_category": 0, @@ -680,71 +679,21 @@ def sync_docs(args: argparse.Namespace) -> None: ) if is_about_topic: - # Index files update the "About" topic for their category. - # Each index.md maps to the most specific matching category - # (e.g. features/cruise/index.md -> sub-category 140). - about_topic_id = synced_topics.get(entry.path) - - if about_topic_id is None: - if args.dry_run: - print( - f" [DRY RUN] WARN: No About topic for " - f"category {category_id}: {label}" - ) - stats["updated_index"] += 1 - else: - print( - f" FAIL: No About topic for " - f"category {category_id}: {label}" - ) - stats["failed"] += 1 - continue - - post_id = client.first_post_id(about_topic_id) - if post_id is None: - print( - f" FAIL: No first post for " - f"About topic {about_topic_id}: {label}" - ) - stats["failed"] += 1 - continue - - if _post_body_matches_remote( - client, post_id, body, debug_label=f"index {label}", - ): - if args.verbose: - print(f" SKIP (discourse up-to-date): {label}") - stats["skipped_discourse_match"] += 1 - if not args.dry_run: - cache.save(entry.path, raw_content) - continue - - if args.dry_run: - print( - f" [DRY RUN] Would update index: {label}\n" - f" -> About topic {about_topic_id} " - f"(category {category_id})\n" - f" -> {config.base_url}/t/{about_topic_id}" - ) - stats["updated_index"] += 1 - continue - - time.sleep(1.0) - result = client.update_post( - post_id, body, - edit_reason="Docs sync: index page", - ) - if result is None: + # Index pages are rendered by _generate_sidebars, which appends + # the sidebar list after the converted body. Updating the About + # topic here with just body+footer would be overwritten by the + # sidebar pass immediately after, and the pre-update idempotency + # check would always see a mismatch (remote has sidebar lines, + # local doesn't), double-counting every index as "Would update". + # Defer the update to sidebar generation — it holds the authoritative + # combined body. + if args.verbose: print( - f" FAIL: Could not update " - f"About topic {about_topic_id}: {label}" + f" DEFER to sidebar gen: {label} " + f"(about topic {synced_topics.get(entry.path)})" ) - stats["failed"] += 1 - continue - - print(f" Updated index: {label} -> About topic {about_topic_id}") - stats["updated_index"] += 1 - cache.save(entry.path, raw_content) + if not args.dry_run: + cache.save(entry.path, raw_content) continue # --- Normal doc: find or create --- @@ -923,7 +872,6 @@ def sync_docs(args: argparse.Namespace) -> None: print(f" Total Parsed: {stats['total']}") print(f" Created: {stats['created']}") print(f" Updated (Normal): {stats['updated']}") - print(f" Updated (Index): {stats['updated_index']}") print(f" Skipped (Cached): {stats['skipped_cached']}") print(f" Skipped (Discourse Match): {stats['skipped_discourse_match']}") print(f" Skipped (Metadata Match): {stats['skipped_metadata_match']}") From 3b0439c1ab9bcb95292852f4d03364bfd83ebef3 Mon Sep 17 00:00:00 2001 From: Jason Wen Date: Sat, 18 Apr 2026 01:25:34 -0400 Subject: [PATCH 6/8] fix(ci): detect PR-affected docs via rendered-body hash diff Previous preview logic ran either an --only filter on git-diff-detected doc changes or a full sync preview on structural changes (tools/, zensical.toml, index.md). Full-sync mode surfaced every pre-existing drift between master and Discourse, including docs PRs that only touch CI/sync tooling but happen to expose unhealed state from earlier master syncs. Replace that heuristic with a deterministic render-diff: - tools/render_docs.py renders every nav entry the same way the sync orchestrator does (convert + build_footer, empty topic_id_map for consistency) and prints a sha256-per-path map. - The preview job renders at HEAD and at the base commit via git worktree, diffs the hashes, and feeds only paths whose rendering differs into sync_to_discourse.py --only. If no doc renders differently the script is skipped entirely. - Drops the STRUCTURAL bucket; tools/converter/nav changes that affect rendering are now caught directly by the hash diff, while tooling changes that leave rendered output untouched no longer trigger a false-positive preview. For PR 14, which only changes CI/tests/sync plumbing, the preview now reports zero affected docs. --- .github/workflows/ci.yml | 80 ++++++++++++++++++++++------------------ tools/render_docs.py | 51 +++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 36 deletions(-) create mode 100644 tools/render_docs.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5f691e5..e54dce9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,53 +90,67 @@ jobs: pip install uv uv pip install --system zensical - - name: Determine changed paths + - name: Compute docs affected by this PR id: diff env: BASE_REF: ${{ github.base_ref }} + DOCS_SYNC_REF_NAME: ${{ github.base_ref }} + DISCOURSE_URL: ${{ secrets.DISCOURSE_URL }} run: | set -euo pipefail git fetch --depth=1 origin "$BASE_REF" BASE_SHA=$(git rev-parse "origin/$BASE_REF") echo "Base: $BASE_REF ($BASE_SHA)" - # Docs changed (added or modified only — deletions not handled by sync) - CHANGED_DOCS=$(git diff --name-only --diff-filter=AM \ - "$BASE_SHA"...HEAD -- 'docs/**/*.md' \ - | sed 's|^docs/||' | sort -u) || CHANGED_DOCS="" - - # Structural changes that require a full sync: nav definition, - # sync tooling itself, or any index.md (affects sidebars). - STRUCTURAL=$(git diff --name-only \ - "$BASE_SHA"...HEAD -- zensical.toml 'tools/**' 'docs/**/index.md' \ - | sort -u) || STRUCTURAL="" - - echo "CHANGED_DOCS:" - printf '%s\n' "$CHANGED_DOCS" - echo "STRUCTURAL:" - printf '%s\n' "$STRUCTURAL" + # Render every nav doc at HEAD (PR) and at the base commit, + # then diff the content hashes. This surfaces exactly the + # docs whose sync-rendered body would differ between master + # and this PR merged — including changes introduced by + # edits to zensical.toml, tools/converter.py, or anything + # else that influences rendering. + uv run --python 3.12 python tools/render_docs.py > /tmp/head_hashes.json + + WORKTREE=$(mktemp -d) + git worktree add --detach "$WORKTREE" "$BASE_SHA" + # render_docs.py may not exist on the base branch yet; copy + # the current HEAD version in so we can render base content + # with whichever render entry point exists today. The imports + # it pulls (converter, nav_parser, sync_to_discourse) still + # resolve to the worktree copies, so tools changes on HEAD + # vs base are captured in the hash diff. + cp tools/render_docs.py "$WORKTREE/tools/render_docs.py" + ( + cd "$WORKTREE" + uv run --python 3.12 python tools/render_docs.py > /tmp/base_hashes.json + ) + git worktree remove --force "$WORKTREE" + + python3 - <<'PY' > /tmp/affected.txt + import json + head = json.load(open('/tmp/head_hashes.json')) + base = json.load(open('/tmp/base_hashes.json')) + for path in sorted(set(head) | set(base)): + if head.get(path) != base.get(path): + print(path) + PY + + AFFECTED=$(cat /tmp/affected.txt || true) + echo "AFFECTED_DOCS:" + printf '%s\n' "$AFFECTED" ANY_CHANGE="" - if [ -n "$CHANGED_DOCS" ] || [ -n "$STRUCTURAL" ]; then - ANY_CHANGE="1" - fi + [ -n "$AFFECTED" ] && ANY_CHANGE="1" - # Space-separated doc paths for CLI --only - ONLY_ARGS=$(printf '%s ' $CHANGED_DOCS) + ONLY_ARGS=$(printf '%s ' $AFFECTED) ONLY_ARGS=${ONLY_ARGS% } - # Structural collapsed to single-line marker (presence only) - STRUCTURAL_LINE=$(printf '%s ' $STRUCTURAL) - STRUCTURAL_LINE=${STRUCTURAL_LINE% } - DOC_COUNT=0 - if [ -n "$CHANGED_DOCS" ]; then - DOC_COUNT=$(printf '%s\n' "$CHANGED_DOCS" | wc -l | tr -d ' ') + if [ -n "$AFFECTED" ]; then + DOC_COUNT=$(printf '%s\n' "$AFFECTED" | wc -l | tr -d ' ') fi { echo "any_change=$ANY_CHANGE" - echo "structural=$STRUCTURAL_LINE" echo "only_args=$ONLY_ARGS" echo "doc_count=$DOC_COUNT" } >> "$GITHUB_OUTPUT" @@ -148,7 +162,6 @@ jobs: DISCOURSE_API_KEY: ${{ secrets.DISCOURSE_API_KEY }} DISCOURSE_API_USER: ${{ secrets.DISCOURSE_API_USER }} DISCOURSE_CATEGORY_MAP: ${{ vars.DISCOURSE_CATEGORY_MAP }} - STRUCTURAL: ${{ steps.diff.outputs.structural }} ONLY_ARGS: ${{ steps.diff.outputs.only_args }} # Force footer GitHub link to base branch so dry-run body # matches what sync_to_discourse.py wrote from the last master @@ -160,13 +173,8 @@ jobs: # first. DOCS_SYNC_REF_NAME: ${{ github.base_ref }} run: | - ARGS="--dry-run --verbose" - # Structural changes → full sync preview (sidebar regen eligible). - # Leaf-only doc changes → filter via --only for accurate preview. - if [ -z "$STRUCTURAL" ] && [ -n "$ONLY_ARGS" ]; then - ARGS="$ARGS --only $ONLY_ARGS" - fi - uv run --python 3.12 python tools/sync_to_discourse.py $ARGS 2>&1 | tee sync_preview.txt + uv run --python 3.12 python tools/sync_to_discourse.py \ + --dry-run --verbose --only $ONLY_ARGS 2>&1 | tee sync_preview.txt - name: Write no-change preview if: steps.diff.outputs.any_change != '1' diff --git a/tools/render_docs.py b/tools/render_docs.py new file mode 100644 index 0000000..bad0258 --- /dev/null +++ b/tools/render_docs.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +"""Render each nav doc the same way sync_to_discourse.py would, emit JSON. + +Writes ``{: }`` to stdout. Used by +the PR preview CI job to detect which docs actually render +differently between the base branch and the PR HEAD without touching +Discourse. An empty topic_id_map is passed to converter.convert so +internal .md links always fall back to search URLs — the goal is a +consistent, reproducible hash between refs, not a real sync body. + +Env: + DOCS_SYNC_REF_NAME — ref used in the footer "Suggest changes" link. + DISCOURSE_URL — forum base URL used by converter for link rewrites. +""" + +from __future__ import annotations + +import hashlib +import json +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) + +from converter import convert +from nav_parser import parse_all +from sync_to_discourse import DOCS_DIR, ZENSICAL_TOML, build_footer + + +def _render(doc_path: str) -> str | None: + file_path = DOCS_DIR / doc_path + if not file_path.exists(): + return None + content = file_path.read_text(encoding="utf-8") + converted = convert(content, file_path=doc_path, topic_id_map={}) + return converted.rstrip("\n") + build_footer(doc_path) + + +def main() -> None: + out: dict[str, str] = {} + for entry in parse_all(ZENSICAL_TOML): + body = _render(entry.path) + if body is None: + continue + out[entry.path] = hashlib.sha256(body.encode("utf-8")).hexdigest() + json.dump(out, sys.stdout, indent=2, sort_keys=True) + sys.stdout.write("\n") + + +if __name__ == "__main__": + main() From 2b4894ba6b786f32be2c6b8f0288c5c835ae6821 Mon Sep 17 00:00:00 2001 From: Jason Wen Date: Sat, 18 Apr 2026 01:35:50 -0400 Subject: [PATCH 7/8] fix(ci): unset GITHUB_REF_NAME before render-diff GitHub Actions auto-populates GITHUB_REF_NAME with the PR merge ref (e.g. "14/merge") in every step and ignores step-level env overrides for reserved names. The base-commit worktree still holds the previous sync_to_discourse.py, which reads GITHUB_REF_NAME at import time and embeds it in build_footer. Every base render therefore produced a footer URL containing "14/merge" while the HEAD render used DOCS_SYNC_REF_NAME=master, so every nav doc's hash differed and all 87 entries got passed to --only. Unset GITHUB_REF_NAME at the top of the compute-affected step so both renders fall through to DOCS_SYNC_REF_NAME=master and only docs whose content actually differs between refs surface in the diff. --- .github/workflows/ci.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e54dce9..411af93 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -98,6 +98,15 @@ jobs: DISCOURSE_URL: ${{ secrets.DISCOURSE_URL }} run: | set -euo pipefail + # GitHub Actions auto-populates GITHUB_REF_NAME with the PR + # merge ref (e.g. "14/merge") and silently ignores step-level + # env overrides for reserved names. The base worktree's copy + # of sync_to_discourse.py (pre-DOCS_SYNC_REF_NAME) reads it at + # import time, which would embed the PR ref in every base + # render's footer and make every doc hash differ. Unset it + # so both renders fall through to DOCS_SYNC_REF_NAME=master. + unset GITHUB_REF_NAME + git fetch --depth=1 origin "$BASE_REF" BASE_SHA=$(git rev-parse "origin/$BASE_REF") echo "Base: $BASE_REF ($BASE_SHA)" From ef57211e3b99aff92ba0da31060c0f80d5cd296b Mon Sep 17 00:00:00 2001 From: Jason Wen Date: Sat, 18 Apr 2026 01:50:38 -0400 Subject: [PATCH 8/8] refactor: trim verbose comments and docstrings No functional change. Shortens docstrings, drops narrative comments that duplicate code, removes the SYNC_DEBUG_DIFF diagnostic helper, consolidates tests via shared _run_sync helper, and tightens the CI workflow's prose. --- .github/workflows/ci.yml | 66 +----- tools/discourse_client.py | 13 +- tools/render_docs.py | 38 ++-- tools/sync_to_discourse.py | 178 +++------------ tools/test_sync_to_discourse.py | 371 ++++++++++---------------------- 5 files changed, 163 insertions(+), 503 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 411af93..53c98e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,7 +69,6 @@ jobs: steps: - uses: actions/checkout@v6 with: - # Need history to compute diff vs base branch fetch-depth: 0 - name: Restore Discourse sync cache @@ -98,65 +97,39 @@ jobs: DISCOURSE_URL: ${{ secrets.DISCOURSE_URL }} run: | set -euo pipefail - # GitHub Actions auto-populates GITHUB_REF_NAME with the PR - # merge ref (e.g. "14/merge") and silently ignores step-level - # env overrides for reserved names. The base worktree's copy - # of sync_to_discourse.py (pre-DOCS_SYNC_REF_NAME) reads it at - # import time, which would embed the PR ref in every base - # render's footer and make every doc hash differ. Unset it - # so both renders fall through to DOCS_SYNC_REF_NAME=master. + # GHA reserves GITHUB_REF_NAME; unset so base worktree's + # pre-DOCS_SYNC_REF_NAME sync_to_discourse.py also falls back to master. unset GITHUB_REF_NAME git fetch --depth=1 origin "$BASE_REF" BASE_SHA=$(git rev-parse "origin/$BASE_REF") echo "Base: $BASE_REF ($BASE_SHA)" - # Render every nav doc at HEAD (PR) and at the base commit, - # then diff the content hashes. This surfaces exactly the - # docs whose sync-rendered body would differ between master - # and this PR merged — including changes introduced by - # edits to zensical.toml, tools/converter.py, or anything - # else that influences rendering. uv run --python 3.12 python tools/render_docs.py > /tmp/head_hashes.json WORKTREE=$(mktemp -d) git worktree add --detach "$WORKTREE" "$BASE_SHA" - # render_docs.py may not exist on the base branch yet; copy - # the current HEAD version in so we can render base content - # with whichever render entry point exists today. The imports - # it pulls (converter, nav_parser, sync_to_discourse) still - # resolve to the worktree copies, so tools changes on HEAD - # vs base are captured in the hash diff. cp tools/render_docs.py "$WORKTREE/tools/render_docs.py" - ( - cd "$WORKTREE" - uv run --python 3.12 python tools/render_docs.py > /tmp/base_hashes.json - ) + (cd "$WORKTREE" && uv run --python 3.12 python tools/render_docs.py > /tmp/base_hashes.json) git worktree remove --force "$WORKTREE" python3 - <<'PY' > /tmp/affected.txt import json - head = json.load(open('/tmp/head_hashes.json')) - base = json.load(open('/tmp/base_hashes.json')) - for path in sorted(set(head) | set(base)): - if head.get(path) != base.get(path): - print(path) + h = json.load(open('/tmp/head_hashes.json')) + b = json.load(open('/tmp/base_hashes.json')) + for p in sorted(set(h) | set(b)): + if h.get(p) != b.get(p): + print(p) PY AFFECTED=$(cat /tmp/affected.txt || true) echo "AFFECTED_DOCS:" printf '%s\n' "$AFFECTED" - ANY_CHANGE="" - [ -n "$AFFECTED" ] && ANY_CHANGE="1" - ONLY_ARGS=$(printf '%s ' $AFFECTED) ONLY_ARGS=${ONLY_ARGS% } - - DOC_COUNT=0 - if [ -n "$AFFECTED" ]; then - DOC_COUNT=$(printf '%s\n' "$AFFECTED" | wc -l | tr -d ' ') - fi + DOC_COUNT=$([ -n "$AFFECTED" ] && printf '%s\n' "$AFFECTED" | wc -l | tr -d ' ' || echo 0) + ANY_CHANGE=$([ -n "$AFFECTED" ] && echo 1 || echo "") { echo "any_change=$ANY_CHANGE" @@ -172,14 +145,6 @@ jobs: DISCOURSE_API_USER: ${{ secrets.DISCOURSE_API_USER }} DISCOURSE_CATEGORY_MAP: ${{ vars.DISCOURSE_CATEGORY_MAP }} ONLY_ARGS: ${{ steps.diff.outputs.only_args }} - # Force footer GitHub link to base branch so dry-run body - # matches what sync_to_discourse.py wrote from the last master - # push. Otherwise build_footer embeds the PR ref (e.g. - # "14/merge"), making every remote match check fail and - # inflating the preview count. GitHub Actions reserves - # GITHUB_REF_NAME and ignores step-level env overrides, so - # we use a dedicated env var that _resolve_ref_name consults - # first. DOCS_SYNC_REF_NAME: ${{ github.base_ref }} run: | uv run --python 3.12 python tools/sync_to_discourse.py \ @@ -203,11 +168,6 @@ jobs: UPDATED=$(grep -oP 'Updated \(Normal\):\s+\K\d+' sync_preview.txt || echo "0") SIDEBARS=$(grep -oP 'Sidebars Updated:\s+\K\d+' sync_preview.txt || echo "0") FAILED=$(grep -oP 'Failed:\s+\K\d+' sync_preview.txt || echo "0") - TOTAL=$(grep -oP 'Total Parsed:\s+\K\d+' sync_preview.txt || echo "0") - - # Sidebar updates include every About topic (index.md) that - # actually differs from Discourse, so they count toward the - # "modified" total. CHANGES=$((CREATED + UPDATED + SIDEBARS)) { @@ -219,11 +179,7 @@ jobs: elif [ "$CHANGES" -eq 0 ]; then echo "Docs are up to date. No forum topic changes needed." else - SUFFIX="of ${TOTAL} total" - if [ -n "${{ steps.diff.outputs.only_args }}" ] && [ -z "${{ steps.diff.outputs.structural }}" ]; then - SUFFIX="(filtered to ${DOC_COUNT} PR-changed doc(s))" - fi - echo "**${CHANGES} topic(s) would be modified** (${CREATED} created, ${UPDATED} updated, ${SIDEBARS} index/sidebar) ${SUFFIX}." + echo "**${CHANGES} topic(s) would be modified** (${CREATED} created, ${UPDATED} updated, ${SIDEBARS} index/sidebar) across ${DOC_COUNT} affected doc(s)." fi if [ "$FAILED" -ne 0 ]; then echo "" diff --git a/tools/discourse_client.py b/tools/discourse_client.py index 8258c26..0e6b022 100644 --- a/tools/discourse_client.py +++ b/tools/discourse_client.py @@ -299,18 +299,7 @@ def first_post_id(self, topic_id: int) -> int | None: return posts[0].get("id") def get_topic(self, topic_id: int) -> dict[str, Any] | None: - """Fetch full topic metadata including title and category_id. - - Used to detect whether a topic's title or category differs from - the desired values so we can skip unnecessary update_topic calls. - - Args: - topic_id: The Discourse topic ID. - - Returns: - Topic dict with keys including 'title', 'category_id', - 'post_stream', or None if the request failed. - """ + """Fetch topic metadata (title, category_id, post_stream).""" return self._get(f"/t/{topic_id}.json") def get_post_raw(self, post_id: int) -> str | None: diff --git a/tools/render_docs.py b/tools/render_docs.py index bad0258..158afff 100644 --- a/tools/render_docs.py +++ b/tools/render_docs.py @@ -1,16 +1,10 @@ #!/usr/bin/env python3 -"""Render each nav doc the same way sync_to_discourse.py would, emit JSON. - -Writes ``{: }`` to stdout. Used by -the PR preview CI job to detect which docs actually render -differently between the base branch and the PR HEAD without touching -Discourse. An empty topic_id_map is passed to converter.convert so -internal .md links always fall back to search URLs — the goal is a -consistent, reproducible hash between refs, not a real sync body. - -Env: - DOCS_SYNC_REF_NAME — ref used in the footer "Suggest changes" link. - DISCOURSE_URL — forum base URL used by converter for link rewrites. +"""Render each nav doc and emit {path: sha256} JSON. + +Used by the PR preview CI job to detect which docs render differently +between the base branch and the PR HEAD without touching Discourse. +Link rewriting uses an empty topic_id_map so the hash is reproducible +across refs. """ from __future__ import annotations @@ -27,23 +21,19 @@ from sync_to_discourse import DOCS_DIR, ZENSICAL_TOML, build_footer -def _render(doc_path: str) -> str | None: - file_path = DOCS_DIR / doc_path - if not file_path.exists(): - return None - content = file_path.read_text(encoding="utf-8") - converted = convert(content, file_path=doc_path, topic_id_map={}) - return converted.rstrip("\n") + build_footer(doc_path) - - def main() -> None: out: dict[str, str] = {} for entry in parse_all(ZENSICAL_TOML): - body = _render(entry.path) - if body is None: + fp = DOCS_DIR / entry.path + if not fp.exists(): continue + converted = convert( + fp.read_text(encoding="utf-8"), + file_path=entry.path, topic_id_map={}, + ) + body = converted.rstrip("\n") + build_footer(entry.path) out[entry.path] = hashlib.sha256(body.encode("utf-8")).hexdigest() - json.dump(out, sys.stdout, indent=2, sort_keys=True) + json.dump(out, sys.stdout, sort_keys=True) sys.stdout.write("\n") diff --git a/tools/sync_to_discourse.py b/tools/sync_to_discourse.py index 3a3862b..3c2f732 100644 --- a/tools/sync_to_discourse.py +++ b/tools/sync_to_discourse.py @@ -37,15 +37,9 @@ ZENSICAL_TOML = REPO_ROOT / "zensical.toml" +# DOCS_SYNC_REF_NAME overrides GITHUB_REF_NAME because GitHub Actions +# reserves GITHUB_* and ignores step-level env overrides in PR runs. def _resolve_ref_name() -> str: - """Return the Git ref used in the footer "Suggest changes" link. - - ``DOCS_SYNC_REF_NAME`` takes precedence so callers (notably the PR - preview CI step) can force the ref to the base branch; the built-in - ``GITHUB_REF_NAME`` is reserved by GitHub Actions and step-level - ``env:`` blocks cannot override it, so we cannot rely on it alone in - pull-request runs. Falls back to ``"master"`` for local dev. - """ return ( os.environ.get("DOCS_SYNC_REF_NAME") or os.environ.get("GITHUB_REF_NAME") @@ -110,81 +104,23 @@ def _normalize(s: str) -> str: def _post_body_matches_remote( - client: DiscourseClient, - post_id: int, - local_body: str, - *, - debug_label: str | None = None, + client: DiscourseClient, post_id: int, local_body: str, ) -> bool: - """Fetch remote post raw and return True if it matches local_body. - - Centralizes the fetch-and-compare pattern so both dry-run and live - paths make the same authoritative no-op decision. - - When ``SYNC_DEBUG_DIFF`` is set in the environment and a mismatch is - detected, print the first divergent line pair to aid diagnosis of - unexpected drift (e.g. forum URL differences, hidden whitespace, or - Discourse-side reformatting). - """ time.sleep(1.0) - remote_raw = client.get_post_raw(post_id) - matches = _body_matches_discourse(local_body, remote_raw) - if ( - not matches - and os.environ.get("SYNC_DEBUG_DIFF") - and debug_label is not None - ): - _print_first_divergence(debug_label, local_body, remote_raw) - return matches - - -def _print_first_divergence( - label: str, local_body: str, remote_raw: str | None, -) -> None: - """Print the first point where normalized local and remote diverge.""" - if remote_raw is None: - print(f" DEBUG {label}: remote is None (get_post_raw failed)") - return - - def _norm(s: str) -> list[str]: - return [line.rstrip() for line in s.splitlines()] - - local_lines = _norm(local_body) - remote_lines = _norm(remote_raw) - limit = max(len(local_lines), len(remote_lines)) - for i in range(limit): - local = local_lines[i] if i < len(local_lines) else "" - remote = remote_lines[i] if i < len(remote_lines) else "" - if local != remote: - print(f" DEBUG {label}: first diff at line {i + 1}") - print(f" local : {local[:200]!r}") - print(f" remote: {remote[:200]!r}") - return - print(f" DEBUG {label}: normalized lines equal but match returned False") + return _body_matches_discourse(local_body, client.get_post_raw(post_id)) def _topic_metadata_differs( - client: DiscourseClient, - topic_id: int, - new_title: str, - new_category_id: int, + client: DiscourseClient, topic_id: int, + new_title: str, new_category_id: int, ) -> bool: - """Return True if topic's current title or category differs from target. - - Used to gate ``update_topic`` calls so Discourse is only mutated - when metadata truly needs to change. A failed fetch returns True - (update defensively) so transient read errors do not silently drop - legitimate metadata updates. - """ time.sleep(1.0) topic = client.get_topic(topic_id) if topic is None: return True - current_title = (topic.get("title") or "").strip() - current_category = topic.get("category_id") return ( - current_title != new_title.strip() - or current_category != new_category_id + (topic.get("title") or "").strip() != new_title.strip() + or topic.get("category_id") != new_category_id ) @@ -442,10 +378,7 @@ def _generate_sidebars( ) continue - if _post_body_matches_remote( - client, post_id, combined, - debug_label=f"sidebar {section_title} (cat {category_id})", - ): + if _post_body_matches_remote(client, post_id, combined): if verbose: print( f" SKIP sidebar (discourse up-to-date): " @@ -566,30 +499,18 @@ def sync_docs(args: argparse.Namespace) -> None: all_entries = parse_all(ZENSICAL_TOML) - # Optional path filter (e.g. from PR preview) — restrict which docs - # Pass 2 and sidebar generation consider. Pass 1 still resolves all - # topic IDs so internal .md -> /t/ link rewriting remains correct. - only_paths: set[str] | None = None raw_only = getattr(args, "only", None) - if raw_only: - only_paths = {p.strip() for p in raw_only if p.strip()} + only_paths = {p.strip() for p in raw_only if p.strip()} if raw_only else None if only_paths is not None: entries = [e for e in all_entries if e.path in only_paths] missing = sorted(only_paths - {e.path for e in all_entries}) if missing: - print( - " WARN: --only paths not present in zensical.toml, ignoring: " - f"{missing}" - ) + print(f" WARN: --only paths not in zensical.toml, ignoring: {missing}") else: entries = all_entries - # Sidebar regeneration rebuilds all 8 section About topics from the - # full index_contents map. A filtered run cannot populate every - # section without reading the whole nav, so we always skip sidebar - # generation when --only is set. Nav/index changes must trigger a - # full run (no --only) to refresh sidebars. + # Sidebar gen requires the full index_contents map; filtered runs skip it. skip_sidebars = only_paths is not None stats: dict[str, int] = { @@ -608,11 +529,7 @@ def sync_docs(args: argparse.Namespace) -> None: # Store converted index.md content for sidebar combination index_contents: dict[str, str] = {} - filter_note = ( - f" (filtered to {len(entries)} of {len(all_entries)})" - if only_paths is not None - else "" - ) + filter_note = f" (filtered to {len(entries)} of {len(all_entries)})" if only_paths else "" print(f"Sync: {len(entries)} entries from zensical.toml{filter_note}") print(f" Discourse: {config.base_url}") print(f" Dry run: {args.dry_run}") @@ -622,9 +539,8 @@ def sync_docs(args: argparse.Namespace) -> None: print() # --- Pass 1: Resolve all topic IDs --- - # Resolve the full nav so dry-run link rewriting and sidebar lookups - # still find every topic when the caller only supplied a subset via - # --only. + # Resolve the full nav so link rewriting and sidebar lookups still work + # when the caller supplied a subset via --only. synced_topics = _resolve_all_topic_ids( all_entries, config=config, client=client, verbose=args.verbose, ) @@ -679,19 +595,11 @@ def sync_docs(args: argparse.Namespace) -> None: ) if is_about_topic: - # Index pages are rendered by _generate_sidebars, which appends - # the sidebar list after the converted body. Updating the About - # topic here with just body+footer would be overwritten by the - # sidebar pass immediately after, and the pre-update idempotency - # check would always see a mismatch (remote has sidebar lines, - # local doesn't), double-counting every index as "Would update". - # Defer the update to sidebar generation — it holds the authoritative - # combined body. + # About topics are owned by _generate_sidebars (body + sidebar lines). + # Updating here would be clobbered and the body-only idempotency check + # would always diverge on the sidebar block. if args.verbose: - print( - f" DEFER to sidebar gen: {label} " - f"(about topic {synced_topics.get(entry.path)})" - ) + print(f" DEFER to sidebar gen: {label}") if not args.dry_run: cache.save(entry.path, raw_content) continue @@ -706,9 +614,7 @@ def sync_docs(args: argparse.Namespace) -> None: stats["failed"] += 1 continue - if _post_body_matches_remote( - client, post_id, body, debug_label=f"topic {topic_id} {label}", - ): + if _post_body_matches_remote(client, post_id, body): if args.verbose: print(f" SKIP (discourse up-to-date): {label}") stats["skipped_discourse_match"] += 1 @@ -735,13 +641,9 @@ def sync_docs(args: argparse.Namespace) -> None: stats["failed"] += 1 continue - # Update topic title and category only when drift detected if _topic_metadata_differs(client, topic_id, title, category_id): time.sleep(1.0) - client.update_topic( - topic_id, title, - category_id=category_id, - ) + client.update_topic(topic_id, title, category_id=category_id) else: stats["skipped_metadata_match"] += 1 @@ -766,10 +668,7 @@ def sync_docs(args: argparse.Namespace) -> None: stats["failed"] += 1 continue - if _post_body_matches_remote( - client, post_id, body, - debug_label=f"title-fallback {found_id} {label}", - ): + if _post_body_matches_remote(client, post_id, body): if args.verbose: print(f" SKIP (discourse up-to-date): {label}") stats["skipped_discourse_match"] += 1 @@ -801,10 +700,7 @@ def sync_docs(args: argparse.Namespace) -> None: if _topic_metadata_differs(client, found_id, title, category_id): time.sleep(1.0) - client.update_topic( - found_id, title, - category_id=category_id, - ) + client.update_topic(found_id, title, category_id=category_id) else: stats["skipped_metadata_match"] += 1 @@ -842,26 +738,15 @@ def sync_docs(args: argparse.Namespace) -> None: cache.save(entry.path, raw_content) # --- Sidebar generation --- - # Skip entirely when --only is set: sidebar regeneration depends on - # full index_contents for every section, and regenerating with a - # partial set would overwrite untouched About topics with placeholder - # text. Nav structure changes should run without --only (driven by - # CI detecting zensical.toml or index.md changes). print() if skip_sidebars: - print( - "Skipping sidebar generation (filtered run without nav or index changes)" - ) + print("Skipping sidebar generation (filtered run)") else: print("Generating sidebars...") _generate_sidebars( - config=config, - client=client, - synced_topics=synced_topics, - index_contents=index_contents, - dry_run=args.dry_run, - verbose=args.verbose, - stats=stats, + config=config, client=client, synced_topics=synced_topics, + index_contents=index_contents, dry_run=args.dry_run, + verbose=args.verbose, stats=stats, ) # --- Summary --- @@ -891,13 +776,8 @@ def main() -> None: parser.add_argument("--verbose", "-v", action="store_true", help="Print detailed progress for skipped entries.") parser.add_argument("--force", action="store_true", help="Bypass content cache and re-sync all entries.") parser.add_argument( - "--only", - nargs="+", - metavar="PATH", - help=( - "Restrict Pass 2 to the given doc paths (relative to docs/). " - "Sidebar generation is skipped when this is set." - ), + "--only", nargs="+", metavar="PATH", + help="Restrict Pass 2 to these doc paths; skips sidebar gen.", ) args = parser.parse_args() sync_docs(args) diff --git a/tools/test_sync_to_discourse.py b/tools/test_sync_to_discourse.py index e8e9bed..ebb8b4d 100644 --- a/tools/test_sync_to_discourse.py +++ b/tools/test_sync_to_discourse.py @@ -5,6 +5,8 @@ """ import argparse +import contextlib +import io import json import sys import tempfile @@ -467,250 +469,142 @@ def side_effect(req: object) -> MagicMock: # --only filter + idempotency guarantees # --------------------------------------------------------------------------- - -def _build_two_doc_workspace(tmpdir: str) -> tuple[Path, Path]: - """Create a docs tree with two docs under getting-started. - - Returns ``(docs_dir, toml_path)``. - """ - docs_dir = Path(tmpdir) / "docs" - docs_dir.mkdir() - gs_dir = docs_dir / "getting-started" - gs_dir.mkdir() - - (gs_dir / "doc-a.md").write_text( - "---\ntitle: Doc A\n---\n\n# Doc A\n\nAlpha content.\n" - ) - (gs_dir / "doc-b.md").write_text( - "---\ntitle: Doc B\n---\n\n# Doc B\n\nBravo content.\n" - ) - - toml_path = Path(tmpdir) / "zensical.toml" - toml_path.write_text(textwrap.dedent("""\ - [project] - docs_dir = "docs" - nav = [ - { "Getting Started" = [ - { "Doc A" = "getting-started/doc-a.md" }, - { "Doc B" = "getting-started/doc-b.md" }, - ]}, - ] - """)) - return docs_dir, toml_path +_ENV = { + "DISCOURSE_URL": "https://community.sunnypilot.ai", + "DISCOURSE_API_KEY": "test-key", + "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', +} + +_TWO_DOC_TOML = textwrap.dedent("""\ + [project] + docs_dir = "docs" + nav = [ + { "Getting Started" = [ + { "Doc A" = "getting-started/doc-a.md" }, + { "Doc B" = "getting-started/doc-b.md" }, + ]}, + ] +""") -def _patch_cache_dir(tmpdir: str) -> patch: - """Redirect the content cache to an isolated per-test directory.""" - cache_dir = Path(tmpdir) / ".cache" - return patch("sync_to_discourse.ContentCache", lambda: _ephemeral_cache(cache_dir)) +def _build_two_doc_workspace(tmpdir: str) -> tuple[Path, Path]: + docs = Path(tmpdir) / "docs" / "getting-started" + docs.mkdir(parents=True) + (docs / "doc-a.md").write_text("---\ntitle: Doc A\n---\n\n# Doc A\n\nAlpha.\n") + (docs / "doc-b.md").write_text("---\ntitle: Doc B\n---\n\n# Doc B\n\nBravo.\n") + toml = Path(tmpdir) / "zensical.toml" + toml.write_text(_TWO_DOC_TOML) + return docs.parent, toml -def _ephemeral_cache(cache_dir: Path): +def _run_sync(tmpdir, docs_dir, toml_path, args, side_effect): from content_cache import ContentCache - return ContentCache(cache_dir=cache_dir) + cache = ContentCache(cache_dir=Path(tmpdir) / ".cache") + with ( + patch.dict("os.environ", _ENV, clear=False), + patch("sync_to_discourse.DOCS_DIR", docs_dir), + patch("sync_to_discourse.ZENSICAL_TOML", toml_path), + patch("sync_to_discourse.ContentCache", lambda: cache), + patch("sync_to_discourse.time") as mock_time, + patch("urllib.request.urlopen") as mock_urlopen, + ): + mock_time.sleep = MagicMock() + mock_urlopen.side_effect = side_effect + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + sync_docs(args) + return buf.getvalue(), mock_urlopen def test_sync_docs_only_filter_restricts_pass2(): - """--only restricts Pass 2 to listed paths; others are not processed.""" with tempfile.TemporaryDirectory() as tmpdir: docs_dir, toml_path = _build_two_doc_workspace(tmpdir) - - env = { - "DISCOURSE_URL": "https://community.sunnypilot.ai", - "DISCOURSE_API_KEY": "test-key", - "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', - } args = argparse.Namespace( dry_run=True, verbose=True, force=True, only=["getting-started/doc-a.md"], ) - - with ( - patch.dict("os.environ", env, clear=False), - patch("sync_to_discourse.DOCS_DIR", docs_dir), - patch("sync_to_discourse.ZENSICAL_TOML", toml_path), - _patch_cache_dir(tmpdir), - patch("urllib.request.urlopen") as mock_urlopen, - ): - mock_urlopen.return_value = _mock_response({"topics": []}) - - # Capture printed output to verify only doc-a is mentioned in Pass 2 - import io - import contextlib - buf = io.StringIO() - with contextlib.redirect_stdout(buf): - sync_docs(args) - output = buf.getvalue() - - assert "doc-a.md" in output, "Filtered doc must appear in dry-run output" - # doc-b.md must not appear in Pass 2 processing lines - pass2_section = output.split("Pass 2: Syncing content...", 1)[1] - pass2_end = pass2_section.split("Sync Summary", 1)[0] - assert "doc-b.md" not in pass2_end, ( - f"Excluded doc must not be processed in Pass 2. Found in:\n{pass2_end}" - ) - assert "Skipping sidebar generation" in output, ( - "Filtered runs must skip sidebar generation" + output, _ = _run_sync( + tmpdir, docs_dir, toml_path, args, + lambda req: _mock_response({"topics": []}), ) + assert "doc-a.md" in output + pass2 = output.split("Pass 2:", 1)[1].split("Sync Summary", 1)[0] + assert "doc-b.md" not in pass2 + assert "Skipping sidebar generation" in output print(" PASS: sync_docs_only_filter_restricts_pass2") def test_sync_docs_only_filter_warns_on_unknown_path(): - """--only paths not in zensical.toml produce a WARN, not an error.""" with tempfile.TemporaryDirectory() as tmpdir: docs_dir, toml_path = _build_two_doc_workspace(tmpdir) - - env = { - "DISCOURSE_URL": "https://community.sunnypilot.ai", - "DISCOURSE_API_KEY": "test-key", - "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', - } args = argparse.Namespace( dry_run=True, verbose=True, force=True, only=["getting-started/doc-a.md", "getting-started/ghost.md"], ) + output, _ = _run_sync( + tmpdir, docs_dir, toml_path, args, + lambda req: _mock_response({"topics": []}), + ) - with ( - patch.dict("os.environ", env, clear=False), - patch("sync_to_discourse.DOCS_DIR", docs_dir), - patch("sync_to_discourse.ZENSICAL_TOML", toml_path), - _patch_cache_dir(tmpdir), - patch("urllib.request.urlopen") as mock_urlopen, - ): - mock_urlopen.return_value = _mock_response({"topics": []}) - - import io - import contextlib - buf = io.StringIO() - with contextlib.redirect_stdout(buf): - sync_docs(args) - output = buf.getvalue() - - assert "WARN: --only paths not present" in output - assert "getting-started/ghost.md" in output - assert "doc-a.md" in output - + assert "WARN: --only paths not in zensical.toml" in output + assert "getting-started/ghost.md" in output print(" PASS: sync_docs_only_filter_warns_on_unknown_path") def test_sync_docs_dry_run_counts_match_as_skip_not_update(): - """Dry-run must fetch remote and classify matching content as skipped.""" + def side_effect(req): + url, method = req.full_url, req.method + if method == "GET" and "/search.json" in url: + return _mock_response({"topics": [{"id": 500, "title": "Doc A"}]}) + if method == "GET" and url.endswith("/t/500.json"): + return _mock_response({ + "id": 500, "title": "Doc A", "category_id": 133, + "post_stream": {"posts": [{"id": 900, "post_number": 1}]}, + }) + if method == "GET" and url.endswith("/posts/900.json"): + return _mock_response({"id": 900, "raw": "whatever"}) + return _mock_response({}) + with tempfile.TemporaryDirectory() as tmpdir: docs_dir, toml_path = _build_two_doc_workspace(tmpdir) - - env = { - "DISCOURSE_URL": "https://community.sunnypilot.ai", - "DISCOURSE_API_KEY": "test-key", - "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', - } - # Force=True bypasses content cache so remote check becomes authoritative. args = argparse.Namespace( dry_run=True, verbose=True, force=True, only=["getting-started/doc-a.md"], ) + with patch("sync_to_discourse._body_matches_discourse", return_value=True): + output, _ = _run_sync(tmpdir, docs_dir, toml_path, args, side_effect) - def side_effect(req): - url = req.full_url - method = req.method - if method == "GET" and "/search.json" in url: - # Resolve via sync-id - return _mock_response({"topics": [{"id": 500, "title": "Doc A"}]}) - if method == "GET" and url.endswith("/t/500.json"): - return _mock_response({ - "id": 500, - "title": "Doc A", - "category_id": 133, - "post_stream": {"posts": [{"id": 900, "post_number": 1}]}, - }) - if method == "GET" and url.endswith("/posts/900.json"): - # Return what _post_body_matches_remote will compare against. - # Patching _body_matches_discourse below makes this trivial. - return _mock_response({"id": 900, "raw": "whatever"}) - return _mock_response({}) - - with ( - patch.dict("os.environ", env, clear=False), - patch("sync_to_discourse.DOCS_DIR", docs_dir), - patch("sync_to_discourse.ZENSICAL_TOML", toml_path), - _patch_cache_dir(tmpdir), - patch("sync_to_discourse.time") as mock_time, - patch("urllib.request.urlopen") as mock_urlopen, - patch("sync_to_discourse._body_matches_discourse", return_value=True), - ): - mock_time.sleep = MagicMock() - mock_urlopen.side_effect = side_effect - - import io - import contextlib - buf = io.StringIO() - with contextlib.redirect_stdout(buf): - sync_docs(args) - output = buf.getvalue() - - # Should skip, not "Would update" - assert "Would update" not in output, ( - f"Dry-run must not count matching remote as update:\n{output}" - ) - assert "SKIP (discourse up-to-date)" in output, ( - f"Must log skip for matching remote:\n{output}" - ) - # Summary line must show 1 skipped discourse match - assert "Skipped (Discourse Match): 1" in output - assert "Updated (Normal): 0" in output - + assert "Would update" not in output + assert "SKIP (discourse up-to-date)" in output + assert "Skipped (Discourse Match): 1" in output print(" PASS: sync_docs_dry_run_counts_match_as_skip_not_update") def test_sidebar_skips_update_when_remote_matches(): - """_generate_sidebars must fetch remote and skip unchanged sidebars.""" from sync_to_discourse import _generate_sidebars with tempfile.TemporaryDirectory() as tmpdir: docs_dir, toml_path = _build_two_doc_workspace(tmpdir) - # Add an index so there's something to render - (docs_dir / "getting-started" / "index.md").write_text( - "# Getting Started\n\nIntro text.\n" - ) - - env = { - "DISCOURSE_URL": "https://community.sunnypilot.ai", - "DISCOURSE_API_KEY": "test-key", - "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', - } + (docs_dir / "getting-started" / "index.md").write_text("# GS\n\nIntro.\n") config = DiscourseConfig( - base_url=env["DISCOURSE_URL"], - api_key=env["DISCOURSE_API_KEY"], + base_url=_ENV["DISCOURSE_URL"], api_key="k", category_mapping={"getting-started": 133}, ) - client = DiscourseClient(config) - stats = { - "sidebars_updated": 0, - "skipped_sidebar_match": 0, - } - synced_topics = { - "getting-started/doc-a.md": 501, - "getting-started/doc-b.md": 502, - } + stats = {"sidebars_updated": 0, "skipped_sidebar_match": 0} def side_effect(req): - url = req.full_url - method = req.method + url, method = req.full_url, req.method if method == "GET" and "/c/133/show.json" in url: - return _mock_response({ - "category": {"topic_url": "/t/about-getting-started/700"}, - }) + return _mock_response({"category": {"topic_url": "/t/x/700"}}) if method == "GET" and url.endswith("/t/700.json"): - return _mock_response({ - "post_stream": {"posts": [{"id": 800, "post_number": 1}]}, - }) + return _mock_response({"post_stream": {"posts": [{"id": 800}]}}) if method == "GET" and url.endswith("/posts/800.json"): return _mock_response({"id": 800, "raw": "matches"}) - # Any PUT/POST would indicate an unwanted update - raise AssertionError(f"Unexpected {method} to {url}") + raise AssertionError(f"Unexpected {method} {url}") with ( - patch.dict("os.environ", env, clear=False), + patch.dict("os.environ", _ENV, clear=False), patch("sync_to_discourse.ZENSICAL_TOML", toml_path), patch("sync_to_discourse.time") as mock_time, patch("urllib.request.urlopen") as mock_urlopen, @@ -718,97 +612,48 @@ def side_effect(req): ): mock_time.sleep = MagicMock() mock_urlopen.side_effect = side_effect - _generate_sidebars( - config=config, - client=client, - synced_topics=synced_topics, + config=config, client=DiscourseClient(config), + synced_topics={ + "getting-started/doc-a.md": 501, + "getting-started/doc-b.md": 502, + }, index_contents={"getting-started/index.md": "Intro.\n"}, - dry_run=False, - verbose=True, - stats=stats, + dry_run=False, verbose=True, stats=stats, ) - assert stats["skipped_sidebar_match"] == 1, ( - f"Matching sidebar must be skipped, got stats={stats}" - ) - assert stats["sidebars_updated"] == 0, ( - f"No sidebars should have updated, got stats={stats}" - ) - + assert stats == {"sidebars_updated": 0, "skipped_sidebar_match": 1} print(" PASS: sidebar_skips_update_when_remote_matches") def test_update_topic_skipped_when_metadata_matches(): - """update_topic PUT must be suppressed when title + category already match.""" + calls: list[tuple[str, str]] = [] + + def side_effect(req): + url, method = req.full_url, req.method + calls.append((method, url)) + if method == "GET" and "/search.json" in url: + return _mock_response({"topics": [{"id": 500, "title": "Doc A"}]}) + if method == "GET" and url.endswith("/t/500.json"): + return _mock_response({ + "id": 500, "title": "Doc A", "category_id": 133, + "post_stream": {"posts": [{"id": 900}]}, + }) + if method == "GET" and url.endswith("/posts/900.json"): + return _mock_response({"id": 900, "raw": "body-differs"}) + return _mock_response({"post": {"id": 900}}) + with tempfile.TemporaryDirectory() as tmpdir: docs_dir, toml_path = _build_two_doc_workspace(tmpdir) - - env = { - "DISCOURSE_URL": "https://community.sunnypilot.ai", - "DISCOURSE_API_KEY": "test-key", - "DISCOURSE_CATEGORY_MAP": '{"getting-started": 133}', - } args = argparse.Namespace( dry_run=False, verbose=True, force=True, only=["getting-started/doc-a.md"], ) + with patch("sync_to_discourse._body_matches_discourse", return_value=False): + _run_sync(tmpdir, docs_dir, toml_path, args, side_effect) - calls: list[tuple[str, str, bytes | None]] = [] - - def side_effect(req): - url = req.full_url - method = req.method - data = req.data - calls.append((method, url, data)) - - if method == "GET" and "/search.json" in url: - return _mock_response({"topics": [{"id": 500, "title": "Doc A"}]}) - if method == "GET" and url.endswith("/t/500.json"): - # Metadata check + first_post_id both hit this endpoint; - # return matching metadata + post stream - return _mock_response({ - "id": 500, - "title": "Doc A", - "category_id": 133, - "post_stream": {"posts": [{"id": 900, "post_number": 1}]}, - }) - if method == "GET" and url.endswith("/posts/900.json"): - return _mock_response({"id": 900, "raw": "body-differs"}) - if method == "PUT" and url.endswith("/posts/900.json"): - return _mock_response({"post": {"id": 900}}) - return _mock_response({}) - - with ( - patch.dict("os.environ", env, clear=False), - patch("sync_to_discourse.DOCS_DIR", docs_dir), - patch("sync_to_discourse.ZENSICAL_TOML", toml_path), - _patch_cache_dir(tmpdir), - patch("sync_to_discourse.time") as mock_time, - patch("urllib.request.urlopen") as mock_urlopen, - ): - mock_time.sleep = MagicMock() - mock_urlopen.side_effect = side_effect - # Force body mismatch so update_post fires; metadata check then runs - with patch( - "sync_to_discourse._body_matches_discourse", return_value=False, - ): - sync_docs(args) - - # Must PUT the post (body changed), but MUST NOT PUT the topic - put_topic_calls = [ - (m, u) for m, u, _ in calls - if m == "PUT" and "/t/-/" in u - ] - put_post_calls = [ - (m, u) for m, u, _ in calls - if m == "PUT" and "/posts/" in u - ] - assert put_post_calls, f"Post should be updated when body differs: {calls}" - assert not put_topic_calls, ( - f"Topic metadata matches — no update_topic PUT expected. Got: {put_topic_calls}" - ) - + assert any(m == "PUT" and "/posts/" in u for m, u in calls) + assert not any(m == "PUT" and "/t/-/" in u for m, u in calls) print(" PASS: update_topic_skipped_when_metadata_matches")