diff --git a/.claude/scripts/check-file-moves.sh b/.claude/scripts/check-file-moves.sh new file mode 100755 index 00000000..2fb3ca02 --- /dev/null +++ b/.claude/scripts/check-file-moves.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# PostToolUse hook on Bash. When the command is `git mv` or `git rm`, finds the source +# path(s) and greps the repo for refs to them. Prints findings as additionalContext so +# the model sees the worklist in the same session the move happened. +# +# Purpose: catch doc/comment/Dockerfile drift in the same session a file gets moved or +# deleted — same compounding loop as check-claude-md-refs.sh but for renames/deletions +# instead of CLAUDE.md edits. +# +# Why git mv / git rm specifically (not plain mv / rm): plain mv/rm is used constantly +# for temp files, build artifacts, and ephemeral work. Restricting to git-tracked +# operations dramatically cuts false-positive noise. The cost is missing the case where +# someone uses plain `mv` on a tracked file — that's the gap CodeRabbit's +# path_instruction for file rename/delete picks up at review time. +# +# See CLAUDE.md "File-move discipline" for the canonical rule. + +set -uo pipefail + +REPO_ROOT="/Users/joshuadell/NovaCraft" + +command=$(jq -r '.tool_input.command // ""' 2>/dev/null) + +# Only fire on git mv / git rm. Plain mv/rm is too noisy. +case "$command" in + *"git mv "*) ;; + *"git rm "*) ;; + *) exit 0 ;; +esac + +old_paths=() + +# git mv — capture the FIRST positional arg (the source). +if [[ "$command" =~ git[[:space:]]+mv[[:space:]]+([^[:space:]]+) ]]; then + src="${BASH_REMATCH[1]}" + case "$src" in + -*) ;; # skip if it looked like a flag + *) old_paths+=("$src") ;; + esac +fi + +# git rm [flags] ... — capture all non-flag tokens after `git rm`. +if [[ "$command" =~ git[[:space:]]+rm[[:space:]] ]]; then + args=$(echo "$command" | sed -E 's/^.*git[[:space:]]+rm[[:space:]]+//' | tr -s ' ') + for token in $args; do + case "$token" in + -*) continue ;; + "") continue ;; + *) old_paths+=("$token") ;; + esac + done +fi + +# Nothing to do. +if [ ${#old_paths[@]} -eq 0 ]; then + exit 0 +fi + +# For each old path, grep for refs and collect findings. --fixed-strings so paths +# containing dots / slashes match literally instead of as regex. +findings="" +for old_path in "${old_paths[@]}"; do + case "$old_path" in + ""|"*"|"."|"./"|".."|"./*") continue ;; + esac + + matches=$(grep -rln --fixed-strings "$old_path" \ + --include='*.md' --include='*.cs' --include='*.props' --include='*.csproj' \ + --include='*.yml' --include='*.yaml' --include='*.sh' \ + --include='Dockerfile*' \ + "$REPO_ROOT" 2>/dev/null \ + | head -30 \ + || true) + if [ -n "$matches" ]; then + findings+=$(printf "Refs to '%s' still present in:\n%s\n\n" "$old_path" "$matches") + fi +done + +if [ -z "$findings" ]; then + exit 0 +fi + +msg=$(printf 'File-move/delete detected. Refs to the OLD path may need updating before commit:\n\n%s\nUpdate the paraphrases in the same PR. See CLAUDE.md "File-move discipline".' "$findings") +jq -n --arg m "$msg" '{hookSpecificOutput: {hookEventName: "PostToolUse", additionalContext: $m}}' diff --git a/.claude/settings.json b/.claude/settings.json index 40c01b0b..2610c106 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -61,6 +61,15 @@ "command": "/Users/joshuadell/NovaCraft/.claude/scripts/check-claude-md-refs.sh" } ] + }, + { + "matcher": "Bash", + "hooks": [ + { + "type": "command", + "command": "/Users/joshuadell/NovaCraft/.claude/scripts/check-file-moves.sh" + } + ] } ] } diff --git a/.coderabbit.yaml b/.coderabbit.yaml index d4aba0c0..e7002906 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -447,6 +447,81 @@ reviews: in bash run blocks, missing `permissions:` blocks, missing `concurrency:` groups, secrets in plaintext, hardcoded tokens. + - path: "NextAurora.AppHost/AppHost.cs" + instructions: | + TOPOLOGY DOC PAIRING (CLAUDE.md "Doc-and-diagram discipline"). AppHost.cs + is the canonical source of the system topology — service count, transport + wiring, DB choices, external deps. When this file changes in a PR (new + service registration, new `WithReference`, new `AddAzure*`, new + `WaitFor`, removed resource), the topology depiction artifacts MUST be + updated in the same PR, OR the PR description names a tracking issue + for the deferred update. + + Required pairing: + - `docs/architecture.md` (the prose system map + communication patterns) + - `docs/nextaurora-architecture.svg` (rendered diagram github.com inlines) + - `docs/nextaurora-architecture.excalidraw` (editable source) + + Reviewers look at the .svg on the PR page to understand "what does this + system look like now?" — if the .svg doesn't show the new service or + transport, every subsequent PR review reasons against a stale picture. + Same for architecture.md: it's the doc people grep when onboarding or + debugging a comms issue. Flag this as a finding when AppHost.cs changes + without paired updates. Acceptable PR-description waivers: "topology + change is internal/transient (smoke-test scaffold) and will be reverted + in PR #N" or "docs/diagram updated in tracking issue #N, this PR ships + the code only because [reason]." + + - path: "NextAurora.ServiceDefaults/Extensions.cs" + instructions: | + SERVICE-REQUEST-FLOW DOC PAIRING (CLAUDE.md "Doc-and-diagram discipline"). + Extensions.cs configures the middleware pipeline ordering (auth → + correlation-id → authorization), JWT validation, and the + AddDefaultAuthentication / MapDefaultEndpoints contracts every service + composes with. When this changes (middleware order, JWT params, default + endpoint behavior), the service-request-flow depiction artifacts MUST + be updated in the same PR or named in a deferred issue: + + Required pairing: + - `docs/architecture.md` Observability section (middleware-order rules) + - `docs/service-request-flow.svg` + `docs/service-request-flow.excalidraw` + + Same reasoning as the AppHost.cs rule above — reviewers reason against + the .svg picture of the request lifecycle. Flag missing pairs. + + - path: "**" + instructions: | + FILE-MOVE DISCIPLINE (CLAUDE.md "File-move discipline"). When this PR's + diff includes a file deletion or rename, grep the repo for refs to the + OLD path and flag any matches as findings. Look in: every `*.md` + (README, docs/, .claude/, .github/), inline comments in `*.cs` + (especially comments ending with literal `See CLAUDE.md.`), `Dockerfile*` + COPY/ADD instructions, `*.csproj` ProjectReference Include="...", + `.github/workflows/*.yml` `run:` blocks, `.coderabbit.yaml` + path_instructions, `.claude/scripts/*.sh` literal-path strings, + and `.claude/commands/*.md` slash-command bodies. + + Failing to update these is exactly the drift class that produced the + broken-Dockerfile + broken-CLAUDE.md-link + stale-demo-doc fallout from + the simplicity refactor (PR #31 collapsed CatalogService to a single + project; refs to the pre-collapse 4-project layout sat silent in + `Dockerfile.catalog`, `docs/demo-deployment*.md`, and + `docs/performance-and-data-correctness.md` for months — only caught + when the doc-currency sweep happened in PR #112). Even a single missed + ref is a finding. + + Mechanical enforcement already covers part of this: the + `.github/workflows/ci.yml` "Broken-link audit" step blocks the merge + when relative markdown links to local source files don't resolve. + That catches the broken-link case but not the "ref still points at a + file that happens to exist for a different reason" case — which is + where this CodeRabbit pass earns its keep. + + Allowlist: `.claude/audits/INDEX.md` legitimately links to gitignored + per-article files (copyright reason — see + `.claude/commands/article-audit.md` step 5 "Copyright note"). The CI + guard excludes it; do NOT flag links from INDEX.md as drift. + # Chat: allow @coderabbitai mentions to ask follow-up questions on a PR. chat: auto_reply: true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 75c945ce..c61bf993 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -144,6 +144,81 @@ jobs: # step 5 "Copyright note" — the contract is "INDEX ships, per-article files don't." # On a contributor's machine the links resolve; on the CI runner they don't, by design. + # Broken-COPY audit: Dockerfile* COPY/ADD lines that reference source paths which + # don't exist in the build context (the repo root). This is the gap the markdown + # audit above can't close — `Dockerfile.catalog` rotted silently for months after + # PR #31 (VSA collapse) because the broken paths only failed at deploy time, not + # at build/test time. The Fly demo at https://catalog-api-demo.fly.dev kept running + # pre-collapse code; any redeploy would have failed on the first COPY of a + # nonexistent csproj. Same shape as the markdown audit: parse, check existence, fail. + # Handles the common COPY forms: `COPY src dst`, `COPY src1 src2 dst` (multi-src), + # and `COPY --from=stage src dst` (cross-stage; skips the src check since src is in + # another build stage, not the repo). Skips wildcards (`COPY src/*.json .`) since + # those resolve at build time, not against the repo tree. See CLAUDE.md "File-move + # discipline" for the canonical rule. + - name: Broken-COPY audit — Dockerfile source paths + run: | + fail=0 + while IFS= read -r dockerfile; do + # Read each COPY/ADD line, strip the destination (last token), check each source. + while IFS= read -r line; do + # Skip `COPY --from=...` cross-stage copies (src is from a build stage, not the repo). + case "$line" in *"--from="*) continue ;; esac + # Strip the instruction keyword and any flags. + args=$(echo "$line" | sed -E 's/^[[:space:]]*(COPY|ADD)[[:space:]]+//; s/--[a-zA-Z0-9=._/-]+[[:space:]]+//g') + # Last token is the destination; everything before is source(s). + srcs=$(echo "$args" | awk 'NF>1 {for(i=1;i/dev/null || true) + while IFS= read -r svg; do + excalidraw="${svg%.svg}.excalidraw" + if [ ! -f "$excalidraw" ]; then + echo "::error file=$svg::rendered .svg without .excalidraw source → expected $excalidraw" + fail=1 + fi + done < <(find docs -maxdepth 2 -type f -name '*.svg' 2>/dev/null || true) + exit "$fail" + # Testcontainers-based integration tests, in their own job: they need Docker (the # ubuntu-latest runner ships it at the standard /var/run/docker.sock, so Testcontainers # auto-detects — no DOCKER_HOST override, unlike macOS Docker Desktop locally). Kept diff --git a/CLAUDE.md b/CLAUDE.md index 12480b15..5712433f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -170,7 +170,19 @@ The bar isn't "document every bug fix." It's: **if the failure mode would surpri **When tightening or changing a CLAUDE.md rule**, grep the repo for files that paraphrase it (inline comments in `.cs`/`.props`/`.csproj`, supporting docs, README sections) and update each so they stay aligned. CLAUDE.md is canonical; everywhere else summarizes. Convention: any inline comment that summarizes a CLAUDE.md rule ends with `See CLAUDE.md.` so it's findable via `grep -rn "See CLAUDE.md"`. A PostToolUse hook surfaces candidate files automatically when CLAUDE.md is edited (see `.claude/settings.json`). -This rule is for everyone working in this repo (humans, AI assistants, future-you). Don't wait to be asked. +**File-move discipline — when deleting or renaming a file**, grep the repo for refs to the *old* path and update them in the same PR. Look in: `*.md` (docs, READMEs, INDEX), inline comments in `*.cs`, `Dockerfile*` COPY/ADD lines, `*.csproj` ProjectReference Include, `.github/workflows/*.yml` `run:` blocks, `.coderabbit.yaml` path_instructions. **Same compounding-loop principle as the CLAUDE.md rule above**, just for file structure instead of rule text. Failing to do this is the same drift class as the simplicity refactor fallout — `Dockerfile.catalog` referenced `CatalogService/CatalogService.Api/CatalogService.Api.csproj` for months after PR #31 collapsed CatalogService to a single project, broken silently until someone tried to redeploy. Enforcement is layered: (a) the `check-file-moves.sh` PostToolUse hook prints stale-ref candidates after `git mv` / `git rm`, (b) the CI broken-link audit in `.github/workflows/ci.yml` catches stale markdown links at PR-merge gate, (c) the `.coderabbit.yaml` "**" path_instruction flags missed refs at review time, (d) this rule documents the discipline. + +**Doc-and-diagram discipline — docs and diagrams are the review surface, not byproducts.** When reviewers (human or CodeRabbit) want to *observe* what the system does today, they read `docs/architecture.md` and look at `docs/nextaurora-architecture.svg`. If those are stale, every review reasons against a fiction — and drift becomes invisible until someone hits the discrepancy in production. So when code or config changes affect what a doc or diagram *depicts*, the doc/diagram updates in the same PR — or the PR description names the deferred follow-up issue. Concrete pairings (extend as new categories appear): + +- **Topology changes** (new service / new transport / new DB / new external dep): `NextAurora.AppHost/AppHost.cs` ↔ `docs/architecture.md` ↔ `docs/nextaurora-architecture.{svg,excalidraw}` +- **Communication-pattern changes** (new gRPC client, new ASB subscription, new endpoint family): same trio plus `docs/messaging-transport-selection.md` +- **EF / caching / outbox / migration mechanism changes**: `docs/performance-and-data-correctness.md` + `docs/ef-core.md` + `docs/cqrs-data-access.md` + sibling diagram (e.g. `docs/efcore-query-write.{svg,excalidraw}`, `docs/transactional-outbox.{svg,excalidraw}`, `docs/hybridcache-flow.{svg,excalidraw}`) +- **Loop / process changes** (new hook, new skill, new CI step, new agent): `docs/dev-loop.md` + `.github/AI_WORKFLOW.md` + `docs/dev-loop.{svg,excalidraw}` +- **Service-request-flow changes** (middleware order, auth flow, correlation propagation): `NextAurora.ServiceDefaults/Extensions.cs` ↔ `docs/architecture.md` Observability section ↔ `docs/service-request-flow.{svg,excalidraw}` + +Diagrams are always paired: every `docs/*.excalidraw` ships with a sibling `docs/*.svg`. The `.excalidraw` is the authoritative editable source; the `.svg` is what github.com renders inline (so reviewers see it on the PR page). Editing one without re-rendering the other breaks the pairing — the source no longer matches what reviewers actually see. The render pipeline lives in `.claude/scripts/rebuild-diagrams.sh` (Playwright-driven; reads `.excalidraw`, writes `.svg` + `.png`). Enforcement is layered: (a) the CI `Diagram-pair audit` step fails the build if any `.excalidraw` lacks its `.svg` (or vice versa), (b) the `.coderabbit.yaml` path_instructions for topology-touching files flag missing doc/diagram pairs at review time, (c) this rule documents the discipline. The "is the rendered SVG still in sync with the source?" deeper check isn't mechanically enforced yet (would require the Playwright render in CI); for now, when you edit a `.excalidraw`, run `.claude/scripts/rebuild-diagrams.sh` before committing. + +All three rules are for everyone working in this repo (humans, AI assistants, future-you). Don't wait to be asked. ## Continuous Rule Encoding (the compounding loop)