diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index ae437f2..ac6f347 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -9,7 +9,7 @@
     {
       "name": "autodev",
       "description": "Autonomous development workflow skills for coding agents",
-      "version": "6.3.1",
+      "version": "6.4.0",
       "source": "./",
       "author": {
         "name": "Jon Langevin",
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index d22c713..cb699c9 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "autodev",
   "description": "Autonomous development workflow skills for coding agents: design, review, planning, execution, monitoring, and retrospectives",
-  "version": "6.3.1",
+  "version": "6.4.0",
   "author": {
     "name": "Jon Langevin",
     "email": "jon@gocodealone.com"
diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json
index 3ec3935..9a0d664 100644
--- a/.cursor-plugin/plugin.json
+++ b/.cursor-plugin/plugin.json
@@ -2,7 +2,7 @@
   "name": "autodev",
   "displayName": "Autonomous Dev Kit",
   "description": "Autonomous development workflow skills for coding agents",
-  "version": "6.3.1",
+  "version": "6.4.0",
   "author": {
     "name": "Jon Langevin",
     "email": "jon@gocodealone.com"
diff --git a/.github/workflows/skill-content-check.yml b/.github/workflows/skill-content-check.yml
index 4dc79e4..e7fb0d9 100644
--- a/.github/workflows/skill-content-check.yml
+++ b/.github/workflows/skill-content-check.yml
@@ -6,12 +6,16 @@ on:
       - 'skills/**'
       - 'agents/**'
       - 'tests/skill-content-grep.sh'
+      - 'tests/pipeline-evidence-doc-sync.sh'
+      - 'tests/skill-cross-refs.sh'
       - '.github/workflows/skill-content-check.yml'
   pull_request:
     paths:
       - 'skills/**'
       - 'agents/**'
       - 'tests/skill-content-grep.sh'
+      - 'tests/pipeline-evidence-doc-sync.sh'
+      - 'tests/skill-cross-refs.sh'
       - '.github/workflows/skill-content-check.yml'
   workflow_dispatch:
 
@@ -25,3 +29,7 @@ jobs:
       - uses: actions/checkout@v4
       - name: Check skill content for host-specific tokens
         run: bash tests/skill-content-grep.sh
+      - name: Pipeline evidence + doc-sync contracts
+        run: bash tests/pipeline-evidence-doc-sync.sh
+      - name: Skill cross-references resolve
+        run: bash tests/skill-cross-refs.sh
diff --git a/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design-review.md b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design-review.md
new file mode 100644
index 0000000..49fa524
--- /dev/null
+++ b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design-review.md
@@ -0,0 +1,68 @@
+# Pipeline Evidence + Doc-Sync Hardening — Adversarial Review
+
+**Phase:** design
+**Artifact:** `docs/plans/2026-06-03-pipeline-evidence-doc-sync-design.md`
+**Status:** FAIL → revised (see Resolution column); re-run pending
+
+## Findings
+
+| id | sev | class | loc | issue | resolution |
+|---|---|---|---|---|---|
+| D1 | Critical | Repo-precedent / Existence | D1 §Design | Two committed review files already exist (`2026-05-31-session-owned-lock-claims-design-review.md`/`-plan-review.md`) under convention `<stem>-design-review.md`/`<stem>-plan-review.md`. The invented `-adversarial-<phase>.md` name diverges; #69's "never written" premise overstated — practice exists ad-hoc but isn't skill-mandated, has no stable IDs, no guaranteed path. | Adopted existing `<stem>-design-review.md`/`<stem>-plan-review.md` naming. Reframed #69 as "systematize the ad-hoc practice + add stable IDs + mandate path so retro can glob reliably." |
+| D2 | Critical | Missing failure mode | D2 §Design | Retro fix updated Step 5 process text but left the output-format template (retro SKILL.md:99 "Pull from `tests/skill-activation-audit.sh`") + the `**Reads:**` bullet pointing at the kit-local script — every future retro re-embeds the broken instruction. | D2 scope now explicitly updates retro SKILL.md:99 (format template) + the `**Reads:**` bullet + Step 5 together. |
+| D3 | Important | Circular / dogfood framing | §Multi-Component Validation | Skill edits don't land until their task runs; during THIS feature's own pipeline the report is manually emulated, not skill-written. "First real artifacts of the new behavior" misleads. | Added explicit note: D1 behavior is manually emulated for this feature's own design/plan reviews until the skill-edit task lands; implementing agents must not assume the skill auto-commits before that task. |
+| D4 | Important | Assumptions (A1) | D2 §Design | Retro reads phase from `args` of `skill` entries; the reviewer **subagent** is dispatched via Agent tool → `ev:"agent"` record has no `sk`/`args`/phase. Conflation risk. | Clarified: retro keys off `ev:"skill"` entries (the lead's `Skill` invocation carries `args:"--phase=…"`); the Agent-dispatched reviewer is a separate sub-record the retro ignores for phase. |
+| D5 | Important | YAGNI | D1 §Stable IDs | `Resolution:` field as a per-revision-cycle mutable field adds maintenance with no consumer (retro Step 2 scores from downstream evidence, never reads it). | Reframed: `Resolution` is OPTIONAL, filled ONCE at end-state (commit SHA / `accepted — reason` / `false-positive`), and D2 now wires retro Step 2 to read it as a hint (falls back to downstream evidence) — giving it a real consumer at low maintenance. |
+| D6 | Important | Trap / self-pass | D3 §Step 1e | Step 1e is pure judgment, no script, no exit-code, no halt path like Step 1d → can silently self-pass under autonomy (the exact "trap" the user flagged). | Narrowed trigger to "diff commits a design doc, README/reference doc, or example artifact" (rare/cheap) + require a visible one-line `Doc-reconciliation:` note in the PR body (concrete accountability token, no scanner — honors the user's LIGHT choice). |
+| D7 | Minor | Repo-precedent | D1 | Existing 2 review files use no finding IDs (old `\| sev \| class \| loc \|` table); post-v6.4.0 corpus will be mixed-format. | Retro degrades gracefully: reads new ID format and pre-v6.4.0 reports (no IDs) alike. |
+| D8 | Minor | Failure mode | D1 | Concurrent review writes (lead + manual) → last-write-wins on the report file. | Noted overwrite is safe only under sequential execution; no lock needed at this scale. |
+| D9 | Minor | Precedent overlap | D4 §Design | New plan-phase "naming-convention match" row sits adjacent to existing `Config-validation schema rules` row → reader may conflate. | D4 row text now states it's distinct (this = human naming-convention consistency; that = tool-enforced schema invariants). |
+| D10 | Minor | Infra | D1 | `tests/skill-cross-refs.sh` must resolve any new step references; should be an explicit plan task. | Plan will run `skill-cross-refs.sh` + `skill-content-grep.sh` as a verification task before PR. |
+
+## Bug-Class Scan Transcript
+
+| Class | Result | Note |
+|---|---|---|
+| Project-guidance conflicts | Clean | No `docs/design-guidance.md`; design acknowledges + inherits the user's "not too long/onerous" constraint. |
+| Assumptions under attack | Finding (D4) | A1 live-confirmed; phase-disambiguation clarified for skill-vs-agent records. |
+| Repo-precedent conflicts | Finding (D1, D7) | Existing `-design-review.md`/`-plan-review.md` convention adopted. |
+| Artifact-class precedent | Finding (D1) | 2 prior committed review files surveyed; naming adopted. |
+| YAGNI violations | Finding (D5) | `Resolution` reframed optional/end-state with a wired consumer. |
+| Missing failure modes | Finding (D2, D8) | Retro format-template fix added; concurrent-write noted. |
+| Security/privacy | Clean | Report holds design findings only; jsonl args truncated 80 chars; no PII. |
+| Infrastructure impact | Clean (D10 minor) | No runtime impact; CI skill-checks added to plan. |
+| Multi-component validation | Finding (D3) | Dogfood asymmetry flagged + D1↔D2 path-contract kept literally identical. |
+| Rollback story | Clean | Revert-merge + re-tag; graceful-degrade covers report absence. |
+| Simpler alternative | Clean | Heuristic doc-scanner explicitly rejected per user LIGHT choice. |
+| User-intent drift | Finding (D6) | Step 1e tightened to avoid no-op gate; honors "no traps". |
+| Existence / runtime-validity | Finding (D1, D2) | Existing report files + retro:99 template confirmed by `ls`/`sed`. |
+
+## Options the author may not have considered
+1. **Adopt existing naming convention** — taken (D1).
+2. **Drop `Resolution` entirely** — partially taken: kept but reframed optional/end-state with a wired retro consumer, because the user explicitly wants reviews logged to ease retros across compaction; finding-IDs + an optional resolution hint serve that without per-cycle churn.
+3. **Give Step 1e an output token** — taken (D6): visible PR-body `Doc-reconciliation:` line instead of a scanner.
+
+**Verdict reasoning:** Two Criticals (false "never written" premise + naming divergence; incomplete retro fix leaving the broken template line) plus four Importants are all addressed in the revised design without adding a skill or a scanner. The revision adopts the repo's own convention, completes the retro fix, de-risks the Step 1e trap with a visible token, and reframes the only YAGNI surface (`Resolution`) to have a consumer. Re-run after revision to confirm convergence.
+
+## Cycle 2 (re-run) — all cycle-1 resolved; revision introduced new issues, now fixed
+
+| id | sev | class | issue | resolution |
+|---|---|---|---|---|
+| N1 | Critical | Multi-component / Assumptions | Stem-derivation rule ambiguous for plan files (`<slug>.md` has no `-design` tail to strip) → an agent could derive a wrong `-plan-review.md` path, silently breaking the load-bearing D1↔D2 path contract. | Replaced prose with a deterministic one-rule: drop `.md`; design→`+-review.md`, plan→`+-plan-review.md`, with both a design and a plan worked example. Both D1 and D2 state the identical rule. |
+| N2 | Important | Missing failure mode | Step 1e added to skill body but NOT to `finishing-a-development-branch`'s Autonomous Mode numbered list (the real control flow) → never fires in autonomous runs (deeper self-pass than D6). | D3 now names **two** edit sites: the `### Step 1e` body + a new bullet in the Autonomous Mode list after the Step 1d item. |
+| N3 | Important | Trap | `Doc-reconciliation:` PR-body token claimed retro-visible, but no retro step consumed it → aspirational, soft self-pass. | D2 wires retro Step 5 (Missed activations) to record `finishing Step 1e` fired iff the token is present when the diff touched docs — real consumer, reuses existing table. |
+| N4 | Minor | Existence | `**Reads:**` is two lines; demoting "the bullet" could remove the correct jsonl line. | D2 scalpel note: keep line 159 (jsonl), demote only the `skill-activation-audit.sh` line. |
+| N5 | Minor | YAGNI | Step 1e trigger "README/reference doc" broader than motivating issues. | Trigger reworded to "describes the feature's behavior"; docs with no `docs/plans/` counterpart trivially pass — cheap no-op. |
+
+**Cycle-2 verdict:** all 10 cycle-1 findings verified resolved in design text; the 1 Critical + 2 Important + 2 Minor introduced by the revision are now fixed (deterministic stem rule, dual edit-site for Step 1e, wired token consumer, scalpel Reads edit, trigger reword). No skill added, no scanner, no net bloat beyond ~+20 lines to adversarial-design-review and ~+14 to finishing. Cycle 3 re-run to confirm convergence.
+
+## Cycle 3 (convergence) — PASS
+
+Zero Critical, zero Important. Cycle-2 N1/N2/N3 verified genuinely resolved in design text (deterministic stem rule stated identically in D1 & D2 with both worked examples; Step 1e dual edit-site explicit; token wired to retro Step 5). Converged.
+
+Remaining Minors (→ plan-time clarifications, not design blockers):
+- **M1:** retro Step 5 must check the diff touched docs/examples *before* recording a Step-1e row, else a no-docs PR could get a spurious `unverified` row. Design already gates on "when the diff touched docs/examples"; plan makes it a hard precondition.
+- **M2:** spell out in the plan that an old (no-ID) report → retro falls back to downstream evidence for scoring.
+- **M3 (fixed in design):** clarified "overwrite" = one file per phase, may append `## Cycle N` sections (history survives).
+
+**Final design verdict: PASS @ cycle 3.** Proceed to writing-plans.
diff --git a/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design.md b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design.md
new file mode 100644
index 0000000..846c48f
--- /dev/null
+++ b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design.md
@@ -0,0 +1,257 @@
+# Pipeline Evidence + Doc-Sync Hardening — Design
+
+**Date:** 2026-06-03
+**Issues:** #69, #70, #71, #72 (all GoCodeAlone/autonomous-dev-kit)
+**Target release:** v6.4.0
+**Author:** autonomous pipeline (brainstorming)
+
+## Problem
+
+Four issues, two themes, one root: the pipeline emits design/plan/review artifacts but the
+*connective tissue* between them is weak or fictional.
+
+**Theme A — retro evidence is broken:**
+- **#69:** `post-merge-retrospective` reads "adversarial-review reports committed in `docs/plans/`"
+  (SKILL.md:22, :33, :156). But `adversarial-design-review` does not **mandate** committing the
+  report — step 7 says "Write the report" and the Dispatch subagent returns text, with no
+  instruction to persist+commit it to a known path. *Nuance (adversarial review D1):* the practice
+  exists **ad-hoc** — exactly two committed review files exist today
+  (`docs/plans/2026-05-31-session-owned-lock-claims-design-review.md` + `-plan-review.md`) under
+  the convention `<stem>-design-review.md` / `<stem>-plan-review.md`. But because the skill never
+  mandates it, it happens for some features and not others, has **no stable finding IDs**, and the
+  retro cannot rely on the file existing. Result: most retros reconstruct findings from revision
+  notes/PR threads — worse under long/compacted context (transcript lost). The fix **systematizes
+  the existing ad-hoc practice** (mandate the commit, adopt the existing name, add stable IDs), it
+  does not invent a new artifact.
+- **#70:** retro tells the agent to run `tests/skill-activation-audit.sh` *"(this repo)"* — a
+  **kit-dev-only** script absent in consumer repos → "Missed skill activations" table is "script
+  does not exist" every time. Meanwhile the `record-activity` PostToolUse hook (shipped:
+  `hooks/hooks.json:53`, `${CLAUDE_PLUGIN_ROOT}/hooks/run-hook.cmd record-activity`) **already**
+  appends every Skill activation to `<cwd>/.claude/autodev-state/in-progress.jsonl` in **any**
+  repo. The retro just isn't pointed at it.
+
+**Theme B — committed docs drift from reality:**
+- **#71:** in split-PR features, PR-1 commits docs/examples describing the **full** feature
+  (endpoints/helpers/metrics that ship in later PRs). `alignment-check`/`scope-lock` trace
+  task/PR manifest, not committed **doc content** → forward-references slip to human review.
+- **#72:** during in-scope execution, identifiers get convention-conforming refinements (config
+  key snake→camel, example snippet changes). Not a disproved assumption (so scope-lock's backport
+  path never triggers), not a manifest change → nothing reconciles the design doc with built code.
+  Design ships stale-on-arrival; reviewers burn cycles on doc-vs-code drift.
+
+## Goals / Non-goals
+
+**G1:** adversarial-design-review commits a durable, scannable findings report with stable
+  finding IDs (#69).
+**G2:** retro scores findings from that committed report, and scores activations from the
+  `in-progress.jsonl` the hook already writes — degrading gracefully, not pointing at a kit-local
+  script (#70).
+**G3:** a single pre-PR doc-reconciliation gate catches both forward-references (#71) and
+  identifier drift (#72) in committed docs/examples.
+**G4:** plan-phase adversarial review gains one checklist row: plan identifiers/examples match
+  implemented identifiers + repo naming convention (#72, catch-before-code).
+
+**Non-goals (YAGNI):**
+- No heuristic doc-content scanner that diffs every identifier in prose against the manifest
+  (issue #71's *primary* rec). Rejected: false-positive-prone, unbounded, the "onerous/trap"
+  class the user explicitly warned against. We take #71's own lighter fallback (explicit labeling
+  + identifier match).
+- No new skill, no new **standalone** script (heuristic scanner / activation-append helper). A
+  small grep-assertion **regression test** that guards these contracts and runs inside the existing
+  `skill-content-check.yml` CI is in-scope — the design's Multi-Component Validation requires CI
+  enforcement, so the test is the enforcement harness, not new product surface. No per-gate manual
+  activation-append (the hook covers Skill-invoked gates; manual appends would be redundant bloat).
+- No retro restructure beyond the two evidence sources.
+
+## Design
+
+### D1 — Committed adversarial-review report (#69, G1)
+
+`adversarial-design-review` step 7 + Dispatch + Report-format change:
+- After producing the report, **write it to the repo's existing convention path** and **commit it
+  alongside** the artifact. **Deterministic derivation (one rule, no ambiguity — adversarial review
+  cycle-2 N1):** take the artifact filename, drop the `.md`, then:
+  - **design phase:** append `-review.md` → e.g. `…-doc-sync-design.md` → `…-doc-sync-design-review.md`
+    (matches existing `2026-05-31-session-owned-lock-claims-design-review.md`).
+  - **plan phase:** append `-plan-review.md` → e.g. `2026-06-03-pipeline-evidence-doc-sync.md`
+    (plan has no `-design` tail) → `2026-06-03-pipeline-evidence-doc-sync-plan-review.md`
+    (matches existing `2026-05-31-session-owned-lock-claims-plan-review.md`).
+
+  The retro (D2) derives the **same** path by the same rule — this is the load-bearing D1↔D2
+  contract, so both skills state the rule identically. *(Adopting the existing name, not a new
+  `-adversarial-*` one.)* The Dispatch subagent produces the report text; the **lead** writes+commits
+  it (the subagent has no git authority — matches the existing Dispatch pattern).
+- **Stable finding IDs:** design-phase findings `D1, D2, …`; plan-phase `P1, P2, …`. Each finding
+  row carries its ID as the first column. This is the durable anchor the retro correlates against.
+- **Optional `Resolution` column**, filled **once at end-state** (not mutated every revision
+  cycle): a commit SHA, `accepted — <reason>`, or `false-positive`; left blank/`pending` if
+  unresolved. D2 wires retro Step 2 to read it as a *hint* (falling back to downstream evidence
+  when blank), so the field has a real consumer at near-zero maintenance.
+- Idempotent: re-running the review on a revised artifact updates **the same single report file
+  per phase** (not a new file per cycle) — the file holds the latest cumulative state; multi-cycle
+  runs may append a `## Cycle N` section so finding→resolution history survives for the retro.
+  Safe under sequential execution (the default); no lock needed at this scale.
+- **Back-compat:** pre-v6.4.0 review files (no finding IDs, older table shape) remain valid; the
+  retro reads both. **Dogfood caveat:** during *this* feature's own pipeline the skill text hasn't
+  changed yet, so the lead emulates D1 by hand (writing+committing each phase's review file under
+  the convention) until the task that edits the skill lands — implementing agents must not assume
+  the skill auto-writes the file before that task.
+
+### D2 — Retro reads committed report + activation jsonl (#70, G2)
+
+`post-merge-retrospective`:
+- Step 2 (score findings): derive the report path by the **same deterministic rule as D1** (drop
+  `.md`; design → `+-review.md`, plan → `+-plan-review.md`) and read the committed
+  `…-design-review.md` / `…-plan-review.md` report(s). Use each finding's stable ID; read its
+  optional `Resolution` column as a scoring hint, falling back to downstream evidence (code-review
+  threads, CI) when blank. If the report is absent (ad-hoc PR or pre-mandate branch — most
+  pre-v6.4.0 features have no committed review), state "no committed review report; reconstructed
+  from revision history" — the *current* behavior becomes the explicit fallback, not the default.
+- Step 5 (score activations): **primary source = `.claude/autodev-state/in-progress.jsonl`**
+  (written by `record-activity` in any repo). Read phase from the `args` field of **`ev:"skill"`**
+  entries (the lead's `Skill` invocation carries `args:"--phase=design|plan …"`); the
+  Agent-dispatched reviewer subagent is a separate `ev:"agent"` record without a phase and is
+  ignored for phase attribution. If the jsonl is absent → emit "activation log unavailable" rows,
+  **never** "script does not exist".
+- **Three edit sites, same change (adversarial review D2):** Step 5 process text **and** the
+  output-format template (`## Missed skill activations`, SKILL.md:99, currently "Pull from
+  `tests/skill-activation-audit.sh`") **and** the `**Reads:**` integration bullet must all demote
+  the kit-local script to "(kit-dev convenience; absent in consumer repos)". Fixing only Step 5
+  would leave the broken instruction re-embedded in every future retro's format section.
+  **Scalpel precision (cycle-2 minor):** under `**Reads:**`, the `.claude/autodev-state/in-progress.jsonl`
+  line is correct and stays; demote only the adjacent `tests/skill-activation-audit.sh` line.
+- **Wire the Step-1e accountability token (cycle-2 N3):** so the `Doc-reconciliation:` PR-body line
+  is not unconsumed prose, retro Step 5 (Missed skill activations) gains one row — when the merged
+  PR's diff touched docs/examples, record `finishing Step 1e` as fired iff a `Doc-reconciliation:`
+  line is present in the PR body, else `unverified`. This reuses the existing missed-activation
+  table (no new retro section) and gives the token a real consumer, making "the retro can see it"
+  true rather than aspirational. (The token's *primary* role remains human + pr-monitoring
+  accountability; the retro row is the durable backstop.)
+
+### D3 — Pre-PR doc-reconciliation gate (#71 + #72a, G3)
+
+`finishing-a-development-branch` new **Step 1e: Doc-Reconciliation Check** (after 1d Scope
+Completeness, before Step 2). **Trigger (narrowed, adversarial review D6):** fires only when the
+PR's diff commits a **design doc, README/reference doc, or example artifact** — skip entirely for
+code-only / test-only diffs, so it's rare and cheap. The agent verifies, for those committed
+docs/examples:
+- **(a) Scope (forward-ref, #71):** every behavior/identifier described is either in *this PR's*
+  manifest scope, OR explicitly labeled `Planned (PR #N)` / `Planned — later PR`. Unlabeled
+  forward references = finding → label them or move the prose to the later PR.
+- **(b) Identifier drift (#72):** concrete identifiers in the design doc / examples — config keys,
+  flags, env vars, command invocations, DDL/code snippets, format strings — match the identifiers
+  the code on this branch actually uses (and the repo's naming convention). Mismatch = finding →
+  reconcile the doc to the built code.
+- Checklist gate (agent reads the diff + greps identifiers), **not** an automated scanner (honors
+  the user's LIGHT choice for #71/#72). On a finding in autonomous mode: fix the doc in-branch
+  before PR (in-scope doc edit, no manifest change). Distinct from scope-lock's
+  assumption-backport (disproved assumptions) — this is routine accuracy reconciliation.
+- **Accountability token (anti-trap, adversarial review D6):** the agent MUST emit a one-line
+  `Doc-reconciliation: clean` or `Doc-reconciliation: N item(s) fixed — <summary>` into the PR
+  body. This converts a judgment step that could silently self-pass into a visible record
+  pr-monitoring, the human reviewer, and the retro (via D2's new missed-activation row) can see —
+  without a script.
+- **Second edit site — Autonomous Mode list (cycle-2 N2):** Step 1e added to the skill body alone
+  would never fire in autonomous runs, because `finishing-a-development-branch`'s Autonomous Mode
+  section (its numbered list, currently naming Step 1d at item 2) is the actual control flow. D3
+  therefore edits **two** places: the `### Step 1e` body section **and** a new bullet in the
+  Autonomous Mode list — "Run Step 1e (Doc-Reconciliation Check) — conditional on the diff
+  containing a design/reference doc or example artifact" — inserted after the Step 1d item, before
+  PR creation.
+- **Trigger precision (cycle-2 minor):** "design doc, reference/standards doc, or example
+  artifact that describes the feature's behavior". A doc with no corresponding design/plan in
+  `docs/plans/` (e.g. a standalone library README) has nothing to cross-check and trivially passes
+  `clean` — cheap no-op, not a false negative.
+
+### D4 — Plan-phase naming-convention checklist row (#72b, G4)
+
+`adversarial-design-review` plan-phase bug-class checklist gains one row:
+**Identifier / naming-convention match** — "config keys, flags, env vars, and command/code
+examples in the plan match the repo's established naming convention and the identifiers the code
+will actually use (grep the repo for the convention; a plan showing `snake_case` keys where the
+codebase uses `camelCase` = finding). **Distinct from `Config-validation schema rules`** (which
+checks tool-enforced schema invariants); this row checks human naming-convention consistency."
+Catches the drift in D3(b) **before a line of code is written**, cheaper than reconciling after.
+
+## Global Design Guidance
+
+No `docs/design-guidance.md` in this repo (checked). The kit's durable guidance lives in the
+skills themselves. Relevant inherited principles: skills must stay tight (user constraint:
+"not too long or onerous"); no circular logic / phantom dependencies (#69 *is* one — fixing it
+reduces circularity); reuse existing machinery over adding new (hooks, report format, audit
+script all pre-exist).
+
+## Security Review
+
+Low surface. All changes are skill-markdown instruction edits + one committed-report file path.
+- The committed adversarial report lives in `docs/plans/` (already-committed-artifact territory);
+  no secrets — it summarizes design findings. Reviewer must not paste secrets into findings (same
+  discipline as existing design docs).
+- Reading `.claude/autodev-state/in-progress.jsonl`: local file, no network, no PII beyond skill
+  names + truncated args (hook already truncates args to 80 chars). No new exposure.
+- No auth/authz, no external calls, no new dependencies.
+
+## Infrastructure Impact
+
+None at runtime. No build/deploy/k8s/migration changes. The only "infra" touchpoint: v6.4.0
+release bumps the 3 version manifests (`.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`,
+`.cursor-plugin/plugin.json`) → `release-tag.yml` auto-tags on push to main. Standard kit release
+path, unchanged.
+
+## Multi-Component Validation
+
+The cross-component boundary here is **skill → hook → state-file → retro**:
+- D2 depends on `record-activity` writing `in-progress.jsonl`. **Verified live**: this session's
+  own `brainstorming` invocation produced `{"ts":"2026-06-03T16:31:07Z","ev":"skill",
+  "sk":"autodev:brainstorming"}` in the kit repo's state file, and the hook is plugin-level
+  (`hooks/hooks.json:53`) so it fires in consumer repos too.
+- D1↔D2 contract: the report path written by adversarial-design-review (D1) is the exact path the
+  retro reads (D2). Plan must keep these literally identical (one source constant in prose).
+- `tests/skill-cross-refs.sh` and `tests/skill-content-grep.sh` are the kit's own CI gates over
+  skill markdown — all skill edits must keep cross-references resolvable and host-tokens inside
+  `<host:>` blocks. The plan includes running both before PR.
+- **Dogfood (with caveat, adversarial review D3):** this feature runs through the pipeline, so the
+  *practice* of committing the review report is exercised on its own design+plan reviews. But the
+  skill text edits don't take effect until their task lands — so for this feature the lead
+  **manually** writes+commits each `…-design-review.md` / `…-plan-review.md` (already done for the
+  design phase) rather than the skill doing it automatically. The skill-automated path is first
+  exercised by the *next* feature after v6.4.0.
+- **CI skill gates:** `tests/skill-cross-refs.sh` + `tests/skill-content-grep.sh` (the kit's own
+  markdown gates) run as a plan verification task before PR, so new step/path references resolve
+  and host-tokens stay inside `<host:>` blocks.
+
+## Assumptions
+
+- **A1:** `record-activity` fires in consumer repos (plugin-level PostToolUse hook). *Evidence:*
+  `hooks/hooks.json:53` + live entry this session. **Load-bearing for D2.**
+- **A2:** Skill-invoked gates are what the retro needs to score; gates invoked as non-Skill
+  sub-steps (rare) not appearing in the jsonl is acceptable (graceful-degrade covers it). Phase
+  attribution comes only from `ev:"skill"` entries' `args` (the lead's `Skill` call); the
+  Agent-dispatched reviewer subagent's `ev:"agent"` record has no phase and is ignored for it.
+- **A3:** The adversarial Dispatch subagent can return report text the lead commits; the lead
+  (not the subagent) owns the git write. *Matches existing Dispatch pattern.*
+- **A4:** Writing one report file per phase per feature (overwritten across revision cycles) is
+  acceptable repo noise — same order as the design/plan docs already committed.
+- **A5:** A checklist-style Step 1e (human/agent judgment over the diff) catches the doc drift
+  classes without a scanner. *If false* (agent skips it), the human reviewer remains the backstop —
+  same as today, so no regression.
+
+## Rollback
+
+Change class: skill-content + plugin version bump (release-affecting). Rollback = revert the merge
+commit + re-tag prior version. No data/migration/runtime state to unwind. The committed-report
+path is additive; reverting simply stops writing it (retro's graceful-degrade handles its absence).
+Per-task rollback notes in the plan for the version-bump task.
+
+## Self-challenge (top doubts surfaced)
+
+1. **Is D1 adding bloat to an already-335-line skill?** Net +~18 lines to adversarial-design-review,
+   but it makes an existing *fictional* contract real and removes the retro's reconstruction burden.
+   The report *format* already exists — we add a path + IDs + a Resolution field, not a new section.
+2. **Could D3's Step 1e become a rubber-stamp the agent skips?** Possibly — it's judgment, not a
+   script. Mitigation: it's gated in autonomous mode (like 1d) and scoped to *only fire when docs/
+   examples are in the diff*, so it's cheap and skippable-only-when-irrelevant. The plan-phase row
+   (D4) is the earlier, cheaper catch; 1e is the safety net.
+3. **Does pointing retro at `in-progress.jsonl` over-trust a best-effort hook?** The hook is
+   best-effort (jq-absent / no-stdin → no-op). D2 degrades gracefully on absence, so worst case is
+   "activation log unavailable" — strictly better than today's "script does not exist".
diff --git a/docs/plans/2026-06-03-pipeline-evidence-doc-sync-plan-review.md b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-plan-review.md
new file mode 100644
index 0000000..e055653
--- /dev/null
+++ b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-plan-review.md
@@ -0,0 +1,50 @@
+# Pipeline Evidence + Doc-Sync Hardening — Plan-Phase Adversarial Review
+
+**Phase:** plan
+**Artifact:** `docs/plans/2026-06-03-pipeline-evidence-doc-sync.md`
+**Status:** PASS (zero Critical; both Important resolved before execution)
+
+## Findings
+
+| id | sev | class | loc | issue | resolution |
+|---|---|---|---|---|---|
+| P1 | Important | Verification-class / test design | Task 1 | Two assertions were pre-green at test-creation (`has "$ADR" "commit"` matched ambient "committed" prose; `has "$RETRO" "in-progress.jsonl"` matched the path already present at retro:52/159) → weak RED state. | Fixed in plan: assert the specific new mandate `"Write AND commit the report"` (#69) and the **primary**-source promotion `primary source.*in-progress\.jsonl` (#70). Both are RED before Tasks 2/4, GREEN after. |
+| P2 | Important | CI wiring | Task 6 | `tests/skill-cross-refs.sh` was run locally only; plan didn't add it to CI though the workflow file is already being edited. | Fixed in plan: Task 6 now adds `skill-cross-refs.sh` as a CI step + path filter, with a guard against importing any unrelated pre-existing failure. Verified green on the base tree (`EXIT=0`) so wiring is safe. |
+| P3 | Minor | Test template attribution | Task 1 | Prose said "mirroring `skill-content-grep.sh`" but the code's `pass()/fail()`+counter idiom matches `hook-contracts.sh`. | Fixed: prose now references `hook-contracts.sh`. |
+| P4 | Minor | Integration proof (D1↔D2) | Task 1 | No assertion guarded the load-bearing D1↔D2 path identity against future drift. | Fixed: added test assertion `same deterministic rule\|-plan-review\.md` against the retro. |
+| P5 | Minor | Decomposition | Tasks 2&3 | Two commits to the same skill file. | Accepted: TDD slice-verification discipline; sequential (no collision). No change. |
+| P6 | Minor | Format ripple / bloat | Task 2 | Converting the three Findings sections to a merged table would ripple into PASS/FAIL semantics + Dispatch output blocks. | Fixed (simpler than recommended): keep the three `**Findings (sev):**` sections unchanged, add only an ID prefix + optional inline `Resolution` — zero ripple, less change. |
+
+## Bug-Class Scan Transcript
+
+| Class | Result | Note |
+|---|---|---|
+| Project-guidance conflicts | Clean | No guidance file; net add ~+20 ADR / ~+14 retro / ~+30 finishing — within the user's "not onerous" tolerance. |
+| Assumptions under attack | Clean | A1 (hook fires in consumer repos) live-confirmed; A3 (lead commits subagent text) matches Dispatch pattern. |
+| Repo-precedent conflicts | Clean | Existing `<stem>-design-review.md`/`-plan-review.md` naming adopted; test idiom aligned to `hook-contracts.sh` (P3). |
+| Artifact-class precedent | Clean | 2 prior committed review files surveyed; back-compat for old no-ID format. |
+| YAGNI violations | Clean | No new skills/scripts/scanner; `Resolution` optional with a wired consumer. |
+| Missing failure modes | Clean | Absent jsonl → "activation log unavailable"; absent report → "reconstructed from revision history". |
+| Security / privacy | Clean | Report = design findings; jsonl args truncated; no PII/external calls. |
+| Infrastructure impact | Clean | Version bump → existing `release-tag.yml` auto-tag path. |
+| Multi-component validation | Clean | D1↔D2 path contract now test-guarded (P4); hook verified live. |
+| Rollback story | Clean | Task 7 rollback note + whole-PR rollback section; additive change. |
+| Simpler alternative | Clean | Scanner rejected per LIGHT choice; token (not script) for Step 1e. |
+| User-intent drift | Clean | Exactly the 4 approved issues at LIGHT scope; no creep. |
+| Existence / runtime-validity | Clean | All line refs verified by the reviewer (retro:99/156/159-160, finishing autonomous list:23, awk anchor `### Step 1: Verify Tests`:76); `bump-version.sh`/`version-check.sh`/`skill-content-grep.sh`/`skill-cross-refs.sh` all exist + match invocation syntax. |
+| Over/under-decomposition | Clean | 7 tasks for 3 skill edits + test + bump — appropriate; each has a class-matched verify. |
+| Verification-class mismatch | Resolved (P1/P2) | Test assertions tightened; CI cross-refs wired. |
+| Auth/authz chain | Clean | No auth surfaces. |
+| Hidden serial dependencies | Clean | Tasks 2&3 same file but sequential w/ commits between. |
+| Missing rollback wiring | Clean | Markdown-only; revert + re-tag is the correct class. |
+| Missing integration proof | Resolved (P4) | D1↔D2 path identity now asserted. |
+| Infra verification mismatch | Clean | No infra; self-contained bump. |
+| Plugin-loader runtime layout | Clean | N/A (markdown only). |
+| Config-validation schema rules | Clean | N/A (no wfctl config). |
+
+## Options the author may not have considered
+1. Tighten the `commit` assertion to the verbatim mandate — **taken** (P1).
+2. Wire `skill-cross-refs.sh` into CI while the YAML is open — **taken** (P2).
+3. Collapse Tasks 2+3 into one commit — **declined**, TDD slice discipline retained (P5).
+
+**Verdict reasoning:** PASS. Architecture, sequencing, scope sound; the failing test does not enter CI until Task 6 (by which point Tasks 2–5 made it green), so no mid-PR red. Both Important findings were test-quality (weak RED + a free CI-wiring win), resolved in the plan before execution exactly per the reviewer's recommendations; the four Minors are addressed or accepted with reason. No new skill, no scanner, no net bloat. Proceed to alignment-check.
diff --git a/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md b/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md
new file mode 100644
index 0000000..9bfd14f
--- /dev/null
+++ b/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md
@@ -0,0 +1,311 @@
+# Pipeline Evidence + Doc-Sync Hardening Implementation Plan
+
+> **For the implementing agent:** REQUIRED SUB-SKILL: Use autodev:executing-plans to implement this plan task-by-task.
+
+**Goal:** Fix autodev issues #69/#70/#71/#72 by systematizing the committed adversarial-review report, pointing the retro at the activation log the hook already writes, and adding one pre-PR doc-reconciliation gate + one plan-phase naming-convention check — with zero new skills/scripts and no heuristic scanner.
+
+**Architecture:** Pure skill-markdown edits to 3 skills (`adversarial-design-review`, `post-merge-retrospective`, `finishing-a-development-branch`), guarded by one new grep-assertion regression test wired into the existing `skill-content-check.yml` CI, plus the standard 3-manifest v6.4.0 version bump. Reuses the existing `record-activity` PostToolUse hook (writes `.claude/autodev-state/in-progress.jsonl` in any repo) and the existing `<stem>-design-review.md`/`<stem>-plan-review.md` report convention.
+
+**Tech Stack:** Bash (tests + hooks), Markdown (skills), GitHub Actions (CI), the kit's `scripts/bump-version.sh` + `tests/version-check.sh`.
+
+**Base branch:** main
+
+---
+
+## Scope Manifest
+
+**PR Count:** 1
+**Tasks:** 7
+**Estimated Lines of Change:** ~300 (skill markdown + 1 test + version bump)
+
+**Out of scope:**
+- Heuristic doc-content scanner that diffs every prose identifier against the manifest (#71 primary rec — rejected per user LIGHT choice; would be false-positive-prone bloat).
+- New skills, new standalone scripts, or per-gate manual activation-append calls (the `record-activity` hook already covers Skill-invoked gates).
+- Migrating the 2 pre-existing `2026-05-31-session-owned-lock-claims-*-review.md` files to the new finding-ID format (back-compat: retro reads both old + new shapes).
+- Retro restructure beyond the two evidence sources (committed report + jsonl) and the one Step-1e missed-activation row.
+- Changes to `tests/skill-activation-audit.sh` itself (it stays as a kit-dev convenience; only its *references in the retro* are demoted).
+
+**PR Grouping:**
+
+| PR # | Title | Tasks | Branch |
+|------|-------|-------|--------|
+| 1 | Pipeline evidence + doc-sync hardening (#69 #70 #71 #72) → v6.4.0 | Task 1, Task 2, Task 3, Task 4, Task 5, Task 6, Task 7 | feat/pipeline-evidence-doc-sync |
+
+**Status:** Locked 2026-06-03T17:10:43Z
+
+---
+
+### Task 1: Failing regression test for all four contracts
+
+**Change class:** Hook/trigger-adjacent (grep-assertion test). Verification: the test itself (RED now, GREEN after Tasks 2–5).
+
+**Files:**
+- Create: `tests/pipeline-evidence-doc-sync.sh`
+
+**Step 1: Write the failing test.** Create a bash test (mirroring the `pass()/fail()` + counter style of `tests/hook-contracts.sh`, non-zero exit on any fail) with these assertions against the repo's **skill** files (greps target `skills/…`, never `docs/plans/…`, so the plan's own design docs can't false-match). The assertions are written to be genuinely RED before Tasks 2–5 and GREEN after (plan-review P1: avoid substring matches that pass against pre-existing prose like "committed"):
+
+```bash
+#!/usr/bin/env bash
+# tests/pipeline-evidence-doc-sync.sh
+# Regression guard for issues #69/#70/#71/#72 (v6.4.0). Asserts the skill
+# contracts these issues fixed remain present, so they cannot silently regress.
+set -uo pipefail
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+ADR="$ROOT/skills/adversarial-design-review/SKILL.md"
+RETRO="$ROOT/skills/post-merge-retrospective/SKILL.md"
+FIN="$ROOT/skills/finishing-a-development-branch/SKILL.md"
+fail=0
+pass(){ printf 'PASS: %s\n' "$1"; }
+bad(){ printf 'FAIL: %s\n' "$1" >&2; fail=$((fail+1)); }
+has(){ grep -qiF "$2" "$1"; }       # literal substring
+hasE(){ grep -qiE "$2" "$1"; }      # regex
+
+# --- #69 (D1): adversarial-design-review mandates committing the report ---
+hasE "$ADR" '(-design-review\.md|-plan-review\.md)' \
+  && pass "#69 ADR cites the <stem>-design-review.md/-plan-review.md convention" \
+  || bad  "#69 ADR missing committed-report convention path"
+# P1: assert the SPECIFIC new mandate wording, not the ambient word "commit"
+has "$ADR" "Write AND commit the report" \
+  && pass "#69 ADR mandates writing+committing the report" \
+  || bad  "#69 ADR does not mandate writing+committing the report"
+hasE "$ADR" 'stable finding ID|stable .*ID' \
+  && pass "#69 ADR defines stable finding IDs" \
+  || bad  "#69 ADR missing stable finding IDs"
+# P4: guard the load-bearing D1<->D2 path contract — retro must cite the same derivation
+hasE "$RETRO" 'same deterministic rule|-plan-review\.md' \
+  && pass "#69/#70 retro derives the report path by the same rule (D1<->D2 contract)" \
+  || bad  "#69/#70 retro missing the shared path-derivation rule"
+
+# --- #70 (D2): retro reads the jsonl as PRIMARY; script demoted, NOT a hard dep ---
+# P1: assert the jsonl is the PRIMARY source (only true after Task 4), not merely mentioned
+hasE "$RETRO" 'primary source.*in-progress\.jsonl|in-progress\.jsonl.*primary' \
+  && pass "#70 retro makes in-progress.jsonl the primary activation source" \
+  || bad  "#70 retro does not promote in-progress.jsonl to primary"
+# The format template must NOT instruct 'Pull from tests/skill-activation-audit.sh'
+grep -qiE 'Pull from .*skill-activation-audit\.sh' "$RETRO" \
+  && bad  "#70 retro STILL instructs 'Pull from tests/skill-activation-audit.sh' (line ~99 not demoted)" \
+  || pass "#70 retro format template no longer hard-depends on the kit-local script"
+has "$RETRO" "kit-dev" \
+  && pass "#70 retro marks the audit script kit-dev-only" \
+  || bad  "#70 retro does not demote the audit script to kit-dev-only"
+
+# --- #71/#72 (D3): finishing has Step 1e in BOTH body and autonomous list ---
+hasE "$FIN" 'Step 1e' \
+  && pass "#71/#72 finishing has Step 1e body" \
+  || bad  "#71/#72 finishing missing Step 1e body"
+has "$FIN" "Doc-reconciliation" \
+  && pass "#71/#72 finishing emits Doc-reconciliation token" \
+  || bad  "#71/#72 finishing missing Doc-reconciliation accountability token"
+# Step 1e must be referenced in the Autonomous Mode numbered list region (top of file, before '### Step 1:')
+auto_region="$(awk '/^## Autonomous Mode/{f=1} /^### Step 1: Verify Tests/{f=0} f' "$FIN")"
+printf '%s' "$auto_region" | grep -qiE 'Step 1e' \
+  && pass "#71/#72 Step 1e wired into Autonomous Mode list" \
+  || bad  "#71/#72 Step 1e NOT in Autonomous Mode list (would never fire autonomously)"
+
+# --- #72 (D4): plan-phase naming-convention checklist row ---
+hasE "$ADR" 'naming.convention match|Identifier / naming' \
+  && pass "#72 ADR plan-phase has Identifier/naming-convention row" \
+  || bad  "#72 ADR plan-phase missing naming-convention row"
+
+echo ""; echo "Results: $fail failure(s)"; [ "$fail" -eq 0 ]
+```
+
+**Step 2: Run, verify it FAILS.** Run: `bash tests/pipeline-evidence-doc-sync.sh`
+Expected: multiple `FAIL:` lines (skills not yet edited), final `Results: N failure(s)`, exit 1.
+
+**Step 3: Commit the failing test.**
+```bash
+chmod +x tests/pipeline-evidence-doc-sync.sh
+git add tests/pipeline-evidence-doc-sync.sh
+git commit -m "test: regression guard for pipeline evidence + doc-sync (#69 #70 #71 #72) [red]"
+```
+
+---
+
+### Task 2: D1 — adversarial-design-review mandates a committed findings report
+
+**Change class:** Documentation/skill-content. Verification: Task-1 test #69 assertions pass + `skill-content-grep.sh` + `skill-cross-refs.sh` clean.
+
+**Files:**
+- Modify: `skills/adversarial-design-review/SKILL.md` (Process step 7; Report format header; "Dispatching the reviewer agent" output instruction; Integration "Writes" — add if absent)
+
+**Step 1:** In **Process step 7** ("Write the report"), replace the inline-only instruction with the mandate to persist+commit, stating the **deterministic path rule** verbatim:
+> 7. **Write AND commit the report.** Derive the path from the artifact filename: drop `.md`, then for `--phase=design` append `-review.md` (e.g. `…-doc-sync-design.md` → `…-doc-sync-design-review.md`); for `--phase=plan` append `-plan-review.md` (e.g. `2026-06-03-…-doc-sync.md` → `2026-06-03-…-doc-sync-plan-review.md`). This matches the existing `docs/plans/2026-05-31-session-owned-lock-claims-design-review.md` convention. The **lead** writes the report text the reviewer produced to that path and commits it alongside the artifact (the subagent has no git authority). Re-runs update the same single per-phase file (append a `## Cycle N` section across cycles); safe under sequential execution.
+
+**Step 2:** In the **Report format**, keep the existing three `**Findings (Critical|Important|Minor):**` sections **unchanged in structure** (so the PASS/FAIL semantics and Dispatch "Required output" blocks that key off "Critical findings"/"Important findings" keep working verbatim — plan-review P6: no table conversion, no ripple). Add only: each finding bullet is **prefixed with a stable finding ID** and may carry an optional inline resolution. Update the format example lines to:
+> **Findings (Critical):**
+> - `D1` [class] [section/line]: <description>. Recommendation: <concrete fix>. _Resolution: <optional — filled once at end-state: commit SHA / `accepted — reason` / `false-positive`; omit if open>._
+>
+> Add a one-line note under the format: "Design-phase finding IDs are `D1, D2, …`; plan-phase `P1, P2, …`. IDs are the durable anchor `post-merge-retrospective` correlates against; the optional `Resolution` is a scoring hint (retro falls back to downstream evidence when omitted)." The literal phrase **"stable finding ID"** must appear (the Task-1 test asserts it).
+
+(Keep the `Bug-class scan transcript`, `Options`, and `Verdict reasoning` sections, and the PASS/FAIL semantics section, unchanged.)
+
+**Step 3:** In **"Dispatching the reviewer agent"** output instructions, add one line: the reviewer returns the report text; **the lead commits it to the derived path** (so the subagent isn't asked to do git).
+
+**Step 4:** Add to **Integration** a `**Writes:**` line: `docs/plans/<artifact-stem>-design-review.md` / `-plan-review.md` (committed report).
+
+**Step 5: Run the test slice.** Run: `bash tests/pipeline-evidence-doc-sync.sh`
+Expected: the three `#69` assertions now `PASS:` (overall still failing until later tasks).
+
+**Step 6: Commit.**
+```bash
+git add skills/adversarial-design-review/SKILL.md
+git commit -m "feat(adversarial-review): mandate committed findings report w/ stable IDs (#69)"
+```
+
+---
+
+### Task 3: D4 — plan-phase Identifier/naming-convention checklist row
+
+**Change class:** Documentation/skill-content. Verification: Task-1 test `#72` ADR assertion passes + content-grep clean.
+
+**Files:**
+- Modify: `skills/adversarial-design-review/SKILL.md` ("Bug-class checklist — plan phase" table)
+
+**Step 1:** Add one row to the plan-phase table (after `Config-validation schema rules`):
+> \| **Identifier / naming-convention match** \| Config keys, flags, env vars, and command/code examples in the plan match the repo's established naming convention and the identifiers the code will actually use (grep the repo for the convention; a plan showing `snake_case` keys where the codebase uses `camelCase` = finding). **Distinct from `Config-validation schema rules`**, which checks tool-enforced schema invariants — this row checks human naming-convention consistency. Catches design-vs-code drift before code is written. \|
+
+**Step 2: Run the test slice.** Run: `bash tests/pipeline-evidence-doc-sync.sh`
+Expected: `#72 ADR plan-phase has Identifier/naming-convention row` → `PASS:`.
+
+**Step 3: Commit.**
+```bash
+git add skills/adversarial-design-review/SKILL.md
+git commit -m "feat(adversarial-review): plan-phase naming-convention checklist row (#72)"
+```
+
+---
+
+### Task 4: D2 — retro reads committed report + activation jsonl (3 edit sites + token consumer)
+
+**Change class:** Documentation/skill-content. Verification: Task-1 test `#70` assertions pass + content-grep clean. **This is the highest-care task — three edit sites + scalpel precision.**
+
+**Files:**
+- Modify: `skills/post-merge-retrospective/SKILL.md` (Step 2; Step 5; the `## Missed skill activations` format template ~line 99; the `**Reads:**` integration bullets ~line 159–160; add the Step-1e missed-activation row)
+
+**Step 1 — Step 2 (score findings):** state that the report path is derived by the **same deterministic rule as D1** (drop `.md`; design→`+-review.md`, plan→`+-plan-review.md`); read each finding's stable ID; read the optional `resolution` column as a scoring hint, **falling back to downstream evidence (code-review threads, CI) when blank or when the report is an old no-ID format**. If the report is absent → "no committed review report; reconstructed from revision history" (the explicit fallback; note most pre-v6.4.0 features have none).
+
+**Step 2 — Step 5 (score activations):** make `.claude/autodev-state/in-progress.jsonl` the **primary** source (written by `record-activity` in any repo); read phase from the `args` field of **`ev:"skill"`** entries (the Agent-dispatched reviewer's `ev:"agent"` record has no phase — ignore it for phase). Demote `tests/skill-activation-audit.sh` to "(kit-dev convenience; absent in consumer repos)". If the jsonl is absent → "activation log unavailable" rows, **never** "script does not exist".
+
+**Step 3 — format template (`## Missed skill activations`, ~line 99):** change `Pull from \`tests/skill-activation-audit.sh\`.` to read from `.claude/autodev-state/in-progress.jsonl` (the audit script noted as kit-dev-only). Add one row to that table's example: `| finishing Step 1e (doc-reconciliation) | yes/unverified | only when the diff touched docs/examples |`.
+
+**Step 4 — Step-1e token consumer (D2/N3):** add a sentence to Step 5 (or the Missed-activations section): "When the merged PR's diff touched docs/examples, record `finishing Step 1e` as fired iff a `Doc-reconciliation:` line is present in the PR body, else `unverified`. If the diff touched no docs/examples, record no row (Step 1e legitimately did not fire)." *(precondition resolves cycle-3 M1.)*
+
+**Step 5 — Reads bullets (~line 159–160, SCALPEL):** keep the `.claude/autodev-state/in-progress.jsonl (if present)` line as-is; on the `tests/skill-activation-audit.sh (this repo)` line, change to `tests/skill-activation-audit.sh (kit-dev convenience; absent in consumer repos)`. Also update the line-156 `docs/plans/ (design, plan, adversarial-review reports)` to note reports are now committed by `adversarial-design-review` per the deterministic path.
+
+**Step 6: Run the test slice.** Run: `bash tests/pipeline-evidence-doc-sync.sh`
+Expected: all three `#70` assertions `PASS:` (incl. the negative assertion that "Pull from …skill-activation-audit.sh" is gone).
+
+**Step 7: Commit.**
+```bash
+git add skills/post-merge-retrospective/SKILL.md
+git commit -m "feat(retro): read committed report + activation jsonl; demote kit-local script (#70)"
+```
+
+---
+
+### Task 5: D3 — finishing-a-development-branch Step 1e (doc-reconciliation gate)
+
+**Change class:** Documentation/skill-content. Verification: Task-1 test `#71/#72` finishing assertions pass + content-grep clean. **Two edit sites: the Step body AND the Autonomous Mode list.**
+
+**Files:**
+- Modify: `skills/finishing-a-development-branch/SKILL.md` (Autonomous Mode numbered list ~line 23; new `### Step 1e` after the Step 1d section ~line 157; the `Continue to Step 1d`/`continue to Step 2` transition pointers)
+
+**Step 1 — Autonomous Mode list (~line 23):** after the existing "Run Step 1d (Scope Completeness Check)" item, insert: "Run Step 1e (Doc-Reconciliation Check) — conditional on the diff containing a design/reference doc or example artifact." (Renumber the following list items.)
+
+**Step 2 — new `### Step 1e: Doc-Reconciliation Check`** (after the Step 1d section, before `### Step 2`):
+> **Trigger:** the PR's diff commits a design doc, reference/standards doc, or example artifact that describes the feature's behavior. Skip for code-only / test-only diffs. (A doc with no corresponding `docs/plans/` design/plan trivially passes `clean`.)
+>
+> For each such committed doc/example, verify:
+> - **(a) Scope (forward-ref, #71):** every behavior/identifier it describes is in *this PR's* manifest scope, OR explicitly labeled `Planned (PR #N)` / `Planned — later PR`. Unlabeled forward references = finding → label them or move the prose to the later PR.
+> - **(b) Identifier drift (#72):** concrete identifiers (config keys, flags, env vars, command invocations, DDL/code snippets, format strings) match the identifiers the code on this branch actually uses + the repo's naming convention. Mismatch = finding → reconcile the doc to the built code.
+>
+> This is a checklist gate (read the diff, grep identifiers), **not** an automated scanner. On a finding in autonomous mode, fix the doc in-branch before PR (in-scope doc edit, no manifest change). Distinct from `scope-lock`'s assumption-backport (which is for *disproved assumptions*) — this is routine accuracy reconciliation.
+>
+> **Accountability token:** emit one line into the PR body — `Doc-reconciliation: clean` or `Doc-reconciliation: N item(s) fixed — <summary>` — so pr-monitoring, the human reviewer, and `post-merge-retrospective` (Step 5 missed-activation row) can confirm the gate ran without a script.
+
+**Step 3 — transition pointers:** ensure the Step 1d section ends pointing to Step 1e, and Step 1e ends pointing to Step 2 (Determine Base Branch). Update the line-157 "Do not proceed past Step 1d …" wording only if it implies 1d is the last sub-step.
+
+**Step 4: Run the test slice.** Run: `bash tests/pipeline-evidence-doc-sync.sh`
+Expected: all `#71/#72` finishing assertions `PASS:` (incl. the Autonomous-Mode-region check).
+
+**Step 5: Commit.**
+```bash
+git add skills/finishing-a-development-branch/SKILL.md
+git commit -m "feat(finishing): Step 1e doc-reconciliation gate, body + autonomous list (#71 #72)"
+```
+
+---
+
+### Task 6: Wire the regression test into CI + full local verification
+
+**Change class:** Hook/trigger (CI) + verification. Verification: the new test GREEN; `skill-content-grep.sh` clean; `skill-cross-refs.sh` clean; YAML valid.
+
+**Files:**
+- Modify: `.github/workflows/skill-content-check.yml` (add a step running the new test + add it to the `paths` filters)
+
+**Step 1:** In `skill-content-check.yml`, add `tests/pipeline-evidence-doc-sync.sh` **and** `tests/skill-cross-refs.sh` to both `push.paths` and `pull_request.paths`, and add two steps after the existing content-grep step (plan-review P2: `skill-cross-refs.sh` already exists but was local-only — wire it into CI for free while the workflow is open):
+```yaml
+      - name: Pipeline evidence + doc-sync contracts
+        run: bash tests/pipeline-evidence-doc-sync.sh
+      - name: Skill cross-references resolve
+        run: bash tests/skill-cross-refs.sh
+```
+*(If `skill-cross-refs.sh` surfaces a pre-existing unresolved reference unrelated to this PR, do not expand scope to fix unrelated skills — instead keep it local-only for this PR and note the pre-existing failure in the PR body. Only wire it into CI if it passes clean on the current tree.)*
+
+**Step 2: Run the FULL local gate** (all must be green now that Tasks 2–5 landed):
+```bash
+bash tests/pipeline-evidence-doc-sync.sh   # Expected: Results: 0 failure(s), exit 0
+bash tests/skill-content-grep.sh           # Expected: exit 0 (no host-token leaks in edited skills)
+bash tests/skill-cross-refs.sh             # Expected: exit 0 (new step/path references resolve)
+```
+Expected: all three exit 0. *(Resolves design D10.)* If `skill-content-grep.sh` flags a host-token in any edited skill, move that token inside a `<host: …>` block.
+
+**Step 3: Commit.**
+```bash
+git add .github/workflows/skill-content-check.yml
+git commit -m "ci: run pipeline-evidence-doc-sync contract test on skill changes"
+```
+
+---
+
+### Task 7: Version bump → v6.4.0
+
+**Change class:** Version pin (runtime-affecting — release). Verification: `tests/version-check.sh` green (3 manifests agree). **Rollback: revert the merge commit + re-tag the prior version (v6.3.1); no data/migration to unwind.**
+
+**Files:**
+- Modify (via script): `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, `.cursor-plugin/plugin.json`
+
+**Step 1:** Run the bump script:
+```bash
+bash scripts/bump-version.sh 6.4.0
+```
+
+**Step 2: Verify the 3 manifests agree.** Run: `bash tests/version-check.sh`
+Expected: exit 0 (all three manifests report `6.4.0`).
+
+**Step 3: Confirm no stray version mismatch.** Run: `grep -rn '"version"' .claude-plugin/plugin.json .claude-plugin/marketplace.json .cursor-plugin/plugin.json`
+Expected: each shows `6.4.0`.
+
+**Step 4: Commit.**
+```bash
+git add .claude-plugin/plugin.json .claude-plugin/marketplace.json .cursor-plugin/plugin.json
+git commit -m "chore(release): bump to v6.4.0 (#69 #70 #71 #72)"
+```
+
+*(Pushing this commit to `main` triggers `release-tag.yml`, which tags `v6.4.0` after `version-check.sh` passes. The GH Release is created manually post-merge per the kit's convention.)*
+
+---
+
+## Verification Summary (whole-PR)
+
+Before PR creation, all green:
+- `bash tests/pipeline-evidence-doc-sync.sh` → `Results: 0 failure(s)`
+- `bash tests/skill-content-grep.sh` → exit 0
+- `bash tests/skill-cross-refs.sh` → exit 0
+- `bash tests/version-check.sh` → exit 0
+- Step 1e self-check on THIS PR: it commits design/plan/review docs → emit `Doc-reconciliation: …` in the PR body (dogfood the new gate).
+
+## Rollback (whole-PR)
+
+All edits are skill-markdown + one test + a version bump. Rollback = `git revert` the squash-merge commit + re-tag `v6.3.1` as latest. The committed-report path is additive (reverting just stops writing it; the retro's graceful-degrade covers absence). No runtime state, migrations, or external resources involved.
diff --git a/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md.scope-lock b/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md.scope-lock
new file mode 100644
index 0000000..8dbd142
--- /dev/null
+++ b/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md.scope-lock
@@ -0,0 +1 @@
+6cb6ebd9f6eba987c645f9e707ee85606c10c8649c93f5f84760352ae3c685b2
diff --git a/skills/adversarial-design-review/SKILL.md b/skills/adversarial-design-review/SKILL.md
index 1f6ddc9..98c196e 100644
--- a/skills/adversarial-design-review/SKILL.md
+++ b/skills/adversarial-design-review/SKILL.md
@@ -114,6 +114,7 @@ inherits the design's blast radius) and adds:
 | **Infrastructure verification mismatch** | For infrastructure-affecting changes, does the plan verify render/plan/apply/dry-run, secret wiring, migration order, rollback, and post-deploy health as appropriate? If not, flag it. |
 | **Plugin-loader runtime layout** | Plans that spawn or load an external plugin process must build the binary in a layout the host's discovery code accepts. For wfctl: `$WFCTL_PLUGIN_DIR/<plugin-name>/<plugin-name>` + sibling `plugin.json`. Plans that `go build -o /tmp/single-binary` without the subdir + manifest sidecar will fail at runtime. |
 | **Config-validation schema rules** | Plans that create new config files validated by a schema or CLI tool must satisfy that tool's invariants (e.g., for wfctl: `checkEntryPoints` requires ≥1 entry-point module like `http.server`/`scheduler.modular`/`messaging.broker`, OR a trigger/route/subscription/job/pipeline). Plans omitting required entry-point modules pass `bash -n` but fail schema validation at CI. |
+| **Identifier / naming-convention match** | Config keys, flags, env vars, and command/code examples in the plan match the repo's established naming convention and the identifiers the code will actually use (grep the repo for the convention; a plan showing `snake_case` keys where the codebase uses `camelCase` = finding). **Distinct from `Config-validation schema rules`**, which checks tool-enforced schema invariants — this row checks human naming-convention consistency. Catches design-vs-code drift before code is written. |
 
 ## Process
 
@@ -137,9 +138,7 @@ inherits the design's blast radius) and adds:
 6. **Surface options, not just objections.** For findings, propose a
    concrete fix or alternative. "This design assumes X" → "Alternative: state
    X explicitly, and add a fallback if X is false at runtime."
-7. **Write the report.** Format below. Commit verdict: PASS / FAIL.
-   Use `autodev:condensed-pipeline-writing` for report density unless the
-   user asked for prose.
+7. **Write AND commit the report.** Derive the path from the artifact filename: drop `.md`, then for `--phase=design` append `-review.md` (e.g. `…-doc-sync-design.md` → `…-doc-sync-design-review.md`); for `--phase=plan` append `-plan-review.md` (e.g. `2026-06-03-…-doc-sync.md` → `2026-06-03-…-doc-sync-plan-review.md`). This matches the existing `docs/plans/2026-05-31-session-owned-lock-claims-design-review.md` convention. The **lead** writes the report text the reviewer produced to that path and commits it alongside the artifact (the subagent has no git authority). Re-runs update the same single per-phase file (append a `## Cycle N` section across cycles); safe under sequential execution. Commit verdict: PASS / FAIL. Use `autodev:condensed-pipeline-writing` for report density unless the user asked for prose.
 
 ## Report format
 
@@ -151,13 +150,15 @@ inherits the design's blast radius) and adds:
 **Status:** PASS | FAIL
 
 **Findings (Critical):**
-- [class] [section/line]: <description>. Recommendation: <concrete fix>.
+- `D1` [class] [section/line]: <description>. Recommendation: <concrete fix>. _Resolution: <optional — filled once at end-state: commit SHA / `accepted — reason` / `false-positive`; omit if open>._
 
 **Findings (Important):**
-- [class] [section/line]: <description>. Recommendation: <concrete fix>.
+- `D2` [class] [section/line]: <description>. Recommendation: <concrete fix>. _Resolution: <optional>._
 
 **Findings (Minor):**
-- [class] [section/line]: <description>. Recommendation: <concrete fix>.
+- `D3` [class] [section/line]: <description>. Recommendation: <concrete fix>. _Resolution: <optional>._
+
+Design-phase finding IDs are `D1, D2, …`; plan-phase `P1, P2, …`, numbered **sequentially across all findings regardless of severity** (`D1` is the first finding overall, not the first Critical). IDs are the durable anchor `post-merge-retrospective` correlates against; the optional `Resolution` is a scoring hint (retro falls back to downstream evidence when omitted). Each finding has a **stable finding ID** as its first token.
 
 **Bug-class scan transcript:**
 | Class | Result | Note |
@@ -273,6 +274,8 @@ Agent tool (general-purpose, model: balanced):
     Important finding either has a fix recommendation accepted by the
     author or is escalated as an open question. Otherwise FAIL.
 ````
+
+The reviewer returns the report text. **The lead commits it to the derived path** (drop `.md`, append `-review.md` for design or `-plan-review.md` for plan) — the subagent has no git authority.
 </host>
 
 <host: generic-subagent-capable>
@@ -293,6 +296,9 @@ mindset, ≥3 findings or full transcript, no reflexive approval.
   `alignment-check`.
 - Manual — user invokes against any artifact in `docs/plans/`.
 
+**Writes:**
+- `docs/plans/<artifact-stem>-design-review.md` (design phase) / `docs/plans/<artifact-stem>-plan-review.md` (plan phase) — committed report.
+
 **Calls:**
 - `brainstorming` — on FAIL during `--phase=design`, for revision.
 - `writing-plans` — on FAIL during `--phase=plan`, for revision.
diff --git a/skills/finishing-a-development-branch/SKILL.md b/skills/finishing-a-development-branch/SKILL.md
index e055713..d97789f 100644
--- a/skills/finishing-a-development-branch/SKILL.md
+++ b/skills/finishing-a-development-branch/SKILL.md
@@ -21,8 +21,9 @@ When running in the autonomous pipeline (invoked from subagent-driven-developmen
 
 1. **Verify tests pass** — same as manual mode, abort if failing
 2. **Run Step 1d (Scope Completeness Check)** — see below. This is a mandatory gate in autonomous mode. The agent MUST NOT silently collapse N planned PRs into 1, nor declare success on a partial scope. If Step 1d surfaces a failure, the autonomous pipeline halts and asks the user.
-3. **Skip option presentation** — go directly to PR creation
-4. **For every PR row in the manifest's PR Grouping table, create one PR.** The manifest is the contract. If the table has 3 rows, the autonomous run produces 3 PRs, each pointing at the branch named in the row. Do NOT collapse rows — collapsing is the exact failure mode `skills/scope-lock/SKILL.md` defends against. Per-PR steps:
+3. **Run Step 1e (Doc-Reconciliation Check)** — conditional on the diff containing a design/reference doc or example artifact. See below.
+4. **Skip option presentation** — go directly to PR creation
+5. **For every PR row in the manifest's PR Grouping table, create one PR.** The manifest is the contract. If the table has 3 rows, the autonomous run produces 3 PRs, each pointing at the branch named in the row. Do NOT collapse rows — collapsing is the exact failure mode `skills/scope-lock/SKILL.md` defends against. Per-PR steps:
    ```bash
    feature_branch="<feature-branch>"
    feature_name="<feature-name>"
@@ -63,8 +64,8 @@ When running in the autonomous pipeline (invoked from subagent-driven-developmen
    EOF
    )"
    ```
-5. **Invoke pr-monitoring** — spawn a background monitor for all PRs created in this session; prefer a single agent covering all PRs to avoid GitHub API rate limits, but one agent per PR is acceptable if the PRs are on unrelated codebases or a previous shared monitor was rate-limited
-6. **Report PR URLs** — output every PR link for the user (one per row in the manifest's PR Grouping table)
+6. **Invoke pr-monitoring** — spawn a background monitor for all PRs created in this session; prefer a single agent covering all PRs to avoid GitHub API rate limits, but one agent per PR is acceptable if the PRs are on unrelated codebases or a previous shared monitor was rate-limited
+7. **Report PR URLs** — output every PR link for the user (one per row in the manifest's PR Grouping table)
 
 **Do NOT:**
 - Present the 4-option menu in autonomous mode
@@ -154,7 +155,21 @@ Action:
 - **PR count mismatch (autonomous mode):** if the manifest expects N PRs but the branch layout produced fewer, the agent must split the branch via `git rebase --onto` per the manifest's grouping table — NOT collapse the manifest. Collapsing N planned PRs into 1 is exactly the failure mode `scope-lock` exists to prevent.
 - **Locked-hash mismatch:** the manifest has been edited after the lock. Surface the diff and stop. The user must either revert the edit or go through the amendment path (`recording-decisions` + re-run alignment-check).
 
-Do not proceed past Step 1d on any failure without explicit user direction. There is no "demo mode" — see the anti-patterns in `skills/scope-lock/SKILL.md`.
+Do not proceed past Step 1d on any failure without explicit user direction. There is no "demo mode" — see the anti-patterns in `skills/scope-lock/SKILL.md`. Continue to Step 1e.
+
+### Step 1e: Doc-Reconciliation Check
+
+**Trigger:** the PR's diff commits a design doc, reference/standards doc, or example artifact that describes the feature's behavior. Skip for code-only / test-only diffs. (A doc with no corresponding `docs/plans/` design/plan trivially passes `clean`.)
+
+For each such committed doc/example, verify:
+- **(a) Scope (forward-ref, #71):** every behavior/identifier it describes is in *this PR's* manifest scope, OR explicitly labeled `Planned (PR #N)` / `Planned — later PR`. Unlabeled forward references = finding → label them or move the prose to the later PR.
+- **(b) Identifier drift (#72):** concrete identifiers (config keys, flags, env vars, command invocations, DDL/code snippets, format strings) match the identifiers the code on this branch actually uses + the repo's naming convention. Mismatch = finding → reconcile the doc to the built code.
+
+This is a checklist gate (read the diff, grep identifiers), **not** an automated scanner. On a finding in autonomous mode, fix the doc in-branch before PR (in-scope doc edit, no manifest change). Distinct from `scope-lock`'s assumption-backport (which is for *disproved assumptions*) — this is routine accuracy reconciliation.
+
+**Accountability token:** emit one line into the PR body — `Doc-reconciliation: clean` or `Doc-reconciliation: N item(s) fixed — <summary>` — so pr-monitoring, the human reviewer, and `post-merge-retrospective` (Step 5 missed-activation row) can confirm the gate ran without a script.
+
+Continue to Step 2.
 
 ### Step 2: Determine Base Branch
 
diff --git a/skills/post-merge-retrospective/SKILL.md b/skills/post-merge-retrospective/SKILL.md
index 7162671..39964c8 100644
--- a/skills/post-merge-retrospective/SKILL.md
+++ b/skills/post-merge-retrospective/SKILL.md
@@ -36,6 +36,7 @@ If the PR was opened ad-hoc (no design / plan in `docs/plans/`), this skill exit
    - Any ADRs cited from the design or plan
 
 2. **Score each adversarial-review finding.**
+   Derive the report path by the **same deterministic rule as D1**: take the artifact filename, drop `.md`, then design → append `-review.md`, plan → append `-plan-review.md` (e.g. `…-doc-sync-design.md` → `…-doc-sync-design-review.md`; `2026-06-03-…-doc-sync.md` → `2026-06-03-…-doc-sync-plan-review.md`). Read the committed `…-design-review.md` / `…-plan-review.md` report(s). For each finding, use its stable ID; read the optional `Resolution` column as a scoring hint, **falling back to downstream evidence (code-review threads, CI) when blank or when the report is an old no-ID format**. If the report is absent → "no committed review report; reconstructed from revision history" (most pre-v6.4.0 features have none).
    For every finding raised in either phase's adversarial-review report, classify it as one of:
    - **Prescient** — the finding called out something that turned out to matter (showed up as a code-review comment, CI failure, follow-up bug fix, or revert).
    - **Resolved upfront** — the finding was addressed during plan revision and prevented an issue downstream (no code-review comment / CI failure traces back to it).
@@ -49,9 +50,11 @@ If the PR was opened ad-hoc (no design / plan in `docs/plans/`), this skill exit
    For each unique CI failure on the branch, ask: was this caught by `verification-before-completion` / `runtime-launch-validation` / something else, or did it slip past every local gate? Slips are gate misses too.
 
 5. **Score skill activations.**
-   Read `.claude/autodev-state/in-progress.jsonl` (if present in the repo's `.claude/` directory) and verify the expected pipeline ran. The canonical chain documented in `skills/using-autodev/SKILL.md` is:
+   **Primary source: `.claude/autodev-state/in-progress.jsonl`** (written by the `record-activity` PostToolUse hook in any repo — not kit-dev-only). Read phase from the `args` field of `ev:"skill"` entries (the lead's `Skill` invocation carries `args:"--phase=design|plan …"`); the Agent-dispatched reviewer subagent is a separate `ev:"agent"` record without a phase and is ignored for phase attribution. If the jsonl is absent → emit "activation log unavailable" rows, never "script does not exist". `tests/skill-activation-audit.sh` (kit-dev convenience; absent in consumer repos) may be used to cross-check in the kit repo itself — it reports each skill once, so cross-check phase counts against the jsonl's `args=--phase=<design|plan>` entries when both phases are required.
+   Verify the expected pipeline ran. The canonical chain documented in `skills/using-autodev/SKILL.md` is:
    `brainstorming → adversarial-design-review (design) → writing-plans → adversarial-design-review (plan) → alignment-check → subagent-driven-development → finishing-a-development-branch → pr-monitoring → post-merge-retrospective`.
-   For each gate that was *expected* to fire and didn't, that's a missed-activation. Use `tests/skill-activation-audit.sh` (this repo) to confirm what fired — note that the audit script reports each skill once even when invoked twice (e.g., adversarial-design-review for both phases), so cross-check phase counts against the JSONL `args=--phase=<design|plan>` entries when both phases are required.
+   For each gate that was *expected* to fire and didn't, that's a missed-activation.
+   When the merged PR's diff touched docs/examples, record `finishing Step 1e` as fired iff a `Doc-reconciliation:` line is present in the PR body, else `unverified`. If the diff touched no docs/examples, record no row (Step 1e legitimately did not fire).
 
 6. **Backfeed project design guidance.**
    Invoke `autodev:project-design-guidance`. If the merged work reveals a
@@ -96,13 +99,14 @@ If there are zero gate misses, write: "No gate misses this PR. All downstream is
 
 ## Missed skill activations
 
-Pipeline gates expected to fire (per `using-autodev`): list any that didn't. Pull from `tests/skill-activation-audit.sh`.
+Pipeline gates expected to fire (per `using-autodev`): list any that didn't. Read from `.claude/autodev-state/in-progress.jsonl` (`ev:"skill"` entries; `tests/skill-activation-audit.sh` is a kit-dev convenience only, absent in consumer repos).
 
 | Gate | Fired? | Notes |
 |---|---|---|
 | brainstorming | yes | |
 | adversarial-design-review (design) | yes | |
 | adversarial-design-review (plan) | no | <why — e.g., manual override; deferred to alignment-check> |
+| finishing Step 1e (doc-reconciliation) | yes/unverified | only when the diff touched docs/examples |
 | ... | ... | |
 
 ## What worked
@@ -153,11 +157,11 @@ The retro is intentionally short. Long retros don't get read. The format above f
   guidance change.
 
 **Reads:**
-- `docs/plans/` (design, plan, adversarial-review reports)
+- `docs/plans/` (design, plan, adversarial-review reports — reports now committed by `adversarial-design-review` per the deterministic path rule)
 - `decisions/` (ADRs cited from the design / plan)
 - `gh pr view`, `gh pr review-comments`, `gh run list`
 - `.claude/autodev-state/in-progress.jsonl` (if present)
-- `tests/skill-activation-audit.sh` (this repo)
+- `tests/skill-activation-audit.sh` (kit-dev convenience; absent in consumer repos)
 - `docs/design-guidance.md` or equivalent project guidance, if present
 
 **Writes:**
diff --git a/tests/pipeline-evidence-doc-sync.sh b/tests/pipeline-evidence-doc-sync.sh
new file mode 100755
index 0000000..35cdd04
--- /dev/null
+++ b/tests/pipeline-evidence-doc-sync.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# tests/pipeline-evidence-doc-sync.sh
+# Regression guard for issues #69/#70/#71/#72 (v6.4.0). Asserts the skill
+# contracts these issues fixed remain present, so they cannot silently regress.
+set -uo pipefail
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+ADR="$ROOT/skills/adversarial-design-review/SKILL.md"
+RETRO="$ROOT/skills/post-merge-retrospective/SKILL.md"
+FIN="$ROOT/skills/finishing-a-development-branch/SKILL.md"
+fail=0
+pass(){ printf 'PASS: %s\n' "$1"; }
+bad(){ printf 'FAIL: %s\n' "$1" >&2; fail=$((fail+1)); }
+has(){ grep -qiF "$2" "$1"; }       # literal substring
+hasE(){ grep -qiE "$2" "$1"; }      # regex
+
+# --- #69 (D1): adversarial-design-review mandates committing the report ---
+hasE "$ADR" '(-design-review\.md|-plan-review\.md)' \
+  && pass "#69 ADR cites the <stem>-design-review.md/-plan-review.md convention" \
+  || bad  "#69 ADR missing committed-report convention path"
+# P1: assert the SPECIFIC new mandate wording, not the ambient word "commit"
+has "$ADR" "Write AND commit the report" \
+  && pass "#69 ADR mandates writing+committing the report" \
+  || bad  "#69 ADR does not mandate writing+committing the report"
+hasE "$ADR" 'stable finding ID|stable .*ID' \
+  && pass "#69 ADR defines stable finding IDs" \
+  || bad  "#69 ADR missing stable finding IDs"
+# P4/M1: guard the load-bearing D1<->D2 path contract -- retro must cite the SAME derivation.
+# Assert the specific load-bearing phrase only (dropping the broad '-plan-review.md' OR branch,
+# which is ambient vocabulary that could false-pass on an incidental path mention).
+has "$RETRO" "same deterministic rule" \
+  && pass "#69/#70 retro derives the report path by the same rule (D1<->D2 contract)" \
+  || bad  "#69/#70 retro missing the shared path-derivation rule"
+
+# --- #70 (D2): retro reads the jsonl as PRIMARY; script demoted, NOT a hard dep ---
+# P1: assert the jsonl is the PRIMARY source (only true after Task 4), not merely mentioned
+hasE "$RETRO" 'primary source.*in-progress\.jsonl|in-progress\.jsonl.*primary' \
+  && pass "#70 retro makes in-progress.jsonl the primary activation source" \
+  || bad  "#70 retro does not promote in-progress.jsonl to primary"
+# The format template must NOT instruct 'Pull from tests/skill-activation-audit.sh'
+grep -qiE 'Pull from .*skill-activation-audit\.sh' "$RETRO" \
+  && bad  "#70 retro STILL instructs 'Pull from tests/skill-activation-audit.sh' (line ~99 not demoted)" \
+  || pass "#70 retro format template no longer hard-depends on the kit-local script"
+has "$RETRO" "kit-dev" \
+  && pass "#70 retro marks the audit script kit-dev-only" \
+  || bad  "#70 retro does not demote the audit script to kit-dev-only"
+
+# --- #71/#72 (D3): finishing has Step 1e in BOTH body and autonomous list ---
+hasE "$FIN" 'Step 1e' \
+  && pass "#71/#72 finishing has Step 1e body" \
+  || bad  "#71/#72 finishing missing Step 1e body"
+has "$FIN" "Doc-reconciliation" \
+  && pass "#71/#72 finishing emits Doc-reconciliation token" \
+  || bad  "#71/#72 finishing missing Doc-reconciliation accountability token"
+# Step 1e must be referenced in the Autonomous Mode numbered list region (top of file, before '### Step 1:')
+auto_region="$(awk '/^## Autonomous Mode/{f=1} /^### Step 1: Verify Tests/{f=0} f' "$FIN")"
+printf '%s' "$auto_region" | grep -qiE 'Step 1e' \
+  && pass "#71/#72 Step 1e wired into Autonomous Mode list" \
+  || bad  "#71/#72 Step 1e NOT in Autonomous Mode list (would never fire autonomously)"
+
+# --- #72 (D4): plan-phase naming-convention checklist row ---
+hasE "$ADR" 'naming.convention match|Identifier / naming' \
+  && pass "#72 ADR plan-phase has Identifier/naming-convention row" \
+  || bad  "#72 ADR plan-phase missing naming-convention row"
+
+echo ""; echo "Results: $fail failure(s)"; [ "$fail" -eq 0 ]