diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index ae437f2..ac6f347 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -9,7 +9,7 @@ { "name": "autodev", "description": "Autonomous development workflow skills for coding agents", - "version": "6.3.1", + "version": "6.4.0", "source": "./", "author": { "name": "Jon Langevin", diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index d22c713..cb699c9 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "autodev", "description": "Autonomous development workflow skills for coding agents: design, review, planning, execution, monitoring, and retrospectives", - "version": "6.3.1", + "version": "6.4.0", "author": { "name": "Jon Langevin", "email": "jon@gocodealone.com" diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json index 3ec3935..9a0d664 100644 --- a/.cursor-plugin/plugin.json +++ b/.cursor-plugin/plugin.json @@ -2,7 +2,7 @@ "name": "autodev", "displayName": "Autonomous Dev Kit", "description": "Autonomous development workflow skills for coding agents", - "version": "6.3.1", + "version": "6.4.0", "author": { "name": "Jon Langevin", "email": "jon@gocodealone.com" diff --git a/.github/workflows/skill-content-check.yml b/.github/workflows/skill-content-check.yml index 4dc79e4..e7fb0d9 100644 --- a/.github/workflows/skill-content-check.yml +++ b/.github/workflows/skill-content-check.yml @@ -6,12 +6,16 @@ on: - 'skills/**' - 'agents/**' - 'tests/skill-content-grep.sh' + - 'tests/pipeline-evidence-doc-sync.sh' + - 'tests/skill-cross-refs.sh' - '.github/workflows/skill-content-check.yml' pull_request: paths: - 'skills/**' - 'agents/**' - 'tests/skill-content-grep.sh' + - 'tests/pipeline-evidence-doc-sync.sh' + - 'tests/skill-cross-refs.sh' - '.github/workflows/skill-content-check.yml' workflow_dispatch: @@ -25,3 +29,7 @@ jobs: - uses: actions/checkout@v4 - name: Check skill content for host-specific tokens run: bash tests/skill-content-grep.sh + - name: Pipeline evidence + doc-sync contracts + run: bash tests/pipeline-evidence-doc-sync.sh + - name: Skill cross-references resolve + run: bash tests/skill-cross-refs.sh diff --git a/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design-review.md b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design-review.md new file mode 100644 index 0000000..49fa524 --- /dev/null +++ b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design-review.md @@ -0,0 +1,68 @@ +# Pipeline Evidence + Doc-Sync Hardening — Adversarial Review + +**Phase:** design +**Artifact:** `docs/plans/2026-06-03-pipeline-evidence-doc-sync-design.md` +**Status:** FAIL → revised (see Resolution column); re-run pending + +## Findings + +| id | sev | class | loc | issue | resolution | +|---|---|---|---|---|---| +| D1 | Critical | Repo-precedent / Existence | D1 §Design | Two committed review files already exist (`2026-05-31-session-owned-lock-claims-design-review.md`/`-plan-review.md`) under convention `-design-review.md`/`-plan-review.md`. The invented `-adversarial-.md` name diverges; #69's "never written" premise overstated — practice exists ad-hoc but isn't skill-mandated, has no stable IDs, no guaranteed path. | Adopted existing `-design-review.md`/`-plan-review.md` naming. Reframed #69 as "systematize the ad-hoc practice + add stable IDs + mandate path so retro can glob reliably." | +| D2 | Critical | Missing failure mode | D2 §Design | Retro fix updated Step 5 process text but left the output-format template (retro SKILL.md:99 "Pull from `tests/skill-activation-audit.sh`") + the `**Reads:**` bullet pointing at the kit-local script — every future retro re-embeds the broken instruction. | D2 scope now explicitly updates retro SKILL.md:99 (format template) + the `**Reads:**` bullet + Step 5 together. | +| D3 | Important | Circular / dogfood framing | §Multi-Component Validation | Skill edits don't land until their task runs; during THIS feature's own pipeline the report is manually emulated, not skill-written. "First real artifacts of the new behavior" misleads. | Added explicit note: D1 behavior is manually emulated for this feature's own design/plan reviews until the skill-edit task lands; implementing agents must not assume the skill auto-commits before that task. | +| D4 | Important | Assumptions (A1) | D2 §Design | Retro reads phase from `args` of `skill` entries; the reviewer **subagent** is dispatched via Agent tool → `ev:"agent"` record has no `sk`/`args`/phase. Conflation risk. | Clarified: retro keys off `ev:"skill"` entries (the lead's `Skill` invocation carries `args:"--phase=…"`); the Agent-dispatched reviewer is a separate sub-record the retro ignores for phase. | +| D5 | Important | YAGNI | D1 §Stable IDs | `Resolution:` field as a per-revision-cycle mutable field adds maintenance with no consumer (retro Step 2 scores from downstream evidence, never reads it). | Reframed: `Resolution` is OPTIONAL, filled ONCE at end-state (commit SHA / `accepted — reason` / `false-positive`), and D2 now wires retro Step 2 to read it as a hint (falls back to downstream evidence) — giving it a real consumer at low maintenance. | +| D6 | Important | Trap / self-pass | D3 §Step 1e | Step 1e is pure judgment, no script, no exit-code, no halt path like Step 1d → can silently self-pass under autonomy (the exact "trap" the user flagged). | Narrowed trigger to "diff commits a design doc, README/reference doc, or example artifact" (rare/cheap) + require a visible one-line `Doc-reconciliation:` note in the PR body (concrete accountability token, no scanner — honors the user's LIGHT choice). | +| D7 | Minor | Repo-precedent | D1 | Existing 2 review files use no finding IDs (old `\| sev \| class \| loc \|` table); post-v6.4.0 corpus will be mixed-format. | Retro degrades gracefully: reads new ID format and pre-v6.4.0 reports (no IDs) alike. | +| D8 | Minor | Failure mode | D1 | Concurrent review writes (lead + manual) → last-write-wins on the report file. | Noted overwrite is safe only under sequential execution; no lock needed at this scale. | +| D9 | Minor | Precedent overlap | D4 §Design | New plan-phase "naming-convention match" row sits adjacent to existing `Config-validation schema rules` row → reader may conflate. | D4 row text now states it's distinct (this = human naming-convention consistency; that = tool-enforced schema invariants). | +| D10 | Minor | Infra | D1 | `tests/skill-cross-refs.sh` must resolve any new step references; should be an explicit plan task. | Plan will run `skill-cross-refs.sh` + `skill-content-grep.sh` as a verification task before PR. | + +## Bug-Class Scan Transcript + +| Class | Result | Note | +|---|---|---| +| Project-guidance conflicts | Clean | No `docs/design-guidance.md`; design acknowledges + inherits the user's "not too long/onerous" constraint. | +| Assumptions under attack | Finding (D4) | A1 live-confirmed; phase-disambiguation clarified for skill-vs-agent records. | +| Repo-precedent conflicts | Finding (D1, D7) | Existing `-design-review.md`/`-plan-review.md` convention adopted. | +| Artifact-class precedent | Finding (D1) | 2 prior committed review files surveyed; naming adopted. | +| YAGNI violations | Finding (D5) | `Resolution` reframed optional/end-state with a wired consumer. | +| Missing failure modes | Finding (D2, D8) | Retro format-template fix added; concurrent-write noted. | +| Security/privacy | Clean | Report holds design findings only; jsonl args truncated 80 chars; no PII. | +| Infrastructure impact | Clean (D10 minor) | No runtime impact; CI skill-checks added to plan. | +| Multi-component validation | Finding (D3) | Dogfood asymmetry flagged + D1↔D2 path-contract kept literally identical. | +| Rollback story | Clean | Revert-merge + re-tag; graceful-degrade covers report absence. | +| Simpler alternative | Clean | Heuristic doc-scanner explicitly rejected per user LIGHT choice. | +| User-intent drift | Finding (D6) | Step 1e tightened to avoid no-op gate; honors "no traps". | +| Existence / runtime-validity | Finding (D1, D2) | Existing report files + retro:99 template confirmed by `ls`/`sed`. | + +## Options the author may not have considered +1. **Adopt existing naming convention** — taken (D1). +2. **Drop `Resolution` entirely** — partially taken: kept but reframed optional/end-state with a wired retro consumer, because the user explicitly wants reviews logged to ease retros across compaction; finding-IDs + an optional resolution hint serve that without per-cycle churn. +3. **Give Step 1e an output token** — taken (D6): visible PR-body `Doc-reconciliation:` line instead of a scanner. + +**Verdict reasoning:** Two Criticals (false "never written" premise + naming divergence; incomplete retro fix leaving the broken template line) plus four Importants are all addressed in the revised design without adding a skill or a scanner. The revision adopts the repo's own convention, completes the retro fix, de-risks the Step 1e trap with a visible token, and reframes the only YAGNI surface (`Resolution`) to have a consumer. Re-run after revision to confirm convergence. + +## Cycle 2 (re-run) — all cycle-1 resolved; revision introduced new issues, now fixed + +| id | sev | class | issue | resolution | +|---|---|---|---|---| +| N1 | Critical | Multi-component / Assumptions | Stem-derivation rule ambiguous for plan files (`.md` has no `-design` tail to strip) → an agent could derive a wrong `-plan-review.md` path, silently breaking the load-bearing D1↔D2 path contract. | Replaced prose with a deterministic one-rule: drop `.md`; design→`+-review.md`, plan→`+-plan-review.md`, with both a design and a plan worked example. Both D1 and D2 state the identical rule. | +| N2 | Important | Missing failure mode | Step 1e added to skill body but NOT to `finishing-a-development-branch`'s Autonomous Mode numbered list (the real control flow) → never fires in autonomous runs (deeper self-pass than D6). | D3 now names **two** edit sites: the `### Step 1e` body + a new bullet in the Autonomous Mode list after the Step 1d item. | +| N3 | Important | Trap | `Doc-reconciliation:` PR-body token claimed retro-visible, but no retro step consumed it → aspirational, soft self-pass. | D2 wires retro Step 5 (Missed activations) to record `finishing Step 1e` fired iff the token is present when the diff touched docs — real consumer, reuses existing table. | +| N4 | Minor | Existence | `**Reads:**` is two lines; demoting "the bullet" could remove the correct jsonl line. | D2 scalpel note: keep line 159 (jsonl), demote only the `skill-activation-audit.sh` line. | +| N5 | Minor | YAGNI | Step 1e trigger "README/reference doc" broader than motivating issues. | Trigger reworded to "describes the feature's behavior"; docs with no `docs/plans/` counterpart trivially pass — cheap no-op. | + +**Cycle-2 verdict:** all 10 cycle-1 findings verified resolved in design text; the 1 Critical + 2 Important + 2 Minor introduced by the revision are now fixed (deterministic stem rule, dual edit-site for Step 1e, wired token consumer, scalpel Reads edit, trigger reword). No skill added, no scanner, no net bloat beyond ~+20 lines to adversarial-design-review and ~+14 to finishing. Cycle 3 re-run to confirm convergence. + +## Cycle 3 (convergence) — PASS + +Zero Critical, zero Important. Cycle-2 N1/N2/N3 verified genuinely resolved in design text (deterministic stem rule stated identically in D1 & D2 with both worked examples; Step 1e dual edit-site explicit; token wired to retro Step 5). Converged. + +Remaining Minors (→ plan-time clarifications, not design blockers): +- **M1:** retro Step 5 must check the diff touched docs/examples *before* recording a Step-1e row, else a no-docs PR could get a spurious `unverified` row. Design already gates on "when the diff touched docs/examples"; plan makes it a hard precondition. +- **M2:** spell out in the plan that an old (no-ID) report → retro falls back to downstream evidence for scoring. +- **M3 (fixed in design):** clarified "overwrite" = one file per phase, may append `## Cycle N` sections (history survives). + +**Final design verdict: PASS @ cycle 3.** Proceed to writing-plans. diff --git a/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design.md b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design.md new file mode 100644 index 0000000..846c48f --- /dev/null +++ b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-design.md @@ -0,0 +1,257 @@ +# Pipeline Evidence + Doc-Sync Hardening — Design + +**Date:** 2026-06-03 +**Issues:** #69, #70, #71, #72 (all GoCodeAlone/autonomous-dev-kit) +**Target release:** v6.4.0 +**Author:** autonomous pipeline (brainstorming) + +## Problem + +Four issues, two themes, one root: the pipeline emits design/plan/review artifacts but the +*connective tissue* between them is weak or fictional. + +**Theme A — retro evidence is broken:** +- **#69:** `post-merge-retrospective` reads "adversarial-review reports committed in `docs/plans/`" + (SKILL.md:22, :33, :156). But `adversarial-design-review` does not **mandate** committing the + report — step 7 says "Write the report" and the Dispatch subagent returns text, with no + instruction to persist+commit it to a known path. *Nuance (adversarial review D1):* the practice + exists **ad-hoc** — exactly two committed review files exist today + (`docs/plans/2026-05-31-session-owned-lock-claims-design-review.md` + `-plan-review.md`) under + the convention `-design-review.md` / `-plan-review.md`. But because the skill never + mandates it, it happens for some features and not others, has **no stable finding IDs**, and the + retro cannot rely on the file existing. Result: most retros reconstruct findings from revision + notes/PR threads — worse under long/compacted context (transcript lost). The fix **systematizes + the existing ad-hoc practice** (mandate the commit, adopt the existing name, add stable IDs), it + does not invent a new artifact. +- **#70:** retro tells the agent to run `tests/skill-activation-audit.sh` *"(this repo)"* — a + **kit-dev-only** script absent in consumer repos → "Missed skill activations" table is "script + does not exist" every time. Meanwhile the `record-activity` PostToolUse hook (shipped: + `hooks/hooks.json:53`, `${CLAUDE_PLUGIN_ROOT}/hooks/run-hook.cmd record-activity`) **already** + appends every Skill activation to `/.claude/autodev-state/in-progress.jsonl` in **any** + repo. The retro just isn't pointed at it. + +**Theme B — committed docs drift from reality:** +- **#71:** in split-PR features, PR-1 commits docs/examples describing the **full** feature + (endpoints/helpers/metrics that ship in later PRs). `alignment-check`/`scope-lock` trace + task/PR manifest, not committed **doc content** → forward-references slip to human review. +- **#72:** during in-scope execution, identifiers get convention-conforming refinements (config + key snake→camel, example snippet changes). Not a disproved assumption (so scope-lock's backport + path never triggers), not a manifest change → nothing reconciles the design doc with built code. + Design ships stale-on-arrival; reviewers burn cycles on doc-vs-code drift. + +## Goals / Non-goals + +**G1:** adversarial-design-review commits a durable, scannable findings report with stable + finding IDs (#69). +**G2:** retro scores findings from that committed report, and scores activations from the + `in-progress.jsonl` the hook already writes — degrading gracefully, not pointing at a kit-local + script (#70). +**G3:** a single pre-PR doc-reconciliation gate catches both forward-references (#71) and + identifier drift (#72) in committed docs/examples. +**G4:** plan-phase adversarial review gains one checklist row: plan identifiers/examples match + implemented identifiers + repo naming convention (#72, catch-before-code). + +**Non-goals (YAGNI):** +- No heuristic doc-content scanner that diffs every identifier in prose against the manifest + (issue #71's *primary* rec). Rejected: false-positive-prone, unbounded, the "onerous/trap" + class the user explicitly warned against. We take #71's own lighter fallback (explicit labeling + + identifier match). +- No new skill, no new **standalone** script (heuristic scanner / activation-append helper). A + small grep-assertion **regression test** that guards these contracts and runs inside the existing + `skill-content-check.yml` CI is in-scope — the design's Multi-Component Validation requires CI + enforcement, so the test is the enforcement harness, not new product surface. No per-gate manual + activation-append (the hook covers Skill-invoked gates; manual appends would be redundant bloat). +- No retro restructure beyond the two evidence sources. + +## Design + +### D1 — Committed adversarial-review report (#69, G1) + +`adversarial-design-review` step 7 + Dispatch + Report-format change: +- After producing the report, **write it to the repo's existing convention path** and **commit it + alongside** the artifact. **Deterministic derivation (one rule, no ambiguity — adversarial review + cycle-2 N1):** take the artifact filename, drop the `.md`, then: + - **design phase:** append `-review.md` → e.g. `…-doc-sync-design.md` → `…-doc-sync-design-review.md` + (matches existing `2026-05-31-session-owned-lock-claims-design-review.md`). + - **plan phase:** append `-plan-review.md` → e.g. `2026-06-03-pipeline-evidence-doc-sync.md` + (plan has no `-design` tail) → `2026-06-03-pipeline-evidence-doc-sync-plan-review.md` + (matches existing `2026-05-31-session-owned-lock-claims-plan-review.md`). + + The retro (D2) derives the **same** path by the same rule — this is the load-bearing D1↔D2 + contract, so both skills state the rule identically. *(Adopting the existing name, not a new + `-adversarial-*` one.)* The Dispatch subagent produces the report text; the **lead** writes+commits + it (the subagent has no git authority — matches the existing Dispatch pattern). +- **Stable finding IDs:** design-phase findings `D1, D2, …`; plan-phase `P1, P2, …`. Each finding + row carries its ID as the first column. This is the durable anchor the retro correlates against. +- **Optional `Resolution` column**, filled **once at end-state** (not mutated every revision + cycle): a commit SHA, `accepted — `, or `false-positive`; left blank/`pending` if + unresolved. D2 wires retro Step 2 to read it as a *hint* (falling back to downstream evidence + when blank), so the field has a real consumer at near-zero maintenance. +- Idempotent: re-running the review on a revised artifact updates **the same single report file + per phase** (not a new file per cycle) — the file holds the latest cumulative state; multi-cycle + runs may append a `## Cycle N` section so finding→resolution history survives for the retro. + Safe under sequential execution (the default); no lock needed at this scale. +- **Back-compat:** pre-v6.4.0 review files (no finding IDs, older table shape) remain valid; the + retro reads both. **Dogfood caveat:** during *this* feature's own pipeline the skill text hasn't + changed yet, so the lead emulates D1 by hand (writing+committing each phase's review file under + the convention) until the task that edits the skill lands — implementing agents must not assume + the skill auto-writes the file before that task. + +### D2 — Retro reads committed report + activation jsonl (#70, G2) + +`post-merge-retrospective`: +- Step 2 (score findings): derive the report path by the **same deterministic rule as D1** (drop + `.md`; design → `+-review.md`, plan → `+-plan-review.md`) and read the committed + `…-design-review.md` / `…-plan-review.md` report(s). Use each finding's stable ID; read its + optional `Resolution` column as a scoring hint, falling back to downstream evidence (code-review + threads, CI) when blank. If the report is absent (ad-hoc PR or pre-mandate branch — most + pre-v6.4.0 features have no committed review), state "no committed review report; reconstructed + from revision history" — the *current* behavior becomes the explicit fallback, not the default. +- Step 5 (score activations): **primary source = `.claude/autodev-state/in-progress.jsonl`** + (written by `record-activity` in any repo). Read phase from the `args` field of **`ev:"skill"`** + entries (the lead's `Skill` invocation carries `args:"--phase=design|plan …"`); the + Agent-dispatched reviewer subagent is a separate `ev:"agent"` record without a phase and is + ignored for phase attribution. If the jsonl is absent → emit "activation log unavailable" rows, + **never** "script does not exist". +- **Three edit sites, same change (adversarial review D2):** Step 5 process text **and** the + output-format template (`## Missed skill activations`, SKILL.md:99, currently "Pull from + `tests/skill-activation-audit.sh`") **and** the `**Reads:**` integration bullet must all demote + the kit-local script to "(kit-dev convenience; absent in consumer repos)". Fixing only Step 5 + would leave the broken instruction re-embedded in every future retro's format section. + **Scalpel precision (cycle-2 minor):** under `**Reads:**`, the `.claude/autodev-state/in-progress.jsonl` + line is correct and stays; demote only the adjacent `tests/skill-activation-audit.sh` line. +- **Wire the Step-1e accountability token (cycle-2 N3):** so the `Doc-reconciliation:` PR-body line + is not unconsumed prose, retro Step 5 (Missed skill activations) gains one row — when the merged + PR's diff touched docs/examples, record `finishing Step 1e` as fired iff a `Doc-reconciliation:` + line is present in the PR body, else `unverified`. This reuses the existing missed-activation + table (no new retro section) and gives the token a real consumer, making "the retro can see it" + true rather than aspirational. (The token's *primary* role remains human + pr-monitoring + accountability; the retro row is the durable backstop.) + +### D3 — Pre-PR doc-reconciliation gate (#71 + #72a, G3) + +`finishing-a-development-branch` new **Step 1e: Doc-Reconciliation Check** (after 1d Scope +Completeness, before Step 2). **Trigger (narrowed, adversarial review D6):** fires only when the +PR's diff commits a **design doc, README/reference doc, or example artifact** — skip entirely for +code-only / test-only diffs, so it's rare and cheap. The agent verifies, for those committed +docs/examples: +- **(a) Scope (forward-ref, #71):** every behavior/identifier described is either in *this PR's* + manifest scope, OR explicitly labeled `Planned (PR #N)` / `Planned — later PR`. Unlabeled + forward references = finding → label them or move the prose to the later PR. +- **(b) Identifier drift (#72):** concrete identifiers in the design doc / examples — config keys, + flags, env vars, command invocations, DDL/code snippets, format strings — match the identifiers + the code on this branch actually uses (and the repo's naming convention). Mismatch = finding → + reconcile the doc to the built code. +- Checklist gate (agent reads the diff + greps identifiers), **not** an automated scanner (honors + the user's LIGHT choice for #71/#72). On a finding in autonomous mode: fix the doc in-branch + before PR (in-scope doc edit, no manifest change). Distinct from scope-lock's + assumption-backport (disproved assumptions) — this is routine accuracy reconciliation. +- **Accountability token (anti-trap, adversarial review D6):** the agent MUST emit a one-line + `Doc-reconciliation: clean` or `Doc-reconciliation: N item(s) fixed — ` into the PR + body. This converts a judgment step that could silently self-pass into a visible record + pr-monitoring, the human reviewer, and the retro (via D2's new missed-activation row) can see — + without a script. +- **Second edit site — Autonomous Mode list (cycle-2 N2):** Step 1e added to the skill body alone + would never fire in autonomous runs, because `finishing-a-development-branch`'s Autonomous Mode + section (its numbered list, currently naming Step 1d at item 2) is the actual control flow. D3 + therefore edits **two** places: the `### Step 1e` body section **and** a new bullet in the + Autonomous Mode list — "Run Step 1e (Doc-Reconciliation Check) — conditional on the diff + containing a design/reference doc or example artifact" — inserted after the Step 1d item, before + PR creation. +- **Trigger precision (cycle-2 minor):** "design doc, reference/standards doc, or example + artifact that describes the feature's behavior". A doc with no corresponding design/plan in + `docs/plans/` (e.g. a standalone library README) has nothing to cross-check and trivially passes + `clean` — cheap no-op, not a false negative. + +### D4 — Plan-phase naming-convention checklist row (#72b, G4) + +`adversarial-design-review` plan-phase bug-class checklist gains one row: +**Identifier / naming-convention match** — "config keys, flags, env vars, and command/code +examples in the plan match the repo's established naming convention and the identifiers the code +will actually use (grep the repo for the convention; a plan showing `snake_case` keys where the +codebase uses `camelCase` = finding). **Distinct from `Config-validation schema rules`** (which +checks tool-enforced schema invariants); this row checks human naming-convention consistency." +Catches the drift in D3(b) **before a line of code is written**, cheaper than reconciling after. + +## Global Design Guidance + +No `docs/design-guidance.md` in this repo (checked). The kit's durable guidance lives in the +skills themselves. Relevant inherited principles: skills must stay tight (user constraint: +"not too long or onerous"); no circular logic / phantom dependencies (#69 *is* one — fixing it +reduces circularity); reuse existing machinery over adding new (hooks, report format, audit +script all pre-exist). + +## Security Review + +Low surface. All changes are skill-markdown instruction edits + one committed-report file path. +- The committed adversarial report lives in `docs/plans/` (already-committed-artifact territory); + no secrets — it summarizes design findings. Reviewer must not paste secrets into findings (same + discipline as existing design docs). +- Reading `.claude/autodev-state/in-progress.jsonl`: local file, no network, no PII beyond skill + names + truncated args (hook already truncates args to 80 chars). No new exposure. +- No auth/authz, no external calls, no new dependencies. + +## Infrastructure Impact + +None at runtime. No build/deploy/k8s/migration changes. The only "infra" touchpoint: v6.4.0 +release bumps the 3 version manifests (`.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, +`.cursor-plugin/plugin.json`) → `release-tag.yml` auto-tags on push to main. Standard kit release +path, unchanged. + +## Multi-Component Validation + +The cross-component boundary here is **skill → hook → state-file → retro**: +- D2 depends on `record-activity` writing `in-progress.jsonl`. **Verified live**: this session's + own `brainstorming` invocation produced `{"ts":"2026-06-03T16:31:07Z","ev":"skill", + "sk":"autodev:brainstorming"}` in the kit repo's state file, and the hook is plugin-level + (`hooks/hooks.json:53`) so it fires in consumer repos too. +- D1↔D2 contract: the report path written by adversarial-design-review (D1) is the exact path the + retro reads (D2). Plan must keep these literally identical (one source constant in prose). +- `tests/skill-cross-refs.sh` and `tests/skill-content-grep.sh` are the kit's own CI gates over + skill markdown — all skill edits must keep cross-references resolvable and host-tokens inside + `` blocks. The plan includes running both before PR. +- **Dogfood (with caveat, adversarial review D3):** this feature runs through the pipeline, so the + *practice* of committing the review report is exercised on its own design+plan reviews. But the + skill text edits don't take effect until their task lands — so for this feature the lead + **manually** writes+commits each `…-design-review.md` / `…-plan-review.md` (already done for the + design phase) rather than the skill doing it automatically. The skill-automated path is first + exercised by the *next* feature after v6.4.0. +- **CI skill gates:** `tests/skill-cross-refs.sh` + `tests/skill-content-grep.sh` (the kit's own + markdown gates) run as a plan verification task before PR, so new step/path references resolve + and host-tokens stay inside `` blocks. + +## Assumptions + +- **A1:** `record-activity` fires in consumer repos (plugin-level PostToolUse hook). *Evidence:* + `hooks/hooks.json:53` + live entry this session. **Load-bearing for D2.** +- **A2:** Skill-invoked gates are what the retro needs to score; gates invoked as non-Skill + sub-steps (rare) not appearing in the jsonl is acceptable (graceful-degrade covers it). Phase + attribution comes only from `ev:"skill"` entries' `args` (the lead's `Skill` call); the + Agent-dispatched reviewer subagent's `ev:"agent"` record has no phase and is ignored for it. +- **A3:** The adversarial Dispatch subagent can return report text the lead commits; the lead + (not the subagent) owns the git write. *Matches existing Dispatch pattern.* +- **A4:** Writing one report file per phase per feature (overwritten across revision cycles) is + acceptable repo noise — same order as the design/plan docs already committed. +- **A5:** A checklist-style Step 1e (human/agent judgment over the diff) catches the doc drift + classes without a scanner. *If false* (agent skips it), the human reviewer remains the backstop — + same as today, so no regression. + +## Rollback + +Change class: skill-content + plugin version bump (release-affecting). Rollback = revert the merge +commit + re-tag prior version. No data/migration/runtime state to unwind. The committed-report +path is additive; reverting simply stops writing it (retro's graceful-degrade handles its absence). +Per-task rollback notes in the plan for the version-bump task. + +## Self-challenge (top doubts surfaced) + +1. **Is D1 adding bloat to an already-335-line skill?** Net +~18 lines to adversarial-design-review, + but it makes an existing *fictional* contract real and removes the retro's reconstruction burden. + The report *format* already exists — we add a path + IDs + a Resolution field, not a new section. +2. **Could D3's Step 1e become a rubber-stamp the agent skips?** Possibly — it's judgment, not a + script. Mitigation: it's gated in autonomous mode (like 1d) and scoped to *only fire when docs/ + examples are in the diff*, so it's cheap and skippable-only-when-irrelevant. The plan-phase row + (D4) is the earlier, cheaper catch; 1e is the safety net. +3. **Does pointing retro at `in-progress.jsonl` over-trust a best-effort hook?** The hook is + best-effort (jq-absent / no-stdin → no-op). D2 degrades gracefully on absence, so worst case is + "activation log unavailable" — strictly better than today's "script does not exist". diff --git a/docs/plans/2026-06-03-pipeline-evidence-doc-sync-plan-review.md b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-plan-review.md new file mode 100644 index 0000000..e055653 --- /dev/null +++ b/docs/plans/2026-06-03-pipeline-evidence-doc-sync-plan-review.md @@ -0,0 +1,50 @@ +# Pipeline Evidence + Doc-Sync Hardening — Plan-Phase Adversarial Review + +**Phase:** plan +**Artifact:** `docs/plans/2026-06-03-pipeline-evidence-doc-sync.md` +**Status:** PASS (zero Critical; both Important resolved before execution) + +## Findings + +| id | sev | class | loc | issue | resolution | +|---|---|---|---|---|---| +| P1 | Important | Verification-class / test design | Task 1 | Two assertions were pre-green at test-creation (`has "$ADR" "commit"` matched ambient "committed" prose; `has "$RETRO" "in-progress.jsonl"` matched the path already present at retro:52/159) → weak RED state. | Fixed in plan: assert the specific new mandate `"Write AND commit the report"` (#69) and the **primary**-source promotion `primary source.*in-progress\.jsonl` (#70). Both are RED before Tasks 2/4, GREEN after. | +| P2 | Important | CI wiring | Task 6 | `tests/skill-cross-refs.sh` was run locally only; plan didn't add it to CI though the workflow file is already being edited. | Fixed in plan: Task 6 now adds `skill-cross-refs.sh` as a CI step + path filter, with a guard against importing any unrelated pre-existing failure. Verified green on the base tree (`EXIT=0`) so wiring is safe. | +| P3 | Minor | Test template attribution | Task 1 | Prose said "mirroring `skill-content-grep.sh`" but the code's `pass()/fail()`+counter idiom matches `hook-contracts.sh`. | Fixed: prose now references `hook-contracts.sh`. | +| P4 | Minor | Integration proof (D1↔D2) | Task 1 | No assertion guarded the load-bearing D1↔D2 path identity against future drift. | Fixed: added test assertion `same deterministic rule\|-plan-review\.md` against the retro. | +| P5 | Minor | Decomposition | Tasks 2&3 | Two commits to the same skill file. | Accepted: TDD slice-verification discipline; sequential (no collision). No change. | +| P6 | Minor | Format ripple / bloat | Task 2 | Converting the three Findings sections to a merged table would ripple into PASS/FAIL semantics + Dispatch output blocks. | Fixed (simpler than recommended): keep the three `**Findings (sev):**` sections unchanged, add only an ID prefix + optional inline `Resolution` — zero ripple, less change. | + +## Bug-Class Scan Transcript + +| Class | Result | Note | +|---|---|---| +| Project-guidance conflicts | Clean | No guidance file; net add ~+20 ADR / ~+14 retro / ~+30 finishing — within the user's "not onerous" tolerance. | +| Assumptions under attack | Clean | A1 (hook fires in consumer repos) live-confirmed; A3 (lead commits subagent text) matches Dispatch pattern. | +| Repo-precedent conflicts | Clean | Existing `-design-review.md`/`-plan-review.md` naming adopted; test idiom aligned to `hook-contracts.sh` (P3). | +| Artifact-class precedent | Clean | 2 prior committed review files surveyed; back-compat for old no-ID format. | +| YAGNI violations | Clean | No new skills/scripts/scanner; `Resolution` optional with a wired consumer. | +| Missing failure modes | Clean | Absent jsonl → "activation log unavailable"; absent report → "reconstructed from revision history". | +| Security / privacy | Clean | Report = design findings; jsonl args truncated; no PII/external calls. | +| Infrastructure impact | Clean | Version bump → existing `release-tag.yml` auto-tag path. | +| Multi-component validation | Clean | D1↔D2 path contract now test-guarded (P4); hook verified live. | +| Rollback story | Clean | Task 7 rollback note + whole-PR rollback section; additive change. | +| Simpler alternative | Clean | Scanner rejected per LIGHT choice; token (not script) for Step 1e. | +| User-intent drift | Clean | Exactly the 4 approved issues at LIGHT scope; no creep. | +| Existence / runtime-validity | Clean | All line refs verified by the reviewer (retro:99/156/159-160, finishing autonomous list:23, awk anchor `### Step 1: Verify Tests`:76); `bump-version.sh`/`version-check.sh`/`skill-content-grep.sh`/`skill-cross-refs.sh` all exist + match invocation syntax. | +| Over/under-decomposition | Clean | 7 tasks for 3 skill edits + test + bump — appropriate; each has a class-matched verify. | +| Verification-class mismatch | Resolved (P1/P2) | Test assertions tightened; CI cross-refs wired. | +| Auth/authz chain | Clean | No auth surfaces. | +| Hidden serial dependencies | Clean | Tasks 2&3 same file but sequential w/ commits between. | +| Missing rollback wiring | Clean | Markdown-only; revert + re-tag is the correct class. | +| Missing integration proof | Resolved (P4) | D1↔D2 path identity now asserted. | +| Infra verification mismatch | Clean | No infra; self-contained bump. | +| Plugin-loader runtime layout | Clean | N/A (markdown only). | +| Config-validation schema rules | Clean | N/A (no wfctl config). | + +## Options the author may not have considered +1. Tighten the `commit` assertion to the verbatim mandate — **taken** (P1). +2. Wire `skill-cross-refs.sh` into CI while the YAML is open — **taken** (P2). +3. Collapse Tasks 2+3 into one commit — **declined**, TDD slice discipline retained (P5). + +**Verdict reasoning:** PASS. Architecture, sequencing, scope sound; the failing test does not enter CI until Task 6 (by which point Tasks 2–5 made it green), so no mid-PR red. Both Important findings were test-quality (weak RED + a free CI-wiring win), resolved in the plan before execution exactly per the reviewer's recommendations; the four Minors are addressed or accepted with reason. No new skill, no scanner, no net bloat. Proceed to alignment-check. diff --git a/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md b/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md new file mode 100644 index 0000000..9bfd14f --- /dev/null +++ b/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md @@ -0,0 +1,311 @@ +# Pipeline Evidence + Doc-Sync Hardening Implementation Plan + +> **For the implementing agent:** REQUIRED SUB-SKILL: Use autodev:executing-plans to implement this plan task-by-task. + +**Goal:** Fix autodev issues #69/#70/#71/#72 by systematizing the committed adversarial-review report, pointing the retro at the activation log the hook already writes, and adding one pre-PR doc-reconciliation gate + one plan-phase naming-convention check — with zero new skills/scripts and no heuristic scanner. + +**Architecture:** Pure skill-markdown edits to 3 skills (`adversarial-design-review`, `post-merge-retrospective`, `finishing-a-development-branch`), guarded by one new grep-assertion regression test wired into the existing `skill-content-check.yml` CI, plus the standard 3-manifest v6.4.0 version bump. Reuses the existing `record-activity` PostToolUse hook (writes `.claude/autodev-state/in-progress.jsonl` in any repo) and the existing `-design-review.md`/`-plan-review.md` report convention. + +**Tech Stack:** Bash (tests + hooks), Markdown (skills), GitHub Actions (CI), the kit's `scripts/bump-version.sh` + `tests/version-check.sh`. + +**Base branch:** main + +--- + +## Scope Manifest + +**PR Count:** 1 +**Tasks:** 7 +**Estimated Lines of Change:** ~300 (skill markdown + 1 test + version bump) + +**Out of scope:** +- Heuristic doc-content scanner that diffs every prose identifier against the manifest (#71 primary rec — rejected per user LIGHT choice; would be false-positive-prone bloat). +- New skills, new standalone scripts, or per-gate manual activation-append calls (the `record-activity` hook already covers Skill-invoked gates). +- Migrating the 2 pre-existing `2026-05-31-session-owned-lock-claims-*-review.md` files to the new finding-ID format (back-compat: retro reads both old + new shapes). +- Retro restructure beyond the two evidence sources (committed report + jsonl) and the one Step-1e missed-activation row. +- Changes to `tests/skill-activation-audit.sh` itself (it stays as a kit-dev convenience; only its *references in the retro* are demoted). + +**PR Grouping:** + +| PR # | Title | Tasks | Branch | +|------|-------|-------|--------| +| 1 | Pipeline evidence + doc-sync hardening (#69 #70 #71 #72) → v6.4.0 | Task 1, Task 2, Task 3, Task 4, Task 5, Task 6, Task 7 | feat/pipeline-evidence-doc-sync | + +**Status:** Locked 2026-06-03T17:10:43Z + +--- + +### Task 1: Failing regression test for all four contracts + +**Change class:** Hook/trigger-adjacent (grep-assertion test). Verification: the test itself (RED now, GREEN after Tasks 2–5). + +**Files:** +- Create: `tests/pipeline-evidence-doc-sync.sh` + +**Step 1: Write the failing test.** Create a bash test (mirroring the `pass()/fail()` + counter style of `tests/hook-contracts.sh`, non-zero exit on any fail) with these assertions against the repo's **skill** files (greps target `skills/…`, never `docs/plans/…`, so the plan's own design docs can't false-match). The assertions are written to be genuinely RED before Tasks 2–5 and GREEN after (plan-review P1: avoid substring matches that pass against pre-existing prose like "committed"): + +```bash +#!/usr/bin/env bash +# tests/pipeline-evidence-doc-sync.sh +# Regression guard for issues #69/#70/#71/#72 (v6.4.0). Asserts the skill +# contracts these issues fixed remain present, so they cannot silently regress. +set -uo pipefail +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +ADR="$ROOT/skills/adversarial-design-review/SKILL.md" +RETRO="$ROOT/skills/post-merge-retrospective/SKILL.md" +FIN="$ROOT/skills/finishing-a-development-branch/SKILL.md" +fail=0 +pass(){ printf 'PASS: %s\n' "$1"; } +bad(){ printf 'FAIL: %s\n' "$1" >&2; fail=$((fail+1)); } +has(){ grep -qiF "$2" "$1"; } # literal substring +hasE(){ grep -qiE "$2" "$1"; } # regex + +# --- #69 (D1): adversarial-design-review mandates committing the report --- +hasE "$ADR" '(-design-review\.md|-plan-review\.md)' \ + && pass "#69 ADR cites the -design-review.md/-plan-review.md convention" \ + || bad "#69 ADR missing committed-report convention path" +# P1: assert the SPECIFIC new mandate wording, not the ambient word "commit" +has "$ADR" "Write AND commit the report" \ + && pass "#69 ADR mandates writing+committing the report" \ + || bad "#69 ADR does not mandate writing+committing the report" +hasE "$ADR" 'stable finding ID|stable .*ID' \ + && pass "#69 ADR defines stable finding IDs" \ + || bad "#69 ADR missing stable finding IDs" +# P4: guard the load-bearing D1<->D2 path contract — retro must cite the same derivation +hasE "$RETRO" 'same deterministic rule|-plan-review\.md' \ + && pass "#69/#70 retro derives the report path by the same rule (D1<->D2 contract)" \ + || bad "#69/#70 retro missing the shared path-derivation rule" + +# --- #70 (D2): retro reads the jsonl as PRIMARY; script demoted, NOT a hard dep --- +# P1: assert the jsonl is the PRIMARY source (only true after Task 4), not merely mentioned +hasE "$RETRO" 'primary source.*in-progress\.jsonl|in-progress\.jsonl.*primary' \ + && pass "#70 retro makes in-progress.jsonl the primary activation source" \ + || bad "#70 retro does not promote in-progress.jsonl to primary" +# The format template must NOT instruct 'Pull from tests/skill-activation-audit.sh' +grep -qiE 'Pull from .*skill-activation-audit\.sh' "$RETRO" \ + && bad "#70 retro STILL instructs 'Pull from tests/skill-activation-audit.sh' (line ~99 not demoted)" \ + || pass "#70 retro format template no longer hard-depends on the kit-local script" +has "$RETRO" "kit-dev" \ + && pass "#70 retro marks the audit script kit-dev-only" \ + || bad "#70 retro does not demote the audit script to kit-dev-only" + +# --- #71/#72 (D3): finishing has Step 1e in BOTH body and autonomous list --- +hasE "$FIN" 'Step 1e' \ + && pass "#71/#72 finishing has Step 1e body" \ + || bad "#71/#72 finishing missing Step 1e body" +has "$FIN" "Doc-reconciliation" \ + && pass "#71/#72 finishing emits Doc-reconciliation token" \ + || bad "#71/#72 finishing missing Doc-reconciliation accountability token" +# Step 1e must be referenced in the Autonomous Mode numbered list region (top of file, before '### Step 1:') +auto_region="$(awk '/^## Autonomous Mode/{f=1} /^### Step 1: Verify Tests/{f=0} f' "$FIN")" +printf '%s' "$auto_region" | grep -qiE 'Step 1e' \ + && pass "#71/#72 Step 1e wired into Autonomous Mode list" \ + || bad "#71/#72 Step 1e NOT in Autonomous Mode list (would never fire autonomously)" + +# --- #72 (D4): plan-phase naming-convention checklist row --- +hasE "$ADR" 'naming.convention match|Identifier / naming' \ + && pass "#72 ADR plan-phase has Identifier/naming-convention row" \ + || bad "#72 ADR plan-phase missing naming-convention row" + +echo ""; echo "Results: $fail failure(s)"; [ "$fail" -eq 0 ] +``` + +**Step 2: Run, verify it FAILS.** Run: `bash tests/pipeline-evidence-doc-sync.sh` +Expected: multiple `FAIL:` lines (skills not yet edited), final `Results: N failure(s)`, exit 1. + +**Step 3: Commit the failing test.** +```bash +chmod +x tests/pipeline-evidence-doc-sync.sh +git add tests/pipeline-evidence-doc-sync.sh +git commit -m "test: regression guard for pipeline evidence + doc-sync (#69 #70 #71 #72) [red]" +``` + +--- + +### Task 2: D1 — adversarial-design-review mandates a committed findings report + +**Change class:** Documentation/skill-content. Verification: Task-1 test #69 assertions pass + `skill-content-grep.sh` + `skill-cross-refs.sh` clean. + +**Files:** +- Modify: `skills/adversarial-design-review/SKILL.md` (Process step 7; Report format header; "Dispatching the reviewer agent" output instruction; Integration "Writes" — add if absent) + +**Step 1:** In **Process step 7** ("Write the report"), replace the inline-only instruction with the mandate to persist+commit, stating the **deterministic path rule** verbatim: +> 7. **Write AND commit the report.** Derive the path from the artifact filename: drop `.md`, then for `--phase=design` append `-review.md` (e.g. `…-doc-sync-design.md` → `…-doc-sync-design-review.md`); for `--phase=plan` append `-plan-review.md` (e.g. `2026-06-03-…-doc-sync.md` → `2026-06-03-…-doc-sync-plan-review.md`). This matches the existing `docs/plans/2026-05-31-session-owned-lock-claims-design-review.md` convention. The **lead** writes the report text the reviewer produced to that path and commits it alongside the artifact (the subagent has no git authority). Re-runs update the same single per-phase file (append a `## Cycle N` section across cycles); safe under sequential execution. + +**Step 2:** In the **Report format**, keep the existing three `**Findings (Critical|Important|Minor):**` sections **unchanged in structure** (so the PASS/FAIL semantics and Dispatch "Required output" blocks that key off "Critical findings"/"Important findings" keep working verbatim — plan-review P6: no table conversion, no ripple). Add only: each finding bullet is **prefixed with a stable finding ID** and may carry an optional inline resolution. Update the format example lines to: +> **Findings (Critical):** +> - `D1` [class] [section/line]: . Recommendation: . _Resolution: ._ +> +> Add a one-line note under the format: "Design-phase finding IDs are `D1, D2, …`; plan-phase `P1, P2, …`. IDs are the durable anchor `post-merge-retrospective` correlates against; the optional `Resolution` is a scoring hint (retro falls back to downstream evidence when omitted)." The literal phrase **"stable finding ID"** must appear (the Task-1 test asserts it). + +(Keep the `Bug-class scan transcript`, `Options`, and `Verdict reasoning` sections, and the PASS/FAIL semantics section, unchanged.) + +**Step 3:** In **"Dispatching the reviewer agent"** output instructions, add one line: the reviewer returns the report text; **the lead commits it to the derived path** (so the subagent isn't asked to do git). + +**Step 4:** Add to **Integration** a `**Writes:**` line: `docs/plans/-design-review.md` / `-plan-review.md` (committed report). + +**Step 5: Run the test slice.** Run: `bash tests/pipeline-evidence-doc-sync.sh` +Expected: the three `#69` assertions now `PASS:` (overall still failing until later tasks). + +**Step 6: Commit.** +```bash +git add skills/adversarial-design-review/SKILL.md +git commit -m "feat(adversarial-review): mandate committed findings report w/ stable IDs (#69)" +``` + +--- + +### Task 3: D4 — plan-phase Identifier/naming-convention checklist row + +**Change class:** Documentation/skill-content. Verification: Task-1 test `#72` ADR assertion passes + content-grep clean. + +**Files:** +- Modify: `skills/adversarial-design-review/SKILL.md` ("Bug-class checklist — plan phase" table) + +**Step 1:** Add one row to the plan-phase table (after `Config-validation schema rules`): +> \| **Identifier / naming-convention match** \| Config keys, flags, env vars, and command/code examples in the plan match the repo's established naming convention and the identifiers the code will actually use (grep the repo for the convention; a plan showing `snake_case` keys where the codebase uses `camelCase` = finding). **Distinct from `Config-validation schema rules`**, which checks tool-enforced schema invariants — this row checks human naming-convention consistency. Catches design-vs-code drift before code is written. \| + +**Step 2: Run the test slice.** Run: `bash tests/pipeline-evidence-doc-sync.sh` +Expected: `#72 ADR plan-phase has Identifier/naming-convention row` → `PASS:`. + +**Step 3: Commit.** +```bash +git add skills/adversarial-design-review/SKILL.md +git commit -m "feat(adversarial-review): plan-phase naming-convention checklist row (#72)" +``` + +--- + +### Task 4: D2 — retro reads committed report + activation jsonl (3 edit sites + token consumer) + +**Change class:** Documentation/skill-content. Verification: Task-1 test `#70` assertions pass + content-grep clean. **This is the highest-care task — three edit sites + scalpel precision.** + +**Files:** +- Modify: `skills/post-merge-retrospective/SKILL.md` (Step 2; Step 5; the `## Missed skill activations` format template ~line 99; the `**Reads:**` integration bullets ~line 159–160; add the Step-1e missed-activation row) + +**Step 1 — Step 2 (score findings):** state that the report path is derived by the **same deterministic rule as D1** (drop `.md`; design→`+-review.md`, plan→`+-plan-review.md`); read each finding's stable ID; read the optional `resolution` column as a scoring hint, **falling back to downstream evidence (code-review threads, CI) when blank or when the report is an old no-ID format**. If the report is absent → "no committed review report; reconstructed from revision history" (the explicit fallback; note most pre-v6.4.0 features have none). + +**Step 2 — Step 5 (score activations):** make `.claude/autodev-state/in-progress.jsonl` the **primary** source (written by `record-activity` in any repo); read phase from the `args` field of **`ev:"skill"`** entries (the Agent-dispatched reviewer's `ev:"agent"` record has no phase — ignore it for phase). Demote `tests/skill-activation-audit.sh` to "(kit-dev convenience; absent in consumer repos)". If the jsonl is absent → "activation log unavailable" rows, **never** "script does not exist". + +**Step 3 — format template (`## Missed skill activations`, ~line 99):** change `Pull from \`tests/skill-activation-audit.sh\`.` to read from `.claude/autodev-state/in-progress.jsonl` (the audit script noted as kit-dev-only). Add one row to that table's example: `| finishing Step 1e (doc-reconciliation) | yes/unverified | only when the diff touched docs/examples |`. + +**Step 4 — Step-1e token consumer (D2/N3):** add a sentence to Step 5 (or the Missed-activations section): "When the merged PR's diff touched docs/examples, record `finishing Step 1e` as fired iff a `Doc-reconciliation:` line is present in the PR body, else `unverified`. If the diff touched no docs/examples, record no row (Step 1e legitimately did not fire)." *(precondition resolves cycle-3 M1.)* + +**Step 5 — Reads bullets (~line 159–160, SCALPEL):** keep the `.claude/autodev-state/in-progress.jsonl (if present)` line as-is; on the `tests/skill-activation-audit.sh (this repo)` line, change to `tests/skill-activation-audit.sh (kit-dev convenience; absent in consumer repos)`. Also update the line-156 `docs/plans/ (design, plan, adversarial-review reports)` to note reports are now committed by `adversarial-design-review` per the deterministic path. + +**Step 6: Run the test slice.** Run: `bash tests/pipeline-evidence-doc-sync.sh` +Expected: all three `#70` assertions `PASS:` (incl. the negative assertion that "Pull from …skill-activation-audit.sh" is gone). + +**Step 7: Commit.** +```bash +git add skills/post-merge-retrospective/SKILL.md +git commit -m "feat(retro): read committed report + activation jsonl; demote kit-local script (#70)" +``` + +--- + +### Task 5: D3 — finishing-a-development-branch Step 1e (doc-reconciliation gate) + +**Change class:** Documentation/skill-content. Verification: Task-1 test `#71/#72` finishing assertions pass + content-grep clean. **Two edit sites: the Step body AND the Autonomous Mode list.** + +**Files:** +- Modify: `skills/finishing-a-development-branch/SKILL.md` (Autonomous Mode numbered list ~line 23; new `### Step 1e` after the Step 1d section ~line 157; the `Continue to Step 1d`/`continue to Step 2` transition pointers) + +**Step 1 — Autonomous Mode list (~line 23):** after the existing "Run Step 1d (Scope Completeness Check)" item, insert: "Run Step 1e (Doc-Reconciliation Check) — conditional on the diff containing a design/reference doc or example artifact." (Renumber the following list items.) + +**Step 2 — new `### Step 1e: Doc-Reconciliation Check`** (after the Step 1d section, before `### Step 2`): +> **Trigger:** the PR's diff commits a design doc, reference/standards doc, or example artifact that describes the feature's behavior. Skip for code-only / test-only diffs. (A doc with no corresponding `docs/plans/` design/plan trivially passes `clean`.) +> +> For each such committed doc/example, verify: +> - **(a) Scope (forward-ref, #71):** every behavior/identifier it describes is in *this PR's* manifest scope, OR explicitly labeled `Planned (PR #N)` / `Planned — later PR`. Unlabeled forward references = finding → label them or move the prose to the later PR. +> - **(b) Identifier drift (#72):** concrete identifiers (config keys, flags, env vars, command invocations, DDL/code snippets, format strings) match the identifiers the code on this branch actually uses + the repo's naming convention. Mismatch = finding → reconcile the doc to the built code. +> +> This is a checklist gate (read the diff, grep identifiers), **not** an automated scanner. On a finding in autonomous mode, fix the doc in-branch before PR (in-scope doc edit, no manifest change). Distinct from `scope-lock`'s assumption-backport (which is for *disproved assumptions*) — this is routine accuracy reconciliation. +> +> **Accountability token:** emit one line into the PR body — `Doc-reconciliation: clean` or `Doc-reconciliation: N item(s) fixed — ` — so pr-monitoring, the human reviewer, and `post-merge-retrospective` (Step 5 missed-activation row) can confirm the gate ran without a script. + +**Step 3 — transition pointers:** ensure the Step 1d section ends pointing to Step 1e, and Step 1e ends pointing to Step 2 (Determine Base Branch). Update the line-157 "Do not proceed past Step 1d …" wording only if it implies 1d is the last sub-step. + +**Step 4: Run the test slice.** Run: `bash tests/pipeline-evidence-doc-sync.sh` +Expected: all `#71/#72` finishing assertions `PASS:` (incl. the Autonomous-Mode-region check). + +**Step 5: Commit.** +```bash +git add skills/finishing-a-development-branch/SKILL.md +git commit -m "feat(finishing): Step 1e doc-reconciliation gate, body + autonomous list (#71 #72)" +``` + +--- + +### Task 6: Wire the regression test into CI + full local verification + +**Change class:** Hook/trigger (CI) + verification. Verification: the new test GREEN; `skill-content-grep.sh` clean; `skill-cross-refs.sh` clean; YAML valid. + +**Files:** +- Modify: `.github/workflows/skill-content-check.yml` (add a step running the new test + add it to the `paths` filters) + +**Step 1:** In `skill-content-check.yml`, add `tests/pipeline-evidence-doc-sync.sh` **and** `tests/skill-cross-refs.sh` to both `push.paths` and `pull_request.paths`, and add two steps after the existing content-grep step (plan-review P2: `skill-cross-refs.sh` already exists but was local-only — wire it into CI for free while the workflow is open): +```yaml + - name: Pipeline evidence + doc-sync contracts + run: bash tests/pipeline-evidence-doc-sync.sh + - name: Skill cross-references resolve + run: bash tests/skill-cross-refs.sh +``` +*(If `skill-cross-refs.sh` surfaces a pre-existing unresolved reference unrelated to this PR, do not expand scope to fix unrelated skills — instead keep it local-only for this PR and note the pre-existing failure in the PR body. Only wire it into CI if it passes clean on the current tree.)* + +**Step 2: Run the FULL local gate** (all must be green now that Tasks 2–5 landed): +```bash +bash tests/pipeline-evidence-doc-sync.sh # Expected: Results: 0 failure(s), exit 0 +bash tests/skill-content-grep.sh # Expected: exit 0 (no host-token leaks in edited skills) +bash tests/skill-cross-refs.sh # Expected: exit 0 (new step/path references resolve) +``` +Expected: all three exit 0. *(Resolves design D10.)* If `skill-content-grep.sh` flags a host-token in any edited skill, move that token inside a `` block. + +**Step 3: Commit.** +```bash +git add .github/workflows/skill-content-check.yml +git commit -m "ci: run pipeline-evidence-doc-sync contract test on skill changes" +``` + +--- + +### Task 7: Version bump → v6.4.0 + +**Change class:** Version pin (runtime-affecting — release). Verification: `tests/version-check.sh` green (3 manifests agree). **Rollback: revert the merge commit + re-tag the prior version (v6.3.1); no data/migration to unwind.** + +**Files:** +- Modify (via script): `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, `.cursor-plugin/plugin.json` + +**Step 1:** Run the bump script: +```bash +bash scripts/bump-version.sh 6.4.0 +``` + +**Step 2: Verify the 3 manifests agree.** Run: `bash tests/version-check.sh` +Expected: exit 0 (all three manifests report `6.4.0`). + +**Step 3: Confirm no stray version mismatch.** Run: `grep -rn '"version"' .claude-plugin/plugin.json .claude-plugin/marketplace.json .cursor-plugin/plugin.json` +Expected: each shows `6.4.0`. + +**Step 4: Commit.** +```bash +git add .claude-plugin/plugin.json .claude-plugin/marketplace.json .cursor-plugin/plugin.json +git commit -m "chore(release): bump to v6.4.0 (#69 #70 #71 #72)" +``` + +*(Pushing this commit to `main` triggers `release-tag.yml`, which tags `v6.4.0` after `version-check.sh` passes. The GH Release is created manually post-merge per the kit's convention.)* + +--- + +## Verification Summary (whole-PR) + +Before PR creation, all green: +- `bash tests/pipeline-evidence-doc-sync.sh` → `Results: 0 failure(s)` +- `bash tests/skill-content-grep.sh` → exit 0 +- `bash tests/skill-cross-refs.sh` → exit 0 +- `bash tests/version-check.sh` → exit 0 +- Step 1e self-check on THIS PR: it commits design/plan/review docs → emit `Doc-reconciliation: …` in the PR body (dogfood the new gate). + +## Rollback (whole-PR) + +All edits are skill-markdown + one test + a version bump. Rollback = `git revert` the squash-merge commit + re-tag `v6.3.1` as latest. The committed-report path is additive (reverting just stops writing it; the retro's graceful-degrade covers absence). No runtime state, migrations, or external resources involved. diff --git a/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md.scope-lock b/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md.scope-lock new file mode 100644 index 0000000..8dbd142 --- /dev/null +++ b/docs/plans/2026-06-03-pipeline-evidence-doc-sync.md.scope-lock @@ -0,0 +1 @@ +6cb6ebd9f6eba987c645f9e707ee85606c10c8649c93f5f84760352ae3c685b2 diff --git a/skills/adversarial-design-review/SKILL.md b/skills/adversarial-design-review/SKILL.md index 1f6ddc9..98c196e 100644 --- a/skills/adversarial-design-review/SKILL.md +++ b/skills/adversarial-design-review/SKILL.md @@ -114,6 +114,7 @@ inherits the design's blast radius) and adds: | **Infrastructure verification mismatch** | For infrastructure-affecting changes, does the plan verify render/plan/apply/dry-run, secret wiring, migration order, rollback, and post-deploy health as appropriate? If not, flag it. | | **Plugin-loader runtime layout** | Plans that spawn or load an external plugin process must build the binary in a layout the host's discovery code accepts. For wfctl: `$WFCTL_PLUGIN_DIR//` + sibling `plugin.json`. Plans that `go build -o /tmp/single-binary` without the subdir + manifest sidecar will fail at runtime. | | **Config-validation schema rules** | Plans that create new config files validated by a schema or CLI tool must satisfy that tool's invariants (e.g., for wfctl: `checkEntryPoints` requires ≥1 entry-point module like `http.server`/`scheduler.modular`/`messaging.broker`, OR a trigger/route/subscription/job/pipeline). Plans omitting required entry-point modules pass `bash -n` but fail schema validation at CI. | +| **Identifier / naming-convention match** | Config keys, flags, env vars, and command/code examples in the plan match the repo's established naming convention and the identifiers the code will actually use (grep the repo for the convention; a plan showing `snake_case` keys where the codebase uses `camelCase` = finding). **Distinct from `Config-validation schema rules`**, which checks tool-enforced schema invariants — this row checks human naming-convention consistency. Catches design-vs-code drift before code is written. | ## Process @@ -137,9 +138,7 @@ inherits the design's blast radius) and adds: 6. **Surface options, not just objections.** For findings, propose a concrete fix or alternative. "This design assumes X" → "Alternative: state X explicitly, and add a fallback if X is false at runtime." -7. **Write the report.** Format below. Commit verdict: PASS / FAIL. - Use `autodev:condensed-pipeline-writing` for report density unless the - user asked for prose. +7. **Write AND commit the report.** Derive the path from the artifact filename: drop `.md`, then for `--phase=design` append `-review.md` (e.g. `…-doc-sync-design.md` → `…-doc-sync-design-review.md`); for `--phase=plan` append `-plan-review.md` (e.g. `2026-06-03-…-doc-sync.md` → `2026-06-03-…-doc-sync-plan-review.md`). This matches the existing `docs/plans/2026-05-31-session-owned-lock-claims-design-review.md` convention. The **lead** writes the report text the reviewer produced to that path and commits it alongside the artifact (the subagent has no git authority). Re-runs update the same single per-phase file (append a `## Cycle N` section across cycles); safe under sequential execution. Commit verdict: PASS / FAIL. Use `autodev:condensed-pipeline-writing` for report density unless the user asked for prose. ## Report format @@ -151,13 +150,15 @@ inherits the design's blast radius) and adds: **Status:** PASS | FAIL **Findings (Critical):** -- [class] [section/line]: . Recommendation: . +- `D1` [class] [section/line]: . Recommendation: . _Resolution: ._ **Findings (Important):** -- [class] [section/line]: . Recommendation: . +- `D2` [class] [section/line]: . Recommendation: . _Resolution: ._ **Findings (Minor):** -- [class] [section/line]: . Recommendation: . +- `D3` [class] [section/line]: . Recommendation: . _Resolution: ._ + +Design-phase finding IDs are `D1, D2, …`; plan-phase `P1, P2, …`, numbered **sequentially across all findings regardless of severity** (`D1` is the first finding overall, not the first Critical). IDs are the durable anchor `post-merge-retrospective` correlates against; the optional `Resolution` is a scoring hint (retro falls back to downstream evidence when omitted). Each finding has a **stable finding ID** as its first token. **Bug-class scan transcript:** | Class | Result | Note | @@ -273,6 +274,8 @@ Agent tool (general-purpose, model: balanced): Important finding either has a fix recommendation accepted by the author or is escalated as an open question. Otherwise FAIL. ```` + +The reviewer returns the report text. **The lead commits it to the derived path** (drop `.md`, append `-review.md` for design or `-plan-review.md` for plan) — the subagent has no git authority. @@ -293,6 +296,9 @@ mindset, ≥3 findings or full transcript, no reflexive approval. `alignment-check`. - Manual — user invokes against any artifact in `docs/plans/`. +**Writes:** +- `docs/plans/-design-review.md` (design phase) / `docs/plans/-plan-review.md` (plan phase) — committed report. + **Calls:** - `brainstorming` — on FAIL during `--phase=design`, for revision. - `writing-plans` — on FAIL during `--phase=plan`, for revision. diff --git a/skills/finishing-a-development-branch/SKILL.md b/skills/finishing-a-development-branch/SKILL.md index e055713..d97789f 100644 --- a/skills/finishing-a-development-branch/SKILL.md +++ b/skills/finishing-a-development-branch/SKILL.md @@ -21,8 +21,9 @@ When running in the autonomous pipeline (invoked from subagent-driven-developmen 1. **Verify tests pass** — same as manual mode, abort if failing 2. **Run Step 1d (Scope Completeness Check)** — see below. This is a mandatory gate in autonomous mode. The agent MUST NOT silently collapse N planned PRs into 1, nor declare success on a partial scope. If Step 1d surfaces a failure, the autonomous pipeline halts and asks the user. -3. **Skip option presentation** — go directly to PR creation -4. **For every PR row in the manifest's PR Grouping table, create one PR.** The manifest is the contract. If the table has 3 rows, the autonomous run produces 3 PRs, each pointing at the branch named in the row. Do NOT collapse rows — collapsing is the exact failure mode `skills/scope-lock/SKILL.md` defends against. Per-PR steps: +3. **Run Step 1e (Doc-Reconciliation Check)** — conditional on the diff containing a design/reference doc or example artifact. See below. +4. **Skip option presentation** — go directly to PR creation +5. **For every PR row in the manifest's PR Grouping table, create one PR.** The manifest is the contract. If the table has 3 rows, the autonomous run produces 3 PRs, each pointing at the branch named in the row. Do NOT collapse rows — collapsing is the exact failure mode `skills/scope-lock/SKILL.md` defends against. Per-PR steps: ```bash feature_branch="" feature_name="" @@ -63,8 +64,8 @@ When running in the autonomous pipeline (invoked from subagent-driven-developmen EOF )" ``` -5. **Invoke pr-monitoring** — spawn a background monitor for all PRs created in this session; prefer a single agent covering all PRs to avoid GitHub API rate limits, but one agent per PR is acceptable if the PRs are on unrelated codebases or a previous shared monitor was rate-limited -6. **Report PR URLs** — output every PR link for the user (one per row in the manifest's PR Grouping table) +6. **Invoke pr-monitoring** — spawn a background monitor for all PRs created in this session; prefer a single agent covering all PRs to avoid GitHub API rate limits, but one agent per PR is acceptable if the PRs are on unrelated codebases or a previous shared monitor was rate-limited +7. **Report PR URLs** — output every PR link for the user (one per row in the manifest's PR Grouping table) **Do NOT:** - Present the 4-option menu in autonomous mode @@ -154,7 +155,21 @@ Action: - **PR count mismatch (autonomous mode):** if the manifest expects N PRs but the branch layout produced fewer, the agent must split the branch via `git rebase --onto` per the manifest's grouping table — NOT collapse the manifest. Collapsing N planned PRs into 1 is exactly the failure mode `scope-lock` exists to prevent. - **Locked-hash mismatch:** the manifest has been edited after the lock. Surface the diff and stop. The user must either revert the edit or go through the amendment path (`recording-decisions` + re-run alignment-check). -Do not proceed past Step 1d on any failure without explicit user direction. There is no "demo mode" — see the anti-patterns in `skills/scope-lock/SKILL.md`. +Do not proceed past Step 1d on any failure without explicit user direction. There is no "demo mode" — see the anti-patterns in `skills/scope-lock/SKILL.md`. Continue to Step 1e. + +### Step 1e: Doc-Reconciliation Check + +**Trigger:** the PR's diff commits a design doc, reference/standards doc, or example artifact that describes the feature's behavior. Skip for code-only / test-only diffs. (A doc with no corresponding `docs/plans/` design/plan trivially passes `clean`.) + +For each such committed doc/example, verify: +- **(a) Scope (forward-ref, #71):** every behavior/identifier it describes is in *this PR's* manifest scope, OR explicitly labeled `Planned (PR #N)` / `Planned — later PR`. Unlabeled forward references = finding → label them or move the prose to the later PR. +- **(b) Identifier drift (#72):** concrete identifiers (config keys, flags, env vars, command invocations, DDL/code snippets, format strings) match the identifiers the code on this branch actually uses + the repo's naming convention. Mismatch = finding → reconcile the doc to the built code. + +This is a checklist gate (read the diff, grep identifiers), **not** an automated scanner. On a finding in autonomous mode, fix the doc in-branch before PR (in-scope doc edit, no manifest change). Distinct from `scope-lock`'s assumption-backport (which is for *disproved assumptions*) — this is routine accuracy reconciliation. + +**Accountability token:** emit one line into the PR body — `Doc-reconciliation: clean` or `Doc-reconciliation: N item(s) fixed — ` — so pr-monitoring, the human reviewer, and `post-merge-retrospective` (Step 5 missed-activation row) can confirm the gate ran without a script. + +Continue to Step 2. ### Step 2: Determine Base Branch diff --git a/skills/post-merge-retrospective/SKILL.md b/skills/post-merge-retrospective/SKILL.md index 7162671..39964c8 100644 --- a/skills/post-merge-retrospective/SKILL.md +++ b/skills/post-merge-retrospective/SKILL.md @@ -36,6 +36,7 @@ If the PR was opened ad-hoc (no design / plan in `docs/plans/`), this skill exit - Any ADRs cited from the design or plan 2. **Score each adversarial-review finding.** + Derive the report path by the **same deterministic rule as D1**: take the artifact filename, drop `.md`, then design → append `-review.md`, plan → append `-plan-review.md` (e.g. `…-doc-sync-design.md` → `…-doc-sync-design-review.md`; `2026-06-03-…-doc-sync.md` → `2026-06-03-…-doc-sync-plan-review.md`). Read the committed `…-design-review.md` / `…-plan-review.md` report(s). For each finding, use its stable ID; read the optional `Resolution` column as a scoring hint, **falling back to downstream evidence (code-review threads, CI) when blank or when the report is an old no-ID format**. If the report is absent → "no committed review report; reconstructed from revision history" (most pre-v6.4.0 features have none). For every finding raised in either phase's adversarial-review report, classify it as one of: - **Prescient** — the finding called out something that turned out to matter (showed up as a code-review comment, CI failure, follow-up bug fix, or revert). - **Resolved upfront** — the finding was addressed during plan revision and prevented an issue downstream (no code-review comment / CI failure traces back to it). @@ -49,9 +50,11 @@ If the PR was opened ad-hoc (no design / plan in `docs/plans/`), this skill exit For each unique CI failure on the branch, ask: was this caught by `verification-before-completion` / `runtime-launch-validation` / something else, or did it slip past every local gate? Slips are gate misses too. 5. **Score skill activations.** - Read `.claude/autodev-state/in-progress.jsonl` (if present in the repo's `.claude/` directory) and verify the expected pipeline ran. The canonical chain documented in `skills/using-autodev/SKILL.md` is: + **Primary source: `.claude/autodev-state/in-progress.jsonl`** (written by the `record-activity` PostToolUse hook in any repo — not kit-dev-only). Read phase from the `args` field of `ev:"skill"` entries (the lead's `Skill` invocation carries `args:"--phase=design|plan …"`); the Agent-dispatched reviewer subagent is a separate `ev:"agent"` record without a phase and is ignored for phase attribution. If the jsonl is absent → emit "activation log unavailable" rows, never "script does not exist". `tests/skill-activation-audit.sh` (kit-dev convenience; absent in consumer repos) may be used to cross-check in the kit repo itself — it reports each skill once, so cross-check phase counts against the jsonl's `args=--phase=` entries when both phases are required. + Verify the expected pipeline ran. The canonical chain documented in `skills/using-autodev/SKILL.md` is: `brainstorming → adversarial-design-review (design) → writing-plans → adversarial-design-review (plan) → alignment-check → subagent-driven-development → finishing-a-development-branch → pr-monitoring → post-merge-retrospective`. - For each gate that was *expected* to fire and didn't, that's a missed-activation. Use `tests/skill-activation-audit.sh` (this repo) to confirm what fired — note that the audit script reports each skill once even when invoked twice (e.g., adversarial-design-review for both phases), so cross-check phase counts against the JSONL `args=--phase=` entries when both phases are required. + For each gate that was *expected* to fire and didn't, that's a missed-activation. + When the merged PR's diff touched docs/examples, record `finishing Step 1e` as fired iff a `Doc-reconciliation:` line is present in the PR body, else `unverified`. If the diff touched no docs/examples, record no row (Step 1e legitimately did not fire). 6. **Backfeed project design guidance.** Invoke `autodev:project-design-guidance`. If the merged work reveals a @@ -96,13 +99,14 @@ If there are zero gate misses, write: "No gate misses this PR. All downstream is ## Missed skill activations -Pipeline gates expected to fire (per `using-autodev`): list any that didn't. Pull from `tests/skill-activation-audit.sh`. +Pipeline gates expected to fire (per `using-autodev`): list any that didn't. Read from `.claude/autodev-state/in-progress.jsonl` (`ev:"skill"` entries; `tests/skill-activation-audit.sh` is a kit-dev convenience only, absent in consumer repos). | Gate | Fired? | Notes | |---|---|---| | brainstorming | yes | | | adversarial-design-review (design) | yes | | | adversarial-design-review (plan) | no | | +| finishing Step 1e (doc-reconciliation) | yes/unverified | only when the diff touched docs/examples | | ... | ... | | ## What worked @@ -153,11 +157,11 @@ The retro is intentionally short. Long retros don't get read. The format above f guidance change. **Reads:** -- `docs/plans/` (design, plan, adversarial-review reports) +- `docs/plans/` (design, plan, adversarial-review reports — reports now committed by `adversarial-design-review` per the deterministic path rule) - `decisions/` (ADRs cited from the design / plan) - `gh pr view`, `gh pr review-comments`, `gh run list` - `.claude/autodev-state/in-progress.jsonl` (if present) -- `tests/skill-activation-audit.sh` (this repo) +- `tests/skill-activation-audit.sh` (kit-dev convenience; absent in consumer repos) - `docs/design-guidance.md` or equivalent project guidance, if present **Writes:** diff --git a/tests/pipeline-evidence-doc-sync.sh b/tests/pipeline-evidence-doc-sync.sh new file mode 100755 index 0000000..35cdd04 --- /dev/null +++ b/tests/pipeline-evidence-doc-sync.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# tests/pipeline-evidence-doc-sync.sh +# Regression guard for issues #69/#70/#71/#72 (v6.4.0). Asserts the skill +# contracts these issues fixed remain present, so they cannot silently regress. +set -uo pipefail +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +ADR="$ROOT/skills/adversarial-design-review/SKILL.md" +RETRO="$ROOT/skills/post-merge-retrospective/SKILL.md" +FIN="$ROOT/skills/finishing-a-development-branch/SKILL.md" +fail=0 +pass(){ printf 'PASS: %s\n' "$1"; } +bad(){ printf 'FAIL: %s\n' "$1" >&2; fail=$((fail+1)); } +has(){ grep -qiF "$2" "$1"; } # literal substring +hasE(){ grep -qiE "$2" "$1"; } # regex + +# --- #69 (D1): adversarial-design-review mandates committing the report --- +hasE "$ADR" '(-design-review\.md|-plan-review\.md)' \ + && pass "#69 ADR cites the -design-review.md/-plan-review.md convention" \ + || bad "#69 ADR missing committed-report convention path" +# P1: assert the SPECIFIC new mandate wording, not the ambient word "commit" +has "$ADR" "Write AND commit the report" \ + && pass "#69 ADR mandates writing+committing the report" \ + || bad "#69 ADR does not mandate writing+committing the report" +hasE "$ADR" 'stable finding ID|stable .*ID' \ + && pass "#69 ADR defines stable finding IDs" \ + || bad "#69 ADR missing stable finding IDs" +# P4/M1: guard the load-bearing D1<->D2 path contract -- retro must cite the SAME derivation. +# Assert the specific load-bearing phrase only (dropping the broad '-plan-review.md' OR branch, +# which is ambient vocabulary that could false-pass on an incidental path mention). +has "$RETRO" "same deterministic rule" \ + && pass "#69/#70 retro derives the report path by the same rule (D1<->D2 contract)" \ + || bad "#69/#70 retro missing the shared path-derivation rule" + +# --- #70 (D2): retro reads the jsonl as PRIMARY; script demoted, NOT a hard dep --- +# P1: assert the jsonl is the PRIMARY source (only true after Task 4), not merely mentioned +hasE "$RETRO" 'primary source.*in-progress\.jsonl|in-progress\.jsonl.*primary' \ + && pass "#70 retro makes in-progress.jsonl the primary activation source" \ + || bad "#70 retro does not promote in-progress.jsonl to primary" +# The format template must NOT instruct 'Pull from tests/skill-activation-audit.sh' +grep -qiE 'Pull from .*skill-activation-audit\.sh' "$RETRO" \ + && bad "#70 retro STILL instructs 'Pull from tests/skill-activation-audit.sh' (line ~99 not demoted)" \ + || pass "#70 retro format template no longer hard-depends on the kit-local script" +has "$RETRO" "kit-dev" \ + && pass "#70 retro marks the audit script kit-dev-only" \ + || bad "#70 retro does not demote the audit script to kit-dev-only" + +# --- #71/#72 (D3): finishing has Step 1e in BOTH body and autonomous list --- +hasE "$FIN" 'Step 1e' \ + && pass "#71/#72 finishing has Step 1e body" \ + || bad "#71/#72 finishing missing Step 1e body" +has "$FIN" "Doc-reconciliation" \ + && pass "#71/#72 finishing emits Doc-reconciliation token" \ + || bad "#71/#72 finishing missing Doc-reconciliation accountability token" +# Step 1e must be referenced in the Autonomous Mode numbered list region (top of file, before '### Step 1:') +auto_region="$(awk '/^## Autonomous Mode/{f=1} /^### Step 1: Verify Tests/{f=0} f' "$FIN")" +printf '%s' "$auto_region" | grep -qiE 'Step 1e' \ + && pass "#71/#72 Step 1e wired into Autonomous Mode list" \ + || bad "#71/#72 Step 1e NOT in Autonomous Mode list (would never fire autonomously)" + +# --- #72 (D4): plan-phase naming-convention checklist row --- +hasE "$ADR" 'naming.convention match|Identifier / naming' \ + && pass "#72 ADR plan-phase has Identifier/naming-convention row" \ + || bad "#72 ADR plan-phase missing naming-convention row" + +echo ""; echo "Results: $fail failure(s)"; [ "$fail" -eq 0 ]