diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index f4dafd1..91db5b7 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -9,7 +9,7 @@
     {
       "name": "autodev",
       "description": "Autonomous development workflow skills for coding agents",
-      "version": "6.2.2",
+      "version": "6.3.0",
       "source": "./",
       "author": {
         "name": "Jon Langevin",
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index 9728fa1..940fae4 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "autodev",
   "description": "Autonomous development workflow skills for coding agents: design, review, planning, execution, monitoring, and retrospectives",
-  "version": "6.2.2",
+  "version": "6.3.0",
   "author": {
     "name": "Jon Langevin",
     "email": "jon@gocodealone.com"
diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json
index e77ff1f..47f5913 100644
--- a/.cursor-plugin/plugin.json
+++ b/.cursor-plugin/plugin.json
@@ -2,7 +2,7 @@
   "name": "autodev",
   "displayName": "Autonomous Dev Kit",
   "description": "Autonomous development workflow skills for coding agents",
-  "version": "6.2.2",
+  "version": "6.3.0",
   "author": {
     "name": "Jon Langevin",
     "email": "jon@gocodealone.com"
diff --git a/.github/workflows/hooks-check.yml b/.github/workflows/hooks-check.yml
new file mode 100644
index 0000000..cee7beb
--- /dev/null
+++ b/.github/workflows/hooks-check.yml
@@ -0,0 +1,29 @@
+name: Hooks Check
+on:
+  push:
+    paths:
+      - 'hooks/**'
+      - 'tests/hook-contracts.sh'
+      - 'tests/hook-stdout-discipline.sh'
+      - '.github/workflows/hooks-check.yml'
+  pull_request:
+    paths:
+      - 'hooks/**'
+      - 'tests/hook-contracts.sh'
+      - 'tests/hook-stdout-discipline.sh'
+      - '.github/workflows/hooks-check.yml'
+
+permissions:
+  contents: read
+
+jobs:
+  hooks:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install jq
+        run: sudo apt-get update && sudo apt-get install -y jq
+      - name: Hook stdout discipline tests
+        run: bash tests/hook-stdout-discipline.sh
+      - name: Hook contract tests
+        run: bash tests/hook-contracts.sh
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index d6a71d7..a08af16 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,5 +1,45 @@
 # Autonomous Dev Kit Release Notes
 
+## v6.3.0 — 2026-06-01
+
+Pipeline-hardening release closing five recurring gate-miss / context-waste issues
+observed across autonomous runs and Codex compaction.
+
+- **`adversarial-design-review` — auth/authz chain-composition bug-class (#59):** a new
+  plan-phase row that walks the design's auth/authz chain component-by-component against
+  the plan's wiring and flags any gate enforced by a *client-asserted* value
+  (`evidence.granted_permissions`, a header) instead of server-side against an
+  authenticated principal.
+- **`pr-monitoring` — sanctioned bash poll-loop (#60):** documents the host-scoped
+  CI-wait pattern. Under Claude Code, a bounded `run_in_background` bash sleep-loop that
+  blocks to completion and re-invokes the lead once on settle (the prior background-Agent
+  monitor early-exited ~6× per run); Codex/Cursor use a self-poll-on-wakeup fallback.
+- **`subagent-driven-development` / `team-conventions` — completion trust-boundary (#58,
+  ADR 0003):** a flipped `Implement: N` is a claim, not evidence — the lead must run
+  `verification-before-completion` before trusting it. A deterministic hook-block is
+  infeasible (the pre-tool payload lacks the task subject + caller identity), so
+  correctness rests on lead verification, not on who flipped the checkbox.
+- **`run-hook.cmd` — stdout JSON discipline (#41):** the wrapper now captures each hook's
+  stdout and emits only valid-JSON-or-empty to the host's hook parser, recovering a block
+  decision even when a locale/diagnostic warning precedes it (previously such noise could
+  invalidate the hook's JSON). Diagnostics are routed to stderr; jq-absent hosts pass
+  through unchanged. New `tests/hook-stdout-discipline.sh`.
+- **`pretool-pr-review-reminder` — once-per-session (#61):** the gh-version/Copilot
+  reviewer reminder now emits once per session (deduped via a `.claude/autodev-state`
+  marker, quote-strip-matched so a quoted `--body` mentioning `gh pr create` no longer
+  trips it) and is reset by `pre-compact-snapshot` so it re-emits once after a compaction.
+- **`adversarial-design-review` — artifact-class precedent (#63):** a new design-phase row
+  that surveys how the codebase already implements an *artifact class* (where a scenario
+  stands up a server, where a fixture lives — `ls scenarios/*/cmd/server/main.go`), not
+  just the *mechanism*; grep for sibling instances and follow the established shape or
+  justify divergence.
+- **`session-start` — Linux time-dedup fix (#64):** the SessionStart hook tried BSD
+  `stat -f %m` before GNU `stat -c %Y`; on Linux `stat -f` succeeds-but-wrong (fs info),
+  so the time-based dedup never suppressed re-fires. Now GNU-first with a numeric guard —
+  fixing re-fire spam for all Linux autodev users.
+- **CI:** new `hooks-check.yml` runs the hook contract + stdout-discipline tests on any
+  `hooks/`/test change, so these fixes are regression-gated.
+
 ## v6.2.2 — 2026-05-31
 
 New **Existence / runtime-validity** bug-class in `adversarial-design-review`
diff --git a/agents/team-conventions.md b/agents/team-conventions.md
index 86c4ef0..dac0a5e 100644
--- a/agents/team-conventions.md
+++ b/agents/team-conventions.md
@@ -25,6 +25,9 @@ them.
 - Request code review per `skills/requesting-code-review/SKILL.md`
   using the adversarial-framing brief.
 - Address review per `skills/receiving-code-review/SKILL.md`.
+- **Never self-complete an `Implement: N` task.** Do not flip your own Implement task
+  to `completed` and do not clear its `blockedBy` to back-door completion. DM the
+  spec-reviewer when ready; the code-reviewer is the sole flipper of Implement tasks.
 
 ## Spec reviewer
 
@@ -46,6 +49,18 @@ them.
   `skills/requesting-code-review/SKILL.md` until verdict is SHIP-IT
   (or REVERT-AND-REWRITE after max rounds).
 - Reflexive approval is forbidden.
+- **You are the sole role that flips an `Implement: N` task to `completed`**, and only
+  after quality review passes. Flip BOTH the "Review quality:" task and the
+  corresponding "Implement:" task.
+
+## Lead / orchestrator
+
+- **A `completed` Implement-N is a claim, not evidence.** Whoever flipped it, run
+  `skills/verification-before-completion/SKILL.md` (clean build + tests + CI green)
+  before accepting the task as done or invoking
+  `skills/finishing-a-development-branch/SKILL.md`. Correctness rests on this gate, not
+  on who flipped the checkbox. See
+  `decisions/0003-implement-n-completion-trust-boundary.md`.
 
 ## Modes
 
diff --git a/decisions/0003-implement-n-completion-trust-boundary.md b/decisions/0003-implement-n-completion-trust-boundary.md
new file mode 100644
index 0000000..0f2c4dd
--- /dev/null
+++ b/decisions/0003-implement-n-completion-trust-boundary.md
@@ -0,0 +1,66 @@
+# 0003. Implement-N completion is a lead-verified trust boundary, not a hook-blocked invariant
+
+**Status:** Accepted
+**Date:** 2026-06-01
+**Decision-makers:** autodev maintainers, autonomous pipeline
+**Related:** issue #58; `skills/subagent-driven-development/SKILL.md`; `agents/team-conventions.md`; docs/plans/2026-06-01-pipeline-hardening-4issues-design.md
+
+## Context
+
+Across infra-admin v1 + v1.1 autonomous runs, `Implement: N` tasks were flipped to
+`completed` by implementers or via a blockedBy-clear *before* the code-reviewer's
+quality gate, violating the team-conventions contract ("only code-reviewer flips
+Implement-N to completed"). In v1.1 this masked a non-compiling tree (uncommitted
+helper) and a CI-failing hash regression — both reported "done"; only the lead's
+independent `verification-before-completion` pass caught them.
+
+Issue #58 asked for **plugin/harness enforcement**: reject
+`TaskUpdate(status=completed)` on `Implement: *` tasks unless `owner ==
+code-reviewer`.
+
+We investigated whether a deterministic plugin hook can enforce this. It cannot:
+
+- The PreToolUse payload for a `TaskUpdate` call carries the *tool input*
+  (`taskId`, `status`, `owner`) but **not** the task's current `subject`
+  ("Implement: N") nor the identity of the calling subagent. The task store is
+  harness state a bash hook cannot read (there is no `TaskList` available to a
+  hook).
+- Therefore a hook cannot reliably answer the two questions the block requires —
+  "is this taskId an Implement task?" and "is the caller the code-reviewer?". The
+  only deterministic option, "block all `status=completed`", would break every
+  legitimate completion (spec-review, quality-review, and the orchestrator's own).
+- The closest feasible hook approach — a `SubagentStop` hook reading
+  `transcript_path` to map `taskId → "Implement: N"` (via `TaskCreate` records) and
+  flag a matching `TaskUpdate(status=completed)` — was also rejected: it fires
+  *after* completion (warn, not block), sees only subagent completions (not a
+  lead-level `TaskUpdate`), depends on the unstable transcript JSONL shape, and is
+  racy across subagents. Fragile transcript-format coupling for marginal,
+  after-the-fact value.
+
+## Decision
+
+We reject the infeasible hard-block and instead **shift the trust boundary**: a
+flipped `Implement: N` is a *claim*, not *evidence*, and is **not trusted as done
+until the lead runs `autodev:verification-before-completion`** (build + test from a
+clean tree, CI green) before treating the task as complete or proceeding to
+`finishing-a-development-branch`.
+
+We encode this in `skills/subagent-driven-development/SKILL.md` (the
+"Completion is not trusted until lead-verified" rule) and restate the
+implementer/code-reviewer conventions in `agents/team-conventions.md`. We do **not**
+add an advisory `TaskUpdate` hook: it cannot block, it cannot identify Implement
+tasks, and it would fire on every completion (noise) for no enforcement gain.
+
+## Consequences
+
+The harm #58 names — a premature "done" masking a broken tree — is addressed at the
+point it actually bit: the lead's verification gate, which already caught both v1.1
+regressions. The convention ("code-reviewer is the sole flipper") remains as
+team discipline but is no longer load-bearing for correctness; correctness rests on
+lead verification, which does not depend on who flipped the checkbox.
+
+The limitation is documented so the infeasible hard-block is not re-proposed. If a
+future harness exposes the task subject **and** the calling subagent identity in
+the PreToolUse payload, the deterministic block becomes feasible and this ADR
+should be revisited (the convention is then mechanically enforceable). Until then,
+"green checkbox ≠ verified" is the operative rule.
diff --git a/decisions/0004-v6.3.0-scope-amendment-63-64.md b/decisions/0004-v6.3.0-scope-amendment-63-64.md
new file mode 100644
index 0000000..7698580
--- /dev/null
+++ b/decisions/0004-v6.3.0-scope-amendment-63-64.md
@@ -0,0 +1,48 @@
+# 0004. v6.3.0 scope amendment — fold in #63 (artifact-class precedent) and #64 (session-start Linux stat)
+
+**Status:** Accepted
+**Date:** 2026-06-01
+**Decision-makers:** autodev maintainers, autonomous pipeline, user (explicit approval)
+**Related:** issues #63, #64; docs/plans/2026-06-01-pipeline-hardening-4issues.md (Tasks 8, 9); ADR 0003
+
+## Context
+
+The v6.3.0 pipeline-hardening plan was locked with 7 tasks (#41/#58/#59/#60/#61 + CI
+gate + version bump). During execution two new items arose:
+
+- **#64 (bug, execution-discovered):** Task 6's new `hooks-check.yml` ran
+  `tests/hook-contracts.sh` on ubuntu for the first time and surfaced a **pre-existing**
+  Linux bug in `hooks/session-start` — BSD `stat -f %m` is tried before GNU `stat -c %Y`,
+  and on Linux `stat -f` succeeds-but-wrong (prints fs info), breaking the time-dedup. The
+  fix is a prerequisite for Task 6's deliverable (a green CI gate that runs
+  `hook-contracts.sh`).
+- **#63 (enhancement, newly filed):** add a design-phase `adversarial-design-review`
+  check that surveys how the codebase implements the *artifact class* (where a scenario
+  stands up a server, where a fixture lives), not just the *mechanism*. Same skill the
+  release already edits for #59.
+
+The user explicitly approved addressing both ("if you found a new issue, go ahead and
+address it"; "pick up the new filed issues and address them as well").
+
+## Decision
+
+Amend the locked v6.3.0 manifest from 7 to 9 tasks (single PR unchanged): add Task 8
+(#64 session-start `stat` ordering + numeric guard + re-enable `hook-contracts.sh` in CI)
+and Task 9 (#63 `Artifact-class precedent` design-phase bug-class row). Both are small,
+additive, host-neutral, and thematically identical to the locked hardening scope. The
+original `Locked` stamp is preserved here for audit; the plan re-stamps to `Amended` and
+re-runs `alignment-check`.
+
+We fold them into v6.3.0 rather than deferring to a follow-on release because: #64 is
+required for Task 6's CI gate to be green and real; #63 edits the same skill as #59; and
+the v6.3.0 PR is still open, so one coherent release covers all the hardening the user
+asked for.
+
+## Consequences
+
+v6.3.0 closes seven issues (#41/#58/#59/#60/#61/#63/#64). The `hooks-check.yml` CI gate
+now runs both hook test suites green on Linux (session-start time-dedup works on Linux
+for the first time — a real bug fix for all Linux autodev users, beyond the test gate).
+The amendment is recorded so the scope expansion is durable, not silent. Future
+scope-expansions during execution should follow the same path: surface → user approval →
+ADR → manifest amend → re-align → re-lock.
diff --git a/docs/plans/2026-06-01-pipeline-hardening-4issues-design.md b/docs/plans/2026-06-01-pipeline-hardening-4issues-design.md
new file mode 100644
index 0000000..e211073
--- /dev/null
+++ b/docs/plans/2026-06-01-pipeline-hardening-4issues-design.md
@@ -0,0 +1,320 @@
+# Autodev Pipeline Hardening (v6.3.0) — 5 Recurring Gate-Miss / Context-Waste Fixes — Design
+
+**Status:** Approved (autonomous — user pre-authorized full-pipeline execution; trigger v6.3.0 release after merge)
+**Date:** 2026-06-01
+**Issues:** #41, #58, #59, #60, #61 (GoCodeAlone/autonomous-dev-kit)
+**ADR:** decisions/0003-implement-n-completion-trust-boundary.md (#58)
+**Adversarial review:** cycle 1 = FAIL (1C/3I/3m, all resolved); #61 added (user-reported same session). cycle 2 = FAIL (1 new Important: pre-compact-snapshot early-exit would skip the #61 marker-clear → fixed: clear placed unconditionally before the early-exit). cycle 3 = PASS (converged; applied the empty-session_key guard).
+
+## Problem
+
+Four open issues, all recurring gate-misses observed across infra-admin v1/v1.1
+autonomous runs + Codex compaction:
+
+- **#59** — plan-phase reviews can pass a design that says "behind X auth filter"/
+  "RBAC enforced" while the plan wires a *weaker*, shape-matching gate (v1.1:
+  client-supplied `evidence.granted_permissions` made write-tier RBAC theater;
+  caught only on plan-review cycle 2).
+- **#60** — a `run_in_background` pr-monitoring **Agent** told to poll-loop-with-
+  sleep instead returns after ~1 cycle and re-completes repeatedly without the
+  conclusive report (early-exited ~6× in v1.1). The lead fell back to self-polling
+  `gh pr checks`, which worked but lost fire-on-event semantics.
+- **#41** — the PreCompact hook (via Codex) is reported as emitting invalid hook
+  JSON ("hook returned invalid PreCompact hook JSON output"), interrupting
+  compaction-time state handling. Root cause class: locale warnings / diagnostics
+  leaking onto the hook's stdout before/around its JSON.
+- **#58** — `Implement: N` tasks get flipped to `completed` by implementers or via
+  blockedBy-clear *before* the code-reviewer's quality gate, violating the
+  team-conventions contract. In v1.1 this masked a non-compiling tree + a
+  CI-failing hash regression both reported "done"; only the lead's
+  verification-before-completion caught them.
+- **#61** — the `pretool-pr-review-reminder` hook injects its ~12-line gh-version
+  + Copilot-reviewer `<IMPORTANT>` block on **every** `gh pr create` (observed 7×
+  in one session) — wasted context for advice the agent already has.
+
+## Goals
+
+One coherent v6.3.0 release hardening the autonomous pipeline against these four.
+
+1. **#59** — add a plan-phase bug-class that walks the design's auth/authz chain
+   component-by-component vs the plan's wiring; flag any gate that is
+   client-asserted rather than server-enforced against an authenticated principal.
+2. **#60** — make the **bash poll-loop** the sanctioned pr-monitoring wait pattern
+   (a `run_in_background` Bash sleep-loop that *blocks to completion* and
+   re-invokes the lead once on settle), documenting the subagent early-exit
+   failure mode. Keep the subagent monitor as a documented fallback.
+3. **#41** — harden the **`run-hook.cmd` wrapper** (the single choke-point all
+   hooks run through) to (a) export a deterministic locale with portable fallback
+   and (b) enforce **stdout JSON discipline**: only valid-JSON-or-empty reaches
+   the host's hook parser; non-JSON noise is routed to stderr. Plus a regression
+   test for noisy/locale-warning output.
+4. **#58** — fix the *real* harm via a **trust boundary**, since a hard hook-block
+   is infeasible (see Design): a flipped `Implement: N` is NOT trusted-done until
+   the lead runs `verification-before-completion` (build + test from a clean
+   tree). Strengthen `subagent-driven-development` + `team-conventions` + ADR.
+5. **#61** — emit the pr-review reminder **once per session** (deduped via a
+   `.claude/autodev-state` marker like `session-start`), reset by the PreCompact
+   hook so it re-emits once after a compaction.
+
+## Non-Goals
+
+- No deterministic hook that *blocks* `TaskUpdate(status=completed)` on `Implement:*`
+  — infeasible (see #58 design). No fragile heuristic that pretends to.
+- No change to the hook event registrations in `hooks.json` (the wrapper change is
+  transparent to all of them).
+- No new pr-monitoring tool; reuse the existing `Bash run_in_background` primitive.
+- No change to GHA/other-platform behavior; these are skill-doc + hook + version
+  changes within the plugin.
+
+## Global Design Guidance
+
+`Guidance: none at docs/design-guidance.md; canon = README §Cross-LLM,
+docs/plans/2026-04-25-cross-llm-portability-design.md, ADRs 0001/0002.`
+
+| guidance | response |
+|---|---|
+| Host-neutral / cross-LLM first | The wrapper fix (#41) is the host-neutral choke-point. #60 is explicitly **host-scoped**: bash poll-loop under `<host: claude-code>`, self-poll fallback documented for `<host: codex,cursor>` (no false cross-LLM claim). #58 trust-boundary + #59 bug-class + #61 dedup-marker are host-neutral prose/state. #61 degrades to emit-every-time on identity-less hosts. |
+| Checklist is the floor | #59 row joins the mandatory-scan plan-phase set. |
+| Don't claim enforcement you can't deliver | #58 explicitly rejects the infeasible hard-block and documents why (ADR) — directly applies the Existence/runtime-validity discipline (#55). |
+| Strict stdout discipline for hooks | #41 centralizes it in the wrapper, not per-hook. |
+
+## Design
+
+### #59 — Auth/authz chain-composition bug-class (plan phase)
+
+Add one row to the **plan-phase** checklist of `skills/adversarial-design-review/
+SKILL.md`. Place it **near the top** of the plan-phase table (right after
+`Verification-class mismatch`), not after the plugin-layout/config-schema rows —
+auth/authz is a security-class finding and ordering signals priority (m2):
+
+```
+| **Auth/authz chain composition** | When the design names an auth/authz chain ("behind the X auth filter", "RBAC-enforced", "admin-only"), walk that chain component-by-component against the plan's actual wiring. For each gate, verify it is enforced **server-side against an authenticated principal**, not shape-matched by a client-asserted value. Flag any gate where the plan's check reads from request/client-supplied input (`evidence.granted_permissions`, a header, a body field) instead of an authenticated subject (`authz.Enforce(authenticatedSubject, …)`). A plan that wires a weaker gate than the design's chain implies = finding. |
+```
+
+(Plan-phase, not design-phase: chain *composition* is a plan-wiring concern — the
+design states the intent; the plan is where a weaker gate slips in.)
+
+### #60 — Bash poll-loop as the sanctioned pr-monitoring pattern
+
+In `skills/pr-monitoring/SKILL.md`, add a **"Waiting for CI: the sanctioned
+pattern"** section near the top of the process. The recommendation is **host-scoped**
+(the bash poll-loop is a Claude-Code Bash-`run_in_background` construct; Codex/Cursor
+don't expose it identically):
+
+- **`<host: claude-code>` — Recommended default:** a `Bash` tool call with
+  `run_in_background: true` running a bounded sleep-loop that polls `gh pr checks`
+  until no check is `pending` (or a failure), then prints a settle line and exits.
+  The harness re-invokes the lead once on exit (≈0 tokens while sleeping). On
+  settle the lead reads the result and admin-merges on `failures=0`. **Concrete
+  cap (m3):** bound the loop (e.g. `for i in $(seq 1 120)` × `sleep 30` = 60 min)
+  so it can never spin forever; on cap, print a timeout line and exit for the lead
+  to restart.
+- **Why:** a `run_in_background` **Agent** instructed to sleep-loop tends to return
+  after ~1 cycle and re-complete repeatedly (the agent loop is not a blocking
+  sleep) — observed early-exiting ~6× (#60). The bash sleep-loop genuinely blocks
+  to completion.
+- **`<host: codex, cursor>`:** these hosts keep the existing `<host: codex,
+  opencode, cursor>` guidance (host's equivalent poll mechanism). Where no
+  blocking-background-bash exists, the sanctioned fallback is **self-poll on each
+  lead wakeup** — the lead runs `gh pr checks` once per turn and re-checks on the
+  next turn — which loses fire-on-event but is reliable. Document this explicitly
+  so the Codex path isn't left undefined.
+- **Fallback (all hosts):** the existing background-Agent monitor stays documented
+  for multi-PR review-comment handling needing active fix-and-push; note its
+  early-exit failure mode.
+
+### #41 — Wrapper-level stdout-JSON discipline (+ keep existing locale logic)
+
+Harden `hooks/run-hook.cmd` (Unix portion — the choke-point every hook runs
+through). The **primary** fix is stdout discipline; the existing locale handling
+is already correct and stays.
+
+1. **Locale (UNCHANGED):** the wrapper already falls back `C.UTF-8 → C` (covering
+   `LC_ALL`/`LC_CTYPE`/`LANG`) when `C.UTF-8` is inherited but not installed —
+   keep it as-is. We do **not** extend it to unset/empty locales (no evidence the
+   Codex failure is an unset locale, and forcing `C` on an otherwise-working
+   UTF-8 default could corrupt multibyte hook output). The real #41 cause is
+   diagnostics leaking onto stdout, addressed below — the locale fallback already
+   prevents the *bash* locale warning; stdout discipline catches whatever else
+   (perl/git/jq diagnostics, shell tracing) leaks.
+
+2. **Stdout JSON discipline (the fix):** run the hook with stdout captured (stderr
+   passes through untouched; stdin still flows from host → hook). Then, when `jq`
+   is available:
+   - **empty** stdout → emit nothing.
+   - stdout is **valid JSON as a whole** → emit it verbatim.
+   - otherwise → **extract** the hook's JSON rather than suppress it: take the last
+     line beginning with `{` (`printf '%s\n' "$out" | grep -E '^\{' | tail -1`);
+     if that line is valid JSON (`jq -e .`), emit **that** on stdout and route the
+     rest (the leading warning/diagnostic prefix) to **stderr**. If no JSON line is
+     found, route the whole capture to stderr and emit nothing.
+   - This is the C1 correction: a `decision:block` JSON preceded by a locale/perl
+     warning (`warning\n{"decision":"block"}`) is **not** valid-as-a-whole, so the
+     extraction path delivers the block instead of dropping it.
+   - Preserve the hook's exit code. When `jq` is **absent**, pass stdout through
+     unchanged (don't break hooks on minimal hosts).
+   - **Trailing-newline note (I3):** capturing via `out=$(…)` strips trailing
+     newlines; re-emitting via `printf '%s\n' "$out"` restores exactly one. JSON
+     parsers accept one trailing newline — a regression case asserts this.
+
+3. **Regression test** (`tests/hook-stdout-discipline.sh`, runs the real
+   `run-hook.cmd` against fixture hooks): (a) warning-then-`{"decision":"block"}`
+   → wrapper emits **only** the block JSON on stdout, warning on stderr, exit code
+   preserved; (b) only-noise → stdout empty, noise on stderr; (c) clean valid JSON
+   → unchanged, exactly one trailing newline; (d) jq-absent (PATH stub) →
+   pass-through verbatim. Also run `tests/hook-contracts.sh` for no real-hook
+   regression.
+
+### #58 — Implement-N completion trust boundary (hard-block is infeasible)
+
+**Investigation (the key finding):** a deterministic plugin hook that *blocks*
+`TaskUpdate(status=completed)` on `Implement:*` tasks unless `owner ==
+code-reviewer` is **not feasible**:
+- The PreToolUse payload for a `TaskUpdate` call carries the *tool input*
+  (`taskId`, `status`, `owner`) but **not the task's current subject** ("Implement:
+  N") nor the calling subagent's identity. The task store is harness state the
+  hook cannot read (no `TaskList` from a bash hook).
+- So the hook cannot reliably determine "is taskId an Implement task?" or "is the
+  caller the code-reviewer?" — the two facts the block needs. A heuristic (e.g.
+  block all `completed`) would break legitimate completions.
+- **Closest feasible hook approach, also rejected (m1):** a `SubagentStop` hook
+  *does* receive `transcript_path` (used by `subagent-scope-guard` /
+  `completion-claim-guard`), so it could parse the transcript for `TaskCreate`
+  calls to map `taskId → "Implement: N"` subject and flag a matching
+  `TaskUpdate(status=completed)`. Rejected as too unreliable: it only sees
+  *subagent* completions (not a lead-level `TaskUpdate`), the transcript JSONL
+  shape is not a stable API surface, and event ordering across subagents is racy.
+  It would also only *warn after the fact* (SubagentStop fires post-completion),
+  not block. So it does not deliver the asked-for enforcement and adds a fragile
+  transcript-format dependency for marginal value.
+
+**Feasible fix — shift the trust boundary (ADR 0003):** the harm in v1.1 was not
+*who flipped the bit* but that a flipped `Implement: N` was **trusted as done**
+while the tree didn't compile / CI failed. So:
+- `skills/subagent-driven-development/SKILL.md`: add an explicit **"Completion is
+  not trusted until lead-verified"** rule — regardless of who/what flips an
+  `Implement: N` to `completed`, the lead MUST run
+  `autodev:verification-before-completion` (build + test from a clean tree, CI
+  green) before treating that task as truly done or proceeding to
+  `finishing-a-development-branch`. A green checkbox is a *claim*, not *evidence*.
+- `agents/team-conventions.md`: restate the code-reviewer-sole-flipper convention
+  AND add the implementer rule "never self-complete an `Implement: N`; DM the
+  reviewer instead" + the lead's trust-boundary gate.
+- ADR 0003 records: rejected the infeasible hard-block; chose the verification
+  trust-boundary; documented the hook-payload limitation so it isn't re-proposed.
+
+### #61 — pr-review reminder: once per session, reset on compaction
+
+`hooks/pretool-pr-review-reminder` currently emits its `<IMPORTANT>` block on every
+`gh pr create`. Add session-dedup mirroring `hooks/session-start`'s
+`session-start-seen` precedent:
+
+- Derive a session key from `transcript_path` basename — the same value
+  `pre-compact-snapshot` already computes as `session_key` for session-locks
+  attribution (note: this is *not* `session-start`'s primary dedup key, which is
+  `session_id:source_kind`; basename is a sufficient, stable per-session key for a
+  file marker). Marker file: `${cwd}/.claude/autodev-state/pr-reminder-seen`
+  containing the session key(s) already reminded.
+- On a matched `gh pr create`: if the current session key is already in the marker
+  → `exit 0` silently (no emit). Else → emit the reminder once and append the
+  session key to the marker.
+- **Reset on compaction (I-NEW-1 — placement is load-bearing):**
+  `hooks/pre-compact-snapshot` removes the current session's key from
+  `pr-reminder-seen` so the next `gh pr create` after a compaction re-emits once.
+  This clear MUST be placed **unconditionally near the top of pre-compact-snapshot
+  — immediately after `session_key`/`cwd_dir` are computed (≈ line 30) and BEFORE
+  the `[ -z "$state_section" ] && exit 0` no-locked-plans early-exit**. Co-locating
+  it with the emit block (after the early-exit) would silently skip the clear in
+  the common no-locked-plan case, leaving the reminder permanently silenced after
+  the first PR. The regression test asserts the clear fires even when there are no
+  locked plans. Guard the clear with `[ -n "$session_key" ]` — when
+  `transcript_path` is absent (`session_key=""`), skip the clear (no blanket file
+  wipe), matching the reminder's emit-every-time degradation on identity-less hosts.
+- Degrade gracefully: no `transcript_path` (hookless/identity-less host) → fall
+  back to current behavior (emit every time) rather than suppress, so a host that
+  can't dedup still gets the advice.
+- **Match precision (minor, in-scope):** tighten the trigger so it matches an
+  actual `gh pr create` invocation, not the literal substring inside a quoted
+  argument (e.g. an issue/PR *body* that mentions `gh pr create`) — anchor on a
+  word boundary / command position. (Observed: this design's own tracking-issue
+  body tripped the hook.)
+- **Regression test:** two `gh pr create` payloads with the same session key →
+  emitted once; a `pre-compact-snapshot` run between them **with no locked plans
+  present** (the early-exit case) → still clears the marker so the reminder is
+  emitted again (post-compact reset); no `transcript_path` → emits each time.
+
+- **#41** is security-adjacent: a hook that leaks non-JSON to stdout can corrupt
+  the host's view of a *block* decision (e.g. a scope-guard `{"decision":"block"}`
+  preceded by a locale warning could be ignored → a guarded action proceeds).
+  Centralizing JSON discipline in the wrapper makes every block decision reliably
+  delivered. No secrets touched; no network; the wrapper reads only the hook's own
+  stdout.
+- **#59** *is* an auth/authz review-quality improvement — it strengthens detection
+  of client-asserted-permission theater in plans.
+- **#58/#60** are process/doc; no runtime security surface.
+
+## Amendment (2026-06-01, user-approved — see decisions/0004)
+
+Two items folded into v6.3.0 during execution, both user-approved (manifest 7 → 9 tasks,
+PR count unchanged):
+- **#64** — `hooks/session-start` time-dedup is broken on Linux (BSD `stat -f %m` tried
+  before GNU `stat -c %Y`; `stat -f` succeeds-but-wrong on Linux). Surfaced by Task 6's
+  `hooks-check.yml` running `hook-contracts.sh` on ubuntu for the first time. Fixed
+  (GNU-first + numeric guard) and `hook-contracts.sh` re-enabled in the CI gate.
+- **#63** — new design-phase `Artifact-class precedent` bug-class: survey how the codebase
+  implements the *artifact class* (where a scenario stands up a server, where a fixture
+  lives), not just the *mechanism*; grep for sibling instances and follow the established
+  shape or justify divergence.
+
+## Infrastructure Impact
+
+None at runtime. Plugin skill/hook/doc changes + a version bump. Release path is
+the existing `release-tag.yml` (push to main touching `.claude-plugin/plugin.json`
+→ version-check → tag → marketplace dispatch).
+
+## Multi-Component Validation
+
+- **#41 wrapper × hooks:** `tests/hook-stdout-discipline.sh` runs the **real**
+  `run-hook.cmd` against fixture hook scripts (noisy + clean + jq-absent) and
+  asserts stdout/stderr/exit-code — the real wrapper↔hook boundary, not a mock.
+  Also run the existing `tests/hook-contracts.sh` to confirm no regression to the
+  real hooks (session-start, scope-guard, completion-claim-guard, pre-compact).
+- **#61 reminder dedup:** the regression test runs the real
+  `pretool-pr-review-reminder` against two same-session `gh pr create` payloads
+  (emit once) + a `pre-compact-snapshot` between them (re-emit once) + a
+  no-`transcript_path` payload (emit each time) — the real hook↔state boundary.
+- **#59/#60/#58:** `tests/skill-content-grep.sh` (host-neutral lint) +
+  `tests/skill-cross-refs.sh` pass on the edited skills; plan-phase reviewers will
+  enumerate the new class because the plan-phase checklist is embedded in their
+  dispatch prompt.
+
+## Assumptions
+
+| id | assumption | challenge | fallback |
+|---|---|---|---|
+| A1 | All autodev hooks emit JSON-or-nothing on stdout (Claude-Code/Codex protocol) | A hook might emit intentional plain-text stdout | Verified: every registered hook emits `{…}` JSON or nothing. If a future hook needs plain-text stdout it must opt out — but none do today. |
+| A2 | The PreToolUse payload lacks task subject + caller identity | Harness could add it later | Verified against the documented hook payload; if it ever exposes both, the hard-block becomes feasible and ADR 0003 should be revisited. |
+| A3 | A `run_in_background` Bash sleep-loop genuinely blocks + re-invokes the lead once on exit (Claude Code) | Codex/Cursor lack this primitive | Directly observed working across this session's many CI waits ([[feedback_ci_wait_use_bash_poll_loop]]); scoped to `<host: claude-code>`, with the self-poll fallback documented for other hosts. |
+| A6 | #41 last-JSON-line extraction recovers a block decision behind a warning | A hook could emit multi-line pretty-printed JSON (not single-line) | autodev hooks all emit single-line `jq -n` JSON (one object, one line); the `grep '^{' | tail -1` extraction matches. If a future hook pretty-prints, the whole-blob `jq .` path still handles the no-warning case; the test covers the warning-prefixed single-line case. |
+| A4 | The wrapper can capture stdout without breaking hooks that read stdin | Hooks read the harness JSON from stdin, not the wrapper | The wrapper only redirects the hook's *stdout*; stdin still flows from the host to the hook unchanged. |
+| A5 | v6.3.0 is the right bump + the tag is free | Could collide (the #804 lesson) | Verified `git ls-remote --tags` shows v6.3.0 free; current 6.2.2 → 6.3.0 minor. |
+
+## Rollback
+
+- Revert the PR(s). #59/#60/#58 are additive prose; #41 reverts the wrapper to its
+  prior conditional-locale form (no migration). The version bump reverts with the
+  same commit; do not push the v6.3.0 tag if reverted pre-merge.
+- **#41 runtime note:** the wrapper is on the hook hot-path. If the captured-stdout
+  change regresses a hook, rollback = revert `run-hook.cmd` to the `exec bash` form
+  + re-run `tests/hook-contracts.sh`. The regression test gates this pre-merge.
+
+## Self-Challenge
+
+- **Simplest alternative:** fix the PreCompact hook script alone (#41) instead of
+  the wrapper. Rejected — the issue explicitly wants the wrapper-level fix so
+  *every* hook gets stdout discipline, not just one.
+- **Most fragile assumption:** A1 (hooks emit JSON-or-nothing). Mitigated by the
+  jq-absent pass-through + the fact that all current hooks comply.
+- **YAGNI sweep:** no advisory `TaskUpdate` hook for #58 (it can't block, fires on
+  every completion → noise); no new monitor tool for #60; no per-hook locale edits
+  (#41 is wrapper-only). All rejected as surface the issues didn't ask for.
diff --git a/docs/plans/2026-06-01-pipeline-hardening-4issues.md b/docs/plans/2026-06-01-pipeline-hardening-4issues.md
new file mode 100644
index 0000000..878be0d
--- /dev/null
+++ b/docs/plans/2026-06-01-pipeline-hardening-4issues.md
@@ -0,0 +1,535 @@
+# Autodev Pipeline Hardening (v6.3.0) Implementation Plan
+
+> **For the implementing agent:** REQUIRED SUB-SKILL: Use autodev:executing-plans to implement this plan task-by-task.
+
+**Goal:** Ship 5 recurring-gate-miss / context-waste fixes (#41/#58/#59/#60/#61) as one v6.3.0 release.
+
+**Architecture:** Skill-doc additions (#59 bug-class, #60 pr-monitoring pattern, #58 trust-boundary) + two hook changes through the choke-points (#41 `run-hook.cmd` stdout-JSON discipline; #61 `pretool-pr-review-reminder` session-dedup + `pre-compact-snapshot` marker-clear) + a CI workflow that runs the hook regression tests + a version bump that triggers `release-tag.yml`.
+
+**Tech Stack:** Markdown skill files; bash hooks + `jq`; bash test harness (`tests/hook-contracts.sh`); GitHub Actions.
+
+**Base branch:** main
+
+---
+
+## Scope Manifest
+
+**PR Count:** 1
+**Tasks:** 9
+**Estimated Lines of Change:** ~340 (informational; not enforced)
+
+**Out of scope:**
+- A deterministic hook that *blocks* `TaskUpdate(status=completed)` on `Implement:*` (infeasible — ADR 0003).
+- Any advisory `TaskUpdate` hook for #58 (rejected — noisy, can't identify Implement tasks).
+- Extending the wrapper locale logic to unset/empty locales (#41 keeps existing locale handling).
+- Any new pr-monitoring tool / Codex-specific background primitive (#60 documents host-scoped patterns only).
+- Changing hook event registrations in `hooks.json` (the wrapper change is transparent; only the new `hooks-check.yml` workflow is added).
+
+**PR Grouping:**
+
+| PR # | Title | Tasks | Branch |
+|------|-------|-------|--------|
+| 1 | feat: v6.3.0 pipeline hardening (#41/#58/#59/#60/#61/#63/#64) | Task 1, Task 2, Task 3, Task 4, Task 5, Task 6, Task 7, Task 8, Task 9 | feat/pipeline-hardening-4issues-v6.3.0 |
+
+**Status:** Amended 2026-06-01T06:00:00Z (user-approved scope expansion to #63 + #64; see Amendment note + decisions/0004)
+
+---
+
+## Project Design Guidance
+
+`Guidance: none at docs/design-guidance.md; canon = README §Cross-LLM + ADRs 0001/0002/0003.` Mapping:
+- Host-neutral → #41 wrapper + #59/#58/#61 are host-neutral; #60 explicitly host-scoped (verified by `tests/skill-content-grep.sh` host-token lint).
+- Don't claim enforcement you can't deliver → #58 (ADR 0003) + #41 (a test that runs in CI, Task 6).
+- Checklist is the floor → #59 joins the mandatory plan-phase scan.
+
+---
+
+### Task 1: #59 — auth/authz chain-composition bug-class (plan-phase)
+
+**Files:**
+- Modify: `skills/adversarial-design-review/SKILL.md` (plan-phase table, insert right after the `Verification-class mismatch` row)
+
+**Step 1: Verify the insertion anchor**
+Run: `grep -n "Verification-class mismatch" skills/adversarial-design-review/SKILL.md`
+Expected: one match in the plan-phase table (lines ~104-113). The new row goes immediately after it.
+
+**Step 2: Insert the row** (exact, after the `Verification-class mismatch` row):
+```
+| **Auth/authz chain composition** | When the design names an auth/authz chain ("behind the X auth filter", "RBAC-enforced", "admin-only"), walk that chain component-by-component against the plan's actual wiring. For each gate, verify it is enforced **server-side against an authenticated principal**, not shape-matched by a client-asserted value. Flag any gate where the plan's check reads from request/client-supplied input (`evidence.granted_permissions`, a header, a body field) instead of an authenticated subject (`authz.Enforce(authenticatedSubject, …)`). A plan that wires a weaker gate than the design's chain implies = finding. |
+```
+
+**Step 3: Verify placement + lint**
+Run: `awk '/Bug-class checklist — plan phase/{p=1} p && /Auth\/authz chain composition/{print NR": ok"}' skills/adversarial-design-review/SKILL.md`
+Expected: one line (row is in the plan-phase section).
+Run: `bash tests/skill-content-grep.sh 2>&1 | tail -1`
+Expected: `PASS: …`.
+
+**Step 4: Commit**
+```bash
+git add skills/adversarial-design-review/SKILL.md
+git commit -m "feat(adversarial-design-review): add auth/authz chain-composition plan-phase bug-class (#59)"
+```
+Rollback: revert commit (additive prose row).
+
+---
+
+### Task 2: #60 — pr-monitoring bash poll-loop sanctioned pattern
+
+**Files:**
+- Modify: `skills/pr-monitoring/SKILL.md` (add a "Waiting for CI: the sanctioned pattern" subsection near the top of the process, after the overview)
+
+**Step 1: Add the section** (host-scoped, per design §#60). Insert after the "## When to Use" / before "## The Process" (or near the top of the process):
+
+```markdown
+## Waiting for CI: the sanctioned pattern
+
+<host: claude-code>
+**Recommended default — a bash poll-loop, not a long-lived monitor Agent.** Issue a
+`Bash` tool call with `run_in_background: true` running a **bounded** sleep-loop that
+polls `gh pr checks <pr>` until no check is `pending` (or a failure), prints a settle
+line, and exits. The harness re-invokes the lead **once** on exit (≈0 tokens while it
+sleeps); the lead then reads the result and admin-merges on `failures=0`.
+
+Bound the loop so it can never spin forever, e.g.:
+```bash
+for i in $(seq 1 120); do            # 120 × 30s = 60 min cap
+  raw=$(gh pr checks <pr> --json bucket 2>/dev/null)
+  fail=$(echo "$raw" | jq '[.[]|select(.bucket=="fail")]|length')
+  pend=$(echo "$raw" | jq '[.[]|select(.bucket=="pending")]|length')
+  { [ "$fail" != 0 ] && [ -n "$fail" ]; } && { echo "FAILURES=$fail"; break; }
+  { [ "$pend" = 0 ] || [ -z "$pend" ]; } && { echo "SETTLED"; break; }
+  sleep 30
+done
+```
+On the 120-iteration cap, print a timeout line and exit for the lead to restart.
+
+**Why not a background Agent:** a `run_in_background` **Agent** told to sleep-loop
+tends to return after ~1 cycle and re-complete repeatedly (the agent loop is not a
+blocking sleep) — observed early-exiting ~6× in one run. The bash sleep-loop genuinely
+blocks to completion.
+</host>
+
+<host: codex, cursor, opencode>
+Use your host's equivalent poll mechanism. Where no blocking-background-bash exists,
+the sanctioned fallback is **self-poll on each lead wakeup**: run `gh pr checks <pr>`
+once per turn and re-check next turn. This loses fire-on-event but is reliable; do not
+dispatch a background Agent expecting it to block on a sleep-loop.
+</host>
+
+**Fallback (all hosts):** the background-Agent monitor below remains documented for
+multi-PR review-comment handling needing active fix-and-push; note its early-exit
+failure mode and prefer the poll-loop / self-poll for pure CI-wait.
+```
+
+**Step 2: Verify lint + cross-refs**
+Run: `bash tests/skill-content-grep.sh 2>&1 | tail -1` → `PASS`
+Run: `bash tests/skill-cross-refs.sh 2>&1 | tail -1` → `PASS: all cross-skill references resolve.`
+
+**Step 3: Commit**
+```bash
+git add skills/pr-monitoring/SKILL.md
+git commit -m "docs(pr-monitoring): sanction the bash poll-loop CI-wait pattern, host-scoped (#60)"
+```
+Rollback: revert commit.
+
+---
+
+### Task 3: #58 — Implement-N completion trust boundary
+
+**Files:**
+- Modify: `skills/subagent-driven-development/SKILL.md` (add a "Completion is not trusted until lead-verified" rule)
+- Modify: `agents/team-conventions.md` (restate the implementer/code-reviewer rules + the lead trust-boundary)
+- (ADR `decisions/0003-implement-n-completion-trust-boundary.md` already committed with the design.)
+
+**Step 1: Add the trust-boundary rule to subagent-driven-development** — near the "Red Flags"/completion section, add:
+```markdown
+### Completion is not trusted until lead-verified
+
+A green `Implement: N` checkbox is a **claim, not evidence**. Regardless of who or
+what flips an `Implement: N` task to `completed` (implementer, a blockedBy-clear, or
+the code-reviewer), the lead MUST run `autodev:verification-before-completion` —
+build + test from a clean tree, CI green — before treating that task as truly done or
+invoking `finishing-a-development-branch`. The team-conventions "only code-reviewer
+flips Implement-N" rule remains team discipline, but correctness rests on lead
+verification, which does not depend on who flipped the bit. (A deterministic hook that
+blocks the flip is infeasible — the PreToolUse payload lacks the task subject and
+caller identity; see decisions/0003-implement-n-completion-trust-boundary.md.)
+```
+
+**Step 2: Add the rules to team-conventions.md** — under the Implementer + a Code-reviewer/Lead section:
+- Implementer: "Never self-complete an `Implement: N` task. DM the spec-reviewer when ready; the code-reviewer is the sole flipper."
+- Lead: "Treat any `completed` Implement-N as a claim; run `verification-before-completion` (clean build + tests + CI green) before accepting it or finishing the branch."
+
+**Step 3: Verify lint + cross-refs**
+Run: `bash tests/skill-content-grep.sh 2>&1 | tail -1` → `PASS`
+Run: `bash tests/skill-cross-refs.sh 2>&1 | tail -1` → `PASS`
+
+**Step 4: Commit**
+```bash
+git add skills/subagent-driven-development/SKILL.md agents/team-conventions.md
+git commit -m "docs(subagent-driven-development): completion trust-boundary for Implement-N (#58, ADR 0003)"
+```
+Rollback: revert commit.
+
+---
+
+### Task 4: #41 — run-hook.cmd stdout-JSON discipline + test
+
+**Files:**
+- Modify: `hooks/run-hook.cmd` (Unix portion — replace the final `exec bash …` with a capture-then-discipline block; keep the locale logic unchanged)
+- Create: `tests/hook-stdout-discipline.sh`
+
+**Step 1: Write the failing test** (`tests/hook-stdout-discipline.sh`) — runs the REAL `run-hook.cmd` against fixture hooks:
+```bash
+#!/usr/bin/env bash
+# tests/hook-stdout-discipline.sh — verify run-hook.cmd enforces stdout JSON discipline.
+set -uo pipefail
+REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+WRAPPER="$REPO_ROOT/hooks/run-hook.cmd"
+failures=0
+pass() { printf 'PASS: %s\n' "$*"; }
+fail() { printf 'FAIL: %s\n' "$*" >&2; failures=$((failures+1)); }
+command -v jq >/dev/null 2>&1 || { echo "SKIP: jq required"; exit 0; }
+
+tmp="$(mktemp -d)"
+# Cleanup trap set BEFORE any fixture is copied into hooks/ (rm -f on absent files is safe).
+trap 'rm -f "$REPO_ROOT"/hooks/fix-warn-then-json "$REPO_ROOT"/hooks/fix-noise "$REPO_ROOT"/hooks/fix-clean; rm -rf "$tmp"' EXIT
+mkfix() { printf '%s\n' "$1" > "$tmp/$2"; chmod +x "$tmp/$2"; }
+
+# Fixture A: locale warning to stderr-or-stdout then a block JSON on stdout.
+mkfix '#!/usr/bin/env bash
+echo "perl: warning: Setting locale failed."   # leaks to stdout
+printf "%s\n" "{\"decision\":\"block\",\"reason\":\"x\"}"' fix-warn-then-json
+# Fixture B: only noise, no JSON.
+mkfix '#!/usr/bin/env bash
+echo "just a diagnostic line"' fix-noise
+# Fixture C: clean single-line JSON.
+mkfix '#!/usr/bin/env bash
+printf "%s\n" "{\"hookSpecificOutput\":{\"hookEventName\":\"X\"}}"' fix-clean
+
+run() { local out err rc; out="$("$WRAPPER" "$1" 2>"$tmp/err")"; rc=$?; printf '%s' "$out" > "$tmp/out"; err="$(cat "$tmp/err")"; OUT="$out"; ERR="$err"; RC=$rc; }
+
+# Point the wrapper at the tmp fixtures by copying them next to run-hook.cmd is
+# heavy; instead invoke the wrapper with a fixture in hooks/ via a temp symlink dir.
+# Simplest: call the wrapper with HOOK_DIR override is not supported, so copy fixtures
+# into hooks/ under a unique prefix and clean up.
+for f in fix-warn-then-json fix-noise fix-clean; do cp "$tmp/$f" "$REPO_ROOT/hooks/$f"; done
+
+# (a) warning + block JSON → stdout is ONLY the block JSON; warning ON stderr (m1).
+run fix-warn-then-json
+if printf '%s' "$OUT" | jq -e '.decision=="block"' >/dev/null 2>&1 \
+   && ! printf '%s' "$OUT" | grep -q 'perl: warning' \
+   && printf '%s' "$ERR" | grep -q 'perl: warning'; then
+  pass "(a) block JSON on stdout, warning routed to stderr"
+else fail "(a) expected block JSON on stdout + warning on stderr, got OUT=[$OUT] ERR=[$ERR]"; fi
+
+# (b) only noise → stdout empty, noise on stderr.
+run fix-noise
+{ [ -z "$OUT" ] && printf '%s' "$ERR" | grep -q 'diagnostic'; } \
+  && pass "(b) noise suppressed from stdout, routed to stderr" || fail "(b) expected empty stdout, got: $OUT"
+
+# (c) clean JSON → unchanged + valid.
+run fix-clean
+printf '%s' "$OUT" | jq -e '.hookSpecificOutput.hookEventName=="X"' >/dev/null 2>&1 \
+  && pass "(c) clean JSON passthrough" || fail "(c) clean JSON broke, got: $OUT"
+
+# (d) jq-absent (I2) → wrapper passes stdout through VERBATIM (warning + JSON both present).
+# Stub PATH so the wrapper's `command -v jq` fails; assert via grep (no jq needed here).
+nojq="$tmp/nojq"; mkdir -p "$nojq"
+OUTD="$(PATH="$nojq" "$WRAPPER" fix-warn-then-json 2>/dev/null)"
+{ printf '%s' "$OUTD" | grep -q 'perl: warning' && printf '%s' "$OUTD" | grep -q '"decision":"block"'; } \
+  && pass "(d) jq-absent → verbatim passthrough (no discipline applied)" \
+  || fail "(d) expected verbatim passthrough with jq absent, got: $OUTD"
+
+echo ""; echo "Results: $failures failure(s)"; [ "$failures" -eq 0 ]
+```
+
+> Implementation note: fixtures are copied into `hooks/` (the wrapper resolves
+> scripts relative to its own dir) and cleaned up in the trap (set BEFORE the copy so
+> a mid-test failure still cleans up). The top `command -v jq` guard SKIPs only when
+> the **test machine** has no jq (can't run a/b/c); on CI (jq present) all four cases
+> run — case (d) stubs PATH so the **wrapper** sees no jq and asserts verbatim
+> passthrough via grep (no jq needed for that assertion).
+
+**Step 2: Run → FAIL** (wrapper still `exec bash`, doesn't strip the warning):
+Run: `bash tests/hook-stdout-discipline.sh 2>&1 | tail -6`
+Expected: `(a)` FAILs (stdout contains the perl warning + JSON).
+
+**Step 3: Implement the wrapper discipline** — replace the final `exec bash "${SCRIPT_DIR}/${SCRIPT_NAME}" "$@"` line of `hooks/run-hook.cmd` with:
+```bash
+# Run the hook with stdout captured (stderr + stdin pass through untouched).
+# Enforce stdout JSON discipline: only valid-JSON-or-empty reaches the host's hook
+# parser; diagnostics that leak onto stdout (locale/perl/git warnings) are routed to
+# stderr. A block decision preceded by a warning is recovered, not dropped (#41).
+if command -v jq >/dev/null 2>&1; then
+  hook_out="$(bash "${SCRIPT_DIR}/${SCRIPT_NAME}" "$@")"
+  hook_rc=$?
+  if [ -z "$hook_out" ]; then
+    : # nothing to emit
+  elif printf '%s' "$hook_out" | jq -e . >/dev/null 2>&1; then
+    printf '%s\n' "$hook_out"                       # valid JSON as a whole
+  else
+    json_line="$(printf '%s\n' "$hook_out" | grep -E '^\{' | tail -1)"
+    if [ -n "$json_line" ] && printf '%s' "$json_line" | jq -e . >/dev/null 2>&1; then
+      printf '%s\n' "$hook_out" | grep -vF "$json_line" >&2   # diagnostics → stderr
+      printf '%s\n' "$json_line"                              # recovered JSON → stdout
+    else
+      printf '%s\n' "$hook_out" >&2                           # all noise → stderr
+    fi
+  fi
+  exit "$hook_rc"
+else
+  exec bash "${SCRIPT_DIR}/${SCRIPT_NAME}" "$@"     # jq absent: pass through unchanged
+fi
+```
+
+**Step 4: Run → PASS**
+Run: `bash tests/hook-stdout-discipline.sh 2>&1 | tail -6`
+Expected: `Results: 0 failure(s)` (all of a/b/c pass).
+Run: `bash tests/hook-contracts.sh 2>&1 | tail -3`
+Expected: existing hook contracts still pass (no regression to real hooks through the wrapper).
+
+**Step 5: Commit**
+```bash
+git add hooks/run-hook.cmd tests/hook-stdout-discipline.sh
+git commit -m "fix(run-hook.cmd): enforce stdout JSON discipline, recover block decisions behind warnings (#41)"
+```
+Rollback: revert `run-hook.cmd` to the `exec bash` form + re-run `tests/hook-contracts.sh`. Hook-path change — verified by the regression test (Task 6 CI-gates it).
+
+---
+
+### Task 5: #61 — pr-review reminder dedup + pre-compact clear
+
+**Files:**
+- Modify: `hooks/pretool-pr-review-reminder` (session-dedup + tighter match)
+- Modify: `hooks/pre-compact-snapshot` (clear the marker, unconditionally, before the early-exit)
+- Modify: `tests/hook-contracts.sh` (dedup + post-compact-reset cases)
+
+**Step 1: Add the dedup + post-compact tests to `tests/hook-contracts.sh`** (reuse `run_hook` + a tmp cwd):
+```bash
+test_pr_reminder_dedup() {
+  local tmp; tmp="$(mktemp -d)"
+  trap 'rm -rf "$tmp"' RETURN                       # I3: repo-convention cleanup
+  local sess='/x/transcripts/sess-abc.jsonl'
+  local payload='{"tool_name":"Bash","tool_input":{"command":"gh pr create --title t --body b"},"cwd":"'"$tmp"'","transcript_path":"'"$sess"'"}'
+  # first call emits
+  local out1; out1="$(run_hook pretool-pr-review-reminder "$payload")"
+  printf '%s' "$out1" | jq -e '.hookSpecificOutput.additionalContext' >/dev/null 2>&1 \
+    || fail "pr-reminder: first call should emit"
+  # second call (same session) is silent
+  local out2; out2="$(run_hook pretool-pr-review-reminder "$payload")"
+  [ -z "$out2" ] && pass "pr-reminder: deduped within session" \
+    || fail "pr-reminder: second call should be silent, got: $out2"
+  # a PreCompact run with NO locked plans must still clear the marker
+  run_hook pre-compact-snapshot '{"cwd":"'"$tmp"'","transcript_path":"'"$sess"'"}' >/dev/null 2>&1 || true
+  local out3; out3="$(run_hook pretool-pr-review-reminder "$payload")"
+  printf '%s' "$out3" | jq -e '.hookSpecificOutput.additionalContext' >/dev/null 2>&1 \
+    && pass "pr-reminder: re-emits after PreCompact reset" \
+    || fail "pr-reminder: should re-emit after compaction, got: $out3"
+  # I1: a quoted body that merely mentions 'gh pr create' must NOT emit
+  local fp='{"tool_name":"Bash","tool_input":{"command":"gh issue create --title t --body \"see gh pr create docs\""},"cwd":"'"$tmp"'","transcript_path":"'"$sess"'"}'
+  local outfp; outfp="$(run_hook pretool-pr-review-reminder "$fp")"
+  [ -z "$outfp" ] && pass "pr-reminder: not tripped by 'gh pr create' inside a quoted body" \
+    || fail "pr-reminder: false-positive on quoted body, got: $outfp"
+  # degrade gracefully: no transcript_path → emits every time (no marker write)
+  local tmp2; tmp2="$(mktemp -d)"
+  local np='{"tool_name":"Bash","tool_input":{"command":"gh pr create --title t"},"cwd":"'"$tmp2"'"}'
+  local na; na="$(run_hook pretool-pr-review-reminder "$np")"
+  local nb; nb="$(run_hook pretool-pr-review-reminder "$np")"
+  { printf '%s' "$na" | jq -e '.hookSpecificOutput' >/dev/null 2>&1 && printf '%s' "$nb" | jq -e '.hookSpecificOutput' >/dev/null 2>&1; } \
+    && pass "pr-reminder: no transcript_path → emits every time" \
+    || fail "pr-reminder: should emit each time without transcript_path"
+  rm -rf "$tmp2"
+}
+# (call test_pr_reminder_dedup in the main run sequence)
+```
+
+**Step 2: Run → FAIL** (reminder has no dedup yet):
+Run: `bash tests/hook-contracts.sh 2>&1 | grep -i "pr-reminder" | head`
+Expected: the "deduped within session" case FAILs (second call still emits).
+
+**Step 3: Implement dedup in `pretool-pr-review-reminder`** — after computing `cmd` and confirming `gh pr create`, before emitting:
+- **Tighten the match via quote-stripping (I1 — mirror `pre-tool-scope-guard`'s precedent), NOT a boundary regex.** A boundary regex still matches `gh pr create` inside a quoted `--body`. Strip quoted segments first, then match on the stripped command:
+  ```bash
+  cmd_unquoted=$(printf '%s' "$cmd" | sed "s/\"[^\"]*\"//g; s/'[^']*'//g")  # double-first, matching pre-tool-scope-guard
+  printf '%s' "$cmd_unquoted" | grep -q 'gh pr create' || exit 0
+  ```
+  So `gh issue create --body "… gh pr create …"` → stripped → `gh issue create --body ` → no match → no emit; a real `gh pr create --title t --body b` → stripped → `gh pr create --title t --body ` → matches.
+- Compute `session_key`: `transcript_path=$(… jq -r .transcript_path …); session_key=""; [ -n "$transcript_path" ] && session_key=$(basename "$transcript_path")`.
+- Marker: `marker="${cwd_dir}/.claude/autodev-state/pr-reminder-seen"`.
+- If `[ -n "$session_key" ]` and `grep -qxF "$session_key" "$marker" 2>/dev/null` → `exit 0` (already reminded). Else emit, and if `[ -n "$session_key" ]` append: `mkdir -p "$(dirname "$marker")"; printf '%s\n' "$session_key" >> "$marker"`.
+- No `session_key` (no transcript) → emit every time (current behavior).
+
+**Step 4: Implement the clear in `pre-compact-snapshot`** — UNCONDITIONALLY, immediately after `session_key` is computed (≈ line 30) and BEFORE the `[ -z "$state_section" ] && exit 0` early-exit:
+```bash
+# #61: clear this session's pr-review-reminder marker so the reminder re-emits once
+# after compaction (the post-compaction context lost the earlier reminder). Must run
+# before the no-locked-plans early-exit. Guard on a non-empty session key.
+if [ -n "${session_key:-}" ]; then
+  reminder_marker="${cwd_dir}/.claude/autodev-state/pr-reminder-seen"
+  if [ -f "$reminder_marker" ]; then
+    grep -vxF "$session_key" "$reminder_marker" > "${reminder_marker}.tmp" 2>/dev/null \
+      && mv "${reminder_marker}.tmp" "$reminder_marker" || rm -f "${reminder_marker}.tmp"
+  fi
+fi
+```
+
+**Step 5: Run → PASS**
+Run: `bash tests/hook-contracts.sh 2>&1 | tail -4`
+Expected: all pass, including `pr-reminder: deduped within session` + `pr-reminder: re-emits after PreCompact reset`.
+
+**Step 6: Commit**
+```bash
+git add hooks/pretool-pr-review-reminder hooks/pre-compact-snapshot tests/hook-contracts.sh
+git commit -m "fix(hooks): pr-review reminder once-per-session + PreCompact reset (#61)"
+```
+Rollback: revert commit (hooks restore prior emit-every-time behavior).
+
+---
+
+### Task 6: CI-gate the hook regression tests
+
+**Files:**
+- Create: `.github/workflows/hooks-check.yml`
+
+**Rationale:** #41 is a hook-reliability fix; its regression test must actually run in CI (a test that never runs is theater — the existence/runtime-validity discipline). No workflow currently runs the hook tests.
+
+**Step 1: Add the workflow**
+```yaml
+name: Hooks Check
+on:
+  push:
+    paths:
+      - 'hooks/**'
+      - 'tests/hook-contracts.sh'
+      - 'tests/hook-stdout-discipline.sh'
+      - '.github/workflows/hooks-check.yml'
+  pull_request:
+    paths:
+      - 'hooks/**'
+      - 'tests/hook-contracts.sh'
+      - 'tests/hook-stdout-discipline.sh'
+jobs:
+  hooks:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install jq
+        run: sudo apt-get update && sudo apt-get install -y jq
+      - name: Hook contract tests
+        run: bash tests/hook-contracts.sh
+      - name: Hook stdout discipline tests
+        run: bash tests/hook-stdout-discipline.sh
+```
+
+**Step 2: Verify both test scripts currently pass locally (the gate is real)**
+Run: `bash tests/hook-contracts.sh 2>&1 | tail -2 && bash tests/hook-stdout-discipline.sh 2>&1 | tail -2`
+Expected: both report 0 failures (after Tasks 4+5).
+
+**Step 3: Lint the workflow YAML**
+Run: `python3 -c "import yaml; yaml.safe_load(open('.github/workflows/hooks-check.yml'))" && echo "YAML OK"`
+Expected: `YAML OK`.
+
+**Step 4: Commit**
+```bash
+git add .github/workflows/hooks-check.yml
+git commit -m "ci: run hook contract + stdout-discipline tests on hooks/tests changes (#41/#61)"
+```
+Rollback: revert commit (removes the workflow).
+
+---
+
+### Task 7: Version bump → v6.3.0 + RELEASE-NOTES
+
+**Files:**
+- Modify: `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, `.cursor-plugin/plugin.json` (via `scripts/bump-version.sh`)
+- Modify: `RELEASE-NOTES.md`
+
+**Step 1: Confirm start version + the tag is free**
+Run: `grep -m1 '"version"' .claude-plugin/plugin.json; git ls-remote --tags origin refs/tags/v6.3.0`
+Expected: `6.2.2`; `git ls-remote` prints nothing (v6.3.0 free).
+
+**Step 2: Bump**
+Run: `scripts/bump-version.sh 6.3.0`
+Expected: `Bumping version: 6.2.2 → 6.3.0` across the 3 manifests.
+
+**Step 3: Consistency gate (the exact check `release-tag.yml` runs)**
+Run: `bash tests/version-check.sh`
+Expected: `OK: All version files agree on version 6.3.0`.
+
+**Step 4: Add RELEASE-NOTES entry** — a `## v6.3.0 — 2026-06-01` section at the top (below the title), summarizing the 5 fixes (#41/#58/#59/#60/#61).
+
+**Step 5: Commit**
+```bash
+git add .claude-plugin/plugin.json .claude-plugin/marketplace.json .cursor-plugin/plugin.json RELEASE-NOTES.md
+git commit -m "chore: bump version to 6.3.0 (#41/#58/#59/#60/#61)"
+```
+Rollback: `scripts/bump-version.sh 6.3.0 6.2.2` + revert; pre-merge revert prevents the v6.3.0 tag (release fires only on merge-to-main touching plugin.json).
+
+> After merge: `release-tag.yml` auto-tags v6.3.0 + dispatches the marketplace. Create the GH Release from the RELEASE-NOTES v6.3.0 section (tag-only is the workflow's behavior; v6.2.0/6.2.2 precedent).
+
+---
+
+### Task 8: #64 — session-start time-dedup Linux portability (amendment)
+
+**Discovered at execution:** Task 6's new `hooks-check.yml` ran `hook-contracts.sh` on
+ubuntu for the first time and surfaced a **pre-existing** Linux bug:
+`hooks/session-start` computes the last-emit mtime with BSD `stat -f %m` **before** GNU
+`stat -c %Y`. On Linux `stat -f` means "file system status", succeeds (exit 0), and
+prints fs info instead of the mtime → `[ "$last" -gt 0 ]` errors and the time-dedup
+never suppresses re-fires. User-approved amendment (#64).
+
+**Files:**
+- Modify: `hooks/session-start` (the `stat` ordering + numeric guard)
+- Modify: `.github/workflows/hooks-check.yml` (re-enable `hook-contracts.sh` once fixed)
+
+**Step 1: Fix the `stat` ordering** — GNU first, then BSD, then guard numeric:
+```bash
+last=$(stat -c %Y "$LAST_EMIT_FILE" 2>/dev/null || stat -f %m "$LAST_EMIT_FILE" 2>/dev/null || echo 0)
+case "$last" in (*[!0-9]*|'') last=0 ;; esac
+```
+**Step 2: Re-enable `hook-contracts.sh` in `hooks-check.yml`** (the CI gate is now real on Linux).
+**Step 3: Verify** — `bash tests/hook-contracts.sh` → `All hook contract tests passed.`
+(macOS: `stat -c %Y` fails → falls to BSD `stat -f %m`; Linux: `stat -c %Y` works.) CI on
+ubuntu confirms the time-dedup case now passes.
+**Step 4: Commit** — `fix(session-start): GNU stat -c %Y before BSD -f %m … (#64)`.
+Rollback: revert commit.
+
+### Task 9: #63 — Artifact-class precedent design-phase bug-class (amendment)
+
+**User-approved amendment (#63).** A design can pass mechanism-correctness review yet put
+an artifact in the wrong *place/shape* (v1.1: a scenario test fixture inside the
+production engine repo, when sibling scenarios own a `cmd/server/main.go`).
+
+**Files:**
+- Modify: `skills/adversarial-design-review/SKILL.md` (design-phase table, after `Repo-precedent conflicts`)
+
+**Step 1: Insert the row** (after `Repo-precedent conflicts`):
+```
+| **Artifact-class precedent** | Survey how the codebase already implements this *artifact class* — not just the *mechanism*. … Grep for sibling instances (`ls scenarios/*/cmd/server/main.go`, sibling plugins, migrations, CLI commands, fixtures) and confirm the design follows the established shape — or explicitly justifies divergence. … Run the decisive `ls`/`grep` for the artifact class, not just for the mechanism. |
+```
+**Step 2: Verify** — awk confirms the row is in the **design-phase** section; `bash tests/skill-content-grep.sh` PASS.
+**Step 3: Commit** — `feat(adversarial-design-review): add Artifact-class precedent design-phase bug-class (#63)`.
+Rollback: revert commit (additive prose row).
+
+## Verification summary (change-class mapping)
+
+| Task | Change class | Verification | Expected |
+|---|---|---|---|
+| 1 | Documentation (skill) | skill-content-grep + awk placement | PASS; row in plan-phase section |
+| 2 | Documentation (skill) | skill-content-grep + skill-cross-refs | PASS |
+| 3 | Documentation (skill+agent) | skill-content-grep + skill-cross-refs | PASS |
+| 4 | Hook (wrapper) | `tests/hook-stdout-discipline.sh` + `tests/hook-contracts.sh` | 0 failures; block JSON recovered behind warning |
+| 5 | Hook (reminder+precompact) | `tests/hook-contracts.sh` dedup+reset cases | 0 failures; deduped + re-emits post-compact |
+| 6 | CI workflow | YAML lint + both hook test scripts pass locally | YAML OK; 0 failures |
+| 7 | Version pin | `tests/version-check.sh` | all 3 manifests agree on 6.3.0 |
+
+## Multi-Component / Integration proof
+
+The real boundaries: (a) wrapper↔hook — Task 4's `hook-stdout-discipline.sh` runs the
+**real** `run-hook.cmd` against fixture hooks (warning+JSON, noise, clean) and asserts
+stdout/stderr; `hook-contracts.sh` runs the real registered hooks through the wrapper
+(no regression). (b) reminder↔state↔pre-compact — Task 5's tests run the **real**
+`pretool-pr-review-reminder` + `pre-compact-snapshot` against a shared tmp
+`.claude/autodev-state`, asserting dedup + post-compact reset. (c) Task 6 CI-gates both
+so the hook fixes can't silently regress.
diff --git a/docs/plans/2026-06-01-pipeline-hardening-4issues.md.scope-lock b/docs/plans/2026-06-01-pipeline-hardening-4issues.md.scope-lock
new file mode 100644
index 0000000..f9c2454
--- /dev/null
+++ b/docs/plans/2026-06-01-pipeline-hardening-4issues.md.scope-lock
@@ -0,0 +1 @@
+815ea55f53f1673b36497497a3141abf8c0fc4d2cb5291303ade1d123046498a
diff --git a/hooks/pre-compact-snapshot b/hooks/pre-compact-snapshot
index a5284e2..5bc19ef 100755
--- a/hooks/pre-compact-snapshot
+++ b/hooks/pre-compact-snapshot
@@ -29,6 +29,26 @@ transcript_path=$(printf '%s' "$hook_input" | jq -r '.transcript_path // empty'
 session_key=""
 [ -n "$transcript_path" ] && session_key=$(basename "$transcript_path")
 
+# ── #61: clear this session's pr-review-reminder marker ───────────────────────
+# So the reminder re-emits once after compaction (the post-compaction context lost the
+# earlier reminder). MUST run before the no-locked-plans early-exit below. Guard on a
+# non-empty session key (no blanket file wipe on identity-less hosts).
+if [ -n "${session_key:-}" ]; then
+    reminder_marker="${cwd_dir}/.claude/autodev-state/pr-reminder-seen"
+    if [ -f "$reminder_marker" ]; then
+        # grep -v exits 1 when no lines remain (only our key) — tolerate it under set -e.
+        remaining="$(grep -vxF "$session_key" "$reminder_marker" 2>/dev/null || true)"
+        if [ -n "$remaining" ]; then
+            # Atomic: write other sessions' keys to a temp file then rename.
+            printf '%s\n' "$remaining" > "${reminder_marker}.tmp" 2>/dev/null \
+                && mv "${reminder_marker}.tmp" "$reminder_marker" \
+                || rm -f "${reminder_marker}.tmp"
+        else
+            rm -f "$reminder_marker"                          # only our key → remove marker
+        fi
+    fi
+fi
+
 # ── Find locked plans and collect their state ─────────────────────────────────
 plans_dir="${cwd_dir}/docs/plans"
 state_section=""
diff --git a/hooks/pretool-pr-review-reminder b/hooks/pretool-pr-review-reminder
index a74bbfa..500e8c8 100755
--- a/hooks/pretool-pr-review-reminder
+++ b/hooks/pretool-pr-review-reminder
@@ -26,8 +26,23 @@ tool_name=$(printf '%s' "$hook_input" | jq -r '.tool_name // empty' 2>/dev/null
 cmd=$(printf '%s' "$hook_input" | jq -r '.tool_input.command // empty' 2>/dev/null || true)
 [ -z "$cmd" ] && exit 0
 
-# Only act on PR creation commands.
-printf '%s' "$cmd" | grep -q 'gh pr create' || exit 0
+# Only act on real PR-creation commands. Strip quoted segments first so a quoted
+# --body that merely mentions "gh pr create" does not trip the hook (#61; mirrors
+# pre-tool-scope-guard's quote-strip precedent).
+cmd_unquoted=$(printf '%s' "$cmd" | sed "s/\"[^\"]*\"//g; s/'[^']*'//g")
+printf '%s' "$cmd_unquoted" | grep -q 'gh pr create' || exit 0
+
+# Session dedup (#61): emit once per session; reset by pre-compact-snapshot on
+# compaction. Keyed by transcript basename; degrade to emit-every-time when absent.
+cwd_dir=$(printf '%s' "$hook_input" | jq -r '.cwd // empty' 2>/dev/null || true)
+[ -z "$cwd_dir" ] && cwd_dir="${PWD}"
+transcript_path=$(printf '%s' "$hook_input" | jq -r '.transcript_path // empty' 2>/dev/null || true)
+session_key=""
+[ -n "$transcript_path" ] && session_key=$(basename "$transcript_path")
+marker="${cwd_dir}/.claude/autodev-state/pr-reminder-seen"
+if [ -n "$session_key" ] && grep -qxF "$session_key" "$marker" 2>/dev/null; then
+    exit 0   # already reminded this session
+fi
 
 reminder=$(cat <<'REMINDER'
 <IMPORTANT>
@@ -52,4 +67,10 @@ emit_additional_context() {
 
 emit_additional_context "PreToolUse" "$reminder"
 
+# Record this session so the reminder doesn't repeat (only when we have a session key).
+if [ -n "$session_key" ]; then
+    mkdir -p "$(dirname "$marker")" 2>/dev/null || true
+    printf '%s\n' "$session_key" >> "$marker" 2>/dev/null || true
+fi
+
 exit 0
diff --git a/hooks/run-hook.cmd b/hooks/run-hook.cmd
index 45955ea..73f5872 100755
--- a/hooks/run-hook.cmd
+++ b/hooks/run-hook.cmd
@@ -55,4 +55,27 @@ if { [ "${LC_ALL:-}" = "C.UTF-8" ] || [ "${LC_CTYPE:-}" = "C.UTF-8" ] || [ "${LA
   [ "${LANG:-}" = "C.UTF-8" ] && export LANG=C
 fi
 
-exec bash "${SCRIPT_DIR}/${SCRIPT_NAME}" "$@"
+# Run the hook with stdout captured (stderr + stdin pass through untouched).
+# Enforce stdout JSON discipline: only valid-JSON-or-empty reaches the host's hook
+# parser; diagnostics that leak onto stdout (locale/perl/git warnings) are routed to
+# stderr. A block decision preceded by a warning is recovered, not dropped (#41).
+if command -v jq >/dev/null 2>&1; then
+  hook_out="$(bash "${SCRIPT_DIR}/${SCRIPT_NAME}" "$@")"
+  hook_rc=$?
+  if [ -z "$hook_out" ]; then
+    : # nothing to emit
+  elif printf '%s' "$hook_out" | jq -e . >/dev/null 2>&1; then
+    printf '%s\n' "$hook_out"                       # valid JSON as a whole
+  else
+    json_line="$(printf '%s\n' "$hook_out" | grep -E '^\{' | tail -1)"
+    if [ -n "$json_line" ] && printf '%s' "$json_line" | jq -e . >/dev/null 2>&1; then
+      printf '%s\n' "$hook_out" | grep -vxF "$json_line" >&2  # diagnostics -> stderr (full-line)
+      printf '%s\n' "$json_line"                              # recovered JSON -> stdout
+    else
+      printf '%s\n' "$hook_out" >&2                           # all noise -> stderr
+    fi
+  fi
+  exit "$hook_rc"
+else
+  exec bash "${SCRIPT_DIR}/${SCRIPT_NAME}" "$@"     # jq absent: pass through unchanged
+fi
diff --git a/hooks/session-start b/hooks/session-start
index 43b5100..98369e3 100755
--- a/hooks/session-start
+++ b/hooks/session-start
@@ -81,9 +81,13 @@ LAST_EMIT_FILE="${STATE_DIR}/session-start-last-emit"
 DEDUP_WINDOW_SEC=5
 if [ -f "$LAST_EMIT_FILE" ]; then
     now=$(date +%s 2>/dev/null || echo 0)
-    last=$(stat -f %m "$LAST_EMIT_FILE" 2>/dev/null \
-        || stat -c %Y "$LAST_EMIT_FILE" 2>/dev/null \
+    # GNU stat (-c %Y) first, then BSD (-f %m). BSD-first is wrong on Linux: `stat -f`
+    # means "file system status" there, succeeds (exit 0), and prints fs info instead
+    # of the mtime — breaking the numeric comparison and the dedup (#64).
+    last=$(stat -c %Y "$LAST_EMIT_FILE" 2>/dev/null \
+        || stat -f %m "$LAST_EMIT_FILE" 2>/dev/null \
         || echo 0)
+    case "$last" in (*[!0-9]*|'') last=0 ;; esac   # guard: only a clean integer
     if [ "$now" -gt 0 ] && [ "$last" -gt 0 ] && [ $((now - last)) -lt "$DEDUP_WINDOW_SEC" ]; then
         exit 0
     fi
diff --git a/skills/adversarial-design-review/SKILL.md b/skills/adversarial-design-review/SKILL.md
index cdbbfc8..1f6ddc9 100644
--- a/skills/adversarial-design-review/SKILL.md
+++ b/skills/adversarial-design-review/SKILL.md
@@ -87,6 +87,7 @@ checklist is the floor, not the ceiling. Additional findings are welcome.
 | **Project-guidance conflicts** | Read `docs/design-guidance.md` or equivalent project guidance. Does the artifact violate product direction, architecture constraints, UX/domain principles, quality/security/ops requirements, or non-goals? Cite both the guidance path/section and the conflicting design/plan section. If guidance is missing and the design does not show Q&A capture, flag it. |
 | **Assumptions under attack** | Load-bearing claims, stated or unstated. "We assume the upstream API is idempotent." "We assume single-tenant." "We assume the user has admin." Challenge each assumption, name how it could be false, and flag any where a false assumption collapses the design. |
 | **Repo-precedent conflicts** | Does this design fight existing patterns, skills, or conventions in this repo? Cite the conflicting `path/file.md:line`. If the design proposes a new pattern that contradicts an established one, the design must justify the divergence. |
+| **Artifact-class precedent** | Survey how the codebase already implements this *artifact class* — not just the *mechanism*. The Repo-precedent class above checks the HOW (e.g. how a plugin registers a module type); this checks the WHERE/shape (e.g. where a scenario stands up a server, where a test fixture lives, where a migration goes). Grep for sibling instances of the same kind of artifact (`ls scenarios/*/cmd/server/main.go`, sibling plugins, migrations, CLI commands, fixtures) and confirm the design follows the established shape — or explicitly justifies divergence. A design that puts a test fixture in the production engine repo when sibling scenarios own their `cmd/server/main.go` = finding. Run the decisive `ls`/`grep` for the artifact class, not just for the mechanism. |
 | **YAGNI violations** | Features in the design that aren't justified by stated requirements. Configuration knobs nobody asked for. Generality nobody needs. Future-proofing for cases that may never arrive. |
 | **Missing failure modes** | What fails first under partial failure, network partition, restart-mid-operation, malformed input, adversarial input, the dependency being down? If the design doesn't address it, flag it. |
 | **Security / privacy at architecture level** | Auth/authz boundaries, secret flow, least privilege, blast radius on compromise, PII exposure, log leakage, dependency/trust boundary, abuse case, CSRF/SSRF/auth-confused-deputy at the design level (not at the code level — that's `requesting-code-review`'s job). |
@@ -106,6 +107,7 @@ inherits the design's blast radius) and adds:
 |---|---|
 | **Over-decomposition / under-decomposition** | Does task granularity match `writing-plans`'s 2–5-minute target per step? A 40-step plan for a CSV export is suspect. A 2-step plan for a schema migration is suspect. Flag both directions. |
 | **Verification-class mismatch** | For each task, does its verification step match its change class per the table in `skills/writing-plans/SKILL.md` ("Verification per change class")? A schema migration verified by unit tests = finding. An API endpoint with no curl invocation = finding. |
+| **Auth/authz chain composition** | When the design names an auth/authz chain ("behind the X auth filter", "RBAC-enforced", "admin-only"), walk that chain component-by-component against the plan's actual wiring. For each gate, verify it is enforced **server-side against an authenticated principal**, not shape-matched by a client-asserted value. Flag any gate where the plan's check reads from request/client-supplied input (`evidence.granted_permissions`, a header, a body field) instead of an authenticated subject (`authz.Enforce(authenticatedSubject, …)`). A plan that wires a weaker gate than the design's chain implies = finding. |
 | **Hidden serial dependencies** | Tasks the plan claims are independent but actually share state (same file, same DB row, same config key). If executed in parallel, they'll collide. Flag any such pair. |
 | **Missing rollback wiring** | The design specifies a rollback story (per the design-phase class above). Is it actually implemented in the plan as a task or step? Or is it a paragraph nobody is going to write code for? |
 | **Missing integration proof** | For multi-component changes, does the plan include an end-to-end or integration verification that exercises the real boundary? If it only tests each component with mocks, flag it unless the design explicitly justified that as sufficient. |
diff --git a/skills/pr-monitoring/SKILL.md b/skills/pr-monitoring/SKILL.md
index a395cd4..626a1ee 100644
--- a/skills/pr-monitoring/SKILL.md
+++ b/skills/pr-monitoring/SKILL.md
@@ -21,6 +21,50 @@ Invoked automatically by `finishing-a-development-branch` in autonomous mode aft
 
 When multiple PRs were created in the same session, prefer launching **one monitor agent that covers all PRs** to reduce GitHub API request volume. You may launch one agent per PR instead if the PRs are on unrelated codebases, if you anticipate heavy parallel CI load, or if a previous shared monitor was rate-limited. Default to the single-agent approach and only deviate with a reason.
 
+## Waiting for CI: the sanctioned pattern
+
+For a pure CI-wait (poll until checks settle, then act), prefer a blocking poll-loop
+over a long-lived monitor Agent. The pattern is host-scoped.
+
+<host: claude-code>
+**Recommended default — a bash poll-loop, not a long-lived monitor Agent.** Issue a
+`Bash` tool call with `run_in_background: true` running a **bounded** sleep-loop that
+polls `gh pr checks <pr>` until no check is `pending` (or a failure), prints a settle
+line, and exits. The harness re-invokes the lead **once** on exit (≈0 tokens while it
+sleeps); the lead then reads the result and admin-merges on `failures=0`.
+
+Bound the loop so it can never spin forever, e.g.:
+
+```bash
+for i in $(seq 1 120); do            # 120 × 30s = 60 min cap
+  raw=$(gh pr checks <pr> --json bucket 2>/dev/null)
+  fail=$(echo "$raw" | jq '[.[]|select(.bucket=="fail")]|length')
+  pend=$(echo "$raw" | jq '[.[]|select(.bucket=="pending")]|length')
+  { [ "$fail" != 0 ] && [ -n "$fail" ]; } && { echo "FAILURES=$fail"; break; }
+  { [ "$pend" = 0 ] || [ -z "$pend" ]; } && { echo "SETTLED"; break; }
+  sleep 30
+done
+```
+
+On the 120-iteration cap, print a timeout line and exit for the lead to restart.
+
+**Why not a background Agent for this:** a `run_in_background` **Agent** told to
+sleep-loop tends to return after ~1 cycle and re-complete repeatedly (the agent loop is
+not a blocking sleep) — observed early-exiting ~6× in one run. The bash sleep-loop
+genuinely blocks to completion.
+</host>
+
+<host: codex, cursor, opencode>
+Use your host's equivalent poll mechanism. Where no blocking-background-bash exists, the
+sanctioned fallback is **self-poll on each lead wakeup**: run `gh pr checks <pr>` once
+per turn and re-check on the next turn. This loses fire-on-event but is reliable; do not
+dispatch a background Agent expecting it to block on a sleep-loop.
+</host>
+
+**Fallback (all hosts):** the background-Agent monitor in "The Process" below remains the
+tool for multi-PR review-comment handling that needs active fix-and-push; note its
+early-exit failure mode and prefer the poll-loop / self-poll for a pure CI-wait.
+
 ## The Process
 
 Run a `balanced`-tier agent that monitors PRs in a loop for up to **60 minutes**, polling every **10 minutes**, until all CI checks pass and no unresolved reviews remain.
diff --git a/skills/subagent-driven-development/SKILL.md b/skills/subagent-driven-development/SKILL.md
index 807005d..dbe97d5 100644
--- a/skills/subagent-driven-development/SKILL.md
+++ b/skills/subagent-driven-development/SKILL.md
@@ -296,6 +296,26 @@ Codex subagents do not share a task list. Use these conventions instead:
 
 ---
 
+## Completion is not trusted until lead-verified
+
+A green `Implement: N` checkbox is a **claim, not evidence**. Regardless of who or what
+flips an `Implement: N` task to `completed` (an implementer, a `blockedBy`-clear, or the
+code-reviewer), the lead MUST run `autodev:verification-before-completion` — build + test
+from a clean tree, CI green — before treating that task as truly done or invoking
+`finishing-a-development-branch`. Recurring failure mode (infra-admin v1.1): a
+non-compiling tree and a CI-failing regression were both marked "done"; only the lead's
+verification caught them.
+
+The team-conventions rule "only the code-reviewer flips `Implement: N` to completed"
+remains team discipline, but **correctness rests on lead verification, which does not
+depend on who flipped the bit**. A deterministic hook that *blocks* the flip is
+infeasible — the pre-tool payload a hook sees carries the task id and status but not the
+task's subject or the calling sub-agent's identity, so a hook cannot tell an `Implement`
+task from any other or identify the caller. See
+`decisions/0003-implement-n-completion-trust-boundary.md`.
+
+---
+
 ## Red Flags
 
 **Never:**
diff --git a/tests/hook-contracts.sh b/tests/hook-contracts.sh
index 3023e5e..6defdf3 100755
--- a/tests/hook-contracts.sh
+++ b/tests/hook-contracts.sh
@@ -419,6 +419,38 @@ test_pretool_pr_review_json() {
   assert_hook_context_json "pretool-pr-review-reminder" "PreToolUse" "$output"
 }
 
+test_pr_reminder_dedup() {
+  local tmp; tmp="$(mktemp -d)"
+  trap 'rm -rf "$tmp"' RETURN
+  local sess='/x/transcripts/sess-abc.jsonl'
+  local payload='{"tool_name":"Bash","tool_input":{"command":"gh pr create --title t --body b"},"cwd":"'"$tmp"'","transcript_path":"'"$sess"'"}'
+  local out1; out1="$(run_hook pretool-pr-review-reminder "$payload")"
+  printf '%s' "$out1" | jq -e '.hookSpecificOutput.additionalContext' >/dev/null 2>&1 \
+    && pass "pr-reminder: first call emits" || fail "pr-reminder: first call should emit"
+  local out2; out2="$(run_hook pretool-pr-review-reminder "$payload")"
+  [ -z "$out2" ] && pass "pr-reminder: deduped within session" \
+    || fail "pr-reminder: second call should be silent, got: $out2"
+  # PreCompact with NO locked plans must still clear the marker.
+  run_hook pre-compact-snapshot '{"cwd":"'"$tmp"'","transcript_path":"'"$sess"'"}' >/dev/null 2>&1 || true
+  local out3; out3="$(run_hook pretool-pr-review-reminder "$payload")"
+  printf '%s' "$out3" | jq -e '.hookSpecificOutput.additionalContext' >/dev/null 2>&1 \
+    && pass "pr-reminder: re-emits after PreCompact reset" \
+    || fail "pr-reminder: should re-emit after compaction, got: $out3"
+  # A quoted body that merely mentions 'gh pr create' must NOT emit.
+  local fp='{"tool_name":"Bash","tool_input":{"command":"gh issue create --title t --body \"see gh pr create docs\""},"cwd":"'"$tmp"'","transcript_path":"'"$sess"'"}'
+  local outfp; outfp="$(run_hook pretool-pr-review-reminder "$fp")"
+  [ -z "$outfp" ] && pass "pr-reminder: not tripped by 'gh pr create' inside a quoted body" \
+    || fail "pr-reminder: false-positive on quoted body, got: $outfp"
+  # Degrade gracefully: no transcript_path -> emits every time.
+  local tmp2; tmp2="$(mktemp -d)"
+  local np='{"tool_name":"Bash","tool_input":{"command":"gh pr create --title t"},"cwd":"'"$tmp2"'"}'
+  local na nb; na="$(run_hook pretool-pr-review-reminder "$np")"; nb="$(run_hook pretool-pr-review-reminder "$np")"
+  { printf '%s' "$na" | jq -e '.hookSpecificOutput' >/dev/null 2>&1 && printf '%s' "$nb" | jq -e '.hookSpecificOutput' >/dev/null 2>&1; } \
+    && pass "pr-reminder: no transcript_path -> emits every time" \
+    || fail "pr-reminder: should emit each time without transcript_path"
+  rm -rf "$tmp2"
+}
+
 test_posttool_pr_created_json() {
   local output
   output="$(run_hook posttool-pr-created '{"tool_name":"Bash","tool_input":{"command":"gh pr create --title test --body test"},"tool_response":"https://github.com/owner/repo/pull/123","cwd":"'"$REPO_ROOT"'"}')"
@@ -1778,6 +1810,7 @@ test_prompt_strict_ignores_single_workspace_lock_when_session_has_no_lock
 test_prompt_strict_uses_session_locked_plan_only
 test_prompt_strict_ignores_prose_mention_of_locked_status
 test_pretool_pr_review_json
+test_pr_reminder_dedup
 test_posttool_pr_created_json
 test_pre_compact_snapshot_json
 test_wrapper_suppresses_pre_compact_locale_noise
diff --git a/tests/hook-stdout-discipline.sh b/tests/hook-stdout-discipline.sh
new file mode 100755
index 0000000..196859b
--- /dev/null
+++ b/tests/hook-stdout-discipline.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+# tests/hook-stdout-discipline.sh — verify run-hook.cmd enforces stdout JSON discipline.
+set -uo pipefail
+REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+WRAPPER="$REPO_ROOT/hooks/run-hook.cmd"
+failures=0
+pass() { printf 'PASS: %s\n' "$*"; }
+fail() { printf 'FAIL: %s\n' "$*" >&2; failures=$((failures+1)); }
+command -v jq >/dev/null 2>&1 || { echo "SKIP: jq required for a/b/c"; exit 0; }
+
+tmp="$(mktemp -d)"
+# Cleanup trap set BEFORE any fixture is copied into hooks/ (rm -f on absent files is safe).
+trap 'rm -f "$REPO_ROOT"/hooks/fix-warn-then-json "$REPO_ROOT"/hooks/fix-noise "$REPO_ROOT"/hooks/fix-clean; rm -rf "$tmp"' EXIT
+mkfix() { printf '%s\n' "$1" > "$tmp/$2"; chmod +x "$tmp/$2"; }
+
+# Fixture A: a warning leaks to stdout, then a block JSON on stdout.
+mkfix '#!/usr/bin/env bash
+echo "perl: warning: Setting locale failed."
+printf "%s\n" "{\"decision\":\"block\",\"reason\":\"x\"}"' fix-warn-then-json
+# Fixture B: only noise, no JSON.
+mkfix '#!/usr/bin/env bash
+echo "just a diagnostic line"' fix-noise
+# Fixture C: clean single-line JSON.
+mkfix '#!/usr/bin/env bash
+printf "%s\n" "{\"hookSpecificOutput\":{\"hookEventName\":\"X\"}}"' fix-clean
+
+for f in fix-warn-then-json fix-noise fix-clean; do cp "$tmp/$f" "$REPO_ROOT/hooks/$f"; done
+
+run() { OUT="$("$WRAPPER" "$1" 2>"$tmp/err")"; RC=$?; ERR="$(cat "$tmp/err")"; }
+
+# (a) warning + block JSON → stdout ONLY the block JSON; warning ON stderr.
+run fix-warn-then-json
+if printf '%s' "$OUT" | jq -e '.decision=="block"' >/dev/null 2>&1 \
+   && ! printf '%s' "$OUT" | grep -q 'perl: warning' \
+   && printf '%s' "$ERR" | grep -q 'perl: warning'; then
+  pass "(a) block JSON on stdout, warning routed to stderr"
+else fail "(a) expected block JSON on stdout + warning on stderr, got OUT=[$OUT] ERR=[$ERR]"; fi
+
+# (b) only noise → stdout empty, noise on stderr.
+run fix-noise
+{ [ -z "$OUT" ] && printf '%s' "$ERR" | grep -q 'diagnostic'; } \
+  && pass "(b) noise suppressed from stdout, routed to stderr" || fail "(b) expected empty stdout, got: $OUT"
+
+# (c) clean JSON → unchanged + valid.
+run fix-clean
+printf '%s' "$OUT" | jq -e '.hookSpecificOutput.hookEventName=="X"' >/dev/null 2>&1 \
+  && pass "(c) clean JSON passthrough" || fail "(c) clean JSON broke, got: $OUT"
+
+# (d) jq-absent → wrapper passes stdout through VERBATIM (warning + JSON both present).
+# Stub PATH so the wrapper's `command -v jq` fails, but `bash` is still resolvable
+# (the wrapper's jq-absent branch `exec bash`s the fixture). assert via grep (no jq).
+nojq="$tmp/nojq"; mkdir -p "$nojq"
+# Symlink the externals the wrapper's jq-absent path needs (bash for `exec bash`,
+# dirname for SCRIPT_DIR) but NOT jq, so `command -v jq` fails.
+for bin in bash dirname; do ln -sf "$(command -v "$bin")" "$nojq/$bin"; done
+OUTD="$(PATH="$nojq" "$WRAPPER" fix-warn-then-json 2>/dev/null)"
+{ printf '%s' "$OUTD" | grep -q 'perl: warning' && printf '%s' "$OUTD" | grep -q '"decision":"block"'; } \
+  && pass "(d) jq-absent → verbatim passthrough (no discipline applied)" \
+  || fail "(d) expected verbatim passthrough with jq absent, got: $OUTD"
+
+echo ""; echo "Results: $failures failure(s)"; [ "$failures" -eq 0 ]