From a1c996461ad246d99f20781ec3dbab7d47c4e879 Mon Sep 17 00:00:00 2001
From: Wild Wind <wwind123@gmail.com>
Date: Sun, 7 Jun 2026 19:19:29 -0700
Subject: [PATCH 1/2] feat: add Claude Code native skill mode for agent-loop
 orchestration (#216)

Adds a Claude Code skill that lets users run the multi-agent review loop
inside an interactive Claude Code session without calling `claude -p` for
Claude turns. External agents (Codex, Gemini) are still invoked as subprocesses.

New files:
- helpers/validate_response.py: validates structured protocol responses
  using the existing library entry points (_validate_plan_review_response,
  _validate_review_response, _validate_coder_followup_response, etc.)
- helpers/state_manager.py: manages local session state and resume via
  _resume_plan_round/_resume_pr_round; accepts --reviewers and --head-sha/--pr
  as required by the existing resume APIs
- helpers/run_external.py: runs codex/gemini CLIs; --dry-run writes a
  canned approved plan_review stub for testing
- helpers/gh_ops.py: wraps gh CLI for issue/PR comment operations
- helpers/demo_loop.py: standalone dry-run demo (no live Claude/GitHub calls)
- SKILL.md: step-by-step skill orchestration instructions for Claude
- docs/skill_mode.md: architecture table, billing guidance, limitations
- tests/test_skill_helpers.py: unit tests for all helper CLIs
- tests/test_skill_loop.py: subprocess integration test for demo_loop.py

Fixes #216

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 SKILL.md                     | 196 ++++++++++++++++++++++++
 docs/skill_mode.md           | 137 +++++++++++++++++
 helpers/__init__.py          |   1 +
 helpers/demo_loop.py         | 178 ++++++++++++++++++++++
 helpers/gh_ops.py            | 143 ++++++++++++++++++
 helpers/run_external.py      | 140 ++++++++++++++++++
 helpers/state_manager.py     | 278 +++++++++++++++++++++++++++++++++++
 helpers/validate_response.py | 151 +++++++++++++++++++
 tests/test_skill_helpers.py  | 222 ++++++++++++++++++++++++++++
 tests/test_skill_loop.py     |  64 ++++++++
 10 files changed, 1510 insertions(+)
 create mode 100644 SKILL.md
 create mode 100644 docs/skill_mode.md
 create mode 100644 helpers/__init__.py
 create mode 100644 helpers/demo_loop.py
 create mode 100644 helpers/gh_ops.py
 create mode 100644 helpers/run_external.py
 create mode 100644 helpers/state_manager.py
 create mode 100644 helpers/validate_response.py
 create mode 100644 tests/test_skill_helpers.py
 create mode 100644 tests/test_skill_loop.py

diff --git a/SKILL.md b/SKILL.md
new file mode 100644
index 0000000..60cac4c
--- /dev/null
+++ b/SKILL.md
@@ -0,0 +1,196 @@
+# Agent Loop Skill — Claude Code Native Mode
+
+This skill lets you run the `coding-review-agent-loop` orchestration directly inside
+an interactive Claude Code session, without calling `claude -p` for Claude turns.
+
+Claude (you, the host) performs coder/plan turns using your active session context.
+External agents (Codex, Gemini) are invoked via their local CLIs as subprocesses.
+GitHub operations go through `gh`.
+
+## Prerequisites
+
+- `gh` authenticated and configured.
+- `codex` CLI installed (for Codex reviewer turns).
+- `gemini` CLI installed (for Gemini reviewer turns).
+- The `coding-review-agent-loop` package importable from `src/` (run from repo root).
+
+## How to start a plan loop for an issue
+
+Provide the following information:
+
+1. **Repository**: `OWNER/REPO`
+2. **Issue number**: e.g. `123`
+3. **Reviewers**: e.g. `codex`, `gemini`, or both
+
+Then follow the steps below.
+
+---
+
+## Orchestration steps
+
+### Step 1 — Check for an existing session
+
+```bash
+python -m helpers.state_manager build-resume \
+  --issue ISSUE --repo OWNER/REPO \
+  --reviewers codex gemini \
+  --flow plan
+```
+
+If `round_number` > 1 or `completed_reviewer_names` is non-empty, a prior round
+was found and you can skip already-completed reviewer turns.
+
+### Step 2 — Write the plan (Claude host turn)
+
+Write the implementation plan to a temp file, e.g.:
+
+```
+/tmp/agent-loop-skill/{session-id}/plan-{uuid}.md
+```
+
+The file must end with:
+
+```
+<!-- AGENT_PLAN_STATE: approved -->
+-- Anthropic Claude
+```
+
+### Step 3 — Validate the plan
+
+```bash
+python -m helpers.validate_response \
+  --file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \
+  --kind plan_state
+```
+
+### Step 4 — Save as pending comment
+
+```bash
+python -m helpers.state_manager write-pending-comment \
+  --issue ISSUE --repo OWNER/REPO \
+  --body /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md
+```
+
+### Step 5 — Post the plan comment
+
+```bash
+python -m helpers.gh_ops post-issue-comment \
+  --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \
+  --repo OWNER/REPO
+```
+
+```bash
+python -m helpers.state_manager clear-pending-comment \
+  --issue ISSUE --repo OWNER/REPO
+```
+
+### Step 6 — Run each reviewer
+
+For each reviewer (e.g. Codex):
+
+```bash
+python -m helpers.run_external \
+  --agent codex \
+  --prompt-file /tmp/agent-loop-skill/{session-id}/reviewer-prompt.md \
+  --output /tmp/agent-loop-skill/{session-id}/codex-review.md \
+  --workdir /path/to/codex/checkout
+```
+
+Validate the reviewer response:
+
+```bash
+python -m helpers.validate_response \
+  --file /tmp/agent-loop-skill/{session-id}/codex-review.md \
+  --kind plan_review \
+  --context-file /tmp/agent-loop-skill/{session-id}/context.json
+```
+
+The `context.json` must contain:
+
+```json
+{
+  "reviewer": "Codex",
+  "prior_items": [...],
+  "current_round_items": [...]
+}
+```
+
+Post the reviewer comment:
+
+```bash
+python -m helpers.gh_ops post-issue-comment \
+  --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/codex-review.md \
+  --repo OWNER/REPO
+```
+
+### Step 7 — Update session state
+
+```bash
+python -m helpers.state_manager write-session \
+  --issue ISSUE --repo OWNER/REPO \
+  --fields '{"last_completed_step": "post_review", "round_number": 1}'
+```
+
+### Step 8 — Decision
+
+- If all reviewers approved: implementation is complete.
+- If any reviewer blocked: perform a new plan revision and loop back to Step 2.
+- If clarification is needed: post an `<!-- AGENT_CLARIFY -->` comment and stop.
+
+---
+
+## PR review mode
+
+Use `--flow pr` with `build-resume` and pass `--pr PR_NUMBER` (or `--head-sha SHA`)
+to operate in PR-review mode. All other steps are the same, using `--kind pr_review`
+for validation.
+
+---
+
+## Billing and terms note
+
+This skill runs Claude turns inside your active interactive Claude Code session.
+Whether that counts as interactive or programmatic usage depends on Anthropic's
+current terms and product behavior at the time you run it.
+Do not use this skill to proxy one user's session to other users, to build
+unattended 24/7 automation, or in any way that violates Anthropic's usage policies.
+
+---
+
+## Session state location
+
+Session state is stored in:
+
+```
+~/.local/state/coding-review-agent-loop/skill-sessions/{owner-repo}/{issue}.json
+```
+
+This location is outside git checkouts, so it never dirties any working tree.
+
+---
+
+## Limitations
+
+- If Claude Code's session ends mid-loop, resume from the last posted GitHub comment
+  by re-running Step 1 with `build-resume`.
+- Long-running Codex/Gemini subprocess progress is not streamed; check the log
+  file in `/tmp/coding-review-agent-loop/skill-logs/` if a reviewer hangs.
+- The structured protocol (AGENT_LOOP_META markers, structured JSON responses)
+  must match the versions expected by the existing library in `src/`.
+
+---
+
+## Demo
+
+Run a minimal dry-run demo (no live GitHub or agent calls):
+
+```bash
+python -m helpers.demo_loop --issue 123 --repo demo/repo
+```
+
+Expected output includes:
+```
+validation passed: plan_state
+validation passed: plan_review
+demo_loop: all steps completed successfully
+```
diff --git a/docs/skill_mode.md b/docs/skill_mode.md
new file mode 100644
index 0000000..5928db7
--- /dev/null
+++ b/docs/skill_mode.md
@@ -0,0 +1,137 @@
+# Claude Code Native Skill Mode
+
+## Overview
+
+`coding-review-agent-loop` includes a Claude Code skill that lets you run the
+multi-agent review loop directly inside an interactive Claude Code session
+instead of through `claude -p` subprocesses.
+
+| Aspect | Headless CLI mode | Skill mode |
+|--------|-------------------|------------|
+| Claude turns | `claude -p` subprocess (Agent SDK credits) | Active Claude Code session |
+| Codex turns | `codex exec` subprocess | Same `codex exec` subprocess |
+| Gemini turns | `gemini` subprocess | Same `gemini` subprocess |
+| GitHub ops | Python `gh` wrapper | Same `gh` wrapper |
+| Session resume | AGENT_LOOP_META in GitHub comments | Same markers + local session JSON |
+| Best for | Headless CI / unattended automation | Interactive development sessions |
+
+## Architecture
+
+```
+Claude Code (interactive session)
+│
+├── helpers/validate_response.py   ← validates structured protocol responses
+├── helpers/state_manager.py       ← session state + GitHub comment resume
+├── helpers/run_external.py        ← invokes codex/gemini CLIs
+├── helpers/gh_ops.py              ← GitHub issue/PR comment operations
+└── helpers/demo_loop.py           ← standalone dry-run demo
+```
+
+Claude performs coder/plan turns by writing files directly (using its Write
+tool or by producing structured JSON in its response).  External reviewers
+(Codex, Gemini) are still invoked as subprocesses via `run_external.py`.
+
+## Structured protocol compatibility
+
+The skill helpers reuse the same library entry points used by the headless CLI:
+
+- `_validate_plan_review_response` / `_validate_review_response` (unresolved_items)
+- `_resume_plan_round` / `_resume_pr_round` (round_state)
+- `parse_plan_state` / `validate_structured_plan_revision` (protocol)
+
+GitHub comment metadata markers (`AGENT_LOOP_META`) written by the skill are
+identical to those written by the headless CLI, so mixed-mode operation (start
+headless, resume in skill, or vice versa) is supported.
+
+## Session state
+
+Local session state is stored at:
+
+```
+~/.local/state/coding-review-agent-loop/skill-sessions/{owner-repo}/{issue}.json
+```
+
+This path is outside any git checkout so it never dirties a working tree.
+Fields written by `state_manager write-session`:
+
+| Field | Description |
+|-------|-------------|
+| `last_completed_step` | Most recently completed orchestration step |
+| `session_id` | Current skill session UUID prefix |
+| `round_number` | Current plan/PR round number |
+| `pending_comment_body` | Path to a comment body not yet posted |
+
+The `pending_comment_body` field provides crash recovery: if the session ends
+after writing the comment file but before posting it, the next `build-resume`
+call includes the path so Claude can re-post it.
+
+## Resume from existing round
+
+`state_manager build-resume` reads GitHub issue comments, extracts all
+`AGENT_LOOP_META` base64 blobs, calls `_resume_plan_round(comments,
+configured_reviewers=...)` or `_resume_pr_round(comments, head_sha=...,
+configured_reviewers=...)`, and outputs a JSON descriptor:
+
+```json
+{
+  "round_number": 2,
+  "prior_items": [...],
+  "compact_prior_summaries": [...],
+  "completed_reviewer_names": ["Codex"],
+  "pending_comment_body": null,
+  "current_plan": "..."
+}
+```
+
+The skill then skips already-completed reviewer turns and resumes from where
+the last session ended.
+
+**Important**: `--reviewers` must exactly match the configured reviewer list for
+the current invocation.  For PR-flow sessions, `--head-sha` or `--pr` is also
+required so `_resume_pr_round` can compare the current PR head SHA.
+
+## Billing and terms
+
+Running Claude turns inside an interactive Claude Code session may count
+differently toward billing than `claude -p` / Agent SDK invocations.  Whether
+this constitutes "interactive" or "programmatic" use depends on Anthropic's
+current terms and product behavior at the time of use.
+
+**Non-goals / constraints**:
+- Do not use this skill to proxy one user's session to other users.
+- Do not build unattended 24/7 automation that relies on pretending to be
+  interactive use.
+- Do not market this as free Claude access or billing bypass.
+- The existing headless `agent-loop` CLI path is unchanged and unaffected.
+
+## Install / setup for open-source users
+
+1. Clone the repository and install in development mode:
+   ```
+   pip install -e ".[dev]"
+   ```
+2. Copy or symlink `helpers/` and `SKILL.md` into your working directory
+   (or run from the repo root).
+3. Authenticate `gh` and install `codex` / `gemini` CLIs as needed.
+4. Run the demo to verify the install:
+   ```
+   python -m helpers.demo_loop --issue 123 --repo demo/repo
+   ```
+
+## Known limitations
+
+- Reviewer subprocess progress (Codex, Gemini) is not streamed to Claude's
+  terminal while the subprocess runs.  Check logs in
+  `/tmp/coding-review-agent-loop/skill-logs/`.
+- If the Claude Code session ends mid-loop, the next session must call
+  `build-resume` to reconstruct the round state from GitHub comments.
+- The structured protocol versions must match; update both the library and
+  the skill helpers together when the protocol evolves.
+- Future Antigravity CLI migration (#215) may require updates to
+  `run_external.py` when the `gemini` CLI name or interface changes.
+
+## Related
+
+- `SKILL.md` — step-by-step skill orchestration instructions for Claude.
+- Issue #216 — original exploration proposal.
+- Issue #215 — Antigravity CLI migration for Gemini CLI consumer users.
diff --git a/helpers/__init__.py b/helpers/__init__.py
new file mode 100644
index 0000000..e594a3c
--- /dev/null
+++ b/helpers/__init__.py
@@ -0,0 +1 @@
+# helpers package — Claude Code skill support scripts
diff --git a/helpers/demo_loop.py b/helpers/demo_loop.py
new file mode 100644
index 0000000..5d94114
--- /dev/null
+++ b/helpers/demo_loop.py
@@ -0,0 +1,178 @@
+"""
+Minimal standalone demo of the Claude Code skill loop.
+
+Demonstrates:
+  1. Claude (host) writes a stub plan.
+  2. validate_response validates it as plan_state.
+  3. Codex (dry-run) writes a canned approved plan_review stub.
+  4. validate_response validates it as plan_review.
+  5. gh_ops post-issue-comment --dry-run records the review.
+  6. state_manager write-session records last_completed_step=post_review.
+
+Usage:
+  python -m helpers.demo_loop --issue 123 [--dry-run] [--repo OWNER/REPO]
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import subprocess
+import sys
+import tempfile
+import uuid
+from pathlib import Path
+
+# Make src importable when run from the repo root
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+_HELPERS = Path(__file__).parent
+
+_HOST_STUB_PLAN = """\
+## Plan
+
+1. Create helpers/validate_response.py
+2. Create helpers/state_manager.py
+3. Create helpers/run_external.py
+4. Create helpers/gh_ops.py
+5. Create helpers/demo_loop.py
+6. Create SKILL.md
+7. Add tests
+
+<!-- AGENT_PLAN_STATE: approved -->
+-- Anthropic Claude (skill demo stub)
+"""
+
+
+def _run(cmd: list[str], *, check: bool = True) -> subprocess.CompletedProcess[str]:
+    result = subprocess.run(cmd, capture_output=True, text=True, check=False)
+    if check and result.returncode != 0:
+        print(f"demo_loop: command failed: {' '.join(cmd)}", file=sys.stderr)
+        print(result.stderr, file=sys.stderr)
+        sys.exit(1)
+    return result
+
+
+def _py(module_args: list[str]) -> list[str]:
+    return [sys.executable, "-m", f"helpers.{module_args[0]}", *module_args[1:]]
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Minimal skill loop demo.")
+    parser.add_argument("--issue", type=int, default=123)
+    parser.add_argument("--repo", default="demo/repo")
+    parser.add_argument("--dry-run", action="store_true", default=True,
+                        help="Always dry-run for demo (default: True).")
+    args = parser.parse_args()
+
+    session_id = uuid.uuid4().hex[:8]
+    tmpdir = Path(tempfile.mkdtemp(prefix=f"skill-demo-{session_id}-"))
+    print(f"demo_loop: session {session_id}, tmpdir {tmpdir}")
+
+    # Step 1: write host stub plan
+    plan_file = tmpdir / "plan.md"
+    plan_file.write_text(_HOST_STUB_PLAN, encoding="utf-8")
+    print(f"demo_loop: wrote host plan stub to {plan_file}")
+
+    # Step 2: validate plan_state
+    result = _run(_py(["validate_response", "--file", str(plan_file), "--kind", "plan_state"]))
+    print(result.stdout.strip())
+    assert "validation passed: plan_state" in result.stdout, result.stdout
+
+    # Step 3: Codex dry-run produces approved plan_review stub
+    reviewer_output = tmpdir / "codex_review.md"
+    _run(
+        _py(
+            [
+                "run_external",
+                "--agent",
+                "codex",
+                "--prompt-file",
+                str(plan_file),
+                "--output",
+                str(reviewer_output),
+                "--workdir",
+                str(tmpdir),
+                "--dry-run",
+            ]
+        )
+    )
+    print(f"demo_loop: Codex dry-run output written to {reviewer_output}")
+
+    # Step 4: validate plan_review
+    context_file = tmpdir / "context.json"
+    context_file.write_text(
+        json.dumps({"reviewer": "Codex", "prior_items": [], "current_round_items": []}),
+        encoding="utf-8",
+    )
+    result = _run(
+        _py(
+            [
+                "validate_response",
+                "--file",
+                str(reviewer_output),
+                "--kind",
+                "plan_review",
+                "--context-file",
+                str(context_file),
+            ]
+        )
+    )
+    print(result.stdout.strip())
+    assert "validation passed: plan_review" in result.stdout, result.stdout
+
+    # Step 5: dry-run post issue comment
+    _run(
+        _py(
+            [
+                "gh_ops",
+                "post-issue-comment",
+                "--issue",
+                str(args.issue),
+                "--file",
+                str(reviewer_output),
+                "--repo",
+                args.repo,
+                "--dry-run",
+            ]
+        )
+    )
+
+    # Step 6: record session state
+    _run(
+        _py(
+            [
+                "state_manager",
+                "write-session",
+                "--issue",
+                str(args.issue),
+                "--repo",
+                args.repo,
+                "--fields",
+                json.dumps({"last_completed_step": "post_review", "session_id": session_id}),
+            ]
+        )
+    )
+
+    # Verify session was written
+    result = _run(
+        _py(
+            [
+                "state_manager",
+                "read-session",
+                "--issue",
+                str(args.issue),
+                "--repo",
+                args.repo,
+            ]
+        )
+    )
+    session_data = json.loads(result.stdout)
+    assert session_data.get("last_completed_step") == "post_review", session_data
+
+    print("demo_loop: all steps completed successfully")
+    print(f"session state: {json.dumps(session_data, indent=2)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/helpers/gh_ops.py b/helpers/gh_ops.py
new file mode 100644
index 0000000..e377f92
--- /dev/null
+++ b/helpers/gh_ops.py
@@ -0,0 +1,143 @@
+"""
+GitHub CLI wrapper for skill orchestration.
+
+Subcommands:
+
+  fetch-issue   --issue N --repo REPO
+  post-issue-comment --issue N --file PATH --repo REPO [--dry-run]
+  fetch-pr      --pr N --repo REPO
+  post-pr-comment --pr N --file PATH --repo REPO [--dry-run]
+"""
+
+from __future__ import annotations
+
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+
+
+def _gh(*args_list: str, gh_cmd: str = "gh") -> str:
+    result = subprocess.run(
+        [gh_cmd, *args_list],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if result.returncode != 0:
+        print(f"gh_ops: gh error: {result.stderr.strip()}", file=sys.stderr)
+        sys.exit(1)
+    return result.stdout
+
+
+def cmd_fetch_issue(args: argparse.Namespace) -> None:
+    output = _gh(
+        "issue",
+        "view",
+        str(args.issue),
+        "--repo",
+        args.repo,
+        "--json",
+        "number,title,body,comments,state",
+        gh_cmd=args.gh_cmd,
+    )
+    print(output, end="")
+
+
+def cmd_post_issue_comment(args: argparse.Namespace) -> None:
+    try:
+        body = Path(args.file).read_text(encoding="utf-8")
+    except OSError as exc:
+        print(f"gh_ops: cannot read comment file: {exc}", file=sys.stderr)
+        sys.exit(1)
+    if args.dry_run:
+        print(f"[dry-run] would post issue comment to {args.repo}#{args.issue}:")
+        print(body[:400])
+        return
+    _gh(
+        "issue",
+        "comment",
+        str(args.issue),
+        "--repo",
+        args.repo,
+        "--body",
+        body,
+        gh_cmd=args.gh_cmd,
+    )
+    print(f"comment posted to {args.repo}#{args.issue}")
+
+
+def cmd_fetch_pr(args: argparse.Namespace) -> None:
+    output = _gh(
+        "pr",
+        "view",
+        str(args.pr),
+        "--repo",
+        args.repo,
+        "--json",
+        "number,title,body,headRefOid,state,comments",
+        gh_cmd=args.gh_cmd,
+    )
+    print(output, end="")
+
+
+def cmd_post_pr_comment(args: argparse.Namespace) -> None:
+    try:
+        body = Path(args.file).read_text(encoding="utf-8")
+    except OSError as exc:
+        print(f"gh_ops: cannot read comment file: {exc}", file=sys.stderr)
+        sys.exit(1)
+    if args.dry_run:
+        print(f"[dry-run] would post PR comment to {args.repo}#{args.pr}:")
+        print(body[:400])
+        return
+    _gh(
+        "pr",
+        "comment",
+        str(args.pr),
+        "--repo",
+        args.repo,
+        "--body",
+        body,
+        gh_cmd=args.gh_cmd,
+    )
+    print(f"comment posted to {args.repo}#PR{args.pr}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="GitHub CLI wrapper for skill orchestration.")
+    parser.add_argument("--gh-cmd", default="gh")
+    subparsers = parser.add_subparsers(dest="subcommand", required=True)
+
+    p_fi = subparsers.add_parser("fetch-issue")
+    p_fi.add_argument("--issue", type=int, required=True)
+    p_fi.add_argument("--repo", required=True)
+
+    p_pic = subparsers.add_parser("post-issue-comment")
+    p_pic.add_argument("--issue", type=int, required=True)
+    p_pic.add_argument("--file", required=True)
+    p_pic.add_argument("--repo", required=True)
+    p_pic.add_argument("--dry-run", action="store_true")
+
+    p_fp = subparsers.add_parser("fetch-pr")
+    p_fp.add_argument("--pr", type=int, required=True)
+    p_fp.add_argument("--repo", required=True)
+
+    p_ppc = subparsers.add_parser("post-pr-comment")
+    p_ppc.add_argument("--pr", type=int, required=True)
+    p_ppc.add_argument("--file", required=True)
+    p_ppc.add_argument("--repo", required=True)
+    p_ppc.add_argument("--dry-run", action="store_true")
+
+    args = parser.parse_args()
+    dispatch = {
+        "fetch-issue": cmd_fetch_issue,
+        "post-issue-comment": cmd_post_issue_comment,
+        "fetch-pr": cmd_fetch_pr,
+        "post-pr-comment": cmd_post_pr_comment,
+    }
+    dispatch[args.subcommand](args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/helpers/run_external.py b/helpers/run_external.py
new file mode 100644
index 0000000..dced704
--- /dev/null
+++ b/helpers/run_external.py
@@ -0,0 +1,140 @@
+"""
+Run an external agent (Codex or Gemini) for one review turn.
+
+In --dry-run mode, writes a canned approved plan_review stub to --output and exits 0.
+In live mode, invokes the agent CLI and writes the response to --output.
+
+Usage:
+  python -m helpers.run_external \\
+    --agent codex|gemini \\
+    --prompt-file PATH \\
+    --output PATH \\
+    --workdir PATH \\
+    [--cmd PATH] \\
+    [--dry-run]
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from coding_review_agent_loop.runner import Runner
+
+_CANNED_PLAN_REVIEW = json.dumps(
+    {
+        "schema_version": 1,
+        "kind": "plan_review",
+        "state": "approved",
+        "summary": "Dry-run stub: plan looks good.",
+        "blocking_plan_issues": [],
+        "same_plan_followups": [],
+        "future_followups": [],
+        "prior_plan_item_dispositions": [],
+    },
+    indent=2,
+)
+
+_CANNED_PLAN_REVIEW_FOOTER = (
+    "\n<!-- AGENT_PLAN_STATE: approved -->\n-- Codex (dry-run stub)\n"
+)
+
+
+def _build_dry_run_response() -> str:
+    return _CANNED_PLAN_REVIEW + _CANNED_PLAN_REVIEW_FOOTER
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Run one external agent turn.")
+    parser.add_argument("--agent", required=True, choices=["codex", "gemini"])
+    parser.add_argument("--prompt-file", required=True, help="Path to prompt text file.")
+    parser.add_argument("--output", required=True, help="Path to write the agent response.")
+    parser.add_argument("--workdir", required=True, help="Working directory for the agent.")
+    parser.add_argument("--cmd", default=None, help="Agent CLI command (overrides default).")
+    parser.add_argument("--dry-run", action="store_true", help="Write a canned stub and exit.")
+    args = parser.parse_args()
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if args.dry_run:
+        output_path.write_text(_build_dry_run_response(), encoding="utf-8")
+        print(f"dry-run: wrote canned plan_review stub to {output_path}")
+        return
+
+    try:
+        prompt = Path(args.prompt_file).read_text(encoding="utf-8")
+    except OSError as exc:
+        print(f"run_external: cannot read prompt file: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    workdir = Path(args.workdir)
+
+    # Import backends lazily to avoid heavy import in dry-run path
+    from coding_review_agent_loop.agents.codex import CodexBackend
+    from coding_review_agent_loop.agents.gemini import GeminiBackend
+    from coding_review_agent_loop.config import AgentLoopConfig
+
+    agent_name = args.agent
+    default_cmds = {"codex": "codex", "gemini": "gemini"}
+    cmd = args.cmd or default_cmds[agent_name]
+
+    # Build a minimal config sufficient for backend.run()
+    import tempfile
+    log_dir = Path(tempfile.gettempdir()) / "coding-review-agent-loop" / "skill-logs"
+    log_dir.mkdir(parents=True, exist_ok=True)
+
+    config = AgentLoopConfig(
+        repo="skill/run",
+        claude_dir=workdir,
+        codex_dir=workdir,
+        gemini_dir=workdir,
+        coder="claude",
+        reviewer=(agent_name,),  # type: ignore[arg-type]
+        base="main",
+        max_rounds=1,
+        auto_merge=False,
+        dry_run=False,
+        allow_shared_dir=True,
+        claude_cmd="claude",
+        codex_cmd=cmd if agent_name == "codex" else "codex",
+        gemini_cmd=cmd if agent_name == "gemini" else "gemini",
+        gh_cmd="gh",
+        claude_args=(),
+        codex_args=(),
+        gemini_args=(),
+        test_command=None,
+        pre_review_tests=False,
+        ci_check_name="",
+        ci_timeout_seconds=300,
+        ci_poll_interval_seconds=30,
+        quiet=False,
+        log_dir=log_dir,
+        progress_interval_seconds=30,
+        agent_max_retries=0,
+        agent_retry_backoff_seconds=(30,),
+        agent_memory=False,
+        refresh_agent_memory=False,
+        agent_memory_dir=log_dir,
+        refresh_test_profile=False,
+        auto_agent_dirs=(agent_name,),  # type: ignore[arg-type]
+    )
+
+    runner = Runner(dry_run=False)
+    backend = CodexBackend() if agent_name == "codex" else GeminiBackend()
+    try:
+        result = backend.run(runner, config, prompt)
+    except Exception as exc:  # noqa: BLE001
+        print(f"run_external: agent invocation failed: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    output_path.write_text(result.text, encoding="utf-8")
+    print(f"agent result written to {output_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/helpers/state_manager.py b/helpers/state_manager.py
new file mode 100644
index 0000000..e9f96e1
--- /dev/null
+++ b/helpers/state_manager.py
@@ -0,0 +1,278 @@
+"""
+Manage Claude Code skill session state and build resume descriptors.
+
+Subcommands:
+
+  build-resume
+    --issue N --repo REPO --reviewers REVIEWER [REVIEWER ...]
+    [--flow plan|pr] [--head-sha SHA | --pr PR_NUMBER]
+
+    Reads GitHub issue comments, extracts AGENT_LOOP_META records, and calls
+    _resume_plan_round or _resume_pr_round from the existing library.  Outputs
+    a JSON resume descriptor to stdout.
+
+  write-session
+    --issue N --repo REPO --fields JSON
+
+    Writes (or merges) session state to
+    ~/.local/state/coding-review-agent-loop/skill-sessions/{repo-slug}/{issue}.json
+
+  read-session
+    --issue N --repo REPO
+
+    Reads session state JSON to stdout.
+
+  write-pending-comment
+    --issue N --repo REPO --body PATH
+
+    Writes a pending comment body path to session state.
+
+  clear-pending-comment
+    --issue N --repo REPO
+
+    Clears the pending_comment_body field from session state.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from coding_review_agent_loop.agents.base import AgentName
+from coding_review_agent_loop.errors import AgentLoopError
+from coding_review_agent_loop.round_state import (
+    _resume_plan_round,
+    _resume_pr_round,
+    _serialize_unresolved_item,
+)
+
+
+def _session_path(repo: str, issue: int) -> Path:
+    slug = repo.replace("/", "-").replace(":", "-")
+    state_home = Path(
+        os.environ.get("XDG_STATE_HOME", Path.home() / ".local" / "state")
+    )
+    return state_home / "coding-review-agent-loop" / "skill-sessions" / slug / f"{issue}.json"
+
+
+def _load_session(path: Path) -> dict[str, object]:
+    if not path.exists():
+        return {}
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError):
+        return {}
+
+
+def _save_session(path: Path, data: dict[str, object]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(data, indent=2, sort_keys=True), encoding="utf-8")
+
+
+@dataclass
+class _FakeComment:
+    """Minimal object satisfying the comment.body duck-type expected by round_state helpers."""
+    body: str
+
+
+def _fetch_issue_comments(repo: str, issue: int, gh_cmd: str = "gh") -> list[_FakeComment]:
+    result = subprocess.run(
+        [
+            gh_cmd,
+            "api",
+            f"repos/{repo}/issues/{issue}/comments",
+            "--paginate",
+            "--jq",
+            ".[].body",
+        ],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if result.returncode != 0:
+        print(
+            f"state_manager: gh api failed: {result.stderr.strip()}",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    bodies = result.stdout.strip().split("\n")
+    return [_FakeComment(body=b) for b in bodies if b]
+
+
+def _fetch_pr_head_sha(repo: str, pr_number: int, gh_cmd: str = "gh") -> str:
+    result = subprocess.run(
+        [
+            gh_cmd,
+            "pr",
+            "view",
+            str(pr_number),
+            "--repo",
+            repo,
+            "--json",
+            "headRefOid",
+            "--jq",
+            ".headRefOid",
+        ],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if result.returncode != 0:
+        print(
+            f"state_manager: could not fetch PR head SHA: {result.stderr.strip()}",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    return result.stdout.strip()
+
+
+def cmd_build_resume(args: argparse.Namespace) -> None:
+    repo: str = args.repo
+    issue: int = args.issue
+    reviewers: list[AgentName] = args.reviewers
+    flow: str = args.flow
+    gh_cmd: str = getattr(args, "gh_cmd", "gh")
+
+    comments = _fetch_issue_comments(repo, issue, gh_cmd=gh_cmd)
+    session = _load_session(_session_path(repo, issue))
+
+    descriptor: dict[str, object] = {
+        "round_number": 1,
+        "prior_items": [],
+        "compact_prior_summaries": [],
+        "completed_reviewer_names": [],
+        "pending_comment_body": session.get("pending_comment_body"),
+    }
+
+    try:
+        if flow == "plan":
+            result = _resume_plan_round(comments, configured_reviewers=reviewers)
+            if result is not None:
+                plan_text, resumed = result
+                descriptor["round_number"] = resumed.round_number
+                descriptor["prior_items"] = [
+                    _serialize_unresolved_item(item) for item in resumed.prior_items
+                ]
+                descriptor["compact_prior_summaries"] = list(resumed.compact_prior_summaries)
+                descriptor["completed_reviewer_names"] = [
+                    record.metadata.agent for record in resumed.completed_reviews
+                ]
+                descriptor["current_plan"] = plan_text
+        else:
+            # PR flow
+            head_sha: str | None = getattr(args, "head_sha", None)
+            pr_number: int | None = getattr(args, "pr", None)
+            if not head_sha and pr_number:
+                head_sha = _fetch_pr_head_sha(repo, pr_number, gh_cmd=gh_cmd)
+            result = _resume_pr_round(
+                comments,
+                head_sha=head_sha,
+                configured_reviewers=reviewers,
+            )
+            if result is not None:
+                descriptor["round_number"] = result.round_number
+                descriptor["prior_items"] = [
+                    _serialize_unresolved_item(item) for item in result.prior_items
+                ]
+                descriptor["compact_prior_summaries"] = list(result.compact_prior_summaries)
+                descriptor["completed_reviewer_names"] = [
+                    record.metadata.agent for record in result.completed_reviews
+                ]
+    except AgentLoopError as exc:
+        print(f"state_manager: resume error: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    print(json.dumps(descriptor, indent=2))
+
+
+def cmd_write_session(args: argparse.Namespace) -> None:
+    path = _session_path(args.repo, args.issue)
+    existing = _load_session(path)
+    try:
+        updates: dict[str, object] = json.loads(args.fields)
+    except json.JSONDecodeError as exc:
+        print(f"state_manager: invalid --fields JSON: {exc}", file=sys.stderr)
+        sys.exit(1)
+    existing.update(updates)
+    _save_session(path, existing)
+    print(f"session written: {path}")
+
+
+def cmd_read_session(args: argparse.Namespace) -> None:
+    path = _session_path(args.repo, args.issue)
+    data = _load_session(path)
+    print(json.dumps(data, indent=2))
+
+
+def cmd_write_pending_comment(args: argparse.Namespace) -> None:
+    path = _session_path(args.repo, args.issue)
+    existing = _load_session(path)
+    existing["pending_comment_body"] = str(args.body)
+    _save_session(path, existing)
+    print(f"pending comment path written: {path}")
+
+
+def cmd_clear_pending_comment(args: argparse.Namespace) -> None:
+    path = _session_path(args.repo, args.issue)
+    existing = _load_session(path)
+    existing.pop("pending_comment_body", None)
+    _save_session(path, existing)
+    print(f"pending comment cleared: {path}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Manage skill session state.")
+    parser.add_argument("--gh-cmd", default="gh")
+    subparsers = parser.add_subparsers(dest="subcommand", required=True)
+
+    # build-resume
+    p_resume = subparsers.add_parser("build-resume", help="Build a resume descriptor from GitHub comments.")
+    p_resume.add_argument("--issue", type=int, required=True)
+    p_resume.add_argument("--repo", required=True)
+    p_resume.add_argument("--reviewers", nargs="+", required=True)
+    p_resume.add_argument("--flow", choices=["plan", "pr"], default="plan")
+    p_resume.add_argument("--head-sha", default=None)
+    p_resume.add_argument("--pr", type=int, default=None)
+
+    # write-session
+    p_write = subparsers.add_parser("write-session", help="Write session state fields.")
+    p_write.add_argument("--issue", type=int, required=True)
+    p_write.add_argument("--repo", required=True)
+    p_write.add_argument("--fields", required=True, help="JSON object of fields to merge.")
+
+    # read-session
+    p_read = subparsers.add_parser("read-session", help="Read session state.")
+    p_read.add_argument("--issue", type=int, required=True)
+    p_read.add_argument("--repo", required=True)
+
+    # write-pending-comment
+    p_pending = subparsers.add_parser("write-pending-comment", help="Record a pending comment body path.")
+    p_pending.add_argument("--issue", type=int, required=True)
+    p_pending.add_argument("--repo", required=True)
+    p_pending.add_argument("--body", required=True)
+
+    # clear-pending-comment
+    p_clear = subparsers.add_parser("clear-pending-comment", help="Clear the pending comment body path.")
+    p_clear.add_argument("--issue", type=int, required=True)
+    p_clear.add_argument("--repo", required=True)
+
+    args = parser.parse_args()
+    dispatch = {
+        "build-resume": cmd_build_resume,
+        "write-session": cmd_write_session,
+        "read-session": cmd_read_session,
+        "write-pending-comment": cmd_write_pending_comment,
+        "clear-pending-comment": cmd_clear_pending_comment,
+    }
+    dispatch[args.subcommand](args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/helpers/validate_response.py b/helpers/validate_response.py
new file mode 100644
index 0000000..ea3ddb4
--- /dev/null
+++ b/helpers/validate_response.py
@@ -0,0 +1,151 @@
+"""
+Validate a structured agent response file against the existing protocol library.
+
+Usage:
+  python -m helpers.validate_response \\
+    --file PATH --kind KIND [--context-file PATH]
+
+Kinds:
+  plan_state       -- coder plan post; validates AGENT_PLAN_STATE marker
+  plan_review      -- reviewer plan review structured JSON
+  pr_review        -- reviewer PR review structured JSON
+  coder_followup   -- coder follow-up structured JSON
+  plan_revision    -- coder plan revision structured JSON
+
+The optional --context-file is a JSON file with schema:
+  {
+    "reviewer":            str,
+    "prior_items":         [...serialized UnresolvedReviewItem...],
+    "current_round_items": [...serialized UnresolvedReviewItem...],
+    "human_requirements":  [{"id": str, "text": str, "scope": str}, ...]
+  }
+
+Exit 0 on success; exit 1 with diagnostic on failure.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+# Make src importable when run from the repo root as `python -m helpers.validate_response`
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from coding_review_agent_loop.errors import AgentLoopError
+from coding_review_agent_loop.protocol import parse_plan_state
+from coding_review_agent_loop.unresolved_items import (
+    _validate_plan_review_response,
+    _validate_review_response,
+    _validate_coder_followup_response,
+)
+from coding_review_agent_loop.protocol import validate_structured_plan_revision
+from coding_review_agent_loop.round_state import _deserialize_unresolved_item
+from coding_review_agent_loop.github import HumanReviewRequirement
+
+
+def _load_context(path: str | None) -> dict[str, object]:
+    """Load optional context JSON file."""
+    if path is None:
+        return {}
+    try:
+        return json.loads(Path(path).read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError) as exc:
+        print(f"validate_response: could not read context file {path}: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+
+def _deserialize_unresolved_items(raw: list[object]) -> list[object]:
+    result = []
+    for item in raw:
+        result.append(_deserialize_unresolved_item(item))
+    return result
+
+
+def _deserialize_human_requirements(raw: list[object]) -> list[HumanReviewRequirement]:
+    result = []
+    for item in raw:
+        if not isinstance(item, dict):
+            continue
+        result.append(
+            HumanReviewRequirement(
+                source_type=str(item.get("source_type", "issue")),
+                author=str(item.get("author", "")) or None,
+                created_at=str(item.get("created_at", "")) or None,
+                url=str(item.get("url", "")) or None,
+                body=str(item.get("body", item.get("text", ""))),
+            )
+        )
+    return result
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Validate an agent response file.")
+    parser.add_argument("--file", required=True, help="Path to the response file.")
+    parser.add_argument(
+        "--kind",
+        required=True,
+        choices=["plan_state", "plan_review", "pr_review", "coder_followup", "plan_revision"],
+        help="Kind of response to validate.",
+    )
+    parser.add_argument(
+        "--context-file",
+        default=None,
+        help="Optional JSON file with reviewer identity and prior item context.",
+    )
+    args = parser.parse_args()
+
+    try:
+        text = Path(args.file).read_text(encoding="utf-8")
+    except OSError as exc:
+        print(f"validate_response: cannot read {args.file}: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    ctx = _load_context(args.context_file)
+    reviewer = str(ctx.get("reviewer", "Codex"))
+    prior_items = _deserialize_unresolved_items(list(ctx.get("prior_items", [])))
+    current_round_items = _deserialize_unresolved_items(list(ctx.get("current_round_items", [])))
+    raw_human_requirements = list(ctx.get("human_requirements", []))
+    human_requirements = _deserialize_human_requirements(raw_human_requirements)
+
+    kind = args.kind
+    try:
+        if kind == "plan_state":
+            parse_plan_state(text)
+        elif kind == "plan_review":
+            _validate_plan_review_response(
+                text,
+                reviewer=reviewer,
+                unresolved_items=prior_items,
+                current_round_items=current_round_items,
+            )
+        elif kind == "pr_review":
+            _validate_review_response(
+                text,
+                reviewer=reviewer,
+                unresolved_items=prior_items,
+                current_round_items=current_round_items,
+            )
+        elif kind == "coder_followup":
+            _validate_coder_followup_response(
+                text,
+                unresolved_items=prior_items,
+                human_requirements=human_requirements or None,
+            )
+        elif kind == "plan_revision":
+            result = validate_structured_plan_revision(text)
+            if result is None:
+                raise AgentLoopError("Response did not parse as a structured plan_revision.")
+    except AgentLoopError as exc:
+        print(f"validation failed: {kind}: {exc}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as exc:  # noqa: BLE001
+        print(f"validation error: {kind}: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"validation passed: {kind}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_skill_helpers.py b/tests/test_skill_helpers.py
new file mode 100644
index 0000000..1c835e0
--- /dev/null
+++ b/tests/test_skill_helpers.py
@@ -0,0 +1,222 @@
+"""Unit tests for the Claude Code skill helper CLIs."""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+HELPERS = Path(__file__).parent.parent / "helpers"
+
+
+def _run(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]:
+    result = subprocess.run(
+        [sys.executable, "-m", *args],
+        capture_output=True,
+        text=True,
+        cwd=Path(__file__).parent.parent,
+        check=False,
+    )
+    if check and result.returncode != 0:
+        raise AssertionError(
+            f"Command {args!r} failed (exit {result.returncode}):\n"
+            f"stdout: {result.stdout}\n"
+            f"stderr: {result.stderr}"
+        )
+    return result
+
+
+# ---------------------------------------------------------------------------
+# helpers/validate_response.py
+# ---------------------------------------------------------------------------
+
+_VALID_PLAN_STATE = """\
+## Plan
+
+1. Step one
+
+<!-- AGENT_PLAN_STATE: approved -->
+-- Anthropic Claude
+"""
+
+_INVALID_PLAN_STATE = "This has no marker at all."
+
+_VALID_PLAN_REVIEW = json.dumps(
+    {
+        "schema_version": 1,
+        "kind": "plan_review",
+        "state": "approved",
+        "summary": "Plan looks good.",
+        "blocking_plan_issues": [],
+        "same_plan_followups": [],
+        "future_followups": [],
+        "prior_plan_item_dispositions": [],
+    }
+) + "\n<!-- AGENT_PLAN_STATE: approved -->\n-- Codex\n"
+
+
+def _write_tmp(content: str, suffix: str = ".md") -> str:
+    with tempfile.NamedTemporaryFile("w", suffix=suffix, delete=False, encoding="utf-8") as f:
+        f.write(content)
+        return f.name
+
+
+class TestValidateResponse:
+    def test_valid_plan_state_accepted(self) -> None:
+        path = _write_tmp(_VALID_PLAN_STATE)
+        result = _run("helpers.validate_response", "--file", path, "--kind", "plan_state")
+        assert "validation passed: plan_state" in result.stdout
+
+    def test_missing_plan_state_marker_rejected(self) -> None:
+        path = _write_tmp(_INVALID_PLAN_STATE)
+        result = _run("helpers.validate_response", "--file", path, "--kind", "plan_state", check=False)
+        assert result.returncode != 0
+        assert "validation failed: plan_state" in result.stderr
+
+    def test_valid_plan_review_accepted(self) -> None:
+        path = _write_tmp(_VALID_PLAN_REVIEW)
+        ctx_path = _write_tmp(
+            json.dumps({"reviewer": "Codex", "prior_items": [], "current_round_items": []}),
+            suffix=".json",
+        )
+        result = _run(
+            "helpers.validate_response",
+            "--file",
+            path,
+            "--kind",
+            "plan_review",
+            "--context-file",
+            ctx_path,
+        )
+        assert "validation passed: plan_review" in result.stdout
+
+    def test_plan_review_with_unknown_prior_item_rejected(self) -> None:
+        # A review that disposes unknown prior item IDs must be rejected.
+        review = json.dumps(
+            {
+                "schema_version": 1,
+                "kind": "plan_review",
+                "state": "blocking",
+                "summary": "Blocking.",
+                "blocking_plan_issues": ["Something bad."],
+                "same_plan_followups": [],
+                "future_followups": [],
+                "prior_plan_item_dispositions": [
+                    {"item_id": "item-999", "disposition": "resolved"}
+                ],
+            }
+        ) + "\n<!-- AGENT_PLAN_STATE: blocking -->\n-- Codex\n"
+
+        path = _write_tmp(review)
+        # Empty prior items — item-999 is unknown
+        ctx_path = _write_tmp(
+            json.dumps({"reviewer": "Codex", "prior_items": [], "current_round_items": []}),
+            suffix=".json",
+        )
+        result = _run(
+            "helpers.validate_response",
+            "--file",
+            path,
+            "--kind",
+            "plan_review",
+            "--context-file",
+            ctx_path,
+            check=False,
+        )
+        assert result.returncode != 0
+
+
+# ---------------------------------------------------------------------------
+# helpers/state_manager.py  (session round-trip, no live gh required)
+# ---------------------------------------------------------------------------
+
+class TestStateManager:
+    def _session_path(self, repo: str, issue: int) -> Path:
+        import os
+        slug = repo.replace("/", "-").replace(":", "-")
+        state_home = Path(os.environ.get("XDG_STATE_HOME", Path.home() / ".local" / "state"))
+        return state_home / "coding-review-agent-loop" / "skill-sessions" / slug / f"{issue}.json"
+
+    def test_write_and_read_session(self) -> None:
+        repo = "test/skill-repo"
+        issue = 9999
+        fields = {"last_completed_step": "post_review", "session_id": "abc123"}
+        _run(
+            "helpers.state_manager",
+            "write-session",
+            "--issue",
+            str(issue),
+            "--repo",
+            repo,
+            "--fields",
+            json.dumps(fields),
+        )
+        result = _run("helpers.state_manager", "read-session", "--issue", str(issue), "--repo", repo)
+        data = json.loads(result.stdout)
+        assert data["last_completed_step"] == "post_review"
+        assert data["session_id"] == "abc123"
+
+    def test_write_and_clear_pending_comment(self) -> None:
+        repo = "test/skill-repo"
+        issue = 9999
+        body_path = "/tmp/pending-comment-body.md"
+        _run(
+            "helpers.state_manager",
+            "write-pending-comment",
+            "--issue",
+            str(issue),
+            "--repo",
+            repo,
+            "--body",
+            body_path,
+        )
+        result = _run("helpers.state_manager", "read-session", "--issue", str(issue), "--repo", repo)
+        data = json.loads(result.stdout)
+        assert data.get("pending_comment_body") == body_path
+
+        _run(
+            "helpers.state_manager",
+            "clear-pending-comment",
+            "--issue",
+            str(issue),
+            "--repo",
+            repo,
+        )
+        result = _run("helpers.state_manager", "read-session", "--issue", str(issue), "--repo", repo)
+        data = json.loads(result.stdout)
+        assert "pending_comment_body" not in data
+
+
+# ---------------------------------------------------------------------------
+# helpers/run_external.py  (dry-run only)
+# ---------------------------------------------------------------------------
+
+class TestRunExternal:
+    def test_dry_run_exits_zero_and_writes_valid_stub(self) -> None:
+        with tempfile.NamedTemporaryFile("w", suffix=".md", delete=False, encoding="utf-8") as pf:
+            pf.write("Prompt text.")
+            prompt_path = pf.name
+        with tempfile.NamedTemporaryFile("w", suffix=".md", delete=False, encoding="utf-8") as of:
+            output_path = of.name
+
+        result = _run(
+            "helpers.run_external",
+            "--agent",
+            "codex",
+            "--prompt-file",
+            prompt_path,
+            "--output",
+            output_path,
+            "--workdir",
+            "/tmp",
+            "--dry-run",
+        )
+        assert result.returncode == 0
+        content = Path(output_path).read_text(encoding="utf-8")
+        # The dry-run stub must contain a valid plan_review JSON and AGENT_PLAN_STATE marker
+        assert "AGENT_PLAN_STATE: approved" in content
+        assert '"state": "approved"' in content
diff --git a/tests/test_skill_loop.py b/tests/test_skill_loop.py
new file mode 100644
index 0000000..a6d732c
--- /dev/null
+++ b/tests/test_skill_loop.py
@@ -0,0 +1,64 @@
+"""Integration test: invokes demo_loop.py as a subprocess."""
+
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+
+def test_demo_loop_dry_run() -> None:
+    """
+    Run helpers/demo_loop.py and verify it:
+    - exits 0
+    - prints "validation passed: plan_state"
+    - prints "validation passed: plan_review"
+    - writes a session file with last_completed_step=post_review
+    """
+    repo = "demo/skill-loop-test"
+    issue = 88888
+
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "helpers.demo_loop",
+            "--issue",
+            str(issue),
+            "--repo",
+            repo,
+        ],
+        capture_output=True,
+        text=True,
+        cwd=Path(__file__).parent.parent,
+        check=False,
+    )
+
+    assert result.returncode == 0, (
+        f"demo_loop failed (exit {result.returncode}):\n"
+        f"stdout: {result.stdout}\n"
+        f"stderr: {result.stderr}"
+    )
+
+    assert "validation passed: plan_state" in result.stdout, result.stdout
+    assert "validation passed: plan_review" in result.stdout, result.stdout
+
+    # Verify session state was written with last_completed_step=post_review
+    slug = repo.replace("/", "-").replace(":", "-")
+    state_home = Path(
+        os.environ.get("XDG_STATE_HOME", Path.home() / ".local" / "state")
+    )
+    session_path = (
+        state_home
+        / "coding-review-agent-loop"
+        / "skill-sessions"
+        / slug
+        / f"{issue}.json"
+    )
+    assert session_path.exists(), f"session file not found: {session_path}"
+    data = json.loads(session_path.read_text(encoding="utf-8"))
+    assert data.get("last_completed_step") == "post_review", data

From 366fcaefb4f289628bc6e2a2180d9cd25984b51a Mon Sep 17 00:00:00 2001
From: Wild Wind <wwind123@gmail.com>
Date: Sun, 7 Jun 2026 19:30:46 -0700
Subject: [PATCH 2/2] fix: add AGENT_LOOP_META to skill-posted comments and
 strengthen plan_revision validation

Addresses Codex blocking item-1 from PR #283 round 1 review:

1. Add state_manager attach-metadata subcommand that builds PostedRoundMetadata
   and calls _attach_round_metadata before posting, so skill-posted comments carry
   AGENT_LOOP_META and can be found by build-resume's _resume_plan_round /
   _resume_pr_round. Update demo_loop.py and SKILL.md to use this step.

2. Fix validate_response.py plan_revision validation to check that
   prior_plan_item_dispositions only reference item IDs present in the context
   ledger, matching the check in the headless orchestrator's
   _validate_plan_revision_response (orchestrator.py:1281). Add regression tests.

3. New tests: test_attach_metadata_produces_valid_agent_loop_meta and
   test_attach_metadata_reviewer_found_by_resume verify that skill-posted rounds
   are found by _resume_plan_round; test_plan_revision_with_unknown_prior_item_rejected
   and test_plan_revision_with_known_items_accepted cover the ledger check.

Refs #216

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 SKILL.md                     |  45 +++++++--
 helpers/demo_loop.py         | 130 +++++++++++++++++++++---
 helpers/state_manager.py     | 114 +++++++++++++++++++++
 helpers/validate_response.py |  26 ++++-
 tests/test_skill_helpers.py  | 191 ++++++++++++++++++++++++++++++++++-
 tests/test_skill_loop.py     |   5 +
 6 files changed, 483 insertions(+), 28 deletions(-)

diff --git a/SKILL.md b/SKILL.md
index 60cac4c..f48df89 100644
--- a/SKILL.md
+++ b/SKILL.md
@@ -63,19 +63,37 @@ python -m helpers.validate_response \
   --kind plan_state
 ```
 
-### Step 4 — Save as pending comment
+### Step 4 — Attach AGENT_LOOP_META to the plan comment
+
+The comment posted to GitHub must carry an `AGENT_LOOP_META` marker so that
+`build-resume` can reconstruct the round state in future sessions.  Use
+`attach-metadata` to produce a metadata-tagged version of the plan file:
+
+```bash
+python -m helpers.state_manager attach-metadata \
+  --body-file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \
+  --output /tmp/agent-loop-skill/{session-id}/plan-tagged.md \
+  --flow plan --role coder --agent Claude \
+  --round-number {round_number} --state approved \
+  --subject-plan-file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \
+  --canonical-plan-file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \
+  [--prior-items-file /tmp/agent-loop-skill/{session-id}/prior_items.json]
+```
+
+`prior_items.json` is the `prior_items` array from the `build-resume` JSON
+output.  Omit the flag when `prior_items` is empty (round 1).
+
+### Step 5 — Save as pending comment and post
 
 ```bash
 python -m helpers.state_manager write-pending-comment \
   --issue ISSUE --repo OWNER/REPO \
-  --body /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md
+  --body /tmp/agent-loop-skill/{session-id}/plan-tagged.md
 ```
 
-### Step 5 — Post the plan comment
-
 ```bash
 python -m helpers.gh_ops post-issue-comment \
-  --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \
+  --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/plan-tagged.md \
   --repo OWNER/REPO
 ```
 
@@ -115,11 +133,24 @@ The `context.json` must contain:
 }
 ```
 
-Post the reviewer comment:
+Attach AGENT_LOOP_META to the reviewer comment (subject must match the coder comment):
+
+```bash
+python -m helpers.state_manager attach-metadata \
+  --body-file /tmp/agent-loop-skill/{session-id}/codex-review.md \
+  --output /tmp/agent-loop-skill/{session-id}/codex-review-tagged.md \
+  --flow plan --role reviewer --agent Codex \
+  --round-number {round_number} --state approved \
+  --subject-plan-file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \
+  [--prior-items-file /tmp/agent-loop-skill/{session-id}/prior_items.json] \
+  [--dispositions-file /tmp/agent-loop-skill/{session-id}/codex_dispositions.json]
+```
+
+Post the reviewer comment (with metadata):
 
 ```bash
 python -m helpers.gh_ops post-issue-comment \
-  --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/codex-review.md \
+  --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/codex-review-tagged.md \
   --repo OWNER/REPO
 ```
 
diff --git a/helpers/demo_loop.py b/helpers/demo_loop.py
index 5d94114..56e4684 100644
--- a/helpers/demo_loop.py
+++ b/helpers/demo_loop.py
@@ -4,13 +4,17 @@
 Demonstrates:
   1. Claude (host) writes a stub plan.
   2. validate_response validates it as plan_state.
-  3. Codex (dry-run) writes a canned approved plan_review stub.
-  4. validate_response validates it as plan_review.
-  5. gh_ops post-issue-comment --dry-run records the review.
-  6. state_manager write-session records last_completed_step=post_review.
+  3. state_manager attach-metadata adds AGENT_LOOP_META to the plan comment.
+  4. gh_ops post-issue-comment --dry-run records the plan (with metadata).
+  5. Codex (dry-run) writes a canned approved plan_review stub.
+  6. validate_response validates it as plan_review.
+  7. state_manager attach-metadata adds AGENT_LOOP_META to the reviewer comment.
+  8. gh_ops post-issue-comment --dry-run records the reviewer comment.
+  9. state_manager write-session records last_completed_step=post_review.
+ 10. Verify _resume_plan_round can find the round from the metadata-tagged bodies.
 
 Usage:
-  python -m helpers.demo_loop --issue 123 [--dry-run] [--repo OWNER/REPO]
+  python -m helpers.demo_loop --issue 123 [--repo OWNER/REPO]
 """
 
 from __future__ import annotations
@@ -26,8 +30,6 @@
 # Make src importable when run from the repo root
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 
-_HELPERS = Path(__file__).parent
-
 _HOST_STUB_PLAN = """\
 ## Plan
 
@@ -61,8 +63,6 @@ def main() -> None:
     parser = argparse.ArgumentParser(description="Minimal skill loop demo.")
     parser.add_argument("--issue", type=int, default=123)
     parser.add_argument("--repo", default="demo/repo")
-    parser.add_argument("--dry-run", action="store_true", default=True,
-                        help="Always dry-run for demo (default: True).")
     args = parser.parse_args()
 
     session_id = uuid.uuid4().hex[:8]
@@ -79,7 +79,55 @@ def main() -> None:
     print(result.stdout.strip())
     assert "validation passed: plan_state" in result.stdout, result.stdout
 
-    # Step 3: Codex dry-run produces approved plan_review stub
+    # Step 3: attach AGENT_LOOP_META to the plan comment (coder, round 1)
+    plan_with_meta = tmpdir / "plan_with_meta.md"
+    _run(
+        _py(
+            [
+                "state_manager",
+                "attach-metadata",
+                "--body-file",
+                str(plan_file),
+                "--output",
+                str(plan_with_meta),
+                "--flow",
+                "plan",
+                "--role",
+                "coder",
+                "--agent",
+                "Claude",
+                "--round-number",
+                "1",
+                "--state",
+                "approved",
+                "--subject-plan-file",
+                str(plan_file),
+                "--canonical-plan-file",
+                str(plan_file),
+            ]
+        )
+    )
+    print(f"demo_loop: plan comment with AGENT_LOOP_META: {plan_with_meta}")
+    assert "AGENT_LOOP_META" in plan_with_meta.read_text(encoding="utf-8")
+
+    # Step 4: dry-run post the plan comment (with metadata)
+    _run(
+        _py(
+            [
+                "gh_ops",
+                "post-issue-comment",
+                "--issue",
+                str(args.issue),
+                "--file",
+                str(plan_with_meta),
+                "--repo",
+                args.repo,
+                "--dry-run",
+            ]
+        )
+    )
+
+    # Step 5: Codex dry-run produces approved plan_review stub
     reviewer_output = tmpdir / "codex_review.md"
     _run(
         _py(
@@ -99,7 +147,7 @@ def main() -> None:
     )
     print(f"demo_loop: Codex dry-run output written to {reviewer_output}")
 
-    # Step 4: validate plan_review
+    # Step 6: validate plan_review
     context_file = tmpdir / "context.json"
     context_file.write_text(
         json.dumps({"reviewer": "Codex", "prior_items": [], "current_round_items": []}),
@@ -121,7 +169,37 @@ def main() -> None:
     print(result.stdout.strip())
     assert "validation passed: plan_review" in result.stdout, result.stdout
 
-    # Step 5: dry-run post issue comment
+    # Step 7: attach AGENT_LOOP_META to the reviewer comment
+    # Compute subject from the plan file (same subject as the coder comment)
+    reviewer_with_meta = tmpdir / "codex_review_with_meta.md"
+    _run(
+        _py(
+            [
+                "state_manager",
+                "attach-metadata",
+                "--body-file",
+                str(reviewer_output),
+                "--output",
+                str(reviewer_with_meta),
+                "--flow",
+                "plan",
+                "--role",
+                "reviewer",
+                "--agent",
+                "Codex",
+                "--round-number",
+                "1",
+                "--state",
+                "approved",
+                "--subject-plan-file",
+                str(plan_file),
+            ]
+        )
+    )
+    print(f"demo_loop: reviewer comment with AGENT_LOOP_META: {reviewer_with_meta}")
+    assert "AGENT_LOOP_META" in reviewer_with_meta.read_text(encoding="utf-8")
+
+    # Step 8: dry-run post the reviewer comment
     _run(
         _py(
             [
@@ -130,7 +208,7 @@ def main() -> None:
                 "--issue",
                 str(args.issue),
                 "--file",
-                str(reviewer_output),
+                str(reviewer_with_meta),
                 "--repo",
                 args.repo,
                 "--dry-run",
@@ -138,7 +216,7 @@ def main() -> None:
         )
     )
 
-    # Step 6: record session state
+    # Step 9: record session state
     _run(
         _py(
             [
@@ -170,6 +248,30 @@ def main() -> None:
     session_data = json.loads(result.stdout)
     assert session_data.get("last_completed_step") == "post_review", session_data
 
+    # Step 10: Verify _resume_plan_round finds the round from the metadata-tagged comments
+    # This directly tests that attach-metadata produces valid AGENT_LOOP_META.
+    from coding_review_agent_loop.round_state import _resume_plan_round
+
+    class _FakeComment:
+        def __init__(self, body: str) -> None:
+            self.body = body
+
+    fake_comments = [
+        _FakeComment(plan_with_meta.read_text(encoding="utf-8")),
+        _FakeComment(reviewer_with_meta.read_text(encoding="utf-8")),
+    ]
+    resume_result = _resume_plan_round(fake_comments, configured_reviewers=["codex"])
+    assert resume_result is not None, (
+        "build-resume could not find the skill-posted round — AGENT_LOOP_META not recognized"
+    )
+    _plan_text, resumed = resume_result
+    assert resumed.round_number == 1, f"Expected round 1, got {resumed.round_number}"
+    assert len(resumed.completed_reviews) == 1, (
+        f"Expected 1 completed reviewer (Codex), got {len(resumed.completed_reviews)}"
+    )
+    print(f"demo_loop: _resume_plan_round found round {resumed.round_number} with "
+          f"{len(resumed.completed_reviews)} completed reviewer(s)")
+
     print("demo_loop: all steps completed successfully")
     print(f"session state: {json.dumps(session_data, indent=2)}")
 
diff --git a/helpers/state_manager.py b/helpers/state_manager.py
index e9f96e1..faa2f54 100644
--- a/helpers/state_manager.py
+++ b/helpers/state_manager.py
@@ -11,6 +11,21 @@
     _resume_plan_round or _resume_pr_round from the existing library.  Outputs
     a JSON resume descriptor to stdout.
 
+  attach-metadata
+    --body-file PATH --output PATH
+    --flow plan|pr --role coder|reviewer --agent NAME
+    --round-number N --state approved|blocking
+    (--subject SHA | --subject-plan-file PATH)
+    [--prior-items-file PATH]
+    [--dispositions-file PATH]
+    [--new-items-file PATH]
+    [--canonical-plan-file PATH]
+
+    Reads the comment body from --body-file, builds a PostedRoundMetadata
+    object, and writes the body with AGENT_LOOP_META appended to --output.
+    The resulting file can be posted via gh_ops post-issue-comment and will
+    be recognized by build-resume's _resume_plan_round / _resume_pr_round.
+
   write-session
     --issue N --repo REPO --fields JSON
 
@@ -48,10 +63,16 @@
 from coding_review_agent_loop.agents.base import AgentName
 from coding_review_agent_loop.errors import AgentLoopError
 from coding_review_agent_loop.round_state import (
+    PostedRoundMetadata,
+    _attach_round_metadata,
+    _deserialize_disposition,
+    _deserialize_unresolved_item,
+    _plan_subject,
     _resume_plan_round,
     _resume_pr_round,
     _serialize_unresolved_item,
 )
+from coding_review_agent_loop.protocol import ReviewItemDisposition, UnresolvedReviewItem
 
 
 def _session_path(repo: str, issue: int) -> Path:
@@ -192,6 +213,74 @@ def cmd_build_resume(args: argparse.Namespace) -> None:
     print(json.dumps(descriptor, indent=2))
 
 
+def _load_item_list(path: str | None) -> list[object]:
+    if not path:
+        return []
+    try:
+        return json.loads(Path(path).read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError) as exc:
+        print(f"state_manager: cannot read {path}: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+
+def cmd_attach_metadata(args: argparse.Namespace) -> None:
+    try:
+        body = Path(args.body_file).read_text(encoding="utf-8")
+    except OSError as exc:
+        print(f"state_manager: cannot read body file: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    # Compute subject
+    if args.subject:
+        subject = args.subject
+    elif args.subject_plan_file:
+        try:
+            plan_text = Path(args.subject_plan_file).read_text(encoding="utf-8")
+        except OSError as exc:
+            print(f"state_manager: cannot read subject plan file: {exc}", file=sys.stderr)
+            sys.exit(1)
+        subject = _plan_subject(plan_text)
+    else:
+        print("state_manager attach-metadata: provide --subject or --subject-plan-file", file=sys.stderr)
+        sys.exit(1)
+
+    # Load optional item lists
+    raw_prior = _load_item_list(args.prior_items_file)
+    raw_dispositions = _load_item_list(args.dispositions_file)
+    raw_new_items = _load_item_list(args.new_items_file)
+
+    prior_items = tuple(_deserialize_unresolved_item(item) for item in raw_prior)
+    dispositions = tuple(_deserialize_disposition(d) for d in raw_dispositions)
+    new_items = tuple(_deserialize_unresolved_item(item) for item in raw_new_items)
+
+    canonical_plan: str | None = None
+    if args.canonical_plan_file:
+        try:
+            canonical_plan = Path(args.canonical_plan_file).read_text(encoding="utf-8")
+        except OSError as exc:
+            print(f"state_manager: cannot read canonical plan file: {exc}", file=sys.stderr)
+            sys.exit(1)
+
+    metadata = PostedRoundMetadata(
+        flow=args.flow,
+        role=args.role,
+        agent=args.agent,
+        round_number=args.round_number,
+        subject=subject,
+        prior_items=prior_items,
+        dispositions=dispositions,
+        new_items=new_items,
+        state=args.state,
+        canonical_plan=canonical_plan,
+    )
+    augmented = _attach_round_metadata(body, metadata)
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(augmented, encoding="utf-8")
+    print(f"metadata attached: {output_path}")
+
+
 def cmd_write_session(args: argparse.Namespace) -> None:
     path = _session_path(args.repo, args.issue)
     existing = _load_session(path)
@@ -232,6 +321,30 @@ def main() -> None:
     parser.add_argument("--gh-cmd", default="gh")
     subparsers = parser.add_subparsers(dest="subcommand", required=True)
 
+    # attach-metadata
+    p_meta = subparsers.add_parser(
+        "attach-metadata",
+        help="Attach AGENT_LOOP_META to a comment body so build-resume can reconstruct the round.",
+    )
+    p_meta.add_argument("--body-file", required=True, help="Input comment body file.")
+    p_meta.add_argument("--output", required=True, help="Output file with AGENT_LOOP_META attached.")
+    p_meta.add_argument("--flow", required=True, choices=["plan", "pr"])
+    p_meta.add_argument("--role", required=True, choices=["coder", "reviewer"])
+    p_meta.add_argument("--agent", required=True, help="Agent display name (e.g. 'Claude', 'Codex').")
+    p_meta.add_argument("--round-number", type=int, required=True)
+    p_meta.add_argument("--state", required=True, choices=["approved", "blocking"])
+    p_meta.add_argument("--subject", default=None, help="Pre-computed subject SHA256 hex string.")
+    p_meta.add_argument("--subject-plan-file", default=None,
+                        help="Compute subject as sha256(plan text) from this file.")
+    p_meta.add_argument("--prior-items-file", default=None,
+                        help="JSON array of serialized UnresolvedReviewItem.")
+    p_meta.add_argument("--dispositions-file", default=None,
+                        help="JSON array of serialized ReviewItemDisposition.")
+    p_meta.add_argument("--new-items-file", default=None,
+                        help="JSON array of serialized UnresolvedReviewItem (new items from this turn).")
+    p_meta.add_argument("--canonical-plan-file", default=None,
+                        help="Plan text file (written as canonical_plan for coder turns).")
+
     # build-resume
     p_resume = subparsers.add_parser("build-resume", help="Build a resume descriptor from GitHub comments.")
     p_resume.add_argument("--issue", type=int, required=True)
@@ -265,6 +378,7 @@ def main() -> None:
 
     args = parser.parse_args()
     dispatch = {
+        "attach-metadata": cmd_attach_metadata,
         "build-resume": cmd_build_resume,
         "write-session": cmd_write_session,
         "read-session": cmd_read_session,
diff --git a/helpers/validate_response.py b/helpers/validate_response.py
index ea3ddb4..fc44329 100644
--- a/helpers/validate_response.py
+++ b/helpers/validate_response.py
@@ -33,14 +33,13 @@
 # Make src importable when run from the repo root as `python -m helpers.validate_response`
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 
-from coding_review_agent_loop.errors import AgentLoopError
-from coding_review_agent_loop.protocol import parse_plan_state
+from coding_review_agent_loop.errors import AgentLoopError, UnknownPriorItemDispositionError
+from coding_review_agent_loop.protocol import parse_plan_state, validate_structured_plan_revision
 from coding_review_agent_loop.unresolved_items import (
     _validate_plan_review_response,
     _validate_review_response,
     _validate_coder_followup_response,
 )
-from coding_review_agent_loop.protocol import validate_structured_plan_revision
 from coding_review_agent_loop.round_state import _deserialize_unresolved_item
 from coding_review_agent_loop.github import HumanReviewRequirement
 
@@ -134,9 +133,26 @@ def main() -> None:
                 human_requirements=human_requirements or None,
             )
         elif kind == "plan_revision":
-            result = validate_structured_plan_revision(text)
-            if result is None:
+            parsed = validate_structured_plan_revision(text)
+            if parsed is None:
                 raise AgentLoopError("Response did not parse as a structured plan_revision.")
+            # Validate that dispositions only reference known prior item IDs,
+            # matching the check in the headless orchestrator's _validate_plan_revision_response.
+            if prior_items:
+                allowed_ids = {item.item_id for item in prior_items}
+                unknown = {
+                    disposition.item_id
+                    for disposition in parsed.prior_plan_item_dispositions
+                } - allowed_ids
+                if unknown:
+                    raise UnknownPriorItemDispositionError(
+                        unknown_ids=tuple(sorted(unknown)),
+                        allowed_ids=tuple(sorted(allowed_ids)),
+                        same_round_description=(
+                            "Same-round findings are informational only and must not be "
+                            "dispositioned as prior carried items."
+                        ),
+                    )
     except AgentLoopError as exc:
         print(f"validation failed: {kind}: {exc}", file=sys.stderr)
         sys.exit(1)
diff --git a/tests/test_skill_helpers.py b/tests/test_skill_helpers.py
index 1c835e0..6ac6642 100644
--- a/tests/test_skill_helpers.py
+++ b/tests/test_skill_helpers.py
@@ -11,6 +11,10 @@
 import pytest
 
 HELPERS = Path(__file__).parent.parent / "helpers"
+SRC = Path(__file__).parent.parent / "src"
+
+# Make library importable for direct calls in this test file
+sys.path.insert(0, str(SRC))
 
 
 def _run(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]:
@@ -95,7 +99,6 @@ def test_valid_plan_review_accepted(self) -> None:
         assert "validation passed: plan_review" in result.stdout
 
     def test_plan_review_with_unknown_prior_item_rejected(self) -> None:
-        # A review that disposes unknown prior item IDs must be rejected.
         review = json.dumps(
             {
                 "schema_version": 1,
@@ -129,9 +132,91 @@ def test_plan_review_with_unknown_prior_item_rejected(self) -> None:
         )
         assert result.returncode != 0
 
+    def test_plan_revision_with_unknown_prior_item_rejected(self) -> None:
+        """plan_revision must reject dispositions for item IDs not in the prior-items ledger."""
+        revision = json.dumps(
+            {
+                "schema_version": 1,
+                "kind": "plan_revision",
+                "state": "blocking",
+                "summary": "Revised plan.",
+                "prior_plan_item_dispositions": [
+                    {"item_id": "item-unknown-99", "disposition": "resolved"}
+                ],
+                "plan_steps": ["Step A", "Step B"],
+            }
+        ) + "\n<!-- AGENT_PLAN_STATE: blocking -->\n-- Anthropic Claude\n"
+
+        path = _write_tmp(revision)
+        # context has item-1 but revision references item-unknown-99
+        prior_item = {
+            "item_id": "item-1",
+            "reviewer": "Codex",
+            "source_round": 1,
+            "text": "Some issue.",
+            "status": "blocking",
+            "source_status": "blocking",
+            "notes": [],
+        }
+        ctx_path = _write_tmp(
+            json.dumps({"prior_items": [prior_item], "current_round_items": []}),
+            suffix=".json",
+        )
+        result = _run(
+            "helpers.validate_response",
+            "--file",
+            path,
+            "--kind",
+            "plan_revision",
+            "--context-file",
+            ctx_path,
+            check=False,
+        )
+        assert result.returncode != 0
+
+    def test_plan_revision_with_known_items_accepted(self) -> None:
+        """plan_revision with only known prior item IDs must be accepted."""
+        prior_item = {
+            "item_id": "item-1",
+            "reviewer": "Codex",
+            "source_round": 1,
+            "text": "Some issue.",
+            "status": "blocking",
+            "source_status": "blocking",
+            "notes": [],
+        }
+        revision = json.dumps(
+            {
+                "schema_version": 1,
+                "kind": "plan_revision",
+                "state": "blocking",
+                "summary": "Revised plan.",
+                "prior_plan_item_dispositions": [
+                    {"item_id": "item-1", "disposition": "resolved"}
+                ],
+                "plan_steps": ["Step A", "Step B"],
+            }
+        ) + "\n<!-- AGENT_PLAN_STATE: blocking -->\n-- Anthropic Claude\n"
+
+        path = _write_tmp(revision)
+        ctx_path = _write_tmp(
+            json.dumps({"prior_items": [prior_item], "current_round_items": []}),
+            suffix=".json",
+        )
+        result = _run(
+            "helpers.validate_response",
+            "--file",
+            path,
+            "--kind",
+            "plan_revision",
+            "--context-file",
+            ctx_path,
+        )
+        assert "validation passed: plan_revision" in result.stdout
+
 
 # ---------------------------------------------------------------------------
-# helpers/state_manager.py  (session round-trip, no live gh required)
+# helpers/state_manager.py  (session round-trip + attach-metadata)
 # ---------------------------------------------------------------------------
 
 class TestStateManager:
@@ -190,6 +275,108 @@ def test_write_and_clear_pending_comment(self) -> None:
         data = json.loads(result.stdout)
         assert "pending_comment_body" not in data
 
+    def test_attach_metadata_produces_valid_agent_loop_meta(self) -> None:
+        """attach-metadata must embed AGENT_LOOP_META that _resume_plan_round recognizes."""
+        plan_body = _VALID_PLAN_STATE
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            body_file = Path(tmpdir) / "plan.md"
+            body_file.write_text(plan_body, encoding="utf-8")
+            output_file = Path(tmpdir) / "plan_tagged.md"
+
+            _run(
+                "helpers.state_manager",
+                "attach-metadata",
+                "--body-file",
+                str(body_file),
+                "--output",
+                str(output_file),
+                "--flow",
+                "plan",
+                "--role",
+                "coder",
+                "--agent",
+                "Claude",
+                "--round-number",
+                "1",
+                "--state",
+                "approved",
+                "--subject-plan-file",
+                str(body_file),
+                "--canonical-plan-file",
+                str(body_file),
+            )
+
+            tagged = output_file.read_text(encoding="utf-8")
+            assert "AGENT_LOOP_META" in tagged
+
+            # Verify _resume_plan_round can reconstruct from this comment alone
+            from coding_review_agent_loop.round_state import _resume_plan_round
+
+            class _FC:
+                def __init__(self, body: str) -> None:
+                    self.body = body
+
+            result = _resume_plan_round([_FC(tagged)], configured_reviewers=["codex"])
+            # A coder comment with no reviewer comments → returns the round so reviewers can run
+            assert result is not None, "build-resume could not find skill-posted coder round"
+            _plan_text, resumed = result
+            assert resumed.round_number == 1
+
+    def test_attach_metadata_reviewer_found_by_resume(self) -> None:
+        """Coder + reviewer comments both with AGENT_LOOP_META → resume finds completed reviewer."""
+        plan_body = _VALID_PLAN_STATE
+        reviewer_body = _VALID_PLAN_REVIEW
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            plan_file = Path(tmpdir) / "plan.md"
+            plan_file.write_text(plan_body, encoding="utf-8")
+            plan_tagged = Path(tmpdir) / "plan_tagged.md"
+            review_file = Path(tmpdir) / "review.md"
+            review_file.write_text(reviewer_body, encoding="utf-8")
+            review_tagged = Path(tmpdir) / "review_tagged.md"
+
+            # Attach coder metadata
+            _run(
+                "helpers.state_manager",
+                "attach-metadata",
+                "--body-file", str(plan_file),
+                "--output", str(plan_tagged),
+                "--flow", "plan", "--role", "coder", "--agent", "Claude",
+                "--round-number", "1", "--state", "approved",
+                "--subject-plan-file", str(plan_file),
+                "--canonical-plan-file", str(plan_file),
+            )
+
+            # Attach reviewer metadata (same subject)
+            _run(
+                "helpers.state_manager",
+                "attach-metadata",
+                "--body-file", str(review_file),
+                "--output", str(review_tagged),
+                "--flow", "plan", "--role", "reviewer", "--agent", "Codex",
+                "--round-number", "1", "--state", "approved",
+                "--subject-plan-file", str(plan_file),
+            )
+
+            from coding_review_agent_loop.round_state import _resume_plan_round
+
+            class _FC:
+                def __init__(self, body: str) -> None:
+                    self.body = body
+
+            result = _resume_plan_round(
+                [_FC(plan_tagged.read_text(encoding="utf-8")),
+                 _FC(review_tagged.read_text(encoding="utf-8"))],
+                configured_reviewers=["codex"],
+            )
+            assert result is not None, "build-resume did not find the round"
+            _plan_text, resumed = result
+            assert resumed.round_number == 1
+            assert len(resumed.completed_reviews) == 1, (
+                f"Expected 1 completed reviewer (Codex), got {len(resumed.completed_reviews)}"
+            )
+
 
 # ---------------------------------------------------------------------------
 # helpers/run_external.py  (dry-run only)
diff --git a/tests/test_skill_loop.py b/tests/test_skill_loop.py
index a6d732c..ab7ece0 100644
--- a/tests/test_skill_loop.py
+++ b/tests/test_skill_loop.py
@@ -17,7 +17,9 @@ def test_demo_loop_dry_run() -> None:
     - exits 0
     - prints "validation passed: plan_state"
     - prints "validation passed: plan_review"
+    - produces metadata-tagged comments with AGENT_LOOP_META
     - writes a session file with last_completed_step=post_review
+    - verifies _resume_plan_round can reconstruct the round from the metadata
     """
     repo = "demo/skill-loop-test"
     issue = 88888
@@ -47,6 +49,9 @@ def test_demo_loop_dry_run() -> None:
     assert "validation passed: plan_state" in result.stdout, result.stdout
     assert "validation passed: plan_review" in result.stdout, result.stdout
 
+    # demo_loop now also verifies _resume_plan_round internally
+    assert "_resume_plan_round found round" in result.stdout, result.stdout
+
     # Verify session state was written with last_completed_step=post_review
     slug = repo.replace("/", "-").replace(":", "-")
     state_home = Path(