From a1c996461ad246d99f20781ec3dbab7d47c4e879 Mon Sep 17 00:00:00 2001 From: Wild Wind Date: Sun, 7 Jun 2026 19:19:29 -0700 Subject: [PATCH 1/2] feat: add Claude Code native skill mode for agent-loop orchestration (#216) Adds a Claude Code skill that lets users run the multi-agent review loop inside an interactive Claude Code session without calling `claude -p` for Claude turns. External agents (Codex, Gemini) are still invoked as subprocesses. New files: - helpers/validate_response.py: validates structured protocol responses using the existing library entry points (_validate_plan_review_response, _validate_review_response, _validate_coder_followup_response, etc.) - helpers/state_manager.py: manages local session state and resume via _resume_plan_round/_resume_pr_round; accepts --reviewers and --head-sha/--pr as required by the existing resume APIs - helpers/run_external.py: runs codex/gemini CLIs; --dry-run writes a canned approved plan_review stub for testing - helpers/gh_ops.py: wraps gh CLI for issue/PR comment operations - helpers/demo_loop.py: standalone dry-run demo (no live Claude/GitHub calls) - SKILL.md: step-by-step skill orchestration instructions for Claude - docs/skill_mode.md: architecture table, billing guidance, limitations - tests/test_skill_helpers.py: unit tests for all helper CLIs - tests/test_skill_loop.py: subprocess integration test for demo_loop.py Fixes #216 Co-Authored-By: Claude Sonnet 4.6 --- SKILL.md | 196 ++++++++++++++++++++++++ docs/skill_mode.md | 137 +++++++++++++++++ helpers/__init__.py | 1 + helpers/demo_loop.py | 178 ++++++++++++++++++++++ helpers/gh_ops.py | 143 ++++++++++++++++++ helpers/run_external.py | 140 ++++++++++++++++++ helpers/state_manager.py | 278 +++++++++++++++++++++++++++++++++++ helpers/validate_response.py | 151 +++++++++++++++++++ tests/test_skill_helpers.py | 222 ++++++++++++++++++++++++++++ tests/test_skill_loop.py | 64 ++++++++ 10 files changed, 1510 insertions(+) create mode 100644 SKILL.md create mode 100644 docs/skill_mode.md create mode 100644 helpers/__init__.py create mode 100644 helpers/demo_loop.py create mode 100644 helpers/gh_ops.py create mode 100644 helpers/run_external.py create mode 100644 helpers/state_manager.py create mode 100644 helpers/validate_response.py create mode 100644 tests/test_skill_helpers.py create mode 100644 tests/test_skill_loop.py diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..60cac4c --- /dev/null +++ b/SKILL.md @@ -0,0 +1,196 @@ +# Agent Loop Skill — Claude Code Native Mode + +This skill lets you run the `coding-review-agent-loop` orchestration directly inside +an interactive Claude Code session, without calling `claude -p` for Claude turns. + +Claude (you, the host) performs coder/plan turns using your active session context. +External agents (Codex, Gemini) are invoked via their local CLIs as subprocesses. +GitHub operations go through `gh`. + +## Prerequisites + +- `gh` authenticated and configured. +- `codex` CLI installed (for Codex reviewer turns). +- `gemini` CLI installed (for Gemini reviewer turns). +- The `coding-review-agent-loop` package importable from `src/` (run from repo root). + +## How to start a plan loop for an issue + +Provide the following information: + +1. **Repository**: `OWNER/REPO` +2. **Issue number**: e.g. `123` +3. **Reviewers**: e.g. `codex`, `gemini`, or both + +Then follow the steps below. + +--- + +## Orchestration steps + +### Step 1 — Check for an existing session + +```bash +python -m helpers.state_manager build-resume \ + --issue ISSUE --repo OWNER/REPO \ + --reviewers codex gemini \ + --flow plan +``` + +If `round_number` > 1 or `completed_reviewer_names` is non-empty, a prior round +was found and you can skip already-completed reviewer turns. + +### Step 2 — Write the plan (Claude host turn) + +Write the implementation plan to a temp file, e.g.: + +``` +/tmp/agent-loop-skill/{session-id}/plan-{uuid}.md +``` + +The file must end with: + +``` + +-- Anthropic Claude +``` + +### Step 3 — Validate the plan + +```bash +python -m helpers.validate_response \ + --file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \ + --kind plan_state +``` + +### Step 4 — Save as pending comment + +```bash +python -m helpers.state_manager write-pending-comment \ + --issue ISSUE --repo OWNER/REPO \ + --body /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md +``` + +### Step 5 — Post the plan comment + +```bash +python -m helpers.gh_ops post-issue-comment \ + --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \ + --repo OWNER/REPO +``` + +```bash +python -m helpers.state_manager clear-pending-comment \ + --issue ISSUE --repo OWNER/REPO +``` + +### Step 6 — Run each reviewer + +For each reviewer (e.g. Codex): + +```bash +python -m helpers.run_external \ + --agent codex \ + --prompt-file /tmp/agent-loop-skill/{session-id}/reviewer-prompt.md \ + --output /tmp/agent-loop-skill/{session-id}/codex-review.md \ + --workdir /path/to/codex/checkout +``` + +Validate the reviewer response: + +```bash +python -m helpers.validate_response \ + --file /tmp/agent-loop-skill/{session-id}/codex-review.md \ + --kind plan_review \ + --context-file /tmp/agent-loop-skill/{session-id}/context.json +``` + +The `context.json` must contain: + +```json +{ + "reviewer": "Codex", + "prior_items": [...], + "current_round_items": [...] +} +``` + +Post the reviewer comment: + +```bash +python -m helpers.gh_ops post-issue-comment \ + --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/codex-review.md \ + --repo OWNER/REPO +``` + +### Step 7 — Update session state + +```bash +python -m helpers.state_manager write-session \ + --issue ISSUE --repo OWNER/REPO \ + --fields '{"last_completed_step": "post_review", "round_number": 1}' +``` + +### Step 8 — Decision + +- If all reviewers approved: implementation is complete. +- If any reviewer blocked: perform a new plan revision and loop back to Step 2. +- If clarification is needed: post an `` comment and stop. + +--- + +## PR review mode + +Use `--flow pr` with `build-resume` and pass `--pr PR_NUMBER` (or `--head-sha SHA`) +to operate in PR-review mode. All other steps are the same, using `--kind pr_review` +for validation. + +--- + +## Billing and terms note + +This skill runs Claude turns inside your active interactive Claude Code session. +Whether that counts as interactive or programmatic usage depends on Anthropic's +current terms and product behavior at the time you run it. +Do not use this skill to proxy one user's session to other users, to build +unattended 24/7 automation, or in any way that violates Anthropic's usage policies. + +--- + +## Session state location + +Session state is stored in: + +``` +~/.local/state/coding-review-agent-loop/skill-sessions/{owner-repo}/{issue}.json +``` + +This location is outside git checkouts, so it never dirties any working tree. + +--- + +## Limitations + +- If Claude Code's session ends mid-loop, resume from the last posted GitHub comment + by re-running Step 1 with `build-resume`. +- Long-running Codex/Gemini subprocess progress is not streamed; check the log + file in `/tmp/coding-review-agent-loop/skill-logs/` if a reviewer hangs. +- The structured protocol (AGENT_LOOP_META markers, structured JSON responses) + must match the versions expected by the existing library in `src/`. + +--- + +## Demo + +Run a minimal dry-run demo (no live GitHub or agent calls): + +```bash +python -m helpers.demo_loop --issue 123 --repo demo/repo +``` + +Expected output includes: +``` +validation passed: plan_state +validation passed: plan_review +demo_loop: all steps completed successfully +``` diff --git a/docs/skill_mode.md b/docs/skill_mode.md new file mode 100644 index 0000000..5928db7 --- /dev/null +++ b/docs/skill_mode.md @@ -0,0 +1,137 @@ +# Claude Code Native Skill Mode + +## Overview + +`coding-review-agent-loop` includes a Claude Code skill that lets you run the +multi-agent review loop directly inside an interactive Claude Code session +instead of through `claude -p` subprocesses. + +| Aspect | Headless CLI mode | Skill mode | +|--------|-------------------|------------| +| Claude turns | `claude -p` subprocess (Agent SDK credits) | Active Claude Code session | +| Codex turns | `codex exec` subprocess | Same `codex exec` subprocess | +| Gemini turns | `gemini` subprocess | Same `gemini` subprocess | +| GitHub ops | Python `gh` wrapper | Same `gh` wrapper | +| Session resume | AGENT_LOOP_META in GitHub comments | Same markers + local session JSON | +| Best for | Headless CI / unattended automation | Interactive development sessions | + +## Architecture + +``` +Claude Code (interactive session) +│ +├── helpers/validate_response.py ← validates structured protocol responses +├── helpers/state_manager.py ← session state + GitHub comment resume +├── helpers/run_external.py ← invokes codex/gemini CLIs +├── helpers/gh_ops.py ← GitHub issue/PR comment operations +└── helpers/demo_loop.py ← standalone dry-run demo +``` + +Claude performs coder/plan turns by writing files directly (using its Write +tool or by producing structured JSON in its response). External reviewers +(Codex, Gemini) are still invoked as subprocesses via `run_external.py`. + +## Structured protocol compatibility + +The skill helpers reuse the same library entry points used by the headless CLI: + +- `_validate_plan_review_response` / `_validate_review_response` (unresolved_items) +- `_resume_plan_round` / `_resume_pr_round` (round_state) +- `parse_plan_state` / `validate_structured_plan_revision` (protocol) + +GitHub comment metadata markers (`AGENT_LOOP_META`) written by the skill are +identical to those written by the headless CLI, so mixed-mode operation (start +headless, resume in skill, or vice versa) is supported. + +## Session state + +Local session state is stored at: + +``` +~/.local/state/coding-review-agent-loop/skill-sessions/{owner-repo}/{issue}.json +``` + +This path is outside any git checkout so it never dirties a working tree. +Fields written by `state_manager write-session`: + +| Field | Description | +|-------|-------------| +| `last_completed_step` | Most recently completed orchestration step | +| `session_id` | Current skill session UUID prefix | +| `round_number` | Current plan/PR round number | +| `pending_comment_body` | Path to a comment body not yet posted | + +The `pending_comment_body` field provides crash recovery: if the session ends +after writing the comment file but before posting it, the next `build-resume` +call includes the path so Claude can re-post it. + +## Resume from existing round + +`state_manager build-resume` reads GitHub issue comments, extracts all +`AGENT_LOOP_META` base64 blobs, calls `_resume_plan_round(comments, +configured_reviewers=...)` or `_resume_pr_round(comments, head_sha=..., +configured_reviewers=...)`, and outputs a JSON descriptor: + +```json +{ + "round_number": 2, + "prior_items": [...], + "compact_prior_summaries": [...], + "completed_reviewer_names": ["Codex"], + "pending_comment_body": null, + "current_plan": "..." +} +``` + +The skill then skips already-completed reviewer turns and resumes from where +the last session ended. + +**Important**: `--reviewers` must exactly match the configured reviewer list for +the current invocation. For PR-flow sessions, `--head-sha` or `--pr` is also +required so `_resume_pr_round` can compare the current PR head SHA. + +## Billing and terms + +Running Claude turns inside an interactive Claude Code session may count +differently toward billing than `claude -p` / Agent SDK invocations. Whether +this constitutes "interactive" or "programmatic" use depends on Anthropic's +current terms and product behavior at the time of use. + +**Non-goals / constraints**: +- Do not use this skill to proxy one user's session to other users. +- Do not build unattended 24/7 automation that relies on pretending to be + interactive use. +- Do not market this as free Claude access or billing bypass. +- The existing headless `agent-loop` CLI path is unchanged and unaffected. + +## Install / setup for open-source users + +1. Clone the repository and install in development mode: + ``` + pip install -e ".[dev]" + ``` +2. Copy or symlink `helpers/` and `SKILL.md` into your working directory + (or run from the repo root). +3. Authenticate `gh` and install `codex` / `gemini` CLIs as needed. +4. Run the demo to verify the install: + ``` + python -m helpers.demo_loop --issue 123 --repo demo/repo + ``` + +## Known limitations + +- Reviewer subprocess progress (Codex, Gemini) is not streamed to Claude's + terminal while the subprocess runs. Check logs in + `/tmp/coding-review-agent-loop/skill-logs/`. +- If the Claude Code session ends mid-loop, the next session must call + `build-resume` to reconstruct the round state from GitHub comments. +- The structured protocol versions must match; update both the library and + the skill helpers together when the protocol evolves. +- Future Antigravity CLI migration (#215) may require updates to + `run_external.py` when the `gemini` CLI name or interface changes. + +## Related + +- `SKILL.md` — step-by-step skill orchestration instructions for Claude. +- Issue #216 — original exploration proposal. +- Issue #215 — Antigravity CLI migration for Gemini CLI consumer users. diff --git a/helpers/__init__.py b/helpers/__init__.py new file mode 100644 index 0000000..e594a3c --- /dev/null +++ b/helpers/__init__.py @@ -0,0 +1 @@ +# helpers package — Claude Code skill support scripts diff --git a/helpers/demo_loop.py b/helpers/demo_loop.py new file mode 100644 index 0000000..5d94114 --- /dev/null +++ b/helpers/demo_loop.py @@ -0,0 +1,178 @@ +""" +Minimal standalone demo of the Claude Code skill loop. + +Demonstrates: + 1. Claude (host) writes a stub plan. + 2. validate_response validates it as plan_state. + 3. Codex (dry-run) writes a canned approved plan_review stub. + 4. validate_response validates it as plan_review. + 5. gh_ops post-issue-comment --dry-run records the review. + 6. state_manager write-session records last_completed_step=post_review. + +Usage: + python -m helpers.demo_loop --issue 123 [--dry-run] [--repo OWNER/REPO] +""" + +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +import tempfile +import uuid +from pathlib import Path + +# Make src importable when run from the repo root +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +_HELPERS = Path(__file__).parent + +_HOST_STUB_PLAN = """\ +## Plan + +1. Create helpers/validate_response.py +2. Create helpers/state_manager.py +3. Create helpers/run_external.py +4. Create helpers/gh_ops.py +5. Create helpers/demo_loop.py +6. Create SKILL.md +7. Add tests + + +-- Anthropic Claude (skill demo stub) +""" + + +def _run(cmd: list[str], *, check: bool = True) -> subprocess.CompletedProcess[str]: + result = subprocess.run(cmd, capture_output=True, text=True, check=False) + if check and result.returncode != 0: + print(f"demo_loop: command failed: {' '.join(cmd)}", file=sys.stderr) + print(result.stderr, file=sys.stderr) + sys.exit(1) + return result + + +def _py(module_args: list[str]) -> list[str]: + return [sys.executable, "-m", f"helpers.{module_args[0]}", *module_args[1:]] + + +def main() -> None: + parser = argparse.ArgumentParser(description="Minimal skill loop demo.") + parser.add_argument("--issue", type=int, default=123) + parser.add_argument("--repo", default="demo/repo") + parser.add_argument("--dry-run", action="store_true", default=True, + help="Always dry-run for demo (default: True).") + args = parser.parse_args() + + session_id = uuid.uuid4().hex[:8] + tmpdir = Path(tempfile.mkdtemp(prefix=f"skill-demo-{session_id}-")) + print(f"demo_loop: session {session_id}, tmpdir {tmpdir}") + + # Step 1: write host stub plan + plan_file = tmpdir / "plan.md" + plan_file.write_text(_HOST_STUB_PLAN, encoding="utf-8") + print(f"demo_loop: wrote host plan stub to {plan_file}") + + # Step 2: validate plan_state + result = _run(_py(["validate_response", "--file", str(plan_file), "--kind", "plan_state"])) + print(result.stdout.strip()) + assert "validation passed: plan_state" in result.stdout, result.stdout + + # Step 3: Codex dry-run produces approved plan_review stub + reviewer_output = tmpdir / "codex_review.md" + _run( + _py( + [ + "run_external", + "--agent", + "codex", + "--prompt-file", + str(plan_file), + "--output", + str(reviewer_output), + "--workdir", + str(tmpdir), + "--dry-run", + ] + ) + ) + print(f"demo_loop: Codex dry-run output written to {reviewer_output}") + + # Step 4: validate plan_review + context_file = tmpdir / "context.json" + context_file.write_text( + json.dumps({"reviewer": "Codex", "prior_items": [], "current_round_items": []}), + encoding="utf-8", + ) + result = _run( + _py( + [ + "validate_response", + "--file", + str(reviewer_output), + "--kind", + "plan_review", + "--context-file", + str(context_file), + ] + ) + ) + print(result.stdout.strip()) + assert "validation passed: plan_review" in result.stdout, result.stdout + + # Step 5: dry-run post issue comment + _run( + _py( + [ + "gh_ops", + "post-issue-comment", + "--issue", + str(args.issue), + "--file", + str(reviewer_output), + "--repo", + args.repo, + "--dry-run", + ] + ) + ) + + # Step 6: record session state + _run( + _py( + [ + "state_manager", + "write-session", + "--issue", + str(args.issue), + "--repo", + args.repo, + "--fields", + json.dumps({"last_completed_step": "post_review", "session_id": session_id}), + ] + ) + ) + + # Verify session was written + result = _run( + _py( + [ + "state_manager", + "read-session", + "--issue", + str(args.issue), + "--repo", + args.repo, + ] + ) + ) + session_data = json.loads(result.stdout) + assert session_data.get("last_completed_step") == "post_review", session_data + + print("demo_loop: all steps completed successfully") + print(f"session state: {json.dumps(session_data, indent=2)}") + + +if __name__ == "__main__": + main() diff --git a/helpers/gh_ops.py b/helpers/gh_ops.py new file mode 100644 index 0000000..e377f92 --- /dev/null +++ b/helpers/gh_ops.py @@ -0,0 +1,143 @@ +""" +GitHub CLI wrapper for skill orchestration. + +Subcommands: + + fetch-issue --issue N --repo REPO + post-issue-comment --issue N --file PATH --repo REPO [--dry-run] + fetch-pr --pr N --repo REPO + post-pr-comment --pr N --file PATH --repo REPO [--dry-run] +""" + +from __future__ import annotations + +import argparse +import subprocess +import sys +from pathlib import Path + + +def _gh(*args_list: str, gh_cmd: str = "gh") -> str: + result = subprocess.run( + [gh_cmd, *args_list], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + print(f"gh_ops: gh error: {result.stderr.strip()}", file=sys.stderr) + sys.exit(1) + return result.stdout + + +def cmd_fetch_issue(args: argparse.Namespace) -> None: + output = _gh( + "issue", + "view", + str(args.issue), + "--repo", + args.repo, + "--json", + "number,title,body,comments,state", + gh_cmd=args.gh_cmd, + ) + print(output, end="") + + +def cmd_post_issue_comment(args: argparse.Namespace) -> None: + try: + body = Path(args.file).read_text(encoding="utf-8") + except OSError as exc: + print(f"gh_ops: cannot read comment file: {exc}", file=sys.stderr) + sys.exit(1) + if args.dry_run: + print(f"[dry-run] would post issue comment to {args.repo}#{args.issue}:") + print(body[:400]) + return + _gh( + "issue", + "comment", + str(args.issue), + "--repo", + args.repo, + "--body", + body, + gh_cmd=args.gh_cmd, + ) + print(f"comment posted to {args.repo}#{args.issue}") + + +def cmd_fetch_pr(args: argparse.Namespace) -> None: + output = _gh( + "pr", + "view", + str(args.pr), + "--repo", + args.repo, + "--json", + "number,title,body,headRefOid,state,comments", + gh_cmd=args.gh_cmd, + ) + print(output, end="") + + +def cmd_post_pr_comment(args: argparse.Namespace) -> None: + try: + body = Path(args.file).read_text(encoding="utf-8") + except OSError as exc: + print(f"gh_ops: cannot read comment file: {exc}", file=sys.stderr) + sys.exit(1) + if args.dry_run: + print(f"[dry-run] would post PR comment to {args.repo}#{args.pr}:") + print(body[:400]) + return + _gh( + "pr", + "comment", + str(args.pr), + "--repo", + args.repo, + "--body", + body, + gh_cmd=args.gh_cmd, + ) + print(f"comment posted to {args.repo}#PR{args.pr}") + + +def main() -> None: + parser = argparse.ArgumentParser(description="GitHub CLI wrapper for skill orchestration.") + parser.add_argument("--gh-cmd", default="gh") + subparsers = parser.add_subparsers(dest="subcommand", required=True) + + p_fi = subparsers.add_parser("fetch-issue") + p_fi.add_argument("--issue", type=int, required=True) + p_fi.add_argument("--repo", required=True) + + p_pic = subparsers.add_parser("post-issue-comment") + p_pic.add_argument("--issue", type=int, required=True) + p_pic.add_argument("--file", required=True) + p_pic.add_argument("--repo", required=True) + p_pic.add_argument("--dry-run", action="store_true") + + p_fp = subparsers.add_parser("fetch-pr") + p_fp.add_argument("--pr", type=int, required=True) + p_fp.add_argument("--repo", required=True) + + p_ppc = subparsers.add_parser("post-pr-comment") + p_ppc.add_argument("--pr", type=int, required=True) + p_ppc.add_argument("--file", required=True) + p_ppc.add_argument("--repo", required=True) + p_ppc.add_argument("--dry-run", action="store_true") + + args = parser.parse_args() + dispatch = { + "fetch-issue": cmd_fetch_issue, + "post-issue-comment": cmd_post_issue_comment, + "fetch-pr": cmd_fetch_pr, + "post-pr-comment": cmd_post_pr_comment, + } + dispatch[args.subcommand](args) + + +if __name__ == "__main__": + main() diff --git a/helpers/run_external.py b/helpers/run_external.py new file mode 100644 index 0000000..dced704 --- /dev/null +++ b/helpers/run_external.py @@ -0,0 +1,140 @@ +""" +Run an external agent (Codex or Gemini) for one review turn. + +In --dry-run mode, writes a canned approved plan_review stub to --output and exits 0. +In live mode, invokes the agent CLI and writes the response to --output. + +Usage: + python -m helpers.run_external \\ + --agent codex|gemini \\ + --prompt-file PATH \\ + --output PATH \\ + --workdir PATH \\ + [--cmd PATH] \\ + [--dry-run] +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from coding_review_agent_loop.runner import Runner + +_CANNED_PLAN_REVIEW = json.dumps( + { + "schema_version": 1, + "kind": "plan_review", + "state": "approved", + "summary": "Dry-run stub: plan looks good.", + "blocking_plan_issues": [], + "same_plan_followups": [], + "future_followups": [], + "prior_plan_item_dispositions": [], + }, + indent=2, +) + +_CANNED_PLAN_REVIEW_FOOTER = ( + "\n\n-- Codex (dry-run stub)\n" +) + + +def _build_dry_run_response() -> str: + return _CANNED_PLAN_REVIEW + _CANNED_PLAN_REVIEW_FOOTER + + +def main() -> None: + parser = argparse.ArgumentParser(description="Run one external agent turn.") + parser.add_argument("--agent", required=True, choices=["codex", "gemini"]) + parser.add_argument("--prompt-file", required=True, help="Path to prompt text file.") + parser.add_argument("--output", required=True, help="Path to write the agent response.") + parser.add_argument("--workdir", required=True, help="Working directory for the agent.") + parser.add_argument("--cmd", default=None, help="Agent CLI command (overrides default).") + parser.add_argument("--dry-run", action="store_true", help="Write a canned stub and exit.") + args = parser.parse_args() + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + if args.dry_run: + output_path.write_text(_build_dry_run_response(), encoding="utf-8") + print(f"dry-run: wrote canned plan_review stub to {output_path}") + return + + try: + prompt = Path(args.prompt_file).read_text(encoding="utf-8") + except OSError as exc: + print(f"run_external: cannot read prompt file: {exc}", file=sys.stderr) + sys.exit(1) + + workdir = Path(args.workdir) + + # Import backends lazily to avoid heavy import in dry-run path + from coding_review_agent_loop.agents.codex import CodexBackend + from coding_review_agent_loop.agents.gemini import GeminiBackend + from coding_review_agent_loop.config import AgentLoopConfig + + agent_name = args.agent + default_cmds = {"codex": "codex", "gemini": "gemini"} + cmd = args.cmd or default_cmds[agent_name] + + # Build a minimal config sufficient for backend.run() + import tempfile + log_dir = Path(tempfile.gettempdir()) / "coding-review-agent-loop" / "skill-logs" + log_dir.mkdir(parents=True, exist_ok=True) + + config = AgentLoopConfig( + repo="skill/run", + claude_dir=workdir, + codex_dir=workdir, + gemini_dir=workdir, + coder="claude", + reviewer=(agent_name,), # type: ignore[arg-type] + base="main", + max_rounds=1, + auto_merge=False, + dry_run=False, + allow_shared_dir=True, + claude_cmd="claude", + codex_cmd=cmd if agent_name == "codex" else "codex", + gemini_cmd=cmd if agent_name == "gemini" else "gemini", + gh_cmd="gh", + claude_args=(), + codex_args=(), + gemini_args=(), + test_command=None, + pre_review_tests=False, + ci_check_name="", + ci_timeout_seconds=300, + ci_poll_interval_seconds=30, + quiet=False, + log_dir=log_dir, + progress_interval_seconds=30, + agent_max_retries=0, + agent_retry_backoff_seconds=(30,), + agent_memory=False, + refresh_agent_memory=False, + agent_memory_dir=log_dir, + refresh_test_profile=False, + auto_agent_dirs=(agent_name,), # type: ignore[arg-type] + ) + + runner = Runner(dry_run=False) + backend = CodexBackend() if agent_name == "codex" else GeminiBackend() + try: + result = backend.run(runner, config, prompt) + except Exception as exc: # noqa: BLE001 + print(f"run_external: agent invocation failed: {exc}", file=sys.stderr) + sys.exit(1) + + output_path.write_text(result.text, encoding="utf-8") + print(f"agent result written to {output_path}") + + +if __name__ == "__main__": + main() diff --git a/helpers/state_manager.py b/helpers/state_manager.py new file mode 100644 index 0000000..e9f96e1 --- /dev/null +++ b/helpers/state_manager.py @@ -0,0 +1,278 @@ +""" +Manage Claude Code skill session state and build resume descriptors. + +Subcommands: + + build-resume + --issue N --repo REPO --reviewers REVIEWER [REVIEWER ...] + [--flow plan|pr] [--head-sha SHA | --pr PR_NUMBER] + + Reads GitHub issue comments, extracts AGENT_LOOP_META records, and calls + _resume_plan_round or _resume_pr_round from the existing library. Outputs + a JSON resume descriptor to stdout. + + write-session + --issue N --repo REPO --fields JSON + + Writes (or merges) session state to + ~/.local/state/coding-review-agent-loop/skill-sessions/{repo-slug}/{issue}.json + + read-session + --issue N --repo REPO + + Reads session state JSON to stdout. + + write-pending-comment + --issue N --repo REPO --body PATH + + Writes a pending comment body path to session state. + + clear-pending-comment + --issue N --repo REPO + + Clears the pending_comment_body field from session state. +""" + +from __future__ import annotations + +import argparse +import json +import os +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from coding_review_agent_loop.agents.base import AgentName +from coding_review_agent_loop.errors import AgentLoopError +from coding_review_agent_loop.round_state import ( + _resume_plan_round, + _resume_pr_round, + _serialize_unresolved_item, +) + + +def _session_path(repo: str, issue: int) -> Path: + slug = repo.replace("/", "-").replace(":", "-") + state_home = Path( + os.environ.get("XDG_STATE_HOME", Path.home() / ".local" / "state") + ) + return state_home / "coding-review-agent-loop" / "skill-sessions" / slug / f"{issue}.json" + + +def _load_session(path: Path) -> dict[str, object]: + if not path.exists(): + return {} + try: + return json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return {} + + +def _save_session(path: Path, data: dict[str, object]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(data, indent=2, sort_keys=True), encoding="utf-8") + + +@dataclass +class _FakeComment: + """Minimal object satisfying the comment.body duck-type expected by round_state helpers.""" + body: str + + +def _fetch_issue_comments(repo: str, issue: int, gh_cmd: str = "gh") -> list[_FakeComment]: + result = subprocess.run( + [ + gh_cmd, + "api", + f"repos/{repo}/issues/{issue}/comments", + "--paginate", + "--jq", + ".[].body", + ], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + print( + f"state_manager: gh api failed: {result.stderr.strip()}", + file=sys.stderr, + ) + sys.exit(1) + bodies = result.stdout.strip().split("\n") + return [_FakeComment(body=b) for b in bodies if b] + + +def _fetch_pr_head_sha(repo: str, pr_number: int, gh_cmd: str = "gh") -> str: + result = subprocess.run( + [ + gh_cmd, + "pr", + "view", + str(pr_number), + "--repo", + repo, + "--json", + "headRefOid", + "--jq", + ".headRefOid", + ], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + print( + f"state_manager: could not fetch PR head SHA: {result.stderr.strip()}", + file=sys.stderr, + ) + sys.exit(1) + return result.stdout.strip() + + +def cmd_build_resume(args: argparse.Namespace) -> None: + repo: str = args.repo + issue: int = args.issue + reviewers: list[AgentName] = args.reviewers + flow: str = args.flow + gh_cmd: str = getattr(args, "gh_cmd", "gh") + + comments = _fetch_issue_comments(repo, issue, gh_cmd=gh_cmd) + session = _load_session(_session_path(repo, issue)) + + descriptor: dict[str, object] = { + "round_number": 1, + "prior_items": [], + "compact_prior_summaries": [], + "completed_reviewer_names": [], + "pending_comment_body": session.get("pending_comment_body"), + } + + try: + if flow == "plan": + result = _resume_plan_round(comments, configured_reviewers=reviewers) + if result is not None: + plan_text, resumed = result + descriptor["round_number"] = resumed.round_number + descriptor["prior_items"] = [ + _serialize_unresolved_item(item) for item in resumed.prior_items + ] + descriptor["compact_prior_summaries"] = list(resumed.compact_prior_summaries) + descriptor["completed_reviewer_names"] = [ + record.metadata.agent for record in resumed.completed_reviews + ] + descriptor["current_plan"] = plan_text + else: + # PR flow + head_sha: str | None = getattr(args, "head_sha", None) + pr_number: int | None = getattr(args, "pr", None) + if not head_sha and pr_number: + head_sha = _fetch_pr_head_sha(repo, pr_number, gh_cmd=gh_cmd) + result = _resume_pr_round( + comments, + head_sha=head_sha, + configured_reviewers=reviewers, + ) + if result is not None: + descriptor["round_number"] = result.round_number + descriptor["prior_items"] = [ + _serialize_unresolved_item(item) for item in result.prior_items + ] + descriptor["compact_prior_summaries"] = list(result.compact_prior_summaries) + descriptor["completed_reviewer_names"] = [ + record.metadata.agent for record in result.completed_reviews + ] + except AgentLoopError as exc: + print(f"state_manager: resume error: {exc}", file=sys.stderr) + sys.exit(1) + + print(json.dumps(descriptor, indent=2)) + + +def cmd_write_session(args: argparse.Namespace) -> None: + path = _session_path(args.repo, args.issue) + existing = _load_session(path) + try: + updates: dict[str, object] = json.loads(args.fields) + except json.JSONDecodeError as exc: + print(f"state_manager: invalid --fields JSON: {exc}", file=sys.stderr) + sys.exit(1) + existing.update(updates) + _save_session(path, existing) + print(f"session written: {path}") + + +def cmd_read_session(args: argparse.Namespace) -> None: + path = _session_path(args.repo, args.issue) + data = _load_session(path) + print(json.dumps(data, indent=2)) + + +def cmd_write_pending_comment(args: argparse.Namespace) -> None: + path = _session_path(args.repo, args.issue) + existing = _load_session(path) + existing["pending_comment_body"] = str(args.body) + _save_session(path, existing) + print(f"pending comment path written: {path}") + + +def cmd_clear_pending_comment(args: argparse.Namespace) -> None: + path = _session_path(args.repo, args.issue) + existing = _load_session(path) + existing.pop("pending_comment_body", None) + _save_session(path, existing) + print(f"pending comment cleared: {path}") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Manage skill session state.") + parser.add_argument("--gh-cmd", default="gh") + subparsers = parser.add_subparsers(dest="subcommand", required=True) + + # build-resume + p_resume = subparsers.add_parser("build-resume", help="Build a resume descriptor from GitHub comments.") + p_resume.add_argument("--issue", type=int, required=True) + p_resume.add_argument("--repo", required=True) + p_resume.add_argument("--reviewers", nargs="+", required=True) + p_resume.add_argument("--flow", choices=["plan", "pr"], default="plan") + p_resume.add_argument("--head-sha", default=None) + p_resume.add_argument("--pr", type=int, default=None) + + # write-session + p_write = subparsers.add_parser("write-session", help="Write session state fields.") + p_write.add_argument("--issue", type=int, required=True) + p_write.add_argument("--repo", required=True) + p_write.add_argument("--fields", required=True, help="JSON object of fields to merge.") + + # read-session + p_read = subparsers.add_parser("read-session", help="Read session state.") + p_read.add_argument("--issue", type=int, required=True) + p_read.add_argument("--repo", required=True) + + # write-pending-comment + p_pending = subparsers.add_parser("write-pending-comment", help="Record a pending comment body path.") + p_pending.add_argument("--issue", type=int, required=True) + p_pending.add_argument("--repo", required=True) + p_pending.add_argument("--body", required=True) + + # clear-pending-comment + p_clear = subparsers.add_parser("clear-pending-comment", help="Clear the pending comment body path.") + p_clear.add_argument("--issue", type=int, required=True) + p_clear.add_argument("--repo", required=True) + + args = parser.parse_args() + dispatch = { + "build-resume": cmd_build_resume, + "write-session": cmd_write_session, + "read-session": cmd_read_session, + "write-pending-comment": cmd_write_pending_comment, + "clear-pending-comment": cmd_clear_pending_comment, + } + dispatch[args.subcommand](args) + + +if __name__ == "__main__": + main() diff --git a/helpers/validate_response.py b/helpers/validate_response.py new file mode 100644 index 0000000..ea3ddb4 --- /dev/null +++ b/helpers/validate_response.py @@ -0,0 +1,151 @@ +""" +Validate a structured agent response file against the existing protocol library. + +Usage: + python -m helpers.validate_response \\ + --file PATH --kind KIND [--context-file PATH] + +Kinds: + plan_state -- coder plan post; validates AGENT_PLAN_STATE marker + plan_review -- reviewer plan review structured JSON + pr_review -- reviewer PR review structured JSON + coder_followup -- coder follow-up structured JSON + plan_revision -- coder plan revision structured JSON + +The optional --context-file is a JSON file with schema: + { + "reviewer": str, + "prior_items": [...serialized UnresolvedReviewItem...], + "current_round_items": [...serialized UnresolvedReviewItem...], + "human_requirements": [{"id": str, "text": str, "scope": str}, ...] + } + +Exit 0 on success; exit 1 with diagnostic on failure. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +# Make src importable when run from the repo root as `python -m helpers.validate_response` +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from coding_review_agent_loop.errors import AgentLoopError +from coding_review_agent_loop.protocol import parse_plan_state +from coding_review_agent_loop.unresolved_items import ( + _validate_plan_review_response, + _validate_review_response, + _validate_coder_followup_response, +) +from coding_review_agent_loop.protocol import validate_structured_plan_revision +from coding_review_agent_loop.round_state import _deserialize_unresolved_item +from coding_review_agent_loop.github import HumanReviewRequirement + + +def _load_context(path: str | None) -> dict[str, object]: + """Load optional context JSON file.""" + if path is None: + return {} + try: + return json.loads(Path(path).read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + print(f"validate_response: could not read context file {path}: {exc}", file=sys.stderr) + sys.exit(1) + + +def _deserialize_unresolved_items(raw: list[object]) -> list[object]: + result = [] + for item in raw: + result.append(_deserialize_unresolved_item(item)) + return result + + +def _deserialize_human_requirements(raw: list[object]) -> list[HumanReviewRequirement]: + result = [] + for item in raw: + if not isinstance(item, dict): + continue + result.append( + HumanReviewRequirement( + source_type=str(item.get("source_type", "issue")), + author=str(item.get("author", "")) or None, + created_at=str(item.get("created_at", "")) or None, + url=str(item.get("url", "")) or None, + body=str(item.get("body", item.get("text", ""))), + ) + ) + return result + + +def main() -> None: + parser = argparse.ArgumentParser(description="Validate an agent response file.") + parser.add_argument("--file", required=True, help="Path to the response file.") + parser.add_argument( + "--kind", + required=True, + choices=["plan_state", "plan_review", "pr_review", "coder_followup", "plan_revision"], + help="Kind of response to validate.", + ) + parser.add_argument( + "--context-file", + default=None, + help="Optional JSON file with reviewer identity and prior item context.", + ) + args = parser.parse_args() + + try: + text = Path(args.file).read_text(encoding="utf-8") + except OSError as exc: + print(f"validate_response: cannot read {args.file}: {exc}", file=sys.stderr) + sys.exit(1) + + ctx = _load_context(args.context_file) + reviewer = str(ctx.get("reviewer", "Codex")) + prior_items = _deserialize_unresolved_items(list(ctx.get("prior_items", []))) + current_round_items = _deserialize_unresolved_items(list(ctx.get("current_round_items", []))) + raw_human_requirements = list(ctx.get("human_requirements", [])) + human_requirements = _deserialize_human_requirements(raw_human_requirements) + + kind = args.kind + try: + if kind == "plan_state": + parse_plan_state(text) + elif kind == "plan_review": + _validate_plan_review_response( + text, + reviewer=reviewer, + unresolved_items=prior_items, + current_round_items=current_round_items, + ) + elif kind == "pr_review": + _validate_review_response( + text, + reviewer=reviewer, + unresolved_items=prior_items, + current_round_items=current_round_items, + ) + elif kind == "coder_followup": + _validate_coder_followup_response( + text, + unresolved_items=prior_items, + human_requirements=human_requirements or None, + ) + elif kind == "plan_revision": + result = validate_structured_plan_revision(text) + if result is None: + raise AgentLoopError("Response did not parse as a structured plan_revision.") + except AgentLoopError as exc: + print(f"validation failed: {kind}: {exc}", file=sys.stderr) + sys.exit(1) + except Exception as exc: # noqa: BLE001 + print(f"validation error: {kind}: {exc}", file=sys.stderr) + sys.exit(1) + + print(f"validation passed: {kind}") + + +if __name__ == "__main__": + main() diff --git a/tests/test_skill_helpers.py b/tests/test_skill_helpers.py new file mode 100644 index 0000000..1c835e0 --- /dev/null +++ b/tests/test_skill_helpers.py @@ -0,0 +1,222 @@ +"""Unit tests for the Claude Code skill helper CLIs.""" + +from __future__ import annotations + +import json +import subprocess +import sys +import tempfile +from pathlib import Path + +import pytest + +HELPERS = Path(__file__).parent.parent / "helpers" + + +def _run(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]: + result = subprocess.run( + [sys.executable, "-m", *args], + capture_output=True, + text=True, + cwd=Path(__file__).parent.parent, + check=False, + ) + if check and result.returncode != 0: + raise AssertionError( + f"Command {args!r} failed (exit {result.returncode}):\n" + f"stdout: {result.stdout}\n" + f"stderr: {result.stderr}" + ) + return result + + +# --------------------------------------------------------------------------- +# helpers/validate_response.py +# --------------------------------------------------------------------------- + +_VALID_PLAN_STATE = """\ +## Plan + +1. Step one + + +-- Anthropic Claude +""" + +_INVALID_PLAN_STATE = "This has no marker at all." + +_VALID_PLAN_REVIEW = json.dumps( + { + "schema_version": 1, + "kind": "plan_review", + "state": "approved", + "summary": "Plan looks good.", + "blocking_plan_issues": [], + "same_plan_followups": [], + "future_followups": [], + "prior_plan_item_dispositions": [], + } +) + "\n\n-- Codex\n" + + +def _write_tmp(content: str, suffix: str = ".md") -> str: + with tempfile.NamedTemporaryFile("w", suffix=suffix, delete=False, encoding="utf-8") as f: + f.write(content) + return f.name + + +class TestValidateResponse: + def test_valid_plan_state_accepted(self) -> None: + path = _write_tmp(_VALID_PLAN_STATE) + result = _run("helpers.validate_response", "--file", path, "--kind", "plan_state") + assert "validation passed: plan_state" in result.stdout + + def test_missing_plan_state_marker_rejected(self) -> None: + path = _write_tmp(_INVALID_PLAN_STATE) + result = _run("helpers.validate_response", "--file", path, "--kind", "plan_state", check=False) + assert result.returncode != 0 + assert "validation failed: plan_state" in result.stderr + + def test_valid_plan_review_accepted(self) -> None: + path = _write_tmp(_VALID_PLAN_REVIEW) + ctx_path = _write_tmp( + json.dumps({"reviewer": "Codex", "prior_items": [], "current_round_items": []}), + suffix=".json", + ) + result = _run( + "helpers.validate_response", + "--file", + path, + "--kind", + "plan_review", + "--context-file", + ctx_path, + ) + assert "validation passed: plan_review" in result.stdout + + def test_plan_review_with_unknown_prior_item_rejected(self) -> None: + # A review that disposes unknown prior item IDs must be rejected. + review = json.dumps( + { + "schema_version": 1, + "kind": "plan_review", + "state": "blocking", + "summary": "Blocking.", + "blocking_plan_issues": ["Something bad."], + "same_plan_followups": [], + "future_followups": [], + "prior_plan_item_dispositions": [ + {"item_id": "item-999", "disposition": "resolved"} + ], + } + ) + "\n\n-- Codex\n" + + path = _write_tmp(review) + # Empty prior items — item-999 is unknown + ctx_path = _write_tmp( + json.dumps({"reviewer": "Codex", "prior_items": [], "current_round_items": []}), + suffix=".json", + ) + result = _run( + "helpers.validate_response", + "--file", + path, + "--kind", + "plan_review", + "--context-file", + ctx_path, + check=False, + ) + assert result.returncode != 0 + + +# --------------------------------------------------------------------------- +# helpers/state_manager.py (session round-trip, no live gh required) +# --------------------------------------------------------------------------- + +class TestStateManager: + def _session_path(self, repo: str, issue: int) -> Path: + import os + slug = repo.replace("/", "-").replace(":", "-") + state_home = Path(os.environ.get("XDG_STATE_HOME", Path.home() / ".local" / "state")) + return state_home / "coding-review-agent-loop" / "skill-sessions" / slug / f"{issue}.json" + + def test_write_and_read_session(self) -> None: + repo = "test/skill-repo" + issue = 9999 + fields = {"last_completed_step": "post_review", "session_id": "abc123"} + _run( + "helpers.state_manager", + "write-session", + "--issue", + str(issue), + "--repo", + repo, + "--fields", + json.dumps(fields), + ) + result = _run("helpers.state_manager", "read-session", "--issue", str(issue), "--repo", repo) + data = json.loads(result.stdout) + assert data["last_completed_step"] == "post_review" + assert data["session_id"] == "abc123" + + def test_write_and_clear_pending_comment(self) -> None: + repo = "test/skill-repo" + issue = 9999 + body_path = "/tmp/pending-comment-body.md" + _run( + "helpers.state_manager", + "write-pending-comment", + "--issue", + str(issue), + "--repo", + repo, + "--body", + body_path, + ) + result = _run("helpers.state_manager", "read-session", "--issue", str(issue), "--repo", repo) + data = json.loads(result.stdout) + assert data.get("pending_comment_body") == body_path + + _run( + "helpers.state_manager", + "clear-pending-comment", + "--issue", + str(issue), + "--repo", + repo, + ) + result = _run("helpers.state_manager", "read-session", "--issue", str(issue), "--repo", repo) + data = json.loads(result.stdout) + assert "pending_comment_body" not in data + + +# --------------------------------------------------------------------------- +# helpers/run_external.py (dry-run only) +# --------------------------------------------------------------------------- + +class TestRunExternal: + def test_dry_run_exits_zero_and_writes_valid_stub(self) -> None: + with tempfile.NamedTemporaryFile("w", suffix=".md", delete=False, encoding="utf-8") as pf: + pf.write("Prompt text.") + prompt_path = pf.name + with tempfile.NamedTemporaryFile("w", suffix=".md", delete=False, encoding="utf-8") as of: + output_path = of.name + + result = _run( + "helpers.run_external", + "--agent", + "codex", + "--prompt-file", + prompt_path, + "--output", + output_path, + "--workdir", + "/tmp", + "--dry-run", + ) + assert result.returncode == 0 + content = Path(output_path).read_text(encoding="utf-8") + # The dry-run stub must contain a valid plan_review JSON and AGENT_PLAN_STATE marker + assert "AGENT_PLAN_STATE: approved" in content + assert '"state": "approved"' in content diff --git a/tests/test_skill_loop.py b/tests/test_skill_loop.py new file mode 100644 index 0000000..a6d732c --- /dev/null +++ b/tests/test_skill_loop.py @@ -0,0 +1,64 @@ +"""Integration test: invokes demo_loop.py as a subprocess.""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +from pathlib import Path + +import pytest + + +def test_demo_loop_dry_run() -> None: + """ + Run helpers/demo_loop.py and verify it: + - exits 0 + - prints "validation passed: plan_state" + - prints "validation passed: plan_review" + - writes a session file with last_completed_step=post_review + """ + repo = "demo/skill-loop-test" + issue = 88888 + + result = subprocess.run( + [ + sys.executable, + "-m", + "helpers.demo_loop", + "--issue", + str(issue), + "--repo", + repo, + ], + capture_output=True, + text=True, + cwd=Path(__file__).parent.parent, + check=False, + ) + + assert result.returncode == 0, ( + f"demo_loop failed (exit {result.returncode}):\n" + f"stdout: {result.stdout}\n" + f"stderr: {result.stderr}" + ) + + assert "validation passed: plan_state" in result.stdout, result.stdout + assert "validation passed: plan_review" in result.stdout, result.stdout + + # Verify session state was written with last_completed_step=post_review + slug = repo.replace("/", "-").replace(":", "-") + state_home = Path( + os.environ.get("XDG_STATE_HOME", Path.home() / ".local" / "state") + ) + session_path = ( + state_home + / "coding-review-agent-loop" + / "skill-sessions" + / slug + / f"{issue}.json" + ) + assert session_path.exists(), f"session file not found: {session_path}" + data = json.loads(session_path.read_text(encoding="utf-8")) + assert data.get("last_completed_step") == "post_review", data From 366fcaefb4f289628bc6e2a2180d9cd25984b51a Mon Sep 17 00:00:00 2001 From: Wild Wind Date: Sun, 7 Jun 2026 19:30:46 -0700 Subject: [PATCH 2/2] fix: add AGENT_LOOP_META to skill-posted comments and strengthen plan_revision validation Addresses Codex blocking item-1 from PR #283 round 1 review: 1. Add state_manager attach-metadata subcommand that builds PostedRoundMetadata and calls _attach_round_metadata before posting, so skill-posted comments carry AGENT_LOOP_META and can be found by build-resume's _resume_plan_round / _resume_pr_round. Update demo_loop.py and SKILL.md to use this step. 2. Fix validate_response.py plan_revision validation to check that prior_plan_item_dispositions only reference item IDs present in the context ledger, matching the check in the headless orchestrator's _validate_plan_revision_response (orchestrator.py:1281). Add regression tests. 3. New tests: test_attach_metadata_produces_valid_agent_loop_meta and test_attach_metadata_reviewer_found_by_resume verify that skill-posted rounds are found by _resume_plan_round; test_plan_revision_with_unknown_prior_item_rejected and test_plan_revision_with_known_items_accepted cover the ledger check. Refs #216 Co-Authored-By: Claude Sonnet 4.6 --- SKILL.md | 45 +++++++-- helpers/demo_loop.py | 130 +++++++++++++++++++++--- helpers/state_manager.py | 114 +++++++++++++++++++++ helpers/validate_response.py | 26 ++++- tests/test_skill_helpers.py | 191 ++++++++++++++++++++++++++++++++++- tests/test_skill_loop.py | 5 + 6 files changed, 483 insertions(+), 28 deletions(-) diff --git a/SKILL.md b/SKILL.md index 60cac4c..f48df89 100644 --- a/SKILL.md +++ b/SKILL.md @@ -63,19 +63,37 @@ python -m helpers.validate_response \ --kind plan_state ``` -### Step 4 — Save as pending comment +### Step 4 — Attach AGENT_LOOP_META to the plan comment + +The comment posted to GitHub must carry an `AGENT_LOOP_META` marker so that +`build-resume` can reconstruct the round state in future sessions. Use +`attach-metadata` to produce a metadata-tagged version of the plan file: + +```bash +python -m helpers.state_manager attach-metadata \ + --body-file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \ + --output /tmp/agent-loop-skill/{session-id}/plan-tagged.md \ + --flow plan --role coder --agent Claude \ + --round-number {round_number} --state approved \ + --subject-plan-file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \ + --canonical-plan-file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \ + [--prior-items-file /tmp/agent-loop-skill/{session-id}/prior_items.json] +``` + +`prior_items.json` is the `prior_items` array from the `build-resume` JSON +output. Omit the flag when `prior_items` is empty (round 1). + +### Step 5 — Save as pending comment and post ```bash python -m helpers.state_manager write-pending-comment \ --issue ISSUE --repo OWNER/REPO \ - --body /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md + --body /tmp/agent-loop-skill/{session-id}/plan-tagged.md ``` -### Step 5 — Post the plan comment - ```bash python -m helpers.gh_ops post-issue-comment \ - --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \ + --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/plan-tagged.md \ --repo OWNER/REPO ``` @@ -115,11 +133,24 @@ The `context.json` must contain: } ``` -Post the reviewer comment: +Attach AGENT_LOOP_META to the reviewer comment (subject must match the coder comment): + +```bash +python -m helpers.state_manager attach-metadata \ + --body-file /tmp/agent-loop-skill/{session-id}/codex-review.md \ + --output /tmp/agent-loop-skill/{session-id}/codex-review-tagged.md \ + --flow plan --role reviewer --agent Codex \ + --round-number {round_number} --state approved \ + --subject-plan-file /tmp/agent-loop-skill/{session-id}/plan-{uuid}.md \ + [--prior-items-file /tmp/agent-loop-skill/{session-id}/prior_items.json] \ + [--dispositions-file /tmp/agent-loop-skill/{session-id}/codex_dispositions.json] +``` + +Post the reviewer comment (with metadata): ```bash python -m helpers.gh_ops post-issue-comment \ - --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/codex-review.md \ + --issue ISSUE --file /tmp/agent-loop-skill/{session-id}/codex-review-tagged.md \ --repo OWNER/REPO ``` diff --git a/helpers/demo_loop.py b/helpers/demo_loop.py index 5d94114..56e4684 100644 --- a/helpers/demo_loop.py +++ b/helpers/demo_loop.py @@ -4,13 +4,17 @@ Demonstrates: 1. Claude (host) writes a stub plan. 2. validate_response validates it as plan_state. - 3. Codex (dry-run) writes a canned approved plan_review stub. - 4. validate_response validates it as plan_review. - 5. gh_ops post-issue-comment --dry-run records the review. - 6. state_manager write-session records last_completed_step=post_review. + 3. state_manager attach-metadata adds AGENT_LOOP_META to the plan comment. + 4. gh_ops post-issue-comment --dry-run records the plan (with metadata). + 5. Codex (dry-run) writes a canned approved plan_review stub. + 6. validate_response validates it as plan_review. + 7. state_manager attach-metadata adds AGENT_LOOP_META to the reviewer comment. + 8. gh_ops post-issue-comment --dry-run records the reviewer comment. + 9. state_manager write-session records last_completed_step=post_review. + 10. Verify _resume_plan_round can find the round from the metadata-tagged bodies. Usage: - python -m helpers.demo_loop --issue 123 [--dry-run] [--repo OWNER/REPO] + python -m helpers.demo_loop --issue 123 [--repo OWNER/REPO] """ from __future__ import annotations @@ -26,8 +30,6 @@ # Make src importable when run from the repo root sys.path.insert(0, str(Path(__file__).parent.parent / "src")) -_HELPERS = Path(__file__).parent - _HOST_STUB_PLAN = """\ ## Plan @@ -61,8 +63,6 @@ def main() -> None: parser = argparse.ArgumentParser(description="Minimal skill loop demo.") parser.add_argument("--issue", type=int, default=123) parser.add_argument("--repo", default="demo/repo") - parser.add_argument("--dry-run", action="store_true", default=True, - help="Always dry-run for demo (default: True).") args = parser.parse_args() session_id = uuid.uuid4().hex[:8] @@ -79,7 +79,55 @@ def main() -> None: print(result.stdout.strip()) assert "validation passed: plan_state" in result.stdout, result.stdout - # Step 3: Codex dry-run produces approved plan_review stub + # Step 3: attach AGENT_LOOP_META to the plan comment (coder, round 1) + plan_with_meta = tmpdir / "plan_with_meta.md" + _run( + _py( + [ + "state_manager", + "attach-metadata", + "--body-file", + str(plan_file), + "--output", + str(plan_with_meta), + "--flow", + "plan", + "--role", + "coder", + "--agent", + "Claude", + "--round-number", + "1", + "--state", + "approved", + "--subject-plan-file", + str(plan_file), + "--canonical-plan-file", + str(plan_file), + ] + ) + ) + print(f"demo_loop: plan comment with AGENT_LOOP_META: {plan_with_meta}") + assert "AGENT_LOOP_META" in plan_with_meta.read_text(encoding="utf-8") + + # Step 4: dry-run post the plan comment (with metadata) + _run( + _py( + [ + "gh_ops", + "post-issue-comment", + "--issue", + str(args.issue), + "--file", + str(plan_with_meta), + "--repo", + args.repo, + "--dry-run", + ] + ) + ) + + # Step 5: Codex dry-run produces approved plan_review stub reviewer_output = tmpdir / "codex_review.md" _run( _py( @@ -99,7 +147,7 @@ def main() -> None: ) print(f"demo_loop: Codex dry-run output written to {reviewer_output}") - # Step 4: validate plan_review + # Step 6: validate plan_review context_file = tmpdir / "context.json" context_file.write_text( json.dumps({"reviewer": "Codex", "prior_items": [], "current_round_items": []}), @@ -121,7 +169,37 @@ def main() -> None: print(result.stdout.strip()) assert "validation passed: plan_review" in result.stdout, result.stdout - # Step 5: dry-run post issue comment + # Step 7: attach AGENT_LOOP_META to the reviewer comment + # Compute subject from the plan file (same subject as the coder comment) + reviewer_with_meta = tmpdir / "codex_review_with_meta.md" + _run( + _py( + [ + "state_manager", + "attach-metadata", + "--body-file", + str(reviewer_output), + "--output", + str(reviewer_with_meta), + "--flow", + "plan", + "--role", + "reviewer", + "--agent", + "Codex", + "--round-number", + "1", + "--state", + "approved", + "--subject-plan-file", + str(plan_file), + ] + ) + ) + print(f"demo_loop: reviewer comment with AGENT_LOOP_META: {reviewer_with_meta}") + assert "AGENT_LOOP_META" in reviewer_with_meta.read_text(encoding="utf-8") + + # Step 8: dry-run post the reviewer comment _run( _py( [ @@ -130,7 +208,7 @@ def main() -> None: "--issue", str(args.issue), "--file", - str(reviewer_output), + str(reviewer_with_meta), "--repo", args.repo, "--dry-run", @@ -138,7 +216,7 @@ def main() -> None: ) ) - # Step 6: record session state + # Step 9: record session state _run( _py( [ @@ -170,6 +248,30 @@ def main() -> None: session_data = json.loads(result.stdout) assert session_data.get("last_completed_step") == "post_review", session_data + # Step 10: Verify _resume_plan_round finds the round from the metadata-tagged comments + # This directly tests that attach-metadata produces valid AGENT_LOOP_META. + from coding_review_agent_loop.round_state import _resume_plan_round + + class _FakeComment: + def __init__(self, body: str) -> None: + self.body = body + + fake_comments = [ + _FakeComment(plan_with_meta.read_text(encoding="utf-8")), + _FakeComment(reviewer_with_meta.read_text(encoding="utf-8")), + ] + resume_result = _resume_plan_round(fake_comments, configured_reviewers=["codex"]) + assert resume_result is not None, ( + "build-resume could not find the skill-posted round — AGENT_LOOP_META not recognized" + ) + _plan_text, resumed = resume_result + assert resumed.round_number == 1, f"Expected round 1, got {resumed.round_number}" + assert len(resumed.completed_reviews) == 1, ( + f"Expected 1 completed reviewer (Codex), got {len(resumed.completed_reviews)}" + ) + print(f"demo_loop: _resume_plan_round found round {resumed.round_number} with " + f"{len(resumed.completed_reviews)} completed reviewer(s)") + print("demo_loop: all steps completed successfully") print(f"session state: {json.dumps(session_data, indent=2)}") diff --git a/helpers/state_manager.py b/helpers/state_manager.py index e9f96e1..faa2f54 100644 --- a/helpers/state_manager.py +++ b/helpers/state_manager.py @@ -11,6 +11,21 @@ _resume_plan_round or _resume_pr_round from the existing library. Outputs a JSON resume descriptor to stdout. + attach-metadata + --body-file PATH --output PATH + --flow plan|pr --role coder|reviewer --agent NAME + --round-number N --state approved|blocking + (--subject SHA | --subject-plan-file PATH) + [--prior-items-file PATH] + [--dispositions-file PATH] + [--new-items-file PATH] + [--canonical-plan-file PATH] + + Reads the comment body from --body-file, builds a PostedRoundMetadata + object, and writes the body with AGENT_LOOP_META appended to --output. + The resulting file can be posted via gh_ops post-issue-comment and will + be recognized by build-resume's _resume_plan_round / _resume_pr_round. + write-session --issue N --repo REPO --fields JSON @@ -48,10 +63,16 @@ from coding_review_agent_loop.agents.base import AgentName from coding_review_agent_loop.errors import AgentLoopError from coding_review_agent_loop.round_state import ( + PostedRoundMetadata, + _attach_round_metadata, + _deserialize_disposition, + _deserialize_unresolved_item, + _plan_subject, _resume_plan_round, _resume_pr_round, _serialize_unresolved_item, ) +from coding_review_agent_loop.protocol import ReviewItemDisposition, UnresolvedReviewItem def _session_path(repo: str, issue: int) -> Path: @@ -192,6 +213,74 @@ def cmd_build_resume(args: argparse.Namespace) -> None: print(json.dumps(descriptor, indent=2)) +def _load_item_list(path: str | None) -> list[object]: + if not path: + return [] + try: + return json.loads(Path(path).read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + print(f"state_manager: cannot read {path}: {exc}", file=sys.stderr) + sys.exit(1) + + +def cmd_attach_metadata(args: argparse.Namespace) -> None: + try: + body = Path(args.body_file).read_text(encoding="utf-8") + except OSError as exc: + print(f"state_manager: cannot read body file: {exc}", file=sys.stderr) + sys.exit(1) + + # Compute subject + if args.subject: + subject = args.subject + elif args.subject_plan_file: + try: + plan_text = Path(args.subject_plan_file).read_text(encoding="utf-8") + except OSError as exc: + print(f"state_manager: cannot read subject plan file: {exc}", file=sys.stderr) + sys.exit(1) + subject = _plan_subject(plan_text) + else: + print("state_manager attach-metadata: provide --subject or --subject-plan-file", file=sys.stderr) + sys.exit(1) + + # Load optional item lists + raw_prior = _load_item_list(args.prior_items_file) + raw_dispositions = _load_item_list(args.dispositions_file) + raw_new_items = _load_item_list(args.new_items_file) + + prior_items = tuple(_deserialize_unresolved_item(item) for item in raw_prior) + dispositions = tuple(_deserialize_disposition(d) for d in raw_dispositions) + new_items = tuple(_deserialize_unresolved_item(item) for item in raw_new_items) + + canonical_plan: str | None = None + if args.canonical_plan_file: + try: + canonical_plan = Path(args.canonical_plan_file).read_text(encoding="utf-8") + except OSError as exc: + print(f"state_manager: cannot read canonical plan file: {exc}", file=sys.stderr) + sys.exit(1) + + metadata = PostedRoundMetadata( + flow=args.flow, + role=args.role, + agent=args.agent, + round_number=args.round_number, + subject=subject, + prior_items=prior_items, + dispositions=dispositions, + new_items=new_items, + state=args.state, + canonical_plan=canonical_plan, + ) + augmented = _attach_round_metadata(body, metadata) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(augmented, encoding="utf-8") + print(f"metadata attached: {output_path}") + + def cmd_write_session(args: argparse.Namespace) -> None: path = _session_path(args.repo, args.issue) existing = _load_session(path) @@ -232,6 +321,30 @@ def main() -> None: parser.add_argument("--gh-cmd", default="gh") subparsers = parser.add_subparsers(dest="subcommand", required=True) + # attach-metadata + p_meta = subparsers.add_parser( + "attach-metadata", + help="Attach AGENT_LOOP_META to a comment body so build-resume can reconstruct the round.", + ) + p_meta.add_argument("--body-file", required=True, help="Input comment body file.") + p_meta.add_argument("--output", required=True, help="Output file with AGENT_LOOP_META attached.") + p_meta.add_argument("--flow", required=True, choices=["plan", "pr"]) + p_meta.add_argument("--role", required=True, choices=["coder", "reviewer"]) + p_meta.add_argument("--agent", required=True, help="Agent display name (e.g. 'Claude', 'Codex').") + p_meta.add_argument("--round-number", type=int, required=True) + p_meta.add_argument("--state", required=True, choices=["approved", "blocking"]) + p_meta.add_argument("--subject", default=None, help="Pre-computed subject SHA256 hex string.") + p_meta.add_argument("--subject-plan-file", default=None, + help="Compute subject as sha256(plan text) from this file.") + p_meta.add_argument("--prior-items-file", default=None, + help="JSON array of serialized UnresolvedReviewItem.") + p_meta.add_argument("--dispositions-file", default=None, + help="JSON array of serialized ReviewItemDisposition.") + p_meta.add_argument("--new-items-file", default=None, + help="JSON array of serialized UnresolvedReviewItem (new items from this turn).") + p_meta.add_argument("--canonical-plan-file", default=None, + help="Plan text file (written as canonical_plan for coder turns).") + # build-resume p_resume = subparsers.add_parser("build-resume", help="Build a resume descriptor from GitHub comments.") p_resume.add_argument("--issue", type=int, required=True) @@ -265,6 +378,7 @@ def main() -> None: args = parser.parse_args() dispatch = { + "attach-metadata": cmd_attach_metadata, "build-resume": cmd_build_resume, "write-session": cmd_write_session, "read-session": cmd_read_session, diff --git a/helpers/validate_response.py b/helpers/validate_response.py index ea3ddb4..fc44329 100644 --- a/helpers/validate_response.py +++ b/helpers/validate_response.py @@ -33,14 +33,13 @@ # Make src importable when run from the repo root as `python -m helpers.validate_response` sys.path.insert(0, str(Path(__file__).parent.parent / "src")) -from coding_review_agent_loop.errors import AgentLoopError -from coding_review_agent_loop.protocol import parse_plan_state +from coding_review_agent_loop.errors import AgentLoopError, UnknownPriorItemDispositionError +from coding_review_agent_loop.protocol import parse_plan_state, validate_structured_plan_revision from coding_review_agent_loop.unresolved_items import ( _validate_plan_review_response, _validate_review_response, _validate_coder_followup_response, ) -from coding_review_agent_loop.protocol import validate_structured_plan_revision from coding_review_agent_loop.round_state import _deserialize_unresolved_item from coding_review_agent_loop.github import HumanReviewRequirement @@ -134,9 +133,26 @@ def main() -> None: human_requirements=human_requirements or None, ) elif kind == "plan_revision": - result = validate_structured_plan_revision(text) - if result is None: + parsed = validate_structured_plan_revision(text) + if parsed is None: raise AgentLoopError("Response did not parse as a structured plan_revision.") + # Validate that dispositions only reference known prior item IDs, + # matching the check in the headless orchestrator's _validate_plan_revision_response. + if prior_items: + allowed_ids = {item.item_id for item in prior_items} + unknown = { + disposition.item_id + for disposition in parsed.prior_plan_item_dispositions + } - allowed_ids + if unknown: + raise UnknownPriorItemDispositionError( + unknown_ids=tuple(sorted(unknown)), + allowed_ids=tuple(sorted(allowed_ids)), + same_round_description=( + "Same-round findings are informational only and must not be " + "dispositioned as prior carried items." + ), + ) except AgentLoopError as exc: print(f"validation failed: {kind}: {exc}", file=sys.stderr) sys.exit(1) diff --git a/tests/test_skill_helpers.py b/tests/test_skill_helpers.py index 1c835e0..6ac6642 100644 --- a/tests/test_skill_helpers.py +++ b/tests/test_skill_helpers.py @@ -11,6 +11,10 @@ import pytest HELPERS = Path(__file__).parent.parent / "helpers" +SRC = Path(__file__).parent.parent / "src" + +# Make library importable for direct calls in this test file +sys.path.insert(0, str(SRC)) def _run(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]: @@ -95,7 +99,6 @@ def test_valid_plan_review_accepted(self) -> None: assert "validation passed: plan_review" in result.stdout def test_plan_review_with_unknown_prior_item_rejected(self) -> None: - # A review that disposes unknown prior item IDs must be rejected. review = json.dumps( { "schema_version": 1, @@ -129,9 +132,91 @@ def test_plan_review_with_unknown_prior_item_rejected(self) -> None: ) assert result.returncode != 0 + def test_plan_revision_with_unknown_prior_item_rejected(self) -> None: + """plan_revision must reject dispositions for item IDs not in the prior-items ledger.""" + revision = json.dumps( + { + "schema_version": 1, + "kind": "plan_revision", + "state": "blocking", + "summary": "Revised plan.", + "prior_plan_item_dispositions": [ + {"item_id": "item-unknown-99", "disposition": "resolved"} + ], + "plan_steps": ["Step A", "Step B"], + } + ) + "\n\n-- Anthropic Claude\n" + + path = _write_tmp(revision) + # context has item-1 but revision references item-unknown-99 + prior_item = { + "item_id": "item-1", + "reviewer": "Codex", + "source_round": 1, + "text": "Some issue.", + "status": "blocking", + "source_status": "blocking", + "notes": [], + } + ctx_path = _write_tmp( + json.dumps({"prior_items": [prior_item], "current_round_items": []}), + suffix=".json", + ) + result = _run( + "helpers.validate_response", + "--file", + path, + "--kind", + "plan_revision", + "--context-file", + ctx_path, + check=False, + ) + assert result.returncode != 0 + + def test_plan_revision_with_known_items_accepted(self) -> None: + """plan_revision with only known prior item IDs must be accepted.""" + prior_item = { + "item_id": "item-1", + "reviewer": "Codex", + "source_round": 1, + "text": "Some issue.", + "status": "blocking", + "source_status": "blocking", + "notes": [], + } + revision = json.dumps( + { + "schema_version": 1, + "kind": "plan_revision", + "state": "blocking", + "summary": "Revised plan.", + "prior_plan_item_dispositions": [ + {"item_id": "item-1", "disposition": "resolved"} + ], + "plan_steps": ["Step A", "Step B"], + } + ) + "\n\n-- Anthropic Claude\n" + + path = _write_tmp(revision) + ctx_path = _write_tmp( + json.dumps({"prior_items": [prior_item], "current_round_items": []}), + suffix=".json", + ) + result = _run( + "helpers.validate_response", + "--file", + path, + "--kind", + "plan_revision", + "--context-file", + ctx_path, + ) + assert "validation passed: plan_revision" in result.stdout + # --------------------------------------------------------------------------- -# helpers/state_manager.py (session round-trip, no live gh required) +# helpers/state_manager.py (session round-trip + attach-metadata) # --------------------------------------------------------------------------- class TestStateManager: @@ -190,6 +275,108 @@ def test_write_and_clear_pending_comment(self) -> None: data = json.loads(result.stdout) assert "pending_comment_body" not in data + def test_attach_metadata_produces_valid_agent_loop_meta(self) -> None: + """attach-metadata must embed AGENT_LOOP_META that _resume_plan_round recognizes.""" + plan_body = _VALID_PLAN_STATE + + with tempfile.TemporaryDirectory() as tmpdir: + body_file = Path(tmpdir) / "plan.md" + body_file.write_text(plan_body, encoding="utf-8") + output_file = Path(tmpdir) / "plan_tagged.md" + + _run( + "helpers.state_manager", + "attach-metadata", + "--body-file", + str(body_file), + "--output", + str(output_file), + "--flow", + "plan", + "--role", + "coder", + "--agent", + "Claude", + "--round-number", + "1", + "--state", + "approved", + "--subject-plan-file", + str(body_file), + "--canonical-plan-file", + str(body_file), + ) + + tagged = output_file.read_text(encoding="utf-8") + assert "AGENT_LOOP_META" in tagged + + # Verify _resume_plan_round can reconstruct from this comment alone + from coding_review_agent_loop.round_state import _resume_plan_round + + class _FC: + def __init__(self, body: str) -> None: + self.body = body + + result = _resume_plan_round([_FC(tagged)], configured_reviewers=["codex"]) + # A coder comment with no reviewer comments → returns the round so reviewers can run + assert result is not None, "build-resume could not find skill-posted coder round" + _plan_text, resumed = result + assert resumed.round_number == 1 + + def test_attach_metadata_reviewer_found_by_resume(self) -> None: + """Coder + reviewer comments both with AGENT_LOOP_META → resume finds completed reviewer.""" + plan_body = _VALID_PLAN_STATE + reviewer_body = _VALID_PLAN_REVIEW + + with tempfile.TemporaryDirectory() as tmpdir: + plan_file = Path(tmpdir) / "plan.md" + plan_file.write_text(plan_body, encoding="utf-8") + plan_tagged = Path(tmpdir) / "plan_tagged.md" + review_file = Path(tmpdir) / "review.md" + review_file.write_text(reviewer_body, encoding="utf-8") + review_tagged = Path(tmpdir) / "review_tagged.md" + + # Attach coder metadata + _run( + "helpers.state_manager", + "attach-metadata", + "--body-file", str(plan_file), + "--output", str(plan_tagged), + "--flow", "plan", "--role", "coder", "--agent", "Claude", + "--round-number", "1", "--state", "approved", + "--subject-plan-file", str(plan_file), + "--canonical-plan-file", str(plan_file), + ) + + # Attach reviewer metadata (same subject) + _run( + "helpers.state_manager", + "attach-metadata", + "--body-file", str(review_file), + "--output", str(review_tagged), + "--flow", "plan", "--role", "reviewer", "--agent", "Codex", + "--round-number", "1", "--state", "approved", + "--subject-plan-file", str(plan_file), + ) + + from coding_review_agent_loop.round_state import _resume_plan_round + + class _FC: + def __init__(self, body: str) -> None: + self.body = body + + result = _resume_plan_round( + [_FC(plan_tagged.read_text(encoding="utf-8")), + _FC(review_tagged.read_text(encoding="utf-8"))], + configured_reviewers=["codex"], + ) + assert result is not None, "build-resume did not find the round" + _plan_text, resumed = result + assert resumed.round_number == 1 + assert len(resumed.completed_reviews) == 1, ( + f"Expected 1 completed reviewer (Codex), got {len(resumed.completed_reviews)}" + ) + # --------------------------------------------------------------------------- # helpers/run_external.py (dry-run only) diff --git a/tests/test_skill_loop.py b/tests/test_skill_loop.py index a6d732c..ab7ece0 100644 --- a/tests/test_skill_loop.py +++ b/tests/test_skill_loop.py @@ -17,7 +17,9 @@ def test_demo_loop_dry_run() -> None: - exits 0 - prints "validation passed: plan_state" - prints "validation passed: plan_review" + - produces metadata-tagged comments with AGENT_LOOP_META - writes a session file with last_completed_step=post_review + - verifies _resume_plan_round can reconstruct the round from the metadata """ repo = "demo/skill-loop-test" issue = 88888 @@ -47,6 +49,9 @@ def test_demo_loop_dry_run() -> None: assert "validation passed: plan_state" in result.stdout, result.stdout assert "validation passed: plan_review" in result.stdout, result.stdout + # demo_loop now also verifies _resume_plan_round internally + assert "_resume_plan_round found round" in result.stdout, result.stdout + # Verify session state was written with last_completed_step=post_review slug = repo.replace("/", "-").replace(":", "-") state_home = Path(