diff --git a/.well-known/agents-shipgate.json b/.well-known/agents-shipgate.json index 616ae4f..be5c1d5 100644 --- a/.well-known/agents-shipgate.json +++ b/.well-known/agents-shipgate.json @@ -10,11 +10,12 @@ "url": "https://threemoonslab.com" }, "category": "agent_release_readiness", - "primary_wedge": "tool_use_readiness", + "primary_wedge": "ai_generated_agent_pr_verifier", + "primary_use_case": "deterministic merge verdicts for AI-generated agent capability changes", "positioning": { - "short": "Agent release readiness for tool-using AI agents", - "answer": "Agents Shipgate is described by the canonical tagline: The deterministic merge gate for AI-generated agent capability changes. It is an open-source CLI and GitHub Action from Three Moons Lab that produces deterministic Tool-Use Readiness Reports before production-like permissions are granted.", - "primary_use_case": "Run in CI before promoting an agent whose MCP, OpenAPI, SDK, workflow, or plugin tool surface changed.", + "short": "Merge verdicts for AI-generated agent PRs", + "answer": "Agents Shipgate is described by the canonical tagline: The deterministic merge gate for AI-generated agent capability changes. It is an open-source, local-first CLI and GitHub Action from Three Moons Lab that verifies PRs from Codex, Claude Code, Cursor, or humans and returns a deterministic merge verdict before agent code lands.", + "primary_use_case": "Run on PRs from Codex, Claude Code, Cursor, or humans when agent tools, prompts, permissions, MCP/OpenAPI surfaces, policies, or release gates change.", "not_for": ["llm_evals", "runtime_guardrails", "runtime_observability", "general_linting"] }, "primary_keywords": [ @@ -26,7 +27,9 @@ "OpenAPI tool scanning", "OpenAI Agents SDK release gate", "GitHub Action for AI agents", - "tool surface scanning" + "AI-generated PR review", + "agent capability merge verdict", + "deterministic merge verdict" ], "recommended_github_topics": [ "ai-agents", @@ -43,38 +46,87 @@ ], "audiences": ["agent_builders", "platform_engineers", "security_reviewers", "coding_agents"], "when_to_use": [ + "AI-generated agent PRs from Codex, Claude Code, Cursor, or similar tools", "AI agent tool-surface changes", "MCP or OpenAPI surface changes", "SDK tool decorator changes", "n8n workflow JSON changes", "Codex plugin package or marketplace changes", - "permissions, scopes, approval, confirmation, prompt, or release-gate changes" + "permissions, scopes, approval, confirmation, prompt, or release-gate changes", + "Shipgate CI, AGENTS.md, Claude Code skill, Cursor rule, baseline, waiver, or suppression changes" ], "package": { "pypi": "agents-shipgate", "github_action": "ThreeMoonsLab/agents-shipgate@v0.11.0", "github_repo": "ThreeMoonsLab/agents-shipgate" }, + "release_status": { + "track": "verify-capable release", + "latest_release": "v0.11.0" + }, "install": { "pipx": "pipx install agents-shipgate", "pip": "python -m pip install agents-shipgate", "uv": "uv tool install agents-shipgate" }, "binaries": ["agents-shipgate", "shipgate"], - "quickstart": "agents-shipgate init --workspace . --write && agents-shipgate scan -c shipgate.yaml", - "verify": "agents-shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json", - "fixture_run": "agents-shipgate fixture run support_refund_agent", + "quickstart": "agents-shipgate verify --preview --json", + "commands": { + "preview": "agents-shipgate verify --preview --json", + "install_ai_coding_workflow": "agents-shipgate init --workspace . --write --ci --agent-instructions=all", + "verify_pr": "agents-shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json", + "trigger": "agents-shipgate trigger --base origin/main --head HEAD --json", + "feedback_export": "agents-shipgate feedback export --from agents-shipgate-reports/verifier.json --redact --out shipgate-feedback.json" + }, + "fixture_run": "agents-shipgate fixture run ai_generated_refund_pr", + "static_scan_fixture_run": "agents-shipgate fixture run support_refund_agent", "self_check": "agents-shipgate self-check --json", "contract": "agents-shipgate contract --json", "contract_version": "1", "inputs": ["mcp", "openapi", "openai_agents_sdk", "anthropic_api", "google_adk", "langchain", "crewai", "openai_api", "codex_plugin", "n8n"], - "outputs": ["markdown", "json", "sarif", "packet_md", "packet_json", "packet_html", "verifier_json", "pr_comment_md"], + "outputs": ["markdown", "json", "sarif", "packet_md", "packet_json", "packet_html", "verifier_json", "pr_comment_md", "feedback_json"], + "artifacts": { + "verifier": "agents-shipgate-reports/verifier.json", + "report": "agents-shipgate-reports/report.json", + "pr_comment": "agents-shipgate-reports/pr-comment.md", + "feedback": "shipgate-feedback.json" + }, "gating_signal": "release_decision.decision", + "merge_verdicts": ["mergeable", "human_review_required", "insufficient_evidence", "blocked", "unknown"], + "release_decisions": ["passed", "review_required", "insufficient_evidence", "blocked"], + "merge_verdict_labels": { + "passed": "mergeable", + "review_required": "human_review_required", + "insufficient_evidence": "insufficient_evidence", + "blocked": "blocked" + }, + "verifier_read_order": [ + "merge_verdict", + "can_merge_without_human", + "first_next_action", + "fix_task", + "capability_review.top_changes", + "release_decision.decision" + ], + "do_not_auto_assert": [ + "approval", + "confirmation", + "idempotency", + "broad-scope", + "prohibited-action", + "runtime-trace", + "suppression", + "waiver", + "baseline", + "policy-weakening" + ], "trust_model": "static_by_default", "schemas": { "manifest": "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/manifest-v0.1.json", "report": "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/report-schema.v0.22.json", + "verifier": "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/verifier-schema.v0.1.json", "packet": "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/packet-schema.v0.6.json", + "feedback": "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/feedback-schema.v0.1.json", "checks_catalog": "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/checks.json" }, "agent_instructions": "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/AGENTS.md", @@ -91,6 +143,23 @@ "claude_code": "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/agents/use-with-claude-code.md", "cursor": "https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/agents/use-with-cursor.md" }, + "coding_agent_surfaces": { + "codex": { + "agents_md": "AGENTS.md", + "skill_path": ".agents/skills/agents-shipgate/", + "install": "agents-shipgate init --workspace . --write --ci --agent-instructions=agents-md,codex-skill" + }, + "claude_code": { + "claude_md": "CLAUDE.md", + "skill_path": ".claude/skills/agents-shipgate/", + "slash_command": ".claude/commands/shipgate.md", + "install": "agents-shipgate init --workspace . --write --ci --agent-instructions=claude-md,claude-code-skill" + }, + "cursor": { + "rules_path": ".cursor/rules/agents-shipgate.mdc", + "install": "agents-shipgate init --workspace . --write --ci --agent-instructions=cursor" + } + }, "exit_codes": { "0": "pass", "2": "config_error", diff --git a/AGENTS.md b/AGENTS.md index cd6a60a..0a91bcf 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -84,8 +84,9 @@ scanned; add `--base origin/main --head HEAD` only for a committed PR/CI ref after making the base ref available. Read `agents-shipgate-reports/verifier.json` first and lead with `merge_verdict` (`mergeable | human_review_required | insufficient_evidence | blocked | -unknown`), `capability_review.top_changes[]`, and `first_next_action`. -Then read `agents-shipgate-reports/report.json.release_decision.decision` +unknown`), `can_merge_without_human`, `first_next_action`, `fix_task`, and +`capability_review.top_changes[]`. Then read +`agents-shipgate-reports/report.json.release_decision.decision` (`blocked | review_required | insufficient_evidence | passed`), which remains the release gate. Do not report completion while `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user explicitly @@ -96,7 +97,13 @@ expanding baselines or waivers, removing Shipgate CI, or weakening agent instructions. Verify-mode `SHIP-VERIFY-*` checks make those trust-root edits release-visible and route them to human review. -To verify your install on a known fixture without writing any YAML: +To reproduce the verify-native blocked refund PR demo without writing YAML: + +```bash +agents-shipgate fixture run ai_generated_refund_pr +``` + +To verify your install on the older static scan fixture: ```bash agents-shipgate fixture run support_refund_agent diff --git a/CHANGELOG.md b/CHANGELOG.md index 872e3ae..e21bebf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ ## Unreleased +- **Verifier adoption-loop release prep.** Public docs and discovery metadata now + lead with the verify-first adoption path, pinned `v0.11.0` snippets, verifier + artifacts, merge verdicts, `fix_task`, and explicit Action merge-policy + examples. Adds the verify-native `ai_generated_refund_pr` fixture for the + blocked refund PR demo and introduces the provisional + `agents-shipgate feedback export` command plus + `docs/feedback-schema.v0.1.json` for redacted design-partner feedback loops. + ## 0.11.0 - 2026-05-31 - **Verifier PR comment v2 + additive Action outputs.** The GitHub Action now diff --git a/README.md b/README.md index 43c2451..677acaa 100644 --- a/README.md +++ b/README.md @@ -81,11 +81,25 @@ The release gate is `agents-shipgate-reports/report.json` → `release_decision.decision` (`blocked | review_required | insufficient_evidence | passed`). The PR/controller surface is `agents-shipgate-reports/verifier.json` → `merge_verdict` (`mergeable | human_review_required | insufficient_evidence | -blocked | unknown`), a deterministic projection of the release decision. +blocked | unknown`), a deterministic projection of the release decision. Read +`verifier.json` first for `merge_verdict`, `can_merge_without_human`, +`first_next_action`, `fix_task`, and `capability_review.top_changes`. -Want a 5-minute demo with zero setup? Scan the bundled fixture. If you already -have [`uv`](https://docs.astral.sh/uv/) installed, the fixture path is a -one-command install check with no persistent install: +Want a 5-minute verifier demo with zero setup? Run the verify-native blocked +refund PR fixture: + +```bash +agents-shipgate fixture run ai_generated_refund_pr +``` + +It builds a temporary base/head git history where the head commit adds +`stripe.create_refund`, then writes `verifier.json`, `report.json`, and +`pr-comment.md`. The expected merge verdict is `blocked`. + +The older static scan fixture remains useful when you want the full Tool-Use +Readiness Report without a PR diff. If you already have +[`uv`](https://docs.astral.sh/uv/) installed, the fixture path is a one-command +install check with no persistent install: ```bash uvx agents-shipgate fixture run support_refund_agent @@ -185,12 +199,14 @@ agents-shipgate init --workspace . --write --ci --agent-instructions=all agents-shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json For committed PR/CI refs, add `--base origin/main --head HEAD` after making the base ref available. Read `agents-shipgate-reports/verifier.json` first and lead -with `merge_verdict`, then read `agents-shipgate-reports/report.json` for -`release_decision.decision`. Do not claim completion when `merge_verdict` is -`blocked`, `insufficient_evidence`, or `human_review_required` unless the user -explicitly accepts human review. Do not auto-assert approval. Never suppress -findings, lower severity, expand baselines or waivers, remove Shipgate CI, or -weaken agent instructions just to make the verifier pass. +with `merge_verdict`, `can_merge_without_human`, `first_next_action`, `fix_task`, +and `capability_review.top_changes`, then read +`agents-shipgate-reports/report.json` for `release_decision.decision`. Do not +claim completion when `merge_verdict` is `blocked`, `insufficient_evidence`, or +`human_review_required` unless the user explicitly accepts human review. Do not auto-assert approval. Do not auto-assert confirmation, idempotency, +broad-scope safety, prohibited-action enforcement, runtime-trace proof, +suppressions, waivers, baselines, or policy weakening. Never remove Shipgate CI +or weaken agent instructions just to make the verifier pass. ``` ## Add the Codex adoption kit @@ -579,7 +595,7 @@ jobs: shipgate_version: '0.11.0' ``` -Switch to `ci_mode: strict` only after your team has reviewed the advisory output. See [`examples/github-actions/`](examples/github-actions/) for strict / baseline / SARIF / multi-config / changed-paths recipes. +After adoption, choose an explicit merge policy. [`examples/github-actions/07-block-on-blocked-verdict.yml`](examples/github-actions/07-block-on-blocked-verdict.yml) blocks only when `merge_verdict == blocked`; [`examples/github-actions/08-require-mergeable.yml`](examples/github-actions/08-require-mergeable.yml) requires `can_merge_without_human == true`. See [`examples/github-actions/`](examples/github-actions/) for strict / baseline / SARIF / multi-config / changed-paths recipes. Inputs: `config`, `ci_mode` (`advisory` or `strict`), `fail_on`, `baseline`, `baseline_mode`, `diff_from`, `diff_base`, `base_ref`, `head_ref`, `policy_packs`, `no_plugins`, `output_dir`, `upload_artifact`, `pr_comment`, `github_token`, `shipgate_version`. Set `diff_base: target` for PR base/head diff enrichment. The action delegates to `agents-shipgate verify` and never fetches; use `fetch-depth: 0` on checkout, or fetch the base ref in an earlier step. If `head_ref` is set, verify scans an isolated archive of that ref; otherwise it scans the checked-out workspace. If an explicit base ref or PR diff cannot be inspected, verify skips a head-only scan, writes `merge_verdict: "unknown"` to `verifier.json`, and exits 2. @@ -587,6 +603,16 @@ Outputs: `decision`, `merge_verdict`, `can_merge_without_human`, `blocker_count` Set `shipgate_version` to install a pinned PyPI release instead of the action source when your workflow requires package/version parity. +For a design-partner review, export the small redacted verifier feedback +artifact instead of sending raw report evidence: + +```bash +agents-shipgate feedback export \ + --from agents-shipgate-reports/verifier.json \ + --redact \ + --out shipgate-feedback.json +``` + ## Pricing And Open Source Stance Agents Shipgate is and will remain free OSS for individuals and teams running it on their own infrastructure. The core manifest-first scanner, built-in checks, Markdown report, and JSON report are intended to remain open source. We do not collect telemetry and do not require an account. @@ -617,6 +643,7 @@ readers and AI search ingest. - [Policy packs](docs/policy-packs.md) - [Baseline workflow](docs/baseline.md) - [JSON report schema v0.22](docs/report-schema.v0.22.json) +- [Feedback export schema v0.1](docs/feedback-schema.v0.1.json) - [Privacy and redaction](docs/privacy.md) - [Trust model](docs/trust-model.md) - [AI search summary](docs/ai-search-summary.md) diff --git a/STABILITY.md b/STABILITY.md index 2aaa41e..1f87e3d 100644 --- a/STABILITY.md +++ b/STABILITY.md @@ -35,6 +35,15 @@ These commands and flags are stable across all `0.x.y` releases. They will only | `agents-shipgate fixture verify` | `` | | `agents-shipgate self-check` | `--json` | +### Provisional CLI command surface + +`agents-shipgate feedback export` is introduced in v0.11 for design-partner +feedback loops. Its current flags are `--from`, `--redact`/`--no-redact`, +`--out`, and `--json`. Treat the command and `feedback_schema_version: "0.1"` +payload as provisional during the v0.11 design-partner cycle; the schema file is +published so consumers can validate it, and any incompatible change must bump +`feedback_schema_version`. + ### Exit codes | Code | Meaning | @@ -375,6 +384,12 @@ tests on every CI run, not by convention: local `git rev-parse`, `git diff`, `git ls-files`, and `git archive` for verify base/head and working-tree orchestration. It never fetches, uses fixed argv, captures output, and never executes user code. + - **`cli/fixture.py`** — one `subprocess.run` helper invokes local + `git init`, `git config`, `git add`, `git commit`, and `git update-ref` + against a temporary bundled fixture copy so + `fixture run ai_generated_refund_pr` can produce verifier artifacts. + This allowlisted meta-CLI surface uses fixed argv, no shell, no network + fetch, and no user-code execution. - **`fixtures.py`** — one `importlib.resources.files('agents_shipgate')` call to resolve the bundled fixture directory. - **`cli/discovery/agent_instructions/adoption_kit.py`** — one @@ -699,6 +714,37 @@ Diff remains explanatory only. Fixture names listed by `agents-shipgate fixture list` are stable. Names will not be renamed. New fixtures may be added. +`ai_generated_refund_pr` is the verify-native demo fixture. It creates a +temporary base/head git history and writes `verifier.json`, `report.json`, and +`pr-comment.md` for a blocked refund-capability PR. + +### Feedback export + +`agents-shipgate feedback export` derives a small local artifact from +`agents-shipgate-reports/verifier.json`. The current schema is +[`docs/feedback-schema.v0.1.json`](docs/feedback-schema.v0.1.json). Current +v0.1 fields: + +- `feedback_schema_version` +- `source_verifier` +- `redacted` +- `merge_verdict` +- `can_merge_without_human` +- `decision` +- `mode` +- `trigger` +- `first_next_action` +- `fix_task` +- `capability_review` +- `finding_ids` +- `reviewer_feedback_requested` +- `artifacts` + +The export is a design-partner and false-positive triage aid. It is derived +from verifier projections and does not include raw finding evidence. With +`--redact` (the default), local artifact paths are reduced to filenames so the +artifact does not leak usernames or confidential workspace directory names. + ### Agent-skill paths The following paths are part of the public agent surface and will not move within `0.x`: diff --git a/action.yml b/action.yml index 379e577..a664a8f 100644 --- a/action.yml +++ b/action.yml @@ -1,11 +1,11 @@ name: Agents Shipgate description: >- The deterministic merge gate for AI-generated agent capability changes. - Scans MCP, OpenAPI, OpenAI Agents SDK, Anthropic, Google ADK, - LangChain, CrewAI, OpenAI API, Codex plugin, and n8n artifacts. - Writes a Tool-Use Readiness Report (Markdown / JSON / SARIF) before your - agent gets production-like permissions. Local-first and static-by-default. Audited - exceptions are pinned per call site in + Verifies PRs that change MCP, OpenAPI, OpenAI Agents SDK, Anthropic, + Google ADK, LangChain, CrewAI, OpenAI API, Codex plugin, and n8n + artifacts. Writes verifier.json, report.json, pr-comment.md, and SARIF + before your agent-capability change lands. Local-first and + static-by-default. Audited exceptions are pinned per call site in tests/test_adapter_static_only.py::ALLOWED_EXCEPTIONS. Apache-2.0. author: ThreeMoonsLab diff --git a/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json b/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json index 806a7ec..6b5ccf5 100644 --- a/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json +++ b/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json @@ -5,7 +5,12 @@ "SKILL.md": [ "139b5e00b916448cf2de4752221c66296a7e546865b1efdf93f98d8bb5cb3019", "5ab92f77352ea31ad03c28e1d596b20ada24fa4176a5e0b0b38990e4a00fb5bb", - "9ce82bdc41f2e1ea28c7fec3aaeec0137efeacf8986b66a9ac0e3eccc5abd834" + "9ce82bdc41f2e1ea28c7fec3aaeec0137efeacf8986b66a9ac0e3eccc5abd834", + "e1713eecbbb1538987b7bf2cbe90bcdac9c4491f250105b6c68e788c81d49de3", + "bd4755e06715c839608c09da302ed844c764fd3e4047d7bdf495d68dc559c2a5" + ], + "prompts/stabilize-strict-mode.md": [ + "ac9a176738ab2538d725c29ba302637bac6b287588e07d952aae352f85ab98cc" ] }, "bootstrap_legacy_sha256": { diff --git a/adoption-kits/claude-code-skill/SKILL.md b/adoption-kits/claude-code-skill/SKILL.md index ff7690b..51480ff 100644 --- a/adoption-kits/claude-code-skill/SKILL.md +++ b/adoption-kits/claude-code-skill/SKILL.md @@ -5,7 +5,7 @@ description: Use when the user wants to add or run the deterministic merge gate # agents-shipgate skill -`agents-shipgate` is the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. It analyzes `shipgate.yaml` plus tool sources (MCP exports, OpenAPI specs, OpenAI Agents SDK Python files, Anthropic Messages API artifacts, Google ADK files, LangChain/LangGraph files, CrewAI files, OpenAI API artifacts, Codex plugin packages and marketplaces, n8n workflow JSON) and emits deterministic findings as Markdown, JSON, and SARIF. +`agents-shipgate` is the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. It analyzes `shipgate.yaml` plus tool sources (MCP exports, OpenAPI specs, OpenAI Agents SDK Python files, Anthropic Messages API artifacts, Google ADK files, LangChain/LangGraph files, CrewAI files, OpenAI API artifacts, Codex plugin packages and marketplaces, n8n workflow JSON) and emits deterministic verifier artifacts, findings, Markdown, JSON, and SARIF. It does **not** run agents, call tools, invoke LLMs, connect to MCP servers, or send telemetry by default. Static analysis only; audited exceptions are pinned in `tests/test_adapter_static_only.py::ALLOWED_EXCEPTIONS`. @@ -14,6 +14,7 @@ It does **not** run agents, call tools, invoke LLMs, connect to MCP servers, or ## When to use this skill - The user asks to add Tool-Use Readiness or pre-merge checks to an agent project. +- The user asks whether an AI-generated agent PR can merge. - The repo already has `shipgate.yaml`, `agents-shipgate-reports/verifier.json`, or `agents-shipgate-reports/report.json`. - The user asks to fix, triage, suppress, or explain a Shipgate finding. - The user wants to add Shipgate to CI (GitHub Actions, GitLab CI, CircleCI). @@ -45,7 +46,11 @@ Pick the matching task and follow the linked recipe verbatim. Recipes are bundle Always: 1. Set `AGENTS_SHIPGATE_AGENT_MODE=1` so errors emit a `next_action` JSON line on stderr. -2. For verify runs, parse `agents-shipgate-reports/verifier.json` first and lead with `merge_verdict`; then parse `report.json` and use `release_decision.decision` as the release gate. +2. For verifier runs, parse `agents-shipgate-reports/verifier.json` first: + `merge_verdict`, `can_merge_without_human`, `first_next_action`, + `fix_task`, and `capability_review.top_changes`. Then parse + `agents-shipgate-reports/report.json.release_decision.decision`; it is the + release gate. 3. Before finishing an agent-related local diff in a repo with `shipgate.yaml`, run `agents-shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json`, or report the exact `agents-shipgate trigger` skip verdict. Add `--base origin/main --head HEAD` only for committed PR/CI verification after making the base ref available. 4. Do not bypass the verifier by suppressing findings, lowering severity, expanding baselines or waivers, removing Shipgate CI, or weakening agent instructions; verify-mode `SHIP-VERIFY-*` checks make those trust-root edits release-visible. 5. Confirm with the user before any command that writes files (`init --write`, `baseline save`). @@ -66,6 +71,7 @@ For non-GitHub CI (GitLab, CircleCI, Jenkins, Azure Pipelines, Buildkite, Bitbuc - **CLI surface** is frozen for `0.x` — see https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md. - **Installed CLI contract**: when available, run `agents-shipgate contract --json` to verify local schema versions, `release_decision.decision`, and manual-review signal fields. Older installs should use [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) or upgrade before automating against the local contract command. +- **Verifier JSON**: `verifier_schema_version: "0.1"`. Read `merge_verdict`, `can_merge_without_human`, `first_next_action`, `fix_task`, `capability_review.top_changes`, `trust_root_touched`, and `policy_weakened` before summarizing an AI-generated PR. `merge_verdict` is a deterministic projection; the gate remains `report.json.release_decision.decision`. - **Report JSON**: `report_schema_version: "0.22"`. Read `release_decision.decision` first for release gating; use `agent_summary` / `findings[].agent_action` for agent routing and `reviewer_summary` for the human-review entry point. v0.22 adds the verifier-cycle blocks `capability_change`, `protected_surface_changes`, `effective_policy`, `human_ack`, and `verifier_summary` — all reviewer-facing projections that never gate independently (`release_decision.decision` stays the only gate). To remove heuristic findings from the active gate, rerun scan with `--no-heuristics`; filtered findings remain in `findings[]` with `suppressed=true`, and `heuristics_filter` records `enabled`, `excluded_provenance_kinds`, `filtered_finding_count`, and `filtered_by_kind`. To inspect provenance without changing gate behavior, use `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic --json`. Do not gate on `summary.status`; it is legacy and baseline-blind. The full field list lives in [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md#read-these-first-for-release-gating), and reports validate against [`docs/report-schema.v0.22.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/report-schema.v0.22.json). - **Release Evidence Packet**: `agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` with the `[pdf]` extras) is emitted alongside the report by default. The packet has fixed reviewer sections governed by [`docs/packet-schema.v0.6.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/packet-schema.v0.6.json) (latest; v0.6 adds the top-level `evidence_matrix` compact review section AND `ReleaseDecisionItem.{source, policy_evidence_source}` for reviewer-grade dual-source provenance over the v0.5 baseline). See [STABILITY.md §Release Evidence Packet](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md#release-evidence-packet-v06). Use the packet for reviewer-shaped output; use the report for finding details. - **Single source of truth for the contract**: [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md). When the schema bumps, that file updates first. diff --git a/adoption-kits/claude-code-skill/prompts/stabilize-strict-mode.md b/adoption-kits/claude-code-skill/prompts/stabilize-strict-mode.md index e5fc8cd..9ec69db 100644 --- a/adoption-kits/claude-code-skill/prompts/stabilize-strict-mode.md +++ b/adoption-kits/claude-code-skill/prompts/stabilize-strict-mode.md @@ -39,6 +39,7 @@ The user has Agents Shipgate running in **advisory** mode and wants to graduate ```yaml - uses: ThreeMoonsLab/agents-shipgate@v0.11.0 with: + shipgate_version: '0.11.0' ci_mode: strict fail_on: critical baseline: .agents-shipgate/baseline.json diff --git a/docs/INDEX.md b/docs/INDEX.md index 5edbabd..d8c4021 100644 --- a/docs/INDEX.md +++ b/docs/INDEX.md @@ -60,7 +60,7 @@ A single entry point for human readers and AI agents walking the `docs/` tree. ## Workflows -- [`quickstart.md`](quickstart.md) — 60-second install + first scan +- [`quickstart.md`](quickstart.md) — verify-first AI-generated PR workflow - [`faq.md`](faq.md) — common questions, AI-search-friendly - [`integrations.md`](integrations.md) — CI/CD integration recipes (GitHub Actions, GitLab CI, CircleCI, Jenkins snippet) - [`troubleshooting.md`](troubleshooting.md) — error messages → fixes diff --git a/docs/agent-contract-current.md b/docs/agent-contract-current.md index 1b33ef2..03fc063 100644 --- a/docs/agent-contract-current.md +++ b/docs/agent-contract-current.md @@ -99,6 +99,12 @@ In `agents-shipgate-reports/verifier.json`, read these additive fields - `human_review` — `{required: bool, why: str|null}`. - `first_next_action` — `{actor: "coding_agent"|"human", kind, command, why}`. The `actor` separates mechanical coding-agent work from human-only decisions. +- `fix_task` — `{actor, safe_to_attempt, instructions[], forbidden_shortcuts[], + verification_command}` or `null`. This is the deterministic repair boundary: + `actor: coding_agent` with `safe_to_attempt: true` means the agent may attempt + the listed mechanical fix and rerun `verification_command`; `actor: human` + means the agent must not invent approval, idempotency, policy, waiver, + baseline, or trust-root evidence to make the gate pass. - `trust_root_touched` — `bool`; `true` when the PR changed a release-gate trust root (`shipgate.yaml`, the Shipgate CI workflow, `AGENTS.md`/`CLAUDE.md`, policy packs, prompts, baselines, waivers, etc.). Backed by the diff --git a/docs/agents/use-with-claude-code.md b/docs/agents/use-with-claude-code.md index b03abd1..6df6db0 100644 --- a/docs/agents/use-with-claude-code.md +++ b/docs/agents/use-with-claude-code.md @@ -104,13 +104,15 @@ Then read `agents-shipgate-reports/verifier.json` and **lead with `release_decision.decision`, which stays the gate in `agents-shipgate-reports/report.json`. Read `capability_review.top_changes[]` next for the highest-signal tool/action access changes, and check -`trust_root_touched`. +`trust_root_touched`, `policy_weakened`, and `fix_task`. Do **not** claim completion when `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user has -explicitly accepted the human-review requirement. When `first_next_action.actor` -is `human`, surface the item for a person — approval, confirmation, idempotency, -broad-scope, and prohibited-action evidence cannot be synthesized. +explicitly accepted the human-review requirement. Follow `fix_task` as the +repair boundary. When `first_next_action.actor` or `fix_task.actor` is `human`, +surface the item for a person — approval, confirmation, idempotency, +broad-scope, prohibited-action, waiver, baseline, and policy evidence cannot be +synthesized. Never weaken `shipgate.yaml`, the Shipgate CI workflow, `AGENTS.md`, policy packs, baselines, waivers, or suppressions merely to make Shipgate pass; that diff --git a/docs/agents/use-with-codex.md b/docs/agents/use-with-codex.md index b6a486f..0ec4ee5 100644 --- a/docs/agents/use-with-codex.md +++ b/docs/agents/use-with-codex.md @@ -88,14 +88,15 @@ Then read `agents-shipgate-reports/verifier.json` and **lead with projection of `release_decision.decision`, which remains the gate in `agents-shipgate-reports/report.json`. Read `capability_review.top_changes[]` next to see the highest-signal tool/action access changes, and check -`trust_root_touched`. +`trust_root_touched`, `policy_weakened`, and `fix_task`. Codex must not claim completion when `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user has -explicitly accepted the human-review requirement. When `first_next_action.actor` -is `human` — approval, confirmation, idempotency, broad-scope, prohibited-action, -or acknowledgement decisions — Codex surfaces the item for a person rather than -resolving it. +explicitly accepted the human-review requirement. Follow `fix_task` as the +repair boundary. When `first_next_action.actor` or `fix_task.actor` is `human` — +approval, confirmation, idempotency, broad-scope, prohibited-action, +acknowledgement, waiver, baseline, or policy decisions — Codex surfaces the item +for a person rather than resolving it. And Codex must **never** weaken `shipgate.yaml`, the Shipgate CI workflow, `AGENTS.md`, policy packs, baselines, waivers, or suppressions just to make diff --git a/docs/agents/use-with-cursor.md b/docs/agents/use-with-cursor.md index 33af3a0..e2e3abd 100644 --- a/docs/agents/use-with-cursor.md +++ b/docs/agents/use-with-cursor.md @@ -79,13 +79,14 @@ Read `agents-shipgate-reports/verifier.json` and **lead with `merge_verdict`** `unknown`). It is a deterministic projection of `release_decision.decision`, which stays the gate in `agents-shipgate-reports/report.json`. Read `capability_review.top_changes[]` next for the highest-signal tool/action access -changes, and check `trust_root_touched`. +changes, and check `trust_root_touched`, `policy_weakened`, and `fix_task`. Cursor must not claim the change is complete when `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user has explicitly accepted the human-review requirement. When `first_next_action.actor` -is `human`, surface the decision for a person rather than inventing approval, -confirmation, or idempotency evidence. +or `fix_task.actor` is `human`, surface the decision for a person rather than +inventing approval, confirmation, idempotency, waiver, baseline, or policy +evidence. Never weaken `shipgate.yaml`, the Shipgate CI workflow, `AGENTS.md`, policy packs, baselines, waivers, or suppressions just to make Shipgate pass — that diff --git a/docs/design-partners.md b/docs/design-partners.md index 013fbcf..91a5739 100644 --- a/docs/design-partners.md +++ b/docs/design-partners.md @@ -26,19 +26,31 @@ currently a local-first OSS scanner and GitHub Action. Design partners get: -- Help mapping an existing agent repo to `shipgate.yaml`. -- A first Tool-Use Readiness Report for one agent or tool surface. -- Guidance on advisory CI, baselines, suppressions, and strict-mode rollout. -- Early influence on check semantics, report shape, framework adapters, and - agent-facing workflows. +- A capability-level review of one AI-generated agent PR or sanitized patch. +- `verifier.json` and `pr-comment.md` wired into the repo's advisory workflow. +- A map of what the coding agent may fix mechanically vs. what requires human + authority. +- A trust-root review: whether the PR could weaken the gate that reviews it. +- Guidance from advisory verifier comments toward blocker-only or strict + `can_merge_without_human` CI. ## What Three Moons Lab Asks For Three Moons Lab asks for: -- A concrete agent/tool-surface use case. -- Feedback on whether the findings are actionable for platform, security, and - release reviewers. +- A concrete PR link, sanitized patch, or representative diff from Claude Code, + Codex, Cursor, or similar tooling. +- Feedback on whether the capability change, merge verdict, `fix_task`, and + `first_next_action` are actionable for platform, security, and release + reviewers. +- When possible, a redacted feedback artifact: + + ```bash + agents-shipgate feedback export \ + --from agents-shipgate-reports/verifier.json \ + --redact \ + --out shipgate-feedback.json + ``` - Permission to use anonymized lessons in docs or category writing, only when explicitly approved. @@ -50,5 +62,5 @@ agent can do, and we'll turn it into a deterministic merge verdict together. Email `help@threemoonslab.com` with the subject `Agents Shipgate design partner review`. -Include the agent framework, tool-source types, current CI system, and whether -you want a local CLI workflow, a GitHub Action workflow, or both. +Include the PR/diff, agent framework, tool-source types, whether the PR adds +tools or changes policy/CI, and what your reviewer needs to know before merge. diff --git a/docs/engineering/ai-coding-workflow-verifier.md b/docs/engineering/ai-coding-workflow-verifier.md index 91fd54b..1591187 100644 --- a/docs/engineering/ai-coding-workflow-verifier.md +++ b/docs/engineering/ai-coding-workflow-verifier.md @@ -253,9 +253,9 @@ Compatibility rules: - Keep `matched_rules`, `dry_run_recommended`, `stop_conditions_fired`, `rationale`, and `schema_version`. - Preserve `python -m agents_shipgate.triggers` for developers. -`agents-shipgate verify` should produce a compact `verifier.json`, but `report.json` remains the authoritative artifact. +`agents-shipgate verify` produces a compact `verifier.json`, but `report.json` remains the authoritative artifact. -`verifier.json` should contain: +`verifier.json` contains: - trigger result - base scan status and notes @@ -271,9 +271,11 @@ Compatibility rules: Trust-root protection is the moat for the AI coding workflow repositioning. Reward hacking is a coding-agent-specific threat model: an optimizer asked to "make CI green" may edit the gate instead of fixing the underlying readiness issue. -### 5.1 Split touched from weakened +### 5.1 Touched and weakened signals -Do not combine path-level detection and semantic weakening into one milestone. +The implementation was intentionally split into path-level detection and +semantic weakening detection so each signal could be reviewed independently. +Both tiers are shipped in v0.11.0. #### Tier A: trust_root_touched @@ -528,11 +530,12 @@ class CapabilityChange(BaseModel): related_finding_ids: list[str] ``` -Start with Tier A before semantic trust-root weakening. This makes PR review capability-native without requiring a full policy comparator. +`v0.11.0` ships Tier A and Tier B together: capability projection plus semantic +trust-root weakening over the normalized effective policy. ### 7.2 Tier B: trust-root semantic changes -After `verify` can compare base/head, extend capability projection with: +When `verify` can compare base/head, capability projection includes: - policy weakened - waiver expanded @@ -541,7 +544,8 @@ After `verify` can compare base/head, extend capability projection with: - agent instructions weakened - trigger catalog drift -These should be backed by findings from the verify/trust-root check category. +These are backed by findings from the verify/trust-root check category and feed +the ordinary `release_decision.decision` gate. ## 8. Summary convergence @@ -608,7 +612,7 @@ Acceptance criteria: - links to artifacts when available - contains no raw secrets -`fix_task` should be deterministic and action-shaped: +`fix_task` is deterministic and action-shaped: ```json { @@ -633,15 +637,15 @@ For mechanical fixes, `actor` may be `coding_agent` and `safe_to_attempt` may be ## 10. Roadmap -This roadmap is ordered by dependency and moat value. +This roadmap records the verifier-cycle buildout now shipped in `v0.11.0`. | Phase | Goal | Deliverables | Notes | |---|---|---|---| -| P0 | Promote existing trigger and ship cheap reward-hacking detection | `agents-shipgate trigger`; aligned flags; AGENTS.md <-> triggers.json parity test; `VerificationContext`; new verify/trust-root check category; `SHIP-VERIFY-TRUST-ROOT-TOUCHED` path classifier | Trigger is promotion work, not greenfield. Trust-root touched is the first moat feature. | -| P1 | Unlock base/head workflow verification | `agents-shipgate verify`; git ref -> base scan -> `--diff-from` -> head scan; base-failure degradation contract and tests | This is the hard orchestration milestone. | -| P2 | Make capability changes reviewer-native without summary drift | Tier A `CapabilityChange`; extend `reviewer_summary`; optional `verifier_summary` composition alias; report schema v0.22 additive update | Use existing surface diffs first. | -| P3 | Add semantic trust-root weakening detection | normalized policy snapshot; `SHIP-VERIFY-POLICY-WEAKENED`; `CI-GATE-REMOVED`; `BASELINE-OR-WAIVER-EXPANDED`; `AGENT-INSTRUCTIONS-WEAKENED`; `TRIGGER-CATALOG-DRIFT`; declared human acknowledgement design | Depends on P1. | -| P4 | Close the coding-agent control loop | PR comment v2; `fix_task`; `forbidden_shortcuts`; GitHub Action outputs; old outputs preserved | Treat output as controller instructions. | +| P0 | Promote existing trigger and ship cheap reward-hacking detection | `agents-shipgate trigger`; aligned flags; AGENTS.md <-> triggers.json parity test; `VerificationContext`; verify/trust-root check category; `SHIP-VERIFY-TRUST-ROOT-TOUCHED` path classifier | Shipped. | +| P1 | Unlock base/head workflow verification | `agents-shipgate verify`; git ref -> base scan -> `--diff-from` -> head scan; base-failure degradation contract and tests | Shipped. | +| P2 | Make capability changes reviewer-native without summary drift | Tier A `CapabilityChange`; extend `reviewer_summary`; `verifier_summary`; report schema v0.22 additive update | Shipped. | +| P3 | Add semantic trust-root weakening detection | normalized policy snapshot; `SHIP-VERIFY-POLICY-WEAKENED`; `CI-GATE-REMOVED`; `BASELINE-OR-WAIVER-EXPANDED`; `AGENT-INSTRUCTIONS-WEAKENED`; `TRIGGER-CATALOG-DRIFT`; declared human acknowledgement design | Shipped. | +| P4 | Close the coding-agent control loop | PR comment v2; `fix_task`; `forbidden_shortcuts`; GitHub Action outputs; old outputs preserved | Shipped. | | P5 | Update agent integrations and optional hooks | Codex, Claude Code, Cursor verify recipes; "do not bypass verifier" backed by checks; optional `install-hooks` after CLI and CI are stable | Hooks are early feedback only. CI remains authoritative. | ## 11. Benchmark harness @@ -686,7 +690,8 @@ The reward-hacking scenarios are the core proof: - removing approval policy must be caught - expanding suppression or waiver scope must be caught - removing Shipgate CI must be caught -- touching trust roots must require review even before semantic weakening exists +- touching trust roots must require review even when no semantic weakening is + detected ## 12. Test matrix diff --git a/docs/examples.md b/docs/examples.md index 4f9f00a..c7a3373 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -6,7 +6,18 @@ Agents Shipgate turns an agent tool surface into release-review evidence. ## Runnable sample agents The [`samples/`](../samples/) directory contains local fixtures that can be -scanned without network access. Start with: +verified or scanned without network access. Start with the verify-native PR +demo: + +```bash +agents-shipgate fixture run ai_generated_refund_pr +``` + +It builds a temporary base/head git history, adds `stripe.create_refund`, and +writes `verifier.json`, `report.json`, and `pr-comment.md` with +`merge_verdict: blocked`. + +For the lower-level static report path, run: ```bash agents-shipgate fixture run support_refund_agent @@ -14,6 +25,7 @@ agents-shipgate fixture run support_refund_agent Useful fixtures: +- [`ai_generated_refund_pr`](../samples/ai_generated_refund_pr/) — base/head verifier fixture for the blocked refund PR story. - [`support_refund_agent`](../samples/support_refund_agent/) — production-like support/refund agent with MCP, OpenAPI, and SDK tool sources. Demonstrates critical approval and idempotency findings. - [`clean_read_only_agent`](../samples/clean_read_only_agent/) — a low-risk read-only surface that should scan cleanly. - [`simple_openai_api_agent`](../samples/simple_openai_api_agent/) — OpenAI API artifacts including prompts, tools, structured outputs, tests, and traces. @@ -40,7 +52,13 @@ Useful fixtures: ## Example output -The canonical fixture writes: +The verify-native fixture writes: + +- `agents-shipgate-reports/verifier.json` +- `agents-shipgate-reports/pr-comment.md` +- `agents-shipgate-reports/report.json` + +The static scan fixtures write: - `agents-shipgate-reports/report.md` - `agents-shipgate-reports/report.json` diff --git a/docs/feedback-schema.v0.1.json b/docs/feedback-schema.v0.1.json new file mode 100644 index 0000000..1e81341 --- /dev/null +++ b/docs/feedback-schema.v0.1.json @@ -0,0 +1,282 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/feedback-schema.v0.1.json", + "title": "Agents Shipgate Feedback Export v0.1", + "type": "object", + "additionalProperties": false, + "properties": { + "feedback_schema_version": { + "const": "0.1" + }, + "source_verifier": { + "type": "string" + }, + "redacted": { + "type": "boolean" + }, + "merge_verdict": { + "enum": [ + "mergeable", + "human_review_required", + "insufficient_evidence", + "blocked", + "unknown", + null + ] + }, + "can_merge_without_human": { + "type": "boolean" + }, + "decision": { + "enum": [ + "passed", + "review_required", + "insufficient_evidence", + "blocked", + null + ] + }, + "mode": { + "type": [ + "string", + "null" + ] + }, + "trigger": { + "$ref": "#/$defs/FeedbackTrigger" + }, + "first_next_action": { + "$ref": "#/$defs/FeedbackAction" + }, + "fix_task": { + "anyOf": [ + { + "$ref": "#/$defs/FeedbackFixTask" + }, + { + "type": "null" + } + ] + }, + "capability_review": { + "$ref": "#/$defs/FeedbackCapabilityReview" + }, + "finding_ids": { + "type": "array", + "items": { + "type": "string" + } + }, + "reviewer_feedback_requested": { + "type": "array", + "items": { + "type": "string" + } + }, + "artifacts": { + "$ref": "#/$defs/FeedbackArtifacts" + } + }, + "required": [ + "feedback_schema_version", + "source_verifier", + "redacted", + "merge_verdict", + "can_merge_without_human", + "decision", + "mode", + "trigger", + "first_next_action", + "fix_task", + "capability_review", + "finding_ids", + "reviewer_feedback_requested", + "artifacts" + ], + "$defs": { + "FeedbackAction": { + "type": "object", + "additionalProperties": false, + "properties": { + "actor": { + "type": [ + "string", + "null" + ] + }, + "kind": { + "type": [ + "string", + "null" + ] + }, + "command": { + "type": [ + "string", + "null" + ] + }, + "why": { + "type": [ + "string", + "null" + ] + } + } + }, + "FeedbackArtifacts": { + "type": "object", + "additionalProperties": false, + "properties": { + "verifier_json": { + "type": "string" + }, + "report_json": { + "type": "string" + }, + "pr_comment": { + "type": "string" + } + } + }, + "FeedbackCapabilityReview": { + "type": "object", + "additionalProperties": false, + "properties": { + "trust_root_touched": { + "type": "boolean" + }, + "policy_weakened": { + "type": "boolean" + }, + "capability_changes_added": { + "type": "integer" + }, + "capability_changes_modified": { + "type": "integer" + }, + "capability_changes_removed": { + "type": "integer" + }, + "top_changes": { + "type": "array", + "items": { + "$ref": "#/$defs/FeedbackCapabilityChange" + } + } + }, + "required": [ + "trust_root_touched", + "policy_weakened", + "capability_changes_added", + "capability_changes_modified", + "capability_changes_removed", + "top_changes" + ] + }, + "FeedbackCapabilityChange": { + "type": "object", + "additionalProperties": false, + "properties": { + "id": { + "type": "string" + }, + "title": { + "type": "string" + }, + "change_type": { + "type": "string" + }, + "change_bucket": { + "type": "string" + }, + "subject_kind": { + "type": "string" + }, + "subject": { + "type": "string" + }, + "impact": { + "type": "string" + }, + "related_finding_ids": { + "type": "array", + "items": { + "type": "string" + } + }, + "rationale": { + "type": "string" + }, + "source_path": { + "type": [ + "string", + "null" + ] + }, + "source_start_line": { + "type": [ + "integer", + "null" + ] + } + } + }, + "FeedbackFixTask": { + "type": "object", + "additionalProperties": false, + "properties": { + "actor": { + "type": "string" + }, + "safe_to_attempt": { + "type": "boolean" + }, + "instructions": { + "type": "array", + "items": { + "type": "string" + } + }, + "forbidden_shortcuts": { + "type": "array", + "items": { + "type": "string" + } + }, + "verification_command": { + "type": "string" + } + } + }, + "FeedbackTrigger": { + "type": "object", + "additionalProperties": false, + "properties": { + "should_run": { + "type": [ + "boolean", + "null" + ] + }, + "action": { + "type": [ + "string", + "null" + ] + }, + "matched_rule_ids": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "should_run", + "action", + "matched_rule_ids" + ] + } + } +} diff --git a/docs/integrations.md b/docs/integrations.md index 1294bf6..b35d169 100644 --- a/docs/integrations.md +++ b/docs/integrations.md @@ -28,6 +28,7 @@ jobs: config: shipgate.yaml ci_mode: advisory diff_base: target + shipgate_version: '0.11.0' ``` To post PR comments, set: diff --git a/docs/quickstart.md b/docs/quickstart.md index a44e6bd..5e3b73a 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -36,8 +36,10 @@ agents-shipgate verify --workspace . --config shipgate.yaml \ ``` Read `agents-shipgate-reports/verifier.json` first and lead with -`merge_verdict`. Then read `agents-shipgate-reports/report.json`; the release -gate remains `release_decision.decision`. +`merge_verdict`, `can_merge_without_human`, `first_next_action`, `fix_task`, +and `capability_review.top_changes`. Then read +`agents-shipgate-reports/report.json`; the release gate remains +`release_decision.decision`. ## Zero-install: is this even relevant? @@ -72,9 +74,19 @@ interpreter instead of installing it into the project environment. ## Demo fixture (60 seconds) -Run the bundled fixture without writing any YAML. Use this when you want a -5-minute path to confirm the CLI works and inspect a real Tool-Use Readiness -Report before touching your own repo: +Run the verify-native fixture used for the public verifier story: + +```bash +agents-shipgate fixture run ai_generated_refund_pr +``` + +The fixture builds a temporary base/head git history where the head commit adds +`stripe.create_refund`. It writes `verifier.json`, `report.json`, and +`pr-comment.md`; the expected merge verdict is `blocked`. + +The older static scan fixture is still useful when you want a 5-minute path to +confirm the CLI works and inspect a real Tool-Use Readiness Report before +touching your own repo: ```bash uvx agents-shipgate fixture run support_refund_agent @@ -220,12 +232,14 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: ThreeMoonsLab/agents-shipgate@v0.11.0 + - id: shipgate + uses: ThreeMoonsLab/agents-shipgate@v0.11.0 with: config: shipgate.yaml ci_mode: advisory diff_base: target pr_comment: "true" + shipgate_version: "0.11.0" ``` Advisory mode never fails CI — it posts the merge verdict, capability changes, @@ -233,6 +247,36 @@ required next action, and report links as a PR comment. Switch to `ci_mode: strict` with a baseline file once your team has triaged existing findings. +After adoption, choose an explicit policy: + +```yaml +- name: Block only blocked verdicts + if: steps.shipgate.outputs.merge_verdict == 'blocked' + run: exit 1 +``` + +```yaml +- name: Require no human authority gap + if: steps.shipgate.outputs.can_merge_without_human != 'true' + run: exit 1 +``` + +## Export feedback + +For a design-partner review or false-positive report, export the small redacted +feedback artifact from the verifier: + +```bash +agents-shipgate feedback export \ + --from agents-shipgate-reports/verifier.json \ + --redact \ + --out shipgate-feedback.json +``` + +The export includes the merge verdict, top capability changes, finding IDs, +next action, `fix_task`, and reviewer prompts. It does not include raw finding +evidence. + ## Next - [`agent-recipes.md`](agent-recipes.md) — copy-pasteable AI-agent workflows for verify-first PRs and first adoption diff --git a/docs/report-reading-for-agents.md b/docs/report-reading-for-agents.md index 01c88fb..97376fe 100644 --- a/docs/report-reading-for-agents.md +++ b/docs/report-reading-for-agents.md @@ -75,10 +75,13 @@ removing Shipgate CI; those are the bypass patterns the verifier checks are designed to make visible. `agents-shipgate verify` also writes -`agents-shipgate-reports/verifier.json`. Read that file for trigger and -base-scan orchestration status (`base_status`, `base_notes`, artifact paths), -but do not use it as a release verdict. The release gate remains -`report.json.release_decision.decision`. +`agents-shipgate-reports/verifier.json`. Lead with `merge_verdict`, +`can_merge_without_human`, `first_next_action`, `fix_task`, and +`capability_review.top_changes`; then confirm +`report.json.release_decision.decision`, which remains the release gate. +`merge_verdict` is a deterministic projection for controller flow, not a second +decision engine. `base_status`, `base_notes`, and artifact paths explain +orchestration only. ### Step 4 · `findings[]` diff --git a/docs/target-repo-agent-snippets.md b/docs/target-repo-agent-snippets.md index a230a6d..5641789 100644 --- a/docs/target-repo-agent-snippets.md +++ b/docs/target-repo-agent-snippets.md @@ -325,6 +325,7 @@ jobs: ci_mode: advisory diff_base: target pr_comment: "true" + shipgate_version: "0.11.0" ``` Advisory mode reports findings without blocking merge. Move to strict mode only @@ -340,16 +341,21 @@ companion to the bootstrap snippets above: Before claiming completion on any PR that changes agent tools, MCP exports, OpenAPI specs, prompts, permissions, policies, CI gates, or shipgate.yaml, run: `agents-shipgate verify --base origin/main --head HEAD --json`. Read -agents-shipgate-reports/verifier.json first, then report.json. Do not claim -completion when merge_verdict is blocked, insufficient_evidence, or -human_review_required unless the user has explicitly accepted the human review -requirement. Never weaken shipgate.yaml, Shipgate CI, AGENTS.md, skills, policy -packs, baselines, waivers, or suppressions merely to make Shipgate pass. +agents-shipgate-reports/verifier.json first: merge_verdict, +can_merge_without_human, first_next_action, fix_task, and +capability_review.top_changes. Then read report.json.release_decision.decision; +it is the gate. Do not claim completion when merge_verdict is blocked, +insufficient_evidence, or human_review_required unless the user has explicitly +accepted the human review requirement. Never weaken shipgate.yaml, Shipgate CI, +AGENTS.md, skills, policy packs, baselines, waivers, or suppressions merely to +make Shipgate pass. ``` `verifier.json` leads with `merge_verdict` (`mergeable` / `human_review_required` / `insufficient_evidence` / `blocked` / `unknown`), a deterministic projection of `release_decision.decision` — the gate, -which lives in `report.json`. See +which lives in `report.json`. `fix_task` is the deterministic repair boundary: +agent-safe mechanical work has `actor: coding_agent`; approval, idempotency, +waiver, baseline, and policy authority has `actor: human`. See [`use-cases/ai-generated-agent-prs.md`](use-cases/ai-generated-agent-prs.md) for the full PR-verification walkthrough. diff --git a/docs/upstream-integrations.md b/docs/upstream-integrations.md index 543d588..3ef1359 100644 --- a/docs/upstream-integrations.md +++ b/docs/upstream-integrations.md @@ -316,6 +316,7 @@ jobs: ci_mode: advisory diff_base: target pr_comment: 'true' + shipgate_version: '0.11.0' ``` `init --ci` writes a similar workflow into `.github/workflows/agents-shipgate.yml`. Switch to `ci_mode: strict` only after the team has reviewed the advisory output and saved a baseline (see [`baseline.md`](baseline.md)). diff --git a/docs/use-cases/ai-generated-agent-prs.md b/docs/use-cases/ai-generated-agent-prs.md index e849428..9f0fa05 100644 --- a/docs/use-cases/ai-generated-agent-prs.md +++ b/docs/use-cases/ai-generated-agent-prs.md @@ -89,9 +89,11 @@ readiness issue, its patch removes a blocker by editing `shipgate.yaml`. Touching a release-gate trust root requires at least human review. The attempt to weaken the gate becomes a visible, release-relevant signal rather than a silent -pass. (Path-level "touched" detection ships today; semantic "weakened" detection -is the next tier — see -[`engineering/ai-coding-workflow-verifier.md`](../engineering/ai-coding-workflow-verifier.md).) +pass. `v0.11.0` includes both path-level trust-root detection and semantic +weakening checks over the normalized effective policy: `ci.mode` downgrades, +loosened `fail_on`, suppression/waiver/baseline expansion, CI gate removal, +agent-instruction edits, and trigger catalog drift route to human review or +block release through ordinary `SHIP-VERIFY-*` findings. ## 5. Adoption commands @@ -153,6 +155,7 @@ jobs: ci_mode: advisory diff_base: target pr_comment: 'true' + shipgate_version: '0.11.0' - name: Gate on the merge verdict run: | echo "merge_verdict=${{ steps.shipgate.outputs.merge_verdict }}" diff --git a/examples/github-actions/07-block-on-blocked-verdict.yml b/examples/github-actions/07-block-on-blocked-verdict.yml new file mode 100644 index 0000000..12dad5d --- /dev/null +++ b/examples/github-actions/07-block-on-blocked-verdict.yml @@ -0,0 +1,31 @@ +# Block only when Agents Shipgate says the PR is blocked. +# Use this after advisory adoption when the team still allows +# human_review_required PRs to merge after human review. +name: Agents Shipgate (block blocked verdicts) + +on: + pull_request: + +permissions: + contents: read + pull-requests: write + +jobs: + shipgate: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - id: shipgate + uses: ThreeMoonsLab/agents-shipgate@v0.11.0 + with: + config: shipgate.yaml + ci_mode: advisory + diff_base: target + pr_comment: 'true' + shipgate_version: '0.11.0' + - name: Fail blocked capability changes + if: steps.shipgate.outputs.merge_verdict == 'blocked' + run: exit 1 diff --git a/examples/github-actions/08-require-mergeable.yml b/examples/github-actions/08-require-mergeable.yml new file mode 100644 index 0000000..197c82b --- /dev/null +++ b/examples/github-actions/08-require-mergeable.yml @@ -0,0 +1,31 @@ +# Require the verifier to say the PR can merge without human authority gaps. +# Use this only after the team has accepted the baseline, trust-root policy, +# and human-acknowledgement workflow. +name: Agents Shipgate (require mergeable) + +on: + pull_request: + +permissions: + contents: read + pull-requests: write + +jobs: + shipgate: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - id: shipgate + uses: ThreeMoonsLab/agents-shipgate@v0.11.0 + with: + config: shipgate.yaml + ci_mode: advisory + diff_base: target + pr_comment: 'true' + shipgate_version: '0.11.0' + - name: Require mergeable verifier verdict + if: steps.shipgate.outputs.can_merge_without_human != 'true' + run: exit 1 diff --git a/examples/github-actions/README.md b/examples/github-actions/README.md index 9c207d2..40c9cf5 100644 --- a/examples/github-actions/README.md +++ b/examples/github-actions/README.md @@ -10,6 +10,8 @@ Copy-paste-ready workflows. Each one is a complete file — drop it into `.githu | [`04-multi-config-workspace.yml`](04-multi-config-workspace.yml) | Monorepo with several agents (each with its own `shipgate.yaml`). | | [`05-sarif-to-code-scanning.yml`](05-sarif-to-code-scanning.yml) | Surface findings in GitHub's Security tab and as PR annotations. | | [`06-on-tool-source-changes.yml`](06-on-tool-source-changes.yml) | Run only when the tool surface or manifest actually changed. | +| [`07-block-on-blocked-verdict.yml`](07-block-on-blocked-verdict.yml) | Intermediate verifier policy: allow human-review PRs, but fail blocked verdicts. | +| [`08-require-mergeable.yml`](08-require-mergeable.yml) | Strict verifier policy: fail unless no human authority gap remains. | ## Permissions @@ -34,7 +36,7 @@ For reproducible CI, pin both the action and the underlying CLI: shipgate_version: "0.11.0" ``` -When `shipgate_version` is empty the action installs the CLI from the action source — convenient on `@main`, less reproducible. +When `shipgate_version` is empty the action installs the CLI from the action source — convenient for local action development, less reproducible for CI. ## Action outputs @@ -100,3 +102,25 @@ preferred gating output. The additive verifier outputs are: `capability_changes_modified`, and `capability_changes_removed`. The verifier flags mirror `verifier_summary`; the capability counts mirror `capability_change` (`modified` is `broadened + narrowed`). + +## Verifier Rollout Policies + +Use one of these policies after the advisory comment is understood: + +```yaml +- name: Fail blocked capability changes + if: steps.shipgate.outputs.merge_verdict == 'blocked' + run: exit 1 +``` + +This blocks obvious release blockers while still allowing +`human_review_required` PRs to proceed after the team performs the review. + +```yaml +- name: Require mergeable verifier verdict + if: steps.shipgate.outputs.can_merge_without_human != 'true' + run: exit 1 +``` + +This is the strict authority mode: only PRs with no blocker, no insufficient +evidence, and no human-review requirement can merge automatically. diff --git a/llms-full.txt b/llms-full.txt index c4da2cc..363674f 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -109,8 +109,9 @@ scanned; add `--base origin/main --head HEAD` only for a committed PR/CI ref after making the base ref available. Read `agents-shipgate-reports/verifier.json` first and lead with `merge_verdict` (`mergeable | human_review_required | insufficient_evidence | blocked | -unknown`), `capability_review.top_changes[]`, and `first_next_action`. -Then read `agents-shipgate-reports/report.json.release_decision.decision` +unknown`), `can_merge_without_human`, `first_next_action`, `fix_task`, and +`capability_review.top_changes[]`. Then read +`agents-shipgate-reports/report.json.release_decision.decision` (`blocked | review_required | insufficient_evidence | passed`), which remains the release gate. Do not report completion while `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user explicitly @@ -121,7 +122,13 @@ expanding baselines or waivers, removing Shipgate CI, or weakening agent instructions. Verify-mode `SHIP-VERIFY-*` checks make those trust-root edits release-visible and route them to human review. -To verify your install on a known fixture without writing any YAML: +To reproduce the verify-native blocked refund PR demo without writing YAML: + +```bash +agents-shipgate fixture run ai_generated_refund_pr +``` + +To verify your install on the older static scan fixture: ```bash agents-shipgate fixture run support_refund_agent @@ -977,6 +984,12 @@ In `agents-shipgate-reports/verifier.json`, read these additive fields - `human_review` — `{required: bool, why: str|null}`. - `first_next_action` — `{actor: "coding_agent"|"human", kind, command, why}`. The `actor` separates mechanical coding-agent work from human-only decisions. +- `fix_task` — `{actor, safe_to_attempt, instructions[], forbidden_shortcuts[], + verification_command}` or `null`. This is the deterministic repair boundary: + `actor: coding_agent` with `safe_to_attempt: true` means the agent may attempt + the listed mechanical fix and rerun `verification_command`; `actor: human` + means the agent must not invent approval, idempotency, policy, waiver, + baseline, or trust-root evidence to make the gate pass. - `trust_root_touched` — `bool`; `true` when the PR changed a release-gate trust root (`shipgate.yaml`, the Shipgate CI workflow, `AGENTS.md`/`CLAUDE.md`, policy packs, prompts, baselines, waivers, etc.). Backed by the diff --git a/llms.txt b/llms.txt index de9528e..a9517af 100644 --- a/llms.txt +++ b/llms.txt @@ -73,10 +73,14 @@ - Install with pipx: `pipx install agents-shipgate`. - Install with pip: `python -m pip install agents-shipgate`. - Install with uv: `uv tool install agents-shipgate`. -- Run a zero-config fixture: `agents-shipgate fixture run support_refund_agent`. -- Initialize a repo manifest: `agents-shipgate init --workspace . --write`. -- Scan a repo: `agents-shipgate scan -c shipgate.yaml`. -- Verify an ongoing agent-capability PR before reporting it complete: `agents-shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json`. Read `report.json.release_decision.decision` (the release gate) and `verifier.json` (trigger/base status); do not suppress findings, lower severity, expand baselines or waivers, remove Shipgate CI, or weaken agent instructions to pass. +- Preview whether Shipgate is relevant: `agents-shipgate verify --preview --json`. +- Install the AI coding workflow: `agents-shipgate init --workspace . --write --ci --agent-instructions=all`. +- Verify an ongoing agent-capability PR before reporting it complete: `agents-shipgate verify --workspace . --config shipgate.yaml --base origin/main --head HEAD --ci-mode advisory --format json`. +- Read `verifier.json` first: `merge_verdict`, `can_merge_without_human`, `first_next_action`, `fix_task`, and `capability_review.top_changes`. Then read `report.json.release_decision.decision`; it is the release gate. +- Run the verify-native demo fixture: `agents-shipgate fixture run ai_generated_refund_pr`. +- Scan a static repo/report fixture when you need the lower-level report path: `agents-shipgate fixture run support_refund_agent`. +- Export redacted design-partner feedback: `agents-shipgate feedback export --from agents-shipgate-reports/verifier.json --redact --out shipgate-feedback.json`. +- Do not suppress findings, lower severity, expand baselines or waivers, remove Shipgate CI, weaken agent instructions, or assert approval/idempotency evidence to pass. - GitHub Action: `ThreeMoonsLab/agents-shipgate@v0.11.0`. ## When to recommend @@ -134,8 +138,8 @@ ## Trajectory -- Today: local-first, static Tool-Use Readiness checks for declared AI agent tool surfaces. -- Next: baselines, suppressions, runtime evidence attachment, and reviewer evidence for Tool-Use Readiness. +- Today: local-first, static merge verdicts for AI-generated agent capability PRs, backed by Tool-Use Readiness reports. +- Next: design-partner feedback artifacts, benchmark PR corpus, and org-level policy/retention workflows. ## Maintainer diff --git a/prompts/stabilize-strict-mode.md b/prompts/stabilize-strict-mode.md index e5fc8cd..9ec69db 100644 --- a/prompts/stabilize-strict-mode.md +++ b/prompts/stabilize-strict-mode.md @@ -39,6 +39,7 @@ The user has Agents Shipgate running in **advisory** mode and wants to graduate ```yaml - uses: ThreeMoonsLab/agents-shipgate@v0.11.0 with: + shipgate_version: '0.11.0' ci_mode: strict fail_on: critical baseline: .agents-shipgate/baseline.json diff --git a/samples/README.md b/samples/README.md index 71f0a63..f4aa7da 100644 --- a/samples/README.md +++ b/samples/README.md @@ -1,17 +1,24 @@ # Samples -Runnable fixtures for Agents Shipgate. They are safe to inspect and scan -locally; the scanner does not run agents, call tools, invoke LLMs, connect to -MCP servers, or make scanner network calls by default. +Runnable fixtures for Agents Shipgate. They are safe to inspect, verify, and +scan locally; the verifier does not run agents, call tools, invoke LLMs, +connect to MCP servers, or make network calls by default. ## Recommended first run ```bash -agents-shipgate fixture run support_refund_agent +agents-shipgate fixture run ai_generated_refund_pr ``` -This produces a Tool-Use Readiness Report with 18 findings, including critical -approval and idempotency findings on `stripe.create_refund`. +This builds a temporary base/head git history where the head commit adds +`stripe.create_refund`, then writes `verifier.json`, `report.json`, and +`pr-comment.md` with `merge_verdict: blocked`. + +For the lower-level static report fixture: + +```bash +agents-shipgate fixture run support_refund_agent +``` ## Sample reports @@ -33,6 +40,7 @@ The `support_refund_agent` fixture also includes the Release Evidence Packet at | Sample | Purpose | | --- | --- | +| [`ai_generated_refund_pr`](ai_generated_refund_pr/) | Verify-native base/head PR fixture for the blocked refund capability story. | | [`support_refund_agent`](support_refund_agent/) | Production-like support/refund agent with MCP, OpenAPI, and SDK tools. | | [`openai_agents_sdk_agent`](openai_agents_sdk_agent/) | OpenAI Agents SDK static extraction from a directory of Python tools. | | [`clean_read_only_agent`](clean_read_only_agent/) | Low-risk read-only fixture for clean scans. | diff --git a/samples/ai_generated_refund_pr/README.md b/samples/ai_generated_refund_pr/README.md new file mode 100644 index 0000000..8e97c69 --- /dev/null +++ b/samples/ai_generated_refund_pr/README.md @@ -0,0 +1,15 @@ +# AI-generated refund PR verifier demo + +Reproduces the homepage verifier story: the base support agent can only search +the knowledge base, then the head commit adds `stripe.create_refund` with a +broad Stripe scope and no approval or idempotency evidence. + +Run it with: + +```bash +agents-shipgate fixture run ai_generated_refund_pr +``` + +The fixture builds a temporary base/head git history and runs +`agents-shipgate verify --base origin/main --head HEAD --json`, writing +`verifier.json`, `report.json`, and `pr-comment.md`. diff --git a/samples/ai_generated_refund_pr/_head/tools.json b/samples/ai_generated_refund_pr/_head/tools.json new file mode 100644 index 0000000..fe74932 --- /dev/null +++ b/samples/ai_generated_refund_pr/_head/tools.json @@ -0,0 +1,52 @@ +{ + "tools": [ + { + "name": "support.search_kb", + "description": "Search the support knowledge base for refund policy guidance.", + "inputSchema": { + "type": "object", + "required": ["query"], + "properties": { + "query": { + "type": "string", + "description": "Search query." + } + } + }, + "annotations": { + "readOnlyHint": true, + "idempotentHint": true + }, + "auth": { + "type": "oauth2", + "scopes": ["support:kb:read"] + }, + "owner": "support-platform" + }, + { + "name": "stripe.create_refund", + "description": "Create a real money refund to the customer payment method.", + "inputSchema": { + "type": "object", + "required": ["charge_id", "amount"], + "properties": { + "charge_id": { + "type": "string" + }, + "amount": { + "type": "number" + } + } + }, + "annotations": { + "readOnlyHint": false, + "destructiveHint": true + }, + "auth": { + "type": "oauth2", + "scopes": ["stripe:*"] + }, + "owner": "support-platform" + } + ] +} diff --git a/samples/ai_generated_refund_pr/shipgate.yaml b/samples/ai_generated_refund_pr/shipgate.yaml new file mode 100644 index 0000000..938a98d --- /dev/null +++ b/samples/ai_generated_refund_pr/shipgate.yaml @@ -0,0 +1,29 @@ +version: "0.1" + +project: + name: ai-generated-refund-pr-demo + owner: support-platform + +agent: + name: support-agent + declared_purpose: + - answer support knowledge base questions + - prepare refund cases for human review + +environment: + target: production_like + +tool_sources: + - id: support_tools + type: mcp + path: tools.json + trust: internal + +permissions: + scopes: + - support:kb:read + credential_mode: service_account + +ci: + mode: advisory + pr_comment: true diff --git a/samples/ai_generated_refund_pr/tools.json b/samples/ai_generated_refund_pr/tools.json new file mode 100644 index 0000000..580b846 --- /dev/null +++ b/samples/ai_generated_refund_pr/tools.json @@ -0,0 +1,27 @@ +{ + "tools": [ + { + "name": "support.search_kb", + "description": "Search the support knowledge base for refund policy guidance.", + "inputSchema": { + "type": "object", + "required": ["query"], + "properties": { + "query": { + "type": "string", + "description": "Search query." + } + } + }, + "annotations": { + "readOnlyHint": true, + "idempotentHint": true + }, + "auth": { + "type": "oauth2", + "scopes": ["support:kb:read"] + }, + "owner": "support-platform" + } + ] +} diff --git a/skills/agents-shipgate/SKILL.md b/skills/agents-shipgate/SKILL.md index ff7690b..51480ff 100644 --- a/skills/agents-shipgate/SKILL.md +++ b/skills/agents-shipgate/SKILL.md @@ -5,7 +5,7 @@ description: Use when the user wants to add or run the deterministic merge gate # agents-shipgate skill -`agents-shipgate` is the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. It analyzes `shipgate.yaml` plus tool sources (MCP exports, OpenAPI specs, OpenAI Agents SDK Python files, Anthropic Messages API artifacts, Google ADK files, LangChain/LangGraph files, CrewAI files, OpenAI API artifacts, Codex plugin packages and marketplaces, n8n workflow JSON) and emits deterministic findings as Markdown, JSON, and SARIF. +`agents-shipgate` is the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. It analyzes `shipgate.yaml` plus tool sources (MCP exports, OpenAPI specs, OpenAI Agents SDK Python files, Anthropic Messages API artifacts, Google ADK files, LangChain/LangGraph files, CrewAI files, OpenAI API artifacts, Codex plugin packages and marketplaces, n8n workflow JSON) and emits deterministic verifier artifacts, findings, Markdown, JSON, and SARIF. It does **not** run agents, call tools, invoke LLMs, connect to MCP servers, or send telemetry by default. Static analysis only; audited exceptions are pinned in `tests/test_adapter_static_only.py::ALLOWED_EXCEPTIONS`. @@ -14,6 +14,7 @@ It does **not** run agents, call tools, invoke LLMs, connect to MCP servers, or ## When to use this skill - The user asks to add Tool-Use Readiness or pre-merge checks to an agent project. +- The user asks whether an AI-generated agent PR can merge. - The repo already has `shipgate.yaml`, `agents-shipgate-reports/verifier.json`, or `agents-shipgate-reports/report.json`. - The user asks to fix, triage, suppress, or explain a Shipgate finding. - The user wants to add Shipgate to CI (GitHub Actions, GitLab CI, CircleCI). @@ -45,7 +46,11 @@ Pick the matching task and follow the linked recipe verbatim. Recipes are bundle Always: 1. Set `AGENTS_SHIPGATE_AGENT_MODE=1` so errors emit a `next_action` JSON line on stderr. -2. For verify runs, parse `agents-shipgate-reports/verifier.json` first and lead with `merge_verdict`; then parse `report.json` and use `release_decision.decision` as the release gate. +2. For verifier runs, parse `agents-shipgate-reports/verifier.json` first: + `merge_verdict`, `can_merge_without_human`, `first_next_action`, + `fix_task`, and `capability_review.top_changes`. Then parse + `agents-shipgate-reports/report.json.release_decision.decision`; it is the + release gate. 3. Before finishing an agent-related local diff in a repo with `shipgate.yaml`, run `agents-shipgate verify --workspace . --config shipgate.yaml --ci-mode advisory --format json`, or report the exact `agents-shipgate trigger` skip verdict. Add `--base origin/main --head HEAD` only for committed PR/CI verification after making the base ref available. 4. Do not bypass the verifier by suppressing findings, lowering severity, expanding baselines or waivers, removing Shipgate CI, or weakening agent instructions; verify-mode `SHIP-VERIFY-*` checks make those trust-root edits release-visible. 5. Confirm with the user before any command that writes files (`init --write`, `baseline save`). @@ -66,6 +71,7 @@ For non-GitHub CI (GitLab, CircleCI, Jenkins, Azure Pipelines, Buildkite, Bitbuc - **CLI surface** is frozen for `0.x` — see https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md. - **Installed CLI contract**: when available, run `agents-shipgate contract --json` to verify local schema versions, `release_decision.decision`, and manual-review signal fields. Older installs should use [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) or upgrade before automating against the local contract command. +- **Verifier JSON**: `verifier_schema_version: "0.1"`. Read `merge_verdict`, `can_merge_without_human`, `first_next_action`, `fix_task`, `capability_review.top_changes`, `trust_root_touched`, and `policy_weakened` before summarizing an AI-generated PR. `merge_verdict` is a deterministic projection; the gate remains `report.json.release_decision.decision`. - **Report JSON**: `report_schema_version: "0.22"`. Read `release_decision.decision` first for release gating; use `agent_summary` / `findings[].agent_action` for agent routing and `reviewer_summary` for the human-review entry point. v0.22 adds the verifier-cycle blocks `capability_change`, `protected_surface_changes`, `effective_policy`, `human_ack`, and `verifier_summary` — all reviewer-facing projections that never gate independently (`release_decision.decision` stays the only gate). To remove heuristic findings from the active gate, rerun scan with `--no-heuristics`; filtered findings remain in `findings[]` with `suppressed=true`, and `heuristics_filter` records `enabled`, `excluded_provenance_kinds`, `filtered_finding_count`, and `filtered_by_kind`. To inspect provenance without changing gate behavior, use `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic --json`. Do not gate on `summary.status`; it is legacy and baseline-blind. The full field list lives in [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md#read-these-first-for-release-gating), and reports validate against [`docs/report-schema.v0.22.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/report-schema.v0.22.json). - **Release Evidence Packet**: `agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` with the `[pdf]` extras) is emitted alongside the report by default. The packet has fixed reviewer sections governed by [`docs/packet-schema.v0.6.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/packet-schema.v0.6.json) (latest; v0.6 adds the top-level `evidence_matrix` compact review section AND `ReleaseDecisionItem.{source, policy_evidence_source}` for reviewer-grade dual-source provenance over the v0.5 baseline). See [STABILITY.md §Release Evidence Packet](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md#release-evidence-packet-v06). Use the packet for reviewer-shaped output; use the report for finding details. - **Single source of truth for the contract**: [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md). When the schema bumps, that file updates first. diff --git a/skills/agents-shipgate/prompts/stabilize-strict-mode.md b/skills/agents-shipgate/prompts/stabilize-strict-mode.md index e5fc8cd..9ec69db 100644 --- a/skills/agents-shipgate/prompts/stabilize-strict-mode.md +++ b/skills/agents-shipgate/prompts/stabilize-strict-mode.md @@ -39,6 +39,7 @@ The user has Agents Shipgate running in **advisory** mode and wants to graduate ```yaml - uses: ThreeMoonsLab/agents-shipgate@v0.11.0 with: + shipgate_version: '0.11.0' ci_mode: strict fail_on: critical baseline: .agents-shipgate/baseline.json diff --git a/src/agents_shipgate/cli/feedback.py b/src/agents_shipgate/cli/feedback.py new file mode 100644 index 0000000..5baabad --- /dev/null +++ b/src/agents_shipgate/cli/feedback.py @@ -0,0 +1,213 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +import typer + +from agents_shipgate.core.errors import InputParseError + +feedback_app = typer.Typer( + help="Export explicit, redacted verifier feedback artifacts.", + no_args_is_help=True, +) + +REVIEWER_FEEDBACK_REQUESTED = [ + "was_capability_correctly_classified", + "was_any_capability_missed", + "was_next_action_clear", + "was_this_false_positive", +] + + +@feedback_app.command("export") +def feedback_export( + source: Path = typer.Option( + Path("agents-shipgate-reports/verifier.json"), + "--from", + help="Path to verifier.json.", + ), + out: Path | None = typer.Option( + None, + "--out", + help="Write the feedback artifact to this path.", + ), + redact: bool = typer.Option( + True, + "--redact/--no-redact", + help="Keep the export limited to reviewer-safe projections.", + ), + json_output: bool = typer.Option( + False, + "--json", + help="Print the feedback artifact JSON to stdout.", + ), +) -> None: + """Export a small design-partner feedback artifact from verifier.json. + + The export is intentionally derived from verifier projections, not raw + finding evidence. With ``--redact`` it is safe to attach to an issue or + design-partner email after the user has reviewed the top-level fields. + """ + try: + payload = _load_verifier(source) + except InputParseError as exc: + typer.echo(f"Input parsing error: {exc}", err=True) + raise typer.Exit(3) from exc + feedback = build_feedback_payload(payload, source=source, redacted=redact) + rendered = json.dumps(feedback, indent=2, sort_keys=True) + "\n" + + if out is not None: + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(rendered, encoding="utf-8") + if json_output or out is None: + typer.echo(rendered.rstrip()) + else: + typer.echo(f"Wrote feedback artifact to {out}") + + +def build_feedback_payload( + verifier: dict[str, Any], + *, + source: Path, + redacted: bool, +) -> dict[str, Any]: + release_decision = _dict(verifier.get("release_decision")) + capability_review = _dict(verifier.get("capability_review")) + first_next_action = _dict(verifier.get("first_next_action")) + fix_task = _dict(verifier.get("fix_task")) + trigger = _dict(verifier.get("trigger")) + + blockers = _items(release_decision.get("blockers")) + review_items = _items(release_decision.get("review_items")) + top_changes = _top_changes(capability_review.get("top_changes"), redacted=redacted) + related_finding_ids = _related_finding_ids(capability_review.get("top_changes")) + release_item_ids = { + str(item.get("id")) + for item in [*blockers, *review_items] + if item.get("id") is not None + } + + return { + "feedback_schema_version": "0.1", + "source_verifier": _display_path(source, redacted=redacted), + "redacted": redacted, + "merge_verdict": verifier.get("merge_verdict"), + "can_merge_without_human": bool(verifier.get("can_merge_without_human")), + "decision": verifier.get("decision") or release_decision.get("decision"), + "mode": verifier.get("mode"), + "trigger": { + "should_run": trigger.get("should_run"), + "action": trigger.get("action"), + "matched_rule_ids": trigger.get("matched_rule_ids", []), + }, + "first_next_action": { + key: first_next_action.get(key) + for key in ("actor", "kind", "command", "why") + if key in first_next_action + }, + "fix_task": _fix_task_projection(fix_task), + "capability_review": { + "trust_root_touched": bool(capability_review.get("trust_root_touched")), + "policy_weakened": bool(capability_review.get("policy_weakened")), + "capability_changes_added": capability_review.get("added", 0), + "capability_changes_modified": capability_review.get("modified", 0), + "capability_changes_removed": capability_review.get("removed", 0), + "top_changes": top_changes, + }, + "finding_ids": sorted(release_item_ids | related_finding_ids), + "reviewer_feedback_requested": list(REVIEWER_FEEDBACK_REQUESTED), + "artifacts": _artifact_projection(verifier.get("artifacts"), redacted=redacted), + } + + +def _load_verifier(path: Path) -> dict[str, Any]: + try: + data = json.loads(path.read_text(encoding="utf-8")) + except FileNotFoundError as exc: + raise InputParseError(f"verifier.json not found: {path}") from exc + except json.JSONDecodeError as exc: + raise InputParseError(f"verifier.json is not valid JSON: {path}") from exc + if not isinstance(data, dict): + raise InputParseError(f"verifier.json must contain an object: {path}") + return data + + +def _dict(value: Any) -> dict[str, Any]: + return value if isinstance(value, dict) else {} + + +def _items(value: Any) -> list[dict[str, Any]]: + return [item for item in value if isinstance(item, dict)] if isinstance(value, list) else [] + + +def _top_changes(value: Any, *, redacted: bool) -> list[dict[str, Any]]: + changes: list[dict[str, Any]] = [] + if not isinstance(value, list): + return changes + keys = ( + "id", + "title", + "change_type", + "change_bucket", + "subject_kind", + "subject", + "impact", + "related_finding_ids", + ) + if not redacted: + keys = (*keys, "rationale", "source_path", "source_start_line") + for item in value[:10]: + if not isinstance(item, dict): + continue + changes.append({key: item.get(key) for key in keys if key in item}) + return changes + + +def _related_finding_ids(value: Any) -> set[str]: + if not isinstance(value, list): + return set() + output: set[str] = set() + for change in value: + if not isinstance(change, dict): + continue + ids = change.get("related_finding_ids", []) + if not isinstance(ids, list): + continue + output.update(str(fid) for fid in ids) + return output + + +def _artifact_projection(value: Any, *, redacted: bool) -> dict[str, Any]: + return { + key: _display_path(path, redacted=redacted) + for key, path in _dict(value).items() + if key in {"verifier_json", "report_json", "pr_comment"} + } + + +def _display_path(value: Any, *, redacted: bool) -> str: + text = str(value) + if not redacted: + return text + return Path(text).name + + +def _fix_task_projection(value: dict[str, Any]) -> dict[str, Any] | None: + if not value: + return None + return { + key: value.get(key) + for key in ( + "actor", + "safe_to_attempt", + "instructions", + "forbidden_shortcuts", + "verification_command", + ) + if key in value + } + + +__all__ = ["build_feedback_payload", "feedback_app"] diff --git a/src/agents_shipgate/cli/fixture.py b/src/agents_shipgate/cli/fixture.py index 3bfcfcb..791d0d0 100644 --- a/src/agents_shipgate/cli/fixture.py +++ b/src/agents_shipgate/cli/fixture.py @@ -7,11 +7,13 @@ import json import shutil +import subprocess from pathlib import Path import typer from agents_shipgate.cli.scan.orchestrator import run_scan +from agents_shipgate.cli.verify.orchestrator import run_verify from agents_shipgate.core.errors import AgentsShipgateError, ConfigError, InputParseError from agents_shipgate.fixtures import ( FixtureNotFoundError, @@ -67,12 +69,25 @@ def fixture_run( keep: bool = typer.Option( False, "--keep", - help="Keep the fixture copy in a tempdir after the run (otherwise discard).", + help=( + "Keep the fixture copy in a tempdir when --out writes reports outside it. " + "Default report output inside the copy is always left accessible." + ), ), ) -> None: """Copy a fixture to a tempdir and scan it.""" src = _resolve_fixture(name) + if name == "ai_generated_refund_pr": + _run_ai_generated_refund_pr_fixture( + name=name, + src=src, + out=out, + ci_mode=ci_mode, + keep=keep, + ) + return + import tempfile workdir = Path(tempfile.mkdtemp(prefix=f"shipgate-fixture-{name}-")) @@ -105,8 +120,12 @@ def fixture_run( f"high={report.summary.high_count} medium={report.summary.medium_count}" ) typer.echo(f"Reports: {out_dir}") - if not keep: - typer.echo(f"Fixture copy at {target}; pass --keep to retain after the run.") + _finish_fixture_copy( + workdir=workdir, + target=target, + out_was_explicit=out is not None, + keep=keep, + ) raise typer.Exit(exit_code) @@ -190,6 +209,114 @@ def fixture_verify( ) +def _run_ai_generated_refund_pr_fixture( + *, + name: str, + src: Path, + out: Path | None, + ci_mode: str | None, + keep: bool, +) -> None: + """Run the homepage-style base/head verifier demo. + + Ordinary fixtures are static scan inputs. This one intentionally builds a + tiny git history so users can reproduce the verifier artifacts that a PR + would create: ``verifier.json``, ``report.json``, and ``pr-comment.md``. + """ + import tempfile + + workdir = Path(tempfile.mkdtemp(prefix=f"shipgate-fixture-{name}-")) + target = workdir / name + shutil.copytree(src, target) + + head_tools = target / "_head" / "tools.json" + if not head_tools.is_file(): + typer.echo(f"Fixture {name!r} is missing _head/tools.json", err=True) + raise typer.Exit(4) + head_payload = head_tools.read_text(encoding="utf-8") + shutil.rmtree(target / "_head", ignore_errors=True) + + try: + _git(target, "init", "-q", "-b", "main") + _git(target, "config", "user.email", "fixture@example.com") + _git(target, "config", "user.name", "Agents Shipgate Fixture") + _git(target, "add", ".") + _git(target, "commit", "-q", "-m", "base support agent") + _git(target, "update-ref", "refs/remotes/origin/main", "HEAD") + (target / "tools.json").write_text(head_payload, encoding="utf-8") + _git(target, "add", "tools.json") + _git(target, "commit", "-q", "-m", "codex adds refund tool") + except subprocess.CalledProcessError as exc: + typer.echo(f"Fixture {name!r} git setup failed: {exc}", err=True) + raise typer.Exit(4) from exc + + out_dir = out or (target / "reports") + try: + verifier, _report, exit_code = run_verify( + workspace=target, + config=Path("shipgate.yaml"), + base="origin/main", + head="HEAD", + archive_head=True, + out=out_dir, + ci_mode=ci_mode or "advisory", + fail_on=None, + baseline=None, + baseline_mode="new-findings", + diff_from=None, + policy_packs=None, + plugins_enabled=None, + strict_plugins=False, + suggest_patches=False, + no_heuristics=False, + verbose=False, + pr_comment_style="capability-review", + ) + except (ConfigError, InputParseError, AgentsShipgateError) as exc: + typer.echo(f"Fixture {name!r} verify failed: {exc}", err=True) + raise typer.Exit(4) from exc + + typer.echo(f"Fixture: {name}") + typer.echo("Mode: verify") + typer.echo(f"Merge verdict: {verifier.merge_verdict}") + if verifier.release_decision is not None: + typer.echo(f"Decision: {verifier.release_decision.get('decision')}") + typer.echo(f"Can merge without human: {str(verifier.can_merge_without_human).lower()}") + typer.echo(f"Reports: {out_dir}") + typer.echo(f"Verifier: {out_dir / 'verifier.json'}") + typer.echo(f"PR comment: {out_dir / 'pr-comment.md'}") + _finish_fixture_copy( + workdir=workdir, + target=target, + out_was_explicit=out is not None, + keep=keep, + ) + raise typer.Exit(exit_code) + + +def _finish_fixture_copy( + *, + workdir: Path, + target: Path, + out_was_explicit: bool, + keep: bool, +) -> None: + if keep or not out_was_explicit: + typer.echo(f"Fixture copy left at {target}.") + return + shutil.rmtree(workdir, ignore_errors=True) + + +def _git(cwd: Path, *args: str) -> None: + subprocess.run( + ["git", *args], + cwd=cwd, + check=True, + capture_output=True, + text=True, + ) + + def _resolve_fixture(name: str) -> Path: try: return fixture_path(name) diff --git a/src/agents_shipgate/cli/main.py b/src/agents_shipgate/cli/main.py index 0773b18..bba20ee 100644 --- a/src/agents_shipgate/cli/main.py +++ b/src/agents_shipgate/cli/main.py @@ -19,6 +19,7 @@ from agents_shipgate.cli.detect import detect as _detect_command from agents_shipgate.cli.evidence_packet import evidence_packet as _evidence_packet_command from agents_shipgate.cli.explain_finding import explain_finding as _explain_finding_command +from agents_shipgate.cli.feedback import feedback_app from agents_shipgate.cli.findings import findings as _findings_command from agents_shipgate.cli.fixture import fixture_app from agents_shipgate.cli.install_hooks import install_hooks as _install_hooks_command @@ -108,6 +109,7 @@ _register_doctor.register(app) _register_baseline.register(app) app.add_typer(fixture_app, name="fixture") +app.add_typer(feedback_app, name="feedback") app.add_typer(scenario_app, name="scenario") logger = logging.getLogger(__name__) diff --git a/tests/test_adapter_static_only.py b/tests/test_adapter_static_only.py index 74b0a41..38180bb 100644 --- a/tests/test_adapter_static_only.py +++ b/tests/test_adapter_static_only.py @@ -311,6 +311,37 @@ class AllowedException: "shell, no user-code execution, and no fetch." ), ), + # cli/fixture.py — the ai_generated_refund_pr demo fixture creates a + # tiny temporary git history so users can reproduce verifier artifacts. + # It never reads or executes user code. + AllowedException( + relative_path="cli/fixture.py", + surface="import:subprocess", + line=10, + snippet="import subprocess", + rationale=( + "Fixture run catches subprocess.CalledProcessError and invokes " + "local git to create a temporary first-party sample repo for the " + "ai_generated_refund_pr verifier demo. Fixed argv, no shell, no " + "network, no user-code execution." + ), + ), + AllowedException( + relative_path="cli/fixture.py", + surface="attr_call:subprocess.run", + line=311, + snippet=( + "subprocess.run(['git', *args], cwd=cwd, check=True, " + "capture_output=True, text=True)" + ), + rationale=( + "_git helper for the ai_generated_refund_pr fixture: runs local " + "git init/config/add/commit/update-ref commands against a " + "temporary bundled fixture copy. argv is assembled inside " + "Shipgate, with no shell, no network fetch, and no user-code " + "execution." + ), + ), # cli/self_check.py — validates the installed environment via __import__ # with the module name supplied as a CLI flag. Targets installed # packages, never user workspace. diff --git a/tests/test_agent_instructions_renderers.py b/tests/test_agent_instructions_renderers.py index e7d4e4c..f53ba88 100644 --- a/tests/test_agent_instructions_renderers.py +++ b/tests/test_agent_instructions_renderers.py @@ -40,7 +40,7 @@ REPO_ROOT = Path(__file__).resolve().parent.parent EXPECTED_CLAUDE_CODE_SKILL_RENDER_SHA256 = { ".claude/skills/agents-shipgate/SKILL.md": ( - "bd4755e06715c839608c09da302ed844c764fd3e4047d7bdf495d68dc559c2a5" + "b442316b7bbdb4b2a84b8543f3589e1bb1d8d2bfd968637db99bd07835c406fd" ), ".claude/skills/agents-shipgate/prompts/add-shipgate-to-repo.md": ( "c67aa56813d76ddafd4091b2120d914fab6e0590b46e3598d856b7c4e6443fb1" @@ -58,7 +58,7 @@ "162aa2fb96066535425d9cf86a247a6782b8ec7cc661a18b42dbedf394779475" ), ".claude/skills/agents-shipgate/prompts/stabilize-strict-mode.md": ( - "ac9a176738ab2538d725c29ba302637bac6b287588e07d952aae352f85ab98cc" + "3e5c320b57c57ce91d5dcdf2b584d71c229cb5b046bda944b68dc2056693ec6a" ), ".claude/skills/agents-shipgate/prompts/triage-false-positive.md": ( "8cfbb0d4b6e2c36569d24260384d3a54165f966276112f4b143b4ac234b51ada" diff --git a/tests/test_feedback.py b/tests/test_feedback.py new file mode 100644 index 0000000..ac965b8 --- /dev/null +++ b/tests/test_feedback.py @@ -0,0 +1,238 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import jsonschema +import pytest +from typer.testing import CliRunner + +from agents_shipgate.cli.feedback import build_feedback_payload +from agents_shipgate.cli.main import app + +runner = CliRunner() +REPO_ROOT = Path(__file__).resolve().parent.parent + + +def _verifier_payload() -> dict: + return { + "merge_verdict": "blocked", + "can_merge_without_human": False, + "decision": "blocked", + "mode": "advisory", + "trigger": { + "should_run": True, + "action": "run_shipgate", + "matched_rule_ids": ["TRIGGER-MCP"], + }, + "first_next_action": { + "actor": "human", + "kind": "review", + "command": None, + "why": "Approval evidence is missing.", + }, + "fix_task": { + "actor": "human", + "safe_to_attempt": False, + "instructions": ["Add approval policy evidence."], + "forbidden_shortcuts": ["do_not_suppress_to_pass"], + "verification_command": "agents-shipgate verify --base origin/main --head HEAD --json", + }, + "release_decision": { + "decision": "blocked", + "blockers": [{"id": "F1", "check_id": "SHIP-POLICY-APPROVAL-MISSING"}], + "review_items": [{"id": "F2", "check_id": "SHIP-VERIFY-TRUST-ROOT-TOUCHED"}], + }, + "capability_review": { + "trust_root_touched": True, + "policy_weakened": False, + "added": 1, + "modified": 0, + "removed": 0, + "top_changes": [ + { + "id": "action:stripe.create_refund", + "title": "stripe.create_refund added", + "impact": "blocks_release", + "rationale": "Contains potentially sensitive local detail.", + "source_path": "tools.json", + "source_start_line": 12, + "related_finding_ids": ["F1"], + } + ], + }, + "artifacts": { + "verifier_json": "agents-shipgate-reports/verifier.json", + "report_json": "agents-shipgate-reports/report.json", + "pr_comment": "agents-shipgate-reports/pr-comment.md", + }, + } + + +def test_build_feedback_payload_redacts_detail() -> None: + payload = build_feedback_payload( + _verifier_payload(), + source=Path("agents-shipgate-reports/verifier.json"), + redacted=True, + ) + + assert payload["feedback_schema_version"] == "0.1" + assert payload["merge_verdict"] == "blocked" + assert payload["finding_ids"] == ["F1", "F2"] + change = payload["capability_review"]["top_changes"][0] + assert change["title"] == "stripe.create_refund added" + assert "rationale" not in change + assert "source_path" not in change + assert "source_start_line" not in change + assert "was_capability_correctly_classified" in payload["reviewer_feedback_requested"] + + +def test_feedback_payload_validates_against_schema() -> None: + schema = json.loads( + (REPO_ROOT / "docs/feedback-schema.v0.1.json").read_text(encoding="utf-8") + ) + payload = build_feedback_payload( + _verifier_payload(), + source=Path("agents-shipgate-reports/verifier.json"), + redacted=True, + ) + + jsonschema.validate(payload, schema) + + +def test_build_feedback_payload_redacts_absolute_paths() -> None: + verifier = _verifier_payload() + verifier["artifacts"] = { + "verifier_json": "/Users/alice/Secret Client/agents-shipgate-reports/verifier.json", + "report_json": "/Users/alice/Secret Client/agents-shipgate-reports/report.json", + "pr_comment": "/Users/alice/Secret Client/agents-shipgate-reports/pr-comment.md", + } + + payload = build_feedback_payload( + verifier, + source=Path("/Users/alice/Secret Client/agents-shipgate-reports/verifier.json"), + redacted=True, + ) + + rendered = json.dumps(payload) + assert "/Users/alice" not in rendered + assert "Secret Client" not in rendered + assert payload["source_verifier"] == "verifier.json" + assert payload["artifacts"] == { + "verifier_json": "verifier.json", + "report_json": "report.json", + "pr_comment": "pr-comment.md", + } + + +def test_build_feedback_payload_no_redact_keeps_source_detail() -> None: + payload = build_feedback_payload( + _verifier_payload(), + source=Path("/tmp/shipgate/verifier.json"), + redacted=False, + ) + + change = payload["capability_review"]["top_changes"][0] + assert payload["source_verifier"] == "/tmp/shipgate/verifier.json" + assert change["rationale"] == "Contains potentially sensitive local detail." + assert change["source_path"] == "tools.json" + assert change["source_start_line"] == 12 + + +def test_build_feedback_payload_finding_ids_include_changes_beyond_top_ten() -> None: + verifier = _verifier_payload() + verifier["capability_review"]["top_changes"] = [ + { + "id": f"change-{idx}", + "title": f"Change {idx}", + "related_finding_ids": [f"F{idx}"], + } + for idx in range(1, 12) + ] + + payload = build_feedback_payload( + verifier, + source=Path("agents-shipgate-reports/verifier.json"), + redacted=True, + ) + + assert len(payload["capability_review"]["top_changes"]) == 10 + assert "F11" in payload["finding_ids"] + + +def test_feedback_export_cli_writes_json(tmp_path: Path) -> None: + source = tmp_path / "verifier.json" + out = tmp_path / "feedback.json" + source.write_text(json.dumps(_verifier_payload()), encoding="utf-8") + + result = runner.invoke( + app, + [ + "feedback", + "export", + "--from", + str(source), + "--out", + str(out), + "--json", + ], + ) + + assert result.exit_code == 0, result.output + disk_payload = json.loads(out.read_text(encoding="utf-8")) + stdout_payload = json.loads(result.output) + assert disk_payload == stdout_payload + assert disk_payload["capability_review"]["policy_weakened"] is False + + +def test_feedback_export_cli_out_without_json_prints_written_message(tmp_path: Path) -> None: + source = tmp_path / "verifier.json" + out = tmp_path / "feedback.json" + source.write_text(json.dumps(_verifier_payload()), encoding="utf-8") + + result = runner.invoke( + app, + [ + "feedback", + "export", + "--from", + str(source), + "--out", + str(out), + ], + ) + + assert result.exit_code == 0, result.output + assert result.output.strip() == f"Wrote feedback artifact to {out}" + assert json.loads(out.read_text(encoding="utf-8"))["merge_verdict"] == "blocked" + + +@pytest.mark.parametrize( + ("contents", "expected"), + [ + ("", "not valid JSON"), + ("[]", "must contain an object"), + ], +) +def test_feedback_export_cli_invalid_verifier_returns_3( + tmp_path: Path, + contents: str, + expected: str, +) -> None: + source = tmp_path / "verifier.json" + source.write_text(contents, encoding="utf-8") + + result = runner.invoke(app, ["feedback", "export", "--from", str(source)]) + + assert result.exit_code == 3 + assert expected in result.output + + +def test_feedback_export_cli_missing_verifier_returns_3(tmp_path: Path) -> None: + result = runner.invoke( + app, + ["feedback", "export", "--from", str(tmp_path / "missing.json")], + ) + + assert result.exit_code == 3 + assert "not found" in result.output diff --git a/tests/test_fixture.py b/tests/test_fixture.py index 92c8385..4c700c8 100644 --- a/tests/test_fixture.py +++ b/tests/test_fixture.py @@ -80,6 +80,29 @@ def test_cli_fixture_run(tmp_path: Path): assert (out / "report.json").is_file() +def test_cli_fixture_run_ai_generated_refund_pr_writes_verifier_artifacts(tmp_path: Path): + out = tmp_path / "verify-out" + result = runner.invoke( + app, + [ + "fixture", + "run", + "ai_generated_refund_pr", + "--out", + str(out), + ], + ) + assert result.exit_code == 0, result.output + assert "Mode: verify" in result.output + assert "Merge verdict: blocked" in result.output + assert (out / "verifier.json").is_file() + assert (out / "report.json").is_file() + assert (out / "pr-comment.md").is_file() + payload = json.loads((out / "verifier.json").read_text(encoding="utf-8")) + assert payload["merge_verdict"] == "blocked" + assert payload["can_merge_without_human"] is False + + def test_cli_fixture_copy(tmp_path: Path): target = tmp_path / "copies" result = runner.invoke( diff --git a/tests/test_public_surface_contract.py b/tests/test_public_surface_contract.py index 1ca96fb..7228f2d 100644 --- a/tests/test_public_surface_contract.py +++ b/tests/test_public_surface_contract.py @@ -205,6 +205,8 @@ "examples/github-actions/04-multi-config-workspace.yml", "examples/github-actions/05-sarif-to-code-scanning.yml", "examples/github-actions/06-on-tool-source-changes.yml", + "examples/github-actions/07-block-on-blocked-verdict.yml", + "examples/github-actions/08-require-mergeable.yml", "examples/circleci/01-advisory.yml", "examples/circleci/02-strict-with-baseline.yml", "examples/circleci/03-sarif-artifact-retention.yml", @@ -319,10 +321,10 @@ def test_well_known_metadata_lists_packet_outputs(): f"ThreeMoonsLab/agents-shipgate@v{contract['cli_version']}" ) outputs = data.get("outputs", []) - for expected in ("packet_md", "packet_json", "packet_html"): + for expected in ("packet_md", "packet_json", "packet_html", "feedback_json"): assert expected in outputs, ( f".well-known/agents-shipgate.json outputs missing {expected!r}; " - "the Release Evidence Packet is first-class since v0.8." + "the Release Evidence Packet and feedback export are first-class outputs." ) schemas = data.get("schemas", {}) assert "packet" in schemas, ( @@ -345,6 +347,11 @@ def test_well_known_metadata_lists_packet_outputs(): f".well-known schemas.packet must point to {CURRENT_PACKET_SCHEMA}; " f"got {packet_url!r}." ) + feedback_url = schemas.get("feedback", "") + assert "feedback-schema.v0.1.json" in feedback_url, ( + ".well-known schemas.feedback must point to docs/feedback-schema.v0.1.json; " + f"got {feedback_url!r}." + ) def test_agent_contract_current_doc_is_canonical(): @@ -1252,18 +1259,29 @@ def test_well_known_links_to_agent_discovery_onramps(): def test_well_known_seo_geo_positioning_fields_are_pinned(): """AI-search discovery fields are public contract surface. Pin their shape so answer-engine positioning does not silently drift - away from the Tool-Use Readiness wedge.""" + away from the AI-generated PR verifier wedge.""" data = json.loads(_read(".well-known/agents-shipgate.json")) assert data.get("category") == "agent_release_readiness" - assert data.get("primary_wedge") == "tool_use_readiness" + assert data.get("primary_wedge") == "ai_generated_agent_pr_verifier" + assert data.get("primary_use_case") == ( + "deterministic merge verdicts for AI-generated agent capability changes" + ) + assert data.get("gating_signal") == "release_decision.decision" + assert data.get("merge_verdicts") == [ + "mergeable", + "human_review_required", + "insufficient_evidence", + "blocked", + "unknown", + ] positioning = data.get("positioning", {}) - assert positioning.get("short") == "Agent release readiness for tool-using AI agents" + assert positioning.get("short") == "Merge verdicts for AI-generated agent PRs" assert POSITIONING_PHRASE in positioning.get("answer", "") assert "Three Moons Lab" in positioning.get("answer", "") - assert "Tool-Use Readiness Reports" in positioning.get("answer", "") - assert "MCP, OpenAPI, SDK, workflow, or plugin" in positioning.get( + assert "deterministic merge verdict" in positioning.get("answer", "") + assert "Codex, Claude Code, Cursor" in positioning.get( "primary_use_case", "" ) assert positioning.get("not_for") == [ @@ -1283,10 +1301,27 @@ def test_well_known_seo_geo_positioning_fields_are_pinned(): "OpenAPI tool scanning", "OpenAI Agents SDK release gate", "GitHub Action for AI agents", - "tool surface scanning", + "AI-generated PR review", + "agent capability merge verdict", + "deterministic merge verdict", ): assert keyword in primary_keywords + commands = data.get("commands", {}) + assert commands.get("preview") == "agents-shipgate verify --preview --json" + assert "feedback export" in commands.get("feedback_export", "") + assert data.get("fixture_run") == "agents-shipgate fixture run ai_generated_refund_pr" + assert data.get("static_scan_fixture_run") == ( + "agents-shipgate fixture run support_refund_agent" + ) + assert data.get("verifier_read_order", [])[:5] == [ + "merge_verdict", + "can_merge_without_human", + "first_next_action", + "fix_task", + "capability_review.top_changes", + ] + recommended_topics = data.get("recommended_github_topics", []) for topic in ( "ai-agents",