Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## Unreleased

## 0.11.0 - 2026-05-31

- **Verifier adoption-loop release prep.** Public docs and discovery metadata now
lead with the verify-first adoption path, pinned `v0.11.0` snippets, verifier
artifacts, merge verdicts, `fix_task`, and explicit Action merge-policy
Expand All @@ -10,8 +12,6 @@
`agents-shipgate feedback export` command plus
`docs/feedback-schema.v0.1.json` for redacted design-partner feedback loops.

## 0.11.0 - 2026-05-31

- **Verifier PR comment v2 + additive Action outputs.** The GitHub Action now
defaults to the verifier workflow (`verify_mode: verify`) and the
capability-review PR comment (`pr_comment_style: capability-review`) for the
Expand Down
2 changes: 1 addition & 1 deletion samples/support_refund_agent/expected/packet.html

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion samples/support_refund_agent/expected/packet.json
Original file line number Diff line number Diff line change
Expand Up @@ -1293,7 +1293,9 @@
"6 active finding(s) came from heuristic provenance (keyword_heuristic=6, regex_heuristic=0); review the finding evidence before acting."
],
"headline": "Agents Shipgate is an advisory tool: the deterministic merge gate for AI-generated agent capability changes, run as a local-first, static Tool-Use Readiness review. The packet below is derived from a scan; it does not, by itself, prove the following properties:",
"low_confidence_tools": [],
"low_confidence_tools": [
"send_email_preview"
],
"source_warnings": [
"MCP source declares wildcard tool exposure"
],
Expand Down
2 changes: 1 addition & 1 deletion samples/support_refund_agent/expected/packet.md
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ Agents Shipgate is an advisory tool: the deterministic merge gate for AI-generat

- Source warnings:
- MCP source declares wildcard tool exposure
- Low-confidence tool extractions: none
- Low-confidence tool extractions: `send\_email\_preview`
- Suppressed findings in effect: none
- Memory isolation is not modeled by the v0.1 manifest schema; no static evidence is available.
- 6 active finding\(s\) came from heuristic provenance \(keyword\_heuristic=6, regex\_heuristic=0\); review the finding evidence before acting.
2 changes: 1 addition & 1 deletion src/agents_shipgate/packet/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,7 @@ def _build_not_proven(
) -> NotProvenSection:
suppressed_ids = sorted(f.id for f in findings if f.suppressed and f.id)
low_confidence_tools = sorted(
tool.name for tool in tools if tool.extraction_confidence == "low"
tool.name for tool in tools if tool.extraction_confidence != "high"
)
additional = [
"Memory isolation is not modeled by the v0.1 manifest schema; "
Expand Down
156 changes: 155 additions & 1 deletion tests/test_evidence_packet.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
import pytest
from typer.testing import CliRunner

from agents_shipgate.ci.release_decision import build_release_decision
from agents_shipgate.cli.main import app
from agents_shipgate.cli.scan import run_scan
from agents_shipgate.core.disclaimers import HITL_RUNTIME_CONTROL_DISCLAIMER
from agents_shipgate.core.domain import Tool
from agents_shipgate.packet import (
EvidencePacket,
PacketSchemaError,
Expand All @@ -38,7 +40,12 @@
PACKET_NON_PROOF_HEADLINE,
)
from agents_shipgate.packet.evidence_matrix import build_evidence_matrix
from agents_shipgate.schemas.report import Finding
from agents_shipgate.schemas.report import (
Finding,
ReadinessReport,
ReportSummary,
ToolSurfaceSummary,
)

SAMPLE_CONFIG = Path("samples/support_refund_agent/shipgate.yaml")
EXPECTED_DIR = Path("samples/support_refund_agent/expected")
Expand All @@ -49,6 +56,78 @@
GENERATED_AT = "2026-01-01T00:00:00+00:00"


def _minimal_packet_with_not_proven(
section,
*,
low_confidence_tool_count: int = 0,
) -> EvidencePacket:
from agents_shipgate.schemas.packet import (
ApprovalCoverageSection,
CapabilityIntentDiff,
DynamicScenariosSection,
HighRiskSurfaceSection,
HumanInTheLoopEvidence,
IdempotencyRiskSection,
MemoryIsolationStatus,
ReleaseDecisionSection,
ScopeCoverageSection,
)
from agents_shipgate.schemas.report import (
BaselineDelta,
EvidenceCoverageDecision,
FailPolicy,
)

decision = ReleaseDecisionSection(
decision="insufficient_evidence" if low_confidence_tool_count else "passed",
verdict="INSUFFICIENT EVIDENCE" if low_confidence_tool_count else "PASSED",
reason="Evidence coverage below threshold.",
evidence_coverage=EvidenceCoverageDecision(
level="static",
human_review_recommended=low_confidence_tool_count > 0,
source_warning_count=0,
low_confidence_tool_count=low_confidence_tool_count,
),
baseline_delta=BaselineDelta(enabled=False),
fail_policy=FailPolicy(
ci_mode="advisory",
fail_on=[],
new_findings_only=False,
would_fail_ci=False,
exit_code=0,
),
)
return EvidencePacket(
generated_at=GENERATED_AT,
run_id="r",
project={"name": "p"},
agent={"name": "a"},
environment={"target": "local"},
release_decision=decision,
capability_intent=CapabilityIntentDiff(
status="not_declared",
declared_purpose=[],
prohibited_actions=[],
observed_tools=[],
rows=[],
divergence_findings=[],
),
high_risk_surface=HighRiskSurfaceSection(
status="informational",
total_tools=0,
high_risk_count=0,
tools=[],
),
approval_coverage=ApprovalCoverageSection(status="informational"),
idempotency_risk=IdempotencyRiskSection(status="informational"),
scope_coverage=ScopeCoverageSection(status="informational"),
memory_isolation=MemoryIsolationStatus(),
human_in_the_loop=HumanInTheLoopEvidence(status="not_declared"),
dynamic_scenarios=DynamicScenariosSection(status="informational"),
not_proven=section,
)


def _scan_with_packet(tmp_path: Path) -> tuple[Path, EvidencePacket]:
"""Run scan against the support_refund_agent fixture and return
``(out_dir, parsed_packet)``."""
Expand Down Expand Up @@ -182,6 +261,81 @@ def test_not_proven_residuals_include_non_static_provenance():
assert "external policy packs" in residuals


def test_not_proven_low_confidence_residuals_match_release_decision_count():
tools = [
Tool(
id="high",
name="high_confidence_inventory",
source_type="mcp",
extraction_confidence="high",
),
Tool(
id="medium",
name="medium_confidence_sdk",
source_type="sdk_function",
extraction_confidence="medium",
),
Tool(
id="low",
name="low_confidence_sdk",
source_type="sdk_function",
extraction_confidence="low",
),
]

section = _build_not_proven([], source_warnings=[], tools=tools)

assert section.low_confidence_tools == [
"low_confidence_sdk",
"medium_confidence_sdk",
]
report = ReadinessReport(
run_id="r",
project={"name": "p"},
agent={"name": "a"},
environment={"target": "local"},
summary=ReportSummary(
status="human_review_recommended",
critical_count=0,
high_count=0,
medium_count=0,
human_review_recommended=True,
evidence_coverage="static",
),
tool_surface=ToolSurfaceSummary(
total_tools=len(tools),
high_risk_tools=0,
),
findings=[],
source_warnings=[],
)
decision = build_release_decision(
report=report,
tools=tools,
ci_mode="advisory",
fail_on=None,
new_findings_only=False,
)

assert decision.evidence_coverage.low_confidence_tool_count == len(
section.low_confidence_tools
)

packet = _minimal_packet_with_not_proven(
section,
low_confidence_tool_count=decision.evidence_coverage.low_confidence_tool_count,
)
md = render_packet_markdown(packet)
html = render_packet_html(packet)

assert "Low-confidence tool extractions: none" not in md
assert "Low-confidence tool extractions: none" not in html
assert "`medium\\_confidence\\_sdk`" in md
assert "<code>medium_confidence_sdk</code>" in html
assert "high_confidence_inventory" not in md
assert "high_confidence_inventory" not in html


def test_evidence_matrix_uses_release_decision_only_for_blocking_and_review():
payload = {
"release_decision": {
Expand Down