ThreeMoonsLab · pengfei-threemoonslab · Jun 1, 2026 · Jun 1, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Unreleased
 
+## 0.11.0 - 2026-05-31
+
 - **Verifier adoption-loop release prep.** Public docs and discovery metadata now
   lead with the verify-first adoption path, pinned `v0.11.0` snippets, verifier
   artifacts, merge verdicts, `fix_task`, and explicit Action merge-policy
@@ -10,8 +12,6 @@
   `agents-shipgate feedback export` command plus
   `docs/feedback-schema.v0.1.json` for redacted design-partner feedback loops.
 
-## 0.11.0 - 2026-05-31
-
 - **Verifier PR comment v2 + additive Action outputs.** The GitHub Action now
   defaults to the verifier workflow (`verify_mode: verify`) and the
   capability-review PR comment (`pr_comment_style: capability-review`) for the

diff --git a/samples/support_refund_agent/expected/packet.html b/samples/support_refund_agent/expected/packet.html
diff --git a/samples/support_refund_agent/expected/packet.json b/samples/support_refund_agent/expected/packet.json
@@ -1293,7 +1293,9 @@
       "6 active finding(s) came from heuristic provenance (keyword_heuristic=6, regex_heuristic=0); review the finding evidence before acting."
     ],
     "headline": "Agents Shipgate is an advisory tool: the deterministic merge gate for AI-generated agent capability changes, run as a local-first, static Tool-Use Readiness review. The packet below is derived from a scan; it does not, by itself, prove the following properties:",
-    "low_confidence_tools": [],
+    "low_confidence_tools": [
+      "send_email_preview"
+    ],
     "source_warnings": [
       "MCP source declares wildcard tool exposure"
     ],

diff --git a/samples/support_refund_agent/expected/packet.md b/samples/support_refund_agent/expected/packet.md
@@ -234,7 +234,7 @@ Agents Shipgate is an advisory tool: the deterministic merge gate for AI-generat
 
 - Source warnings:
   - MCP source declares wildcard tool exposure
-- Low-confidence tool extractions: none
+- Low-confidence tool extractions: `send\_email\_preview`
 - Suppressed findings in effect: none
 - Memory isolation is not modeled by the v0.1 manifest schema; no static evidence is available.
 - 6 active finding\(s\) came from heuristic provenance \(keyword\_heuristic=6, regex\_heuristic=0\); review the finding evidence before acting.
diff --git a/src/agents_shipgate/packet/builder.py b/src/agents_shipgate/packet/builder.py
@@ -1080,7 +1080,7 @@ def _build_not_proven(
 ) -> NotProvenSection:
     suppressed_ids = sorted(f.id for f in findings if f.suppressed and f.id)
     low_confidence_tools = sorted(
-        tool.name for tool in tools if tool.extraction_confidence == "low"
+        tool.name for tool in tools if tool.extraction_confidence != "high"
     )
     additional = [
         "Memory isolation is not modeled by the v0.1 manifest schema; "

diff --git a/tests/test_evidence_packet.py b/tests/test_evidence_packet.py
@@ -21,9 +21,11 @@
 import pytest
 from typer.testing import CliRunner
 
+from agents_shipgate.ci.release_decision import build_release_decision
 from agents_shipgate.cli.main import app
 from agents_shipgate.cli.scan import run_scan
 from agents_shipgate.core.disclaimers import HITL_RUNTIME_CONTROL_DISCLAIMER
+from agents_shipgate.core.domain import Tool
 from agents_shipgate.packet import (
     EvidencePacket,
     PacketSchemaError,
@@ -38,7 +40,12 @@
     PACKET_NON_PROOF_HEADLINE,
 )
 from agents_shipgate.packet.evidence_matrix import build_evidence_matrix
-from agents_shipgate.schemas.report import Finding
+from agents_shipgate.schemas.report import (
+    Finding,
+    ReadinessReport,
+    ReportSummary,
+    ToolSurfaceSummary,
+)
 
 SAMPLE_CONFIG = Path("samples/support_refund_agent/shipgate.yaml")
 EXPECTED_DIR = Path("samples/support_refund_agent/expected")
@@ -49,6 +56,78 @@
 GENERATED_AT = "2026-01-01T00:00:00+00:00"
 
 
+def _minimal_packet_with_not_proven(
+    section,
+    *,
+    low_confidence_tool_count: int = 0,
+) -> EvidencePacket:
+    from agents_shipgate.schemas.packet import (
+        ApprovalCoverageSection,
+        CapabilityIntentDiff,
+        DynamicScenariosSection,
+        HighRiskSurfaceSection,
+        HumanInTheLoopEvidence,
+        IdempotencyRiskSection,
+        MemoryIsolationStatus,
+        ReleaseDecisionSection,
+        ScopeCoverageSection,
+    )
+    from agents_shipgate.schemas.report import (
+        BaselineDelta,
+        EvidenceCoverageDecision,
+        FailPolicy,
+    )
+
+    decision = ReleaseDecisionSection(
+        decision="insufficient_evidence" if low_confidence_tool_count else "passed",
+        verdict="INSUFFICIENT EVIDENCE" if low_confidence_tool_count else "PASSED",
+        reason="Evidence coverage below threshold.",
+        evidence_coverage=EvidenceCoverageDecision(
+            level="static",
+            human_review_recommended=low_confidence_tool_count > 0,
+            source_warning_count=0,
+            low_confidence_tool_count=low_confidence_tool_count,
+        ),
+        baseline_delta=BaselineDelta(enabled=False),
+        fail_policy=FailPolicy(
+            ci_mode="advisory",
+            fail_on=[],
+            new_findings_only=False,
+            would_fail_ci=False,
+            exit_code=0,
+        ),
+    )
+    return EvidencePacket(
+        generated_at=GENERATED_AT,
+        run_id="r",
+        project={"name": "p"},
+        agent={"name": "a"},
+        environment={"target": "local"},
+        release_decision=decision,
+        capability_intent=CapabilityIntentDiff(
+            status="not_declared",
+            declared_purpose=[],
+            prohibited_actions=[],
+            observed_tools=[],
+            rows=[],
+            divergence_findings=[],
+        ),
+        high_risk_surface=HighRiskSurfaceSection(
+            status="informational",
+            total_tools=0,
+            high_risk_count=0,
+            tools=[],
+        ),
+        approval_coverage=ApprovalCoverageSection(status="informational"),
+        idempotency_risk=IdempotencyRiskSection(status="informational"),
+        scope_coverage=ScopeCoverageSection(status="informational"),
+        memory_isolation=MemoryIsolationStatus(),
+        human_in_the_loop=HumanInTheLoopEvidence(status="not_declared"),
+        dynamic_scenarios=DynamicScenariosSection(status="informational"),
+        not_proven=section,
+    )
+
+
 def _scan_with_packet(tmp_path: Path) -> tuple[Path, EvidencePacket]:
     """Run scan against the support_refund_agent fixture and return
     ``(out_dir, parsed_packet)``."""
@@ -182,6 +261,81 @@ def test_not_proven_residuals_include_non_static_provenance():
     assert "external policy packs" in residuals
 
 
+def test_not_proven_low_confidence_residuals_match_release_decision_count():
+    tools = [
+        Tool(
+            id="high",
+            name="high_confidence_inventory",
+            source_type="mcp",
+            extraction_confidence="high",
+        ),
+        Tool(
+            id="medium",
+            name="medium_confidence_sdk",
+            source_type="sdk_function",
+            extraction_confidence="medium",
+        ),
+        Tool(
+            id="low",
+            name="low_confidence_sdk",
+            source_type="sdk_function",
+            extraction_confidence="low",
+        ),
+    ]
+
+    section = _build_not_proven([], source_warnings=[], tools=tools)
+
+    assert section.low_confidence_tools == [
+        "low_confidence_sdk",
+        "medium_confidence_sdk",
+    ]
+    report = ReadinessReport(
+        run_id="r",
+        project={"name": "p"},
+        agent={"name": "a"},
+        environment={"target": "local"},
+        summary=ReportSummary(
+            status="human_review_recommended",
+            critical_count=0,
+            high_count=0,
+            medium_count=0,
+            human_review_recommended=True,
+            evidence_coverage="static",
+        ),
+        tool_surface=ToolSurfaceSummary(
+            total_tools=len(tools),
+            high_risk_tools=0,
+        ),
+        findings=[],
+        source_warnings=[],
+    )
+    decision = build_release_decision(
+        report=report,
+        tools=tools,
+        ci_mode="advisory",
+        fail_on=None,
+        new_findings_only=False,
+    )
+
+    assert decision.evidence_coverage.low_confidence_tool_count == len(
+        section.low_confidence_tools
+    )
+
+    packet = _minimal_packet_with_not_proven(
+        section,
+        low_confidence_tool_count=decision.evidence_coverage.low_confidence_tool_count,
+    )
+    md = render_packet_markdown(packet)
+    html = render_packet_html(packet)
+
+    assert "Low-confidence tool extractions: none" not in md
+    assert "Low-confidence tool extractions: none" not in html
+    assert "`medium\\_confidence\\_sdk`" in md
+    assert "<code>medium_confidence_sdk</code>" in html
+    assert "high_confidence_inventory" not in md
+    assert "high_confidence_inventory" not in html
+
+
 def test_evidence_matrix_uses_release_decision_only_for_blocking_and_review():
     payload = {
         "release_decision": {