From 0aecd8f132776385a2e9080062e759db77187ae7 Mon Sep 17 00:00:00 2001
From: NiveditJain <nivedit@exosphere.host>
Date: Sat, 9 May 2026 22:16:20 -0700
Subject: [PATCH 1/2] [luv-342] feat: enforce Pi Stop policies via
 before_agent_start handoff
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pi's `agent_end` event has no Result type — by the time the handler fires,
Pi's agent loop has already exited, so a deny return cannot keep Pi running
the way Claude's exit-2-from-Stop does. Empirically: a user on Pi with
`require-commit-before-stop` enabled saw the deny reason propagate but Pi
exited anyway.

Fix: the `pi-extension/index.ts` shim now captures the deny `reason` from
`agent_end` into a per-`sessionId` in-memory map and re-injects it as a
`systemPrompt` suffix on the next `before_agent_start` (Pi v0.73.x — fires
after a new user prompt, before the agent loop runs). The map is one-shot
per drain and is cleared on every `session_shutdown` reason (including
`quit`), so a stale gate cannot leak into a fresh session in the same
process. `policy-evaluator.ts` emits the `MANDATORY ACTION REQUIRED from
failproofai (policy: …)` wrapper inside `reason` for `cli === "pi" &&
eventType === "Stop"` (deny + instruct paths), mirroring the
Cursor/Gemini/Copilot/OpenCode Stop branches; non-Stop Pi events keep the
existing flat `{permission, reason}` shape.

Bounded by Pi process lifetime — same bound Claude has on exit-2-from-Stop.

Verified:
- Unit: 1603/1603 (incl. 7 new shim tests)
- E2E: 298/298 (incl. new dirty-repo Pi case)
- Live binary smoke in dirty repo emits the MANDATORY ACTION reason JSON

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 CHANGELOG.md                                  |   5 +
 CLAUDE.md                                     |  21 +--
 .../e2e/hooks/pi-integration.e2e.test.ts      |  33 +++++
 __tests__/hooks/pi-extension-shim.test.ts     | 134 +++++++++++++++++-
 pi-extension/index.ts                         |  85 +++++++++--
 src/hooks/policy-evaluator.ts                 |  36 ++++-
 6 files changed, 293 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c4ac8941..ace71bf3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## 0.0.10-beta.12 — 2026-05-10
+
+### Features
+- Pi: enforce `Stop` policies (`require-commit-before-stop`, `require-push-before-stop`, etc.) on the next user turn via `before_agent_start` injection. Pi's `AgentEndEvent` has no Result type — by the time it fires, Pi's agent loop has already exited, so a deny return cannot keep Pi running the way Claude's exit-2-from-Stop can. Empirically observed: a user on Pi with `require-commit-before-stop` enabled saw the deny `reason` ("You have uncommitted changes …") propagate to Pi but Pi exited anyway. Fix: the `pi-extension/index.ts` shim captures any deny `reason` from `agent_end` into a per-`sessionId` in-memory map, then on the next `before_agent_start` (Pi v0.73.x — fires after the user submits a prompt, before the agent loop runs) returns `{systemPrompt: <event.systemPrompt> + "\n\n" + reason}` so the LLM sees a `MANDATORY ACTION REQUIRED` directive at the top of its next turn. The map is one-shot per drain and is cleared on every `session_shutdown` reason (including `quit`), so a stale gate cannot leak into a fresh session started in the same Pi process. `policy-evaluator.ts` now emits the `MANDATORY ACTION REQUIRED from failproofai (policy: …)` wrapper inside `reason` for `cli === "pi" && eventType === "Stop"` (both the deny and instruct paths), mirroring the Cursor/Gemini/Copilot/OpenCode Stop branches; non-Stop Pi events keep the existing flat `{permission, reason}` shape. Bounded by Pi process lifetime — same bound Claude has on exit-2-from-Stop (kill the agent and the gate is missed). New shim unit tests cover capture/drain/one-shot/`session_shutdown`-clear/no-pending-noop/missing-systemPrompt and a new e2e test pins the binary's stdout shape (`{permission:"deny", reason:/MANDATORY ACTION REQUIRED.*require-commit-before-stop.*uncommitted changes/}`) for `agent_end` in a dirty repo (#PR).
+
 ## 0.0.10-beta.11 — 2026-05-10
 
 ### Fixes
diff --git a/CLAUDE.md b/CLAUDE.md
index 95872b29..4bcb7d96 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -218,18 +218,19 @@ pi-extension. Same self-reference caveat applies — do **not** install the
 standard `npx` form from inside this repo.
 
 **Pi limitations vs. Claude semantics** (verified against pi-coding-agent
-v0.72.1 d.ts; the `pi-extension/` shim subscribes to 7 events but Pi's API
+v0.73.1 d.ts; the `pi-extension/` shim subscribes to 8 events but Pi's API
 caps what each handler can do):
 
-| Pi event           | → Claude event   | Veto / mutate? | Notes |
-|--------------------|------------------|----------------|-------|
-| `tool_call`        | PreToolUse       | ✅ block      | Full deny support via `{block, reason}`. |
-| `user_bash`        | PreToolUse       | ✅ block      | Full deny support. |
-| `input`            | UserPromptSubmit | ✅ block      | Full deny support. |
-| `session_start`    | SessionStart     | observation   | No return-value effect on Pi. |
-| `tool_result`      | PostToolUse      | observation   | `ToolResultEventResult` exposes `{content, details, isError}` for mutation but no `block`. PostToolUse is observation/sanitize anyway, matching Claude semantics. |
-| `agent_end`        | Stop             | observation   | Pi's agent loop has already exited; we cannot keep Pi running the way Claude's exit-2-from-Stop can. `require-*-before-stop` policies still RUN — their findings land in the activity store + stderr — but the stop is not vetoed. |
-| `session_shutdown` | SessionEnd       | observation   | Symmetry only. |
+| Pi event             | → Claude event   | Veto / mutate?  | Notes |
+|----------------------|------------------|-----------------|-------|
+| `tool_call`          | PreToolUse       | ✅ block        | Full deny support via `{block, reason}`. |
+| `user_bash`          | PreToolUse       | ✅ block        | Full deny support. |
+| `input`              | UserPromptSubmit | ✅ block        | Full deny support. |
+| `session_start`      | SessionStart     | observation     | No return-value effect on Pi. |
+| `tool_result`        | PostToolUse      | observation     | `ToolResultEventResult` exposes `{content, details, isError}` for mutation but no `block`. PostToolUse is observation/sanitize anyway, matching Claude semantics. |
+| `agent_end`          | Stop             | shifted (next turn) | Pi's `AgentEndEvent` has no Result type — we cannot retry the same loop the way Claude's exit-2-from-Stop can. The shim captures any deny `reason` and stashes it keyed by sessionId for the next `before_agent_start` handler to drain. The 5 `require-*-before-stop` builtins thus enforce by gating the NEXT user turn's system prompt. Bounded by Pi process lifetime — same bound Claude has on exit-2-from-Stop. |
+| `before_agent_start` | (Pi-only handoff) | systemPrompt   | Drains any pending Stop deny captured at `agent_end`, returning `{systemPrompt: <event.systemPrompt> + "\n\n" + reason}` so the LLM sees the MANDATORY ACTION directive before its next turn. Multiple extensions chain. No injection when no block is pending. |
+| `session_shutdown`   | SessionEnd       | observation     | Symmetry only. Also clears any pending stop-block for the session id (every reason, not just `new`/`resume`/`fork`). |
 
 **Instruct (`additionalContext`) on Pi `tool_call`** — Pi's
 `ToolCallEventResult` shape is `{block?, reason?}` only; there's no
diff --git a/__tests__/e2e/hooks/pi-integration.e2e.test.ts b/__tests__/e2e/hooks/pi-integration.e2e.test.ts
index 2ee54d9a..b0c64de9 100644
--- a/__tests__/e2e/hooks/pi-integration.e2e.test.ts
+++ b/__tests__/e2e/hooks/pi-integration.e2e.test.ts
@@ -208,6 +208,39 @@ describe("E2E: Pi integration — hook protocol (handler-only)", () => {
     }
   });
 
+  it("agent_end with require-commit-before-stop in a dirty repo emits MANDATORY ACTION reason", () => {
+    const env = createPiEnv();
+    try {
+      // Make env.cwd a git repo with an uncommitted file so
+      // require-commit-before-stop returns deny. This is the bridge to the
+      // shim-side handoff: the binary's stdout MUST be a Pi-flat
+      // `{permission:"deny", reason}` payload whose reason carries the
+      // "MANDATORY ACTION REQUIRED" wrapper. The shim captures that
+      // reason on agent_end and re-injects it via before_agent_start (the
+      // in-process map handoff is covered by the shim unit tests).
+      execSync("git init -q && git config user.email t@t && git config user.name t", { cwd: env.cwd });
+      writeFileSync(resolve(env.cwd, "dirty.txt"), "uncommitted\n");
+      writeConfig(env.cwd, ["require-commit-before-stop"]);
+      const result = runHook(
+        "agent_end",
+        PiPayloads.agentEnd(env.cwd),
+        { homeDir: env.home, cli: "pi" },
+      );
+      // Stop on Pi uses the MANDATORY ACTION wrapping (not the generic
+      // `Blocked <displayTool> by failproofai because: …` wording that
+      // assertPiDeny matches), so we inline the deny shape checks here.
+      expect(result.exitCode).toBe(0);
+      expect(result.parsed?.permission).toBe("deny");
+      expect(result.parsed?.hookSpecificOutput).toBeUndefined();
+      const reason = String(result.parsed?.reason ?? "");
+      expect(reason).toMatch(/MANDATORY ACTION REQUIRED/);
+      expect(reason).toMatch(/require-commit-before-stop/);
+      expect(reason).toMatch(/uncommitted changes/i);
+    } finally {
+      env.cleanup();
+    }
+  });
+
   it("agent-settings guard: Bash read of .pi/settings.json is denied", () => {
     const env = createPiEnv();
     try {
diff --git a/__tests__/hooks/pi-extension-shim.test.ts b/__tests__/hooks/pi-extension-shim.test.ts
index b73656c8..1f4a4d33 100644
--- a/__tests__/hooks/pi-extension-shim.test.ts
+++ b/__tests__/hooks/pi-extension-shim.test.ts
@@ -24,10 +24,23 @@ interface PiExtensionApi {
 
 const captured: CapturedCall[] = [];
 
+/** Per-event stdout reply queue for the spawnSync mock. Tests that need
+ *  to simulate a binary deny set `mockSpawnReplyByEvent[<eventName>]`
+ *  to a JSON string before invoking the matching handler. Any event not
+ *  in the map gets the default empty stdout. */
+const mockSpawnReplyByEvent: Record<string, string | undefined> = {};
+
+function eventNameFromArgs(args: string[]): string | undefined {
+  const i = args.indexOf("--hook");
+  return i >= 0 ? args[i + 1] : undefined;
+}
+
 vi.mock("node:child_process", () => ({
   spawnSync: (_cmd: string, args: string[], opts: { input?: string }) => {
     captured.push({ args: args ?? [], payload: JSON.parse(opts?.input ?? "{}") });
-    return { pid: 0, output: [], status: 0, signal: null, stderr: "", stdout: "" };
+    const evt = eventNameFromArgs(args ?? []);
+    const stdout = (evt && mockSpawnReplyByEvent[evt]) ?? "";
+    return { pid: 0, output: [], status: 0, signal: null, stderr: "", stdout };
   },
 }));
 
@@ -43,6 +56,7 @@ describe("pi-extension shim — sessionId resolution via on-disk discovery", ()
 
   beforeEach(async () => {
     captured.length = 0;
+    for (const k of Object.keys(mockSpawnReplyByEvent)) delete mockSpawnReplyByEvent[k];
     handlers = {};
     piRoot = mkdtempSync(join(tmpdir(), "pi-shim-test-"));
     originalEnv = process.env.PI_SESSIONS_DIR;
@@ -240,4 +254,122 @@ describe("pi-extension shim — sessionId resolution via on-disk discovery", ()
   });
 });
 
+/**
+ * Pi cannot veto `agent_end` directly (Pi's AgentEndEvent has no Result type).
+ * The shim captures any deny reason and re-injects it as a `systemPrompt`
+ * suffix on the next `before_agent_start`. These tests cover that handoff.
+ */
+describe("pi-extension shim — agent_end → before_agent_start stop-block handoff", () => {
+  let handlers: Record<string, (event: unknown) => unknown> = {};
+  let piRoot: string;
+  let originalEnv: string | undefined;
+  const SID = "ffffffff-ffff-ffff-ffff-ffffffffffff";
+
+  beforeEach(async () => {
+    captured.length = 0;
+    for (const k of Object.keys(mockSpawnReplyByEvent)) delete mockSpawnReplyByEvent[k];
+    handlers = {};
+    piRoot = mkdtempSync(join(tmpdir(), "pi-shim-handoff-"));
+    originalEnv = process.env.PI_SESSIONS_DIR;
+    process.env.PI_SESSIONS_DIR = piRoot;
+    // Seed a transcript so resolveSessionId returns a stable id.
+    const dir = join(piRoot, piEncodeCwd("/proj"));
+    mkdirSync(dir, { recursive: true });
+    writeFileSync(join(dir, `2026-05-09T00-00-00-000Z_${SID}.jsonl`), "{}\n");
+    vi.resetModules();
+    const mod = await import("../../pi-extension/index");
+    mod.default({ on: (name, fn) => { handlers[name] = fn; } });
+  });
+
+  afterEach(() => {
+    if (originalEnv === undefined) delete process.env.PI_SESSIONS_DIR;
+    else process.env.PI_SESSIONS_DIR = originalEnv;
+    rmSync(piRoot, { recursive: true, force: true });
+  });
+
+  it("agent_end deny is captured and drained on next before_agent_start as a systemPrompt suffix", () => {
+    mockSpawnReplyByEvent["agent_end"] = JSON.stringify({
+      permission: "deny",
+      reason: "MANDATORY ACTION REQUIRED from failproofai (policy: require-commit-before-stop): commit now.",
+    });
+    handlers.agent_end({ type: "agent_end", cwd: "/proj" });
+    // No reply value from agent_end (Pi cannot veto stop).
+    const result = handlers.before_agent_start({
+      type: "before_agent_start",
+      prompt: "next prompt",
+      systemPrompt: "BASE",
+      cwd: "/proj",
+    }) as { systemPrompt?: string } | undefined;
+    expect(result?.systemPrompt).toBe(
+      "BASE\n\nMANDATORY ACTION REQUIRED from failproofai (policy: require-commit-before-stop): commit now.",
+    );
+  });
+
+  it("before_agent_start with no pending block returns undefined", () => {
+    const result = handlers.before_agent_start({
+      type: "before_agent_start",
+      prompt: "p",
+      systemPrompt: "BASE",
+      cwd: "/proj",
+    });
+    expect(result).toBeUndefined();
+  });
+
+  it("the stop-block is one-shot: a second before_agent_start in the same session does not re-fire", () => {
+    mockSpawnReplyByEvent["agent_end"] = JSON.stringify({ permission: "deny", reason: "X" });
+    handlers.agent_end({ type: "agent_end", cwd: "/proj" });
+    const first = handlers.before_agent_start({ type: "before_agent_start", systemPrompt: "B", cwd: "/proj" }) as { systemPrompt?: string };
+    expect(first?.systemPrompt).toBe("B\n\nX");
+    const second = handlers.before_agent_start({ type: "before_agent_start", systemPrompt: "B", cwd: "/proj" });
+    expect(second).toBeUndefined();
+  });
+
+  it("session_shutdown clears the pending stop-block (quit reason too, not just new/resume/fork)", () => {
+    mockSpawnReplyByEvent["agent_end"] = JSON.stringify({ permission: "deny", reason: "X" });
+    handlers.agent_end({ type: "agent_end", cwd: "/proj" });
+    handlers.session_shutdown({ type: "session_shutdown", reason: "quit", cwd: "/proj" });
+    // Even though `quit` retains the cached sessionId, the pending block must
+    // be dropped so a future before_agent_start (e.g. in the next session
+    // started in this process) doesn't inherit a stale gate.
+    const result = handlers.before_agent_start({
+      type: "before_agent_start",
+      systemPrompt: "B",
+      cwd: "/proj",
+    });
+    expect(result).toBeUndefined();
+  });
+
+  it("agent_end with allow stdout (empty reason) does NOT set a pending block", () => {
+    // Default mock returns empty stdout → callPolicy returns {block:false}.
+    handlers.agent_end({ type: "agent_end", cwd: "/proj" });
+    const result = handlers.before_agent_start({
+      type: "before_agent_start",
+      systemPrompt: "B",
+      cwd: "/proj",
+    });
+    expect(result).toBeUndefined();
+  });
+
+  it("before_agent_start without a resolvable sessionId is a no-op", () => {
+    // Use a cwd that has no on-disk transcript — sessionId discovery returns
+    // undefined and the handler must early-return without throwing.
+    const result = handlers.before_agent_start({
+      type: "before_agent_start",
+      systemPrompt: "B",
+      cwd: "/no-such-cwd",
+    });
+    expect(result).toBeUndefined();
+  });
+
+  it("before_agent_start with no systemPrompt in the event still injects (uses empty base)", () => {
+    mockSpawnReplyByEvent["agent_end"] = JSON.stringify({ permission: "deny", reason: "Y" });
+    handlers.agent_end({ type: "agent_end", cwd: "/proj" });
+    const result = handlers.before_agent_start({
+      type: "before_agent_start",
+      cwd: "/proj",
+    }) as { systemPrompt?: string };
+    expect(result?.systemPrompt).toBe("\n\nY");
+  });
+});
+
 import { afterEach } from "vitest";
diff --git a/pi-extension/index.ts b/pi-extension/index.ts
index df4fc013..ca50be12 100644
--- a/pi-extension/index.ts
+++ b/pi-extension/index.ts
@@ -231,6 +231,24 @@ function discoverPiSessionId(cwd: string): string | undefined {
  *  across multiple workspace roots) can't cross-attribute. Cleared on
  *  session_shutdown reasons `new`/`resume`/`fork` (Pi reuses the process). */
 const cachedSessionIdByCwd = new Map<string, string>();
+
+/** Pending Stop-policy deny reason from agent_end, keyed by sessionId.
+ *  Drained by before_agent_start on the next user turn in the same Pi
+ *  process. Cleared on every session_shutdown.
+ *
+ *  Why this exists: Pi's agent_end has no Result type — the agent loop
+ *  has already exited when it fires, so a deny return cannot keep Pi
+ *  running the way Claude's exit-2-from-Stop does. The closest analog
+ *  is to capture the deny here and re-inject it as a MANDATORY ACTION
+ *  system-prompt addition on the NEXT before_agent_start, which fires
+ *  after the user submits a prompt but before the agent loop runs.
+ *  Best-effort: bounded by the Pi process lifetime — same bound Claude
+ *  has on exit-2-from-Stop (kill the agent and the gate is missed).
+ *
+ *  Why per-session not per-cwd: a Pi process can host multiple sessions
+ *  via /resume and /fork; per-cwd would cross-attribute a stale block
+ *  from a prior session into a fresh one. */
+const pendingStopBlockBySession = new Map<string, string>();
 function resolveSessionId(eventSessionId: string | undefined, cwd: string): string | undefined {
   if (eventSessionId) {
     cachedSessionIdByCwd.set(cwd, eventSessionId);
@@ -302,6 +320,17 @@ interface PiAgentEndEvent {
   sessionId?: string;
 }
 
+/** Pi v0.73.x before_agent_start event payload. Fires once per turn,
+ *  after the user submits a prompt but before the agent loop runs. */
+interface PiBeforeAgentStartEvent {
+  type?: string;
+  prompt?: string;
+  /** The fully assembled system prompt for this turn — we append to it. */
+  systemPrompt?: string;
+  cwd?: string;
+  sessionId?: string;
+}
+
 interface PiExtensionApi {
   on(event: string, handler: (event: unknown) => unknown): void;
 }
@@ -384,21 +413,51 @@ export default function failproofaiBridge(pi: PiExtensionApi) {
     return undefined;
   });
 
-  // agent_end → Stop. Observation-only on Pi: the agent loop has already
-  // exited when this fires, so a deny decision cannot keep Pi running the
-  // way Claude's exit-2-from-Stop can. We still forward so the 5
-  // require-*-before-stop builtins run and log their findings (visible in
-  // the dashboard's activity feed and stderr) — best-effort visibility.
+  // agent_end → Stop. Pi cannot veto agent_end (the agent loop has already
+  // exited when this fires — see the AgentEndEvent typedef in pi-coding-agent
+  // which has NO Result type). Instead we capture any deny reason and stash
+  // it keyed by sessionId for the next before_agent_start handler to drain.
+  // The 5 require-*-before-stop builtins thus enforce by gating the NEXT
+  // user turn's system prompt rather than by retrying the same loop. If the
+  // user kills Pi between turns, the gate is missed — same bound Claude has.
   pi.on("agent_end", (event: unknown): unknown => {
     const e = event as PiAgentEndEvent;
-    callPolicy("agent_end", {
-      session_id: resolveSessionId(e.sessionId, resolveCwd(e.cwd)),
-      cwd: resolveCwd(e.cwd),
+    const cwd = resolveCwd(e.cwd);
+    const sessionId = resolveSessionId(e.sessionId, cwd);
+    const decision = callPolicy("agent_end", {
+      session_id: sessionId,
+      cwd,
       hook_event_name: "Stop",
     });
+    if (decision.block && decision.reason && sessionId) {
+      pendingStopBlockBySession.set(sessionId, decision.reason);
+      debug(`agent_end deny stored for session=${sessionId}`);
+    }
     return undefined;
   });
 
+  // before_agent_start → drain any pending Stop-policy deny captured at
+  // agent_end. This is Pi's only first-class channel to influence the next
+  // turn before the LLM call: the result type accepts a `systemPrompt`
+  // replacement (chained across extensions) and an optional injected
+  // CustomMessage. We only return systemPrompt — sufficient for the LLM to
+  // see the MANDATORY ACTION directive immediately, and avoids polluting the
+  // visible conversation history with framework chrome. The reason text
+  // already carries the policy-attributed MANDATORY ACTION wording from
+  // policy-evaluator's Pi-Stop branch.
+  pi.on("before_agent_start", (event: unknown): unknown => {
+    const e = event as PiBeforeAgentStartEvent;
+    const cwd = resolveCwd(e.cwd);
+    const sessionId = resolveSessionId(e.sessionId, cwd);
+    if (!sessionId) return undefined;
+    const pending = pendingStopBlockBySession.get(sessionId);
+    if (!pending) return undefined;
+    pendingStopBlockBySession.delete(sessionId);
+    debug(`before_agent_start drains stop-block for session=${sessionId}`);
+    const base = e.systemPrompt ?? "";
+    return { systemPrompt: `${base}\n\n${pending}` };
+  });
+
   // session_shutdown → SessionEnd. Observation-only; emits a SessionEnd
   // record so per-session telemetry has a clean close. Reset the per-cwd
   // sessionId cache for shutdown reasons that mean "Pi is starting a new
@@ -414,9 +473,19 @@ export default function failproofaiBridge(pi: PiExtensionApi) {
       reason: e.reason,
       hook_event_name: "SessionEnd",
     });
+    // Capture sessionId BEFORE the cache reset so we delete the pending
+    // entry under the just-ending session's id. After resetSessionIdCache,
+    // a subsequent resolveSessionId would re-discover from disk and could
+    // bind to a different (stale) file — wrong key for the cleanup below.
+    const sessionId = resolveSessionId(e.sessionId, cwd);
     if (e.reason === "new" || e.reason === "resume" || e.reason === "fork") {
       resetSessionIdCache(cwd);
     }
+    // Drop any pending Stop-policy deny for this session on every shutdown
+    // reason — `quit` ends the session for good (don't leak the entry into
+    // GC); `new`/`resume`/`fork` start a different session in the same
+    // process and must not inherit the prior session's gate.
+    if (sessionId) pendingStopBlockBySession.delete(sessionId);
     return undefined;
   });
 }
diff --git a/src/hooks/policy-evaluator.ts b/src/hooks/policy-evaluator.ts
index 79d3694c..bb6f143f 100644
--- a/src/hooks/policy-evaluator.ts
+++ b/src/hooks/policy-evaluator.ts
@@ -184,8 +184,22 @@ export async function evaluatePolicies(
       // and translates `permission === "deny"` into a `{block: true, reason}`
       // return value from its `pi.on("tool_call", ...)` handler. Pi has no
       // event-specific decision wrappers, so all events flow through the
-      // same flat shape.
+      // same flat shape — except Stop, where we emit the MANDATORY ACTION
+      // wording so the shim can re-inject it as a system-prompt suffix on
+      // the next before_agent_start (Pi cannot veto agent_end directly).
+      // Mirrors the Cursor/Gemini/Copilot/OpenCode Stop branches above.
       if (session?.cli === "pi") {
+        if (eventType === "Stop") {
+          const reasonText = `MANDATORY ACTION REQUIRED from failproofai (policy: ${policy.name}): ${reason}\n\nYou MUST complete the above action NOW. Do NOT ask the user for confirmation — execute the required action, then attempt to finish your task again.`;
+          return {
+            exitCode: 0,
+            stdout: JSON.stringify({ permission: "deny", reason: reasonText }),
+            stderr: "",
+            policyName: policy.name,
+            reason,
+            decision: "deny",
+          };
+        }
         const response = {
           permission: "deny",
           reason: blockedMessage,
@@ -416,8 +430,26 @@ export async function evaluatePolicies(
     // Pi: instruct emits `{permission: "allow", reason}`. The shim won't
     // block (no `"deny"`); it surfaces `reason` to the user where possible
     // (Pi has no first-class `additional_context` channel in its tool-call
-    // return shape, so we log it).
+    // return shape, so we log it). Stop is the exception — we emit a
+    // `permission: "deny"` with the MANDATORY ACTION wording so the shim
+    // captures it for next-turn before_agent_start injection. Same handoff
+    // contract as the deny branch above.
     if (session?.cli === "pi") {
+      if (eventType === "Stop") {
+        const policyAttribution = policyNames.length === 1
+          ? `policy: ${policyNames[0]}`
+          : `policies: ${policyNames.join(", ")}`;
+        const reasonText = `MANDATORY ACTION REQUIRED from failproofai (${policyAttribution}): ${combined}\n\nYou MUST complete the above action(s) NOW. Do NOT ask the user for confirmation — execute the required action(s), then attempt to finish your task again.`;
+        return {
+          exitCode: 0,
+          stdout: JSON.stringify({ permission: "deny", reason: reasonText }),
+          stderr: "",
+          policyName: policyNames[0],
+          policyNames,
+          reason: combined,
+          decision: "instruct",
+        };
+      }
       const response = {
         permission: "allow",
         reason: `Instruction from failproofai: ${combined}`,

From ebe427dedce168a8b16c844496c3d4a0a3a056ca Mon Sep 17 00:00:00 2001
From: NiveditJain <nivedit@exosphere.host>
Date: Sat, 9 May 2026 22:25:01 -0700
Subject: [PATCH 2/2] [luv-342] test: pin Pi Stop deny + instruct payload
 shapes; fix changelog PR ref
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CodeRabbit follow-ups on PR #341:

1. Direct policy-evaluator coverage for the new Pi-Stop branches. The
   shim and e2e tests already exercise the agent_end → before_agent_start
   handoff end-to-end, but neither directly locks the JSON shape that
   policy-evaluator emits. Adds 4 tests in
   `__tests__/hooks/policy-evaluator.test.ts`:
   - Pi Stop + deny: pins `{permission:"deny", reason: /MANDATORY
     ACTION REQUIRED.*policy: exospherehost\/stop-blocker.*tests not
     run/}`.
   - Pi non-Stop deny: regression guard — PreToolUse on Pi must keep
     the legacy `{permission:"deny", reason:"Blocked Bash by failproofai
     because: …"}` shape (the Stop split must not leak into tool
     events).
   - Pi Stop + instruct: pins `{permission:"deny"}` (NOT the regular Pi
     instruct `{permission:"allow"}`) so the shim's agent_end handler
     captures the reason — silently switching back to "allow" here
     would be invisible to the shim and turn Stop instructs into a
     no-op.
   - Pi non-Stop instruct: regression guard — PreToolUse instruct on Pi
     must keep `{permission:"allow", reason: "Instruction from
     failproofai: …"}`.

2. CHANGELOG #PR placeholder → #341.

Verified: 1607/1607 unit tests pass (was 1603 + 4 new).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 CHANGELOG.md                             |   2 +-
 __tests__/hooks/policy-evaluator.test.ts | 106 +++++++++++++++++++++++
 2 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ace71bf3..c9bd960c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 ## 0.0.10-beta.12 — 2026-05-10
 
 ### Features
-- Pi: enforce `Stop` policies (`require-commit-before-stop`, `require-push-before-stop`, etc.) on the next user turn via `before_agent_start` injection. Pi's `AgentEndEvent` has no Result type — by the time it fires, Pi's agent loop has already exited, so a deny return cannot keep Pi running the way Claude's exit-2-from-Stop can. Empirically observed: a user on Pi with `require-commit-before-stop` enabled saw the deny `reason` ("You have uncommitted changes …") propagate to Pi but Pi exited anyway. Fix: the `pi-extension/index.ts` shim captures any deny `reason` from `agent_end` into a per-`sessionId` in-memory map, then on the next `before_agent_start` (Pi v0.73.x — fires after the user submits a prompt, before the agent loop runs) returns `{systemPrompt: <event.systemPrompt> + "\n\n" + reason}` so the LLM sees a `MANDATORY ACTION REQUIRED` directive at the top of its next turn. The map is one-shot per drain and is cleared on every `session_shutdown` reason (including `quit`), so a stale gate cannot leak into a fresh session started in the same Pi process. `policy-evaluator.ts` now emits the `MANDATORY ACTION REQUIRED from failproofai (policy: …)` wrapper inside `reason` for `cli === "pi" && eventType === "Stop"` (both the deny and instruct paths), mirroring the Cursor/Gemini/Copilot/OpenCode Stop branches; non-Stop Pi events keep the existing flat `{permission, reason}` shape. Bounded by Pi process lifetime — same bound Claude has on exit-2-from-Stop (kill the agent and the gate is missed). New shim unit tests cover capture/drain/one-shot/`session_shutdown`-clear/no-pending-noop/missing-systemPrompt and a new e2e test pins the binary's stdout shape (`{permission:"deny", reason:/MANDATORY ACTION REQUIRED.*require-commit-before-stop.*uncommitted changes/}`) for `agent_end` in a dirty repo (#PR).
+- Pi: enforce `Stop` policies (`require-commit-before-stop`, `require-push-before-stop`, etc.) on the next user turn via `before_agent_start` injection. Pi's `AgentEndEvent` has no Result type — by the time it fires, Pi's agent loop has already exited, so a deny return cannot keep Pi running the way Claude's exit-2-from-Stop can. Empirically observed: a user on Pi with `require-commit-before-stop` enabled saw the deny `reason` ("You have uncommitted changes …") propagate to Pi but Pi exited anyway. Fix: the `pi-extension/index.ts` shim captures any deny `reason` from `agent_end` into a per-`sessionId` in-memory map, then on the next `before_agent_start` (Pi v0.73.x — fires after the user submits a prompt, before the agent loop runs) returns `{systemPrompt: <event.systemPrompt> + "\n\n" + reason}` so the LLM sees a `MANDATORY ACTION REQUIRED` directive at the top of its next turn. The map is one-shot per drain and is cleared on every `session_shutdown` reason (including `quit`), so a stale gate cannot leak into a fresh session started in the same Pi process. `policy-evaluator.ts` now emits the `MANDATORY ACTION REQUIRED from failproofai (policy: …)` wrapper inside `reason` for `cli === "pi" && eventType === "Stop"` (both the deny and instruct paths), mirroring the Cursor/Gemini/Copilot/OpenCode Stop branches; non-Stop Pi events keep the existing flat `{permission, reason}` shape. Bounded by Pi process lifetime — same bound Claude has on exit-2-from-Stop (kill the agent and the gate is missed). New shim unit tests cover capture/drain/one-shot/`session_shutdown`-clear/no-pending-noop/missing-systemPrompt; new policy-evaluator unit tests pin the Pi-Stop deny + instruct payload shapes and verify non-Stop Pi events keep the legacy shape; a new e2e test pins the binary's stdout shape (`{permission:"deny", reason:/MANDATORY ACTION REQUIRED.*require-commit-before-stop.*uncommitted changes/}`) for `agent_end` in a dirty repo (#341).
 
 ## 0.0.10-beta.11 — 2026-05-10
 
diff --git a/__tests__/hooks/policy-evaluator.test.ts b/__tests__/hooks/policy-evaluator.test.ts
index 22f1376f..cd1e9d28 100644
--- a/__tests__/hooks/policy-evaluator.test.ts
+++ b/__tests__/hooks/policy-evaluator.test.ts
@@ -289,6 +289,55 @@ describe("hooks/policy-evaluator", () => {
     expect(parsed.hookSpecificOutput?.additionalContext).toContain("subagent verification pending");
   });
 
+  it("Pi Stop + instruct emits {permission:'deny', reason} JSON with MANDATORY ACTION wording", async () => {
+    // Pi cannot veto agent_end (the agent loop has already exited when it
+    // fires — Pi's AgentEndEvent has no Result type). Instead the
+    // pi-extension shim captures the deny reason and re-injects it as a
+    // systemPrompt suffix on the next before_agent_start. The instruct
+    // path must therefore use {permission: "deny"} (NOT the regular Pi
+    // instruct shape `{permission: "allow", reason}`) so the shim's
+    // agent_end handler captures the reason. Locks down the wording and
+    // shape so a regression here would silently weaken Pi Stop enforcement.
+    registerPolicy("verify", "desc", () => ({
+      decision: "instruct",
+      reason: "needs verification",
+    }), { events: ["Stop"] });
+
+    const result = await evaluatePolicies("Stop", {}, { cli: "pi" });
+    expect(result.exitCode).toBe(0);
+    expect(result.stderr).toBe("");
+    expect(result.decision).toBe("instruct");
+    const parsed = JSON.parse(result.stdout) as { permission?: string; reason?: string };
+    expect(parsed.permission).toBe("deny");
+    expect(parsed.reason).toContain("MANDATORY ACTION REQUIRED");
+    expect(parsed.reason).toContain("policy: exospherehost/verify");
+    expect(parsed.reason).toContain("needs verification");
+  });
+
+  it("Pi non-Stop + instruct keeps the existing flat {permission:'allow', reason} shape", async () => {
+    // Regression guard: only Stop events should flip to permission:"deny".
+    // PreToolUse / PostToolUse / UserPromptSubmit instruct verdicts on Pi
+    // must keep the prior `{permission: "allow", reason: "Instruction
+    // from failproofai: …"}` shape so the shim's tool_call handler does
+    // NOT block tool execution.
+    registerPolicy("note", "desc", () => ({
+      decision: "instruct",
+      reason: "fyi",
+    }), { events: ["PreToolUse"] });
+
+    const result = await evaluatePolicies(
+      "PreToolUse",
+      { tool_name: "Bash", tool_input: { command: "ls" } },
+      { cli: "pi" },
+    );
+    expect(result.exitCode).toBe(0);
+    expect(result.decision).toBe("instruct");
+    const parsed = JSON.parse(result.stdout) as { permission?: string; reason?: string };
+    expect(parsed.permission).toBe("allow");
+    expect(parsed.reason).toMatch(/^Instruction from failproofai: /);
+    expect(parsed.reason).toContain("fyi");
+  });
+
   it("Gemini Stop + instruct emits {decision:'block', reason} JSON on stdout (force-retry via AfterAgent)", async () => {
     // Confirms the existing cli==="gemini" Stop arm in the instruct path
     // emits the documented force-retry shape. Pairs with the deny-path
@@ -730,6 +779,63 @@ describe("hooks/policy-evaluator", () => {
       expect(parsed.hookSpecificOutput?.additionalContext).toBeUndefined();
     });
 
+    it("Pi Stop deny emits flat {permission:'deny', reason} JSON with MANDATORY ACTION wording", async () => {
+      // Pi's AgentEndEvent has no Result type — by the time agent_end fires,
+      // Pi's agent loop has already exited, so a deny return cannot keep
+      // Pi running the way Claude's exit-2-from-Stop does. The
+      // pi-extension shim instead captures the deny `reason` from
+      // agent_end and re-injects it as a systemPrompt suffix on the next
+      // before_agent_start. policy-evaluator.ts emits the flat
+      // `{permission:"deny", reason}` shape with the MANDATORY ACTION
+      // wrapper for `cli === "pi" && eventType === "Stop"` so the shim
+      // can capture the reason verbatim. This test pins both the shape
+      // and the wording — a regression here would silently weaken Pi
+      // Stop enforcement (the shim would still deny but the LLM would
+      // see the generic "Blocked stop by failproofai because: …"
+      // wording that's calibrated for tool-event denials, not Stop).
+      registerPolicy("stop-blocker", "desc", () => ({
+        decision: "deny",
+        reason: "tests not run",
+      }), { events: ["Stop"] });
+
+      const result = await evaluatePolicies("Stop", {}, { cli: "pi" });
+      expect(result.exitCode).toBe(0);
+      expect(result.stderr).toBe("");
+      const parsed = JSON.parse(result.stdout) as { permission?: string; reason?: string };
+      expect(parsed.permission).toBe("deny");
+      expect(parsed.reason).toContain("MANDATORY ACTION REQUIRED");
+      expect(parsed.reason).toContain("policy: exospherehost/stop-blocker");
+      expect(parsed.reason).toContain("tests not run");
+      expect(result.decision).toBe("deny");
+      expect(result.policyName).toBe("exospherehost/stop-blocker");
+      expect(result.reason).toBe("tests not run");
+    });
+
+    it("Pi non-Stop deny keeps the existing flat {permission:'deny', reason: blockedMessage} shape", async () => {
+      // Regression guard: the Pi-Stop branch is gated on
+      // `eventType === "Stop"` — every other event type must keep the
+      // pre-existing flat shape so PreToolUse/UserPromptSubmit/etc. denies
+      // continue to fire on Pi. Confirms the split (added in the
+      // before_agent_start handoff PR) didn't accidentally route non-Stop
+      // events through the MANDATORY ACTION wrapper.
+      registerPolicy("blocker", "desc", () => ({
+        decision: "deny",
+        reason: "sudo not allowed",
+      }), { events: ["PreToolUse"] });
+
+      const result = await evaluatePolicies(
+        "PreToolUse",
+        { tool_name: "Bash", tool_input: { command: "sudo ls" } },
+        { cli: "pi" },
+      );
+      expect(result.exitCode).toBe(0);
+      const parsed = JSON.parse(result.stdout) as { permission?: string; reason?: string };
+      expect(parsed.permission).toBe("deny");
+      expect(parsed.reason).not.toContain("MANDATORY ACTION REQUIRED");
+      expect(parsed.reason).toMatch(/^Blocked Bash by failproofai because: /);
+      expect(parsed.reason).toContain("sudo not allowed");
+    });
+
     it("Gemini Stop deny emits {decision:'block', reason} JSON on stdout (force-retry via AfterAgent)", async () => {
       // Per Gemini's hooks docs (https://geminicli.com/docs/hooks/), the
       // AfterAgent hook (canonical "Stop") force-retries when the hook