From 0aecd8f132776385a2e9080062e759db77187ae7 Mon Sep 17 00:00:00 2001 From: NiveditJain Date: Sat, 9 May 2026 22:16:20 -0700 Subject: [PATCH 1/2] [luv-342] feat: enforce Pi Stop policies via before_agent_start handoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pi's `agent_end` event has no Result type — by the time the handler fires, Pi's agent loop has already exited, so a deny return cannot keep Pi running the way Claude's exit-2-from-Stop does. Empirically: a user on Pi with `require-commit-before-stop` enabled saw the deny reason propagate but Pi exited anyway. Fix: the `pi-extension/index.ts` shim now captures the deny `reason` from `agent_end` into a per-`sessionId` in-memory map and re-injects it as a `systemPrompt` suffix on the next `before_agent_start` (Pi v0.73.x — fires after a new user prompt, before the agent loop runs). The map is one-shot per drain and is cleared on every `session_shutdown` reason (including `quit`), so a stale gate cannot leak into a fresh session in the same process. `policy-evaluator.ts` emits the `MANDATORY ACTION REQUIRED from failproofai (policy: …)` wrapper inside `reason` for `cli === "pi" && eventType === "Stop"` (deny + instruct paths), mirroring the Cursor/Gemini/Copilot/OpenCode Stop branches; non-Stop Pi events keep the existing flat `{permission, reason}` shape. Bounded by Pi process lifetime — same bound Claude has on exit-2-from-Stop. Verified: - Unit: 1603/1603 (incl. 7 new shim tests) - E2E: 298/298 (incl. new dirty-repo Pi case) - Live binary smoke in dirty repo emits the MANDATORY ACTION reason JSON Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 5 + CLAUDE.md | 21 +-- .../e2e/hooks/pi-integration.e2e.test.ts | 33 +++++ __tests__/hooks/pi-extension-shim.test.ts | 134 +++++++++++++++++- pi-extension/index.ts | 85 +++++++++-- src/hooks/policy-evaluator.ts | 36 ++++- 6 files changed, 293 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4ac8941..ace71bf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 0.0.10-beta.12 — 2026-05-10 + +### Features +- Pi: enforce `Stop` policies (`require-commit-before-stop`, `require-push-before-stop`, etc.) on the next user turn via `before_agent_start` injection. Pi's `AgentEndEvent` has no Result type — by the time it fires, Pi's agent loop has already exited, so a deny return cannot keep Pi running the way Claude's exit-2-from-Stop can. Empirically observed: a user on Pi with `require-commit-before-stop` enabled saw the deny `reason` ("You have uncommitted changes …") propagate to Pi but Pi exited anyway. Fix: the `pi-extension/index.ts` shim captures any deny `reason` from `agent_end` into a per-`sessionId` in-memory map, then on the next `before_agent_start` (Pi v0.73.x — fires after the user submits a prompt, before the agent loop runs) returns `{systemPrompt: + "\n\n" + reason}` so the LLM sees a `MANDATORY ACTION REQUIRED` directive at the top of its next turn. The map is one-shot per drain and is cleared on every `session_shutdown` reason (including `quit`), so a stale gate cannot leak into a fresh session started in the same Pi process. `policy-evaluator.ts` now emits the `MANDATORY ACTION REQUIRED from failproofai (policy: …)` wrapper inside `reason` for `cli === "pi" && eventType === "Stop"` (both the deny and instruct paths), mirroring the Cursor/Gemini/Copilot/OpenCode Stop branches; non-Stop Pi events keep the existing flat `{permission, reason}` shape. Bounded by Pi process lifetime — same bound Claude has on exit-2-from-Stop (kill the agent and the gate is missed). New shim unit tests cover capture/drain/one-shot/`session_shutdown`-clear/no-pending-noop/missing-systemPrompt and a new e2e test pins the binary's stdout shape (`{permission:"deny", reason:/MANDATORY ACTION REQUIRED.*require-commit-before-stop.*uncommitted changes/}`) for `agent_end` in a dirty repo (#PR). + ## 0.0.10-beta.11 — 2026-05-10 ### Fixes diff --git a/CLAUDE.md b/CLAUDE.md index 95872b29..4bcb7d96 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -218,18 +218,19 @@ pi-extension. Same self-reference caveat applies — do **not** install the standard `npx` form from inside this repo. **Pi limitations vs. Claude semantics** (verified against pi-coding-agent -v0.72.1 d.ts; the `pi-extension/` shim subscribes to 7 events but Pi's API +v0.73.1 d.ts; the `pi-extension/` shim subscribes to 8 events but Pi's API caps what each handler can do): -| Pi event | → Claude event | Veto / mutate? | Notes | -|--------------------|------------------|----------------|-------| -| `tool_call` | PreToolUse | ✅ block | Full deny support via `{block, reason}`. | -| `user_bash` | PreToolUse | ✅ block | Full deny support. | -| `input` | UserPromptSubmit | ✅ block | Full deny support. | -| `session_start` | SessionStart | observation | No return-value effect on Pi. | -| `tool_result` | PostToolUse | observation | `ToolResultEventResult` exposes `{content, details, isError}` for mutation but no `block`. PostToolUse is observation/sanitize anyway, matching Claude semantics. | -| `agent_end` | Stop | observation | Pi's agent loop has already exited; we cannot keep Pi running the way Claude's exit-2-from-Stop can. `require-*-before-stop` policies still RUN — their findings land in the activity store + stderr — but the stop is not vetoed. | -| `session_shutdown` | SessionEnd | observation | Symmetry only. | +| Pi event | → Claude event | Veto / mutate? | Notes | +|----------------------|------------------|-----------------|-------| +| `tool_call` | PreToolUse | ✅ block | Full deny support via `{block, reason}`. | +| `user_bash` | PreToolUse | ✅ block | Full deny support. | +| `input` | UserPromptSubmit | ✅ block | Full deny support. | +| `session_start` | SessionStart | observation | No return-value effect on Pi. | +| `tool_result` | PostToolUse | observation | `ToolResultEventResult` exposes `{content, details, isError}` for mutation but no `block`. PostToolUse is observation/sanitize anyway, matching Claude semantics. | +| `agent_end` | Stop | shifted (next turn) | Pi's `AgentEndEvent` has no Result type — we cannot retry the same loop the way Claude's exit-2-from-Stop can. The shim captures any deny `reason` and stashes it keyed by sessionId for the next `before_agent_start` handler to drain. The 5 `require-*-before-stop` builtins thus enforce by gating the NEXT user turn's system prompt. Bounded by Pi process lifetime — same bound Claude has on exit-2-from-Stop. | +| `before_agent_start` | (Pi-only handoff) | systemPrompt | Drains any pending Stop deny captured at `agent_end`, returning `{systemPrompt: + "\n\n" + reason}` so the LLM sees the MANDATORY ACTION directive before its next turn. Multiple extensions chain. No injection when no block is pending. | +| `session_shutdown` | SessionEnd | observation | Symmetry only. Also clears any pending stop-block for the session id (every reason, not just `new`/`resume`/`fork`). | **Instruct (`additionalContext`) on Pi `tool_call`** — Pi's `ToolCallEventResult` shape is `{block?, reason?}` only; there's no diff --git a/__tests__/e2e/hooks/pi-integration.e2e.test.ts b/__tests__/e2e/hooks/pi-integration.e2e.test.ts index 2ee54d9a..b0c64de9 100644 --- a/__tests__/e2e/hooks/pi-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/pi-integration.e2e.test.ts @@ -208,6 +208,39 @@ describe("E2E: Pi integration — hook protocol (handler-only)", () => { } }); + it("agent_end with require-commit-before-stop in a dirty repo emits MANDATORY ACTION reason", () => { + const env = createPiEnv(); + try { + // Make env.cwd a git repo with an uncommitted file so + // require-commit-before-stop returns deny. This is the bridge to the + // shim-side handoff: the binary's stdout MUST be a Pi-flat + // `{permission:"deny", reason}` payload whose reason carries the + // "MANDATORY ACTION REQUIRED" wrapper. The shim captures that + // reason on agent_end and re-injects it via before_agent_start (the + // in-process map handoff is covered by the shim unit tests). + execSync("git init -q && git config user.email t@t && git config user.name t", { cwd: env.cwd }); + writeFileSync(resolve(env.cwd, "dirty.txt"), "uncommitted\n"); + writeConfig(env.cwd, ["require-commit-before-stop"]); + const result = runHook( + "agent_end", + PiPayloads.agentEnd(env.cwd), + { homeDir: env.home, cli: "pi" }, + ); + // Stop on Pi uses the MANDATORY ACTION wrapping (not the generic + // `Blocked by failproofai because: …` wording that + // assertPiDeny matches), so we inline the deny shape checks here. + expect(result.exitCode).toBe(0); + expect(result.parsed?.permission).toBe("deny"); + expect(result.parsed?.hookSpecificOutput).toBeUndefined(); + const reason = String(result.parsed?.reason ?? ""); + expect(reason).toMatch(/MANDATORY ACTION REQUIRED/); + expect(reason).toMatch(/require-commit-before-stop/); + expect(reason).toMatch(/uncommitted changes/i); + } finally { + env.cleanup(); + } + }); + it("agent-settings guard: Bash read of .pi/settings.json is denied", () => { const env = createPiEnv(); try { diff --git a/__tests__/hooks/pi-extension-shim.test.ts b/__tests__/hooks/pi-extension-shim.test.ts index b73656c8..1f4a4d33 100644 --- a/__tests__/hooks/pi-extension-shim.test.ts +++ b/__tests__/hooks/pi-extension-shim.test.ts @@ -24,10 +24,23 @@ interface PiExtensionApi { const captured: CapturedCall[] = []; +/** Per-event stdout reply queue for the spawnSync mock. Tests that need + * to simulate a binary deny set `mockSpawnReplyByEvent[]` + * to a JSON string before invoking the matching handler. Any event not + * in the map gets the default empty stdout. */ +const mockSpawnReplyByEvent: Record = {}; + +function eventNameFromArgs(args: string[]): string | undefined { + const i = args.indexOf("--hook"); + return i >= 0 ? args[i + 1] : undefined; +} + vi.mock("node:child_process", () => ({ spawnSync: (_cmd: string, args: string[], opts: { input?: string }) => { captured.push({ args: args ?? [], payload: JSON.parse(opts?.input ?? "{}") }); - return { pid: 0, output: [], status: 0, signal: null, stderr: "", stdout: "" }; + const evt = eventNameFromArgs(args ?? []); + const stdout = (evt && mockSpawnReplyByEvent[evt]) ?? ""; + return { pid: 0, output: [], status: 0, signal: null, stderr: "", stdout }; }, })); @@ -43,6 +56,7 @@ describe("pi-extension shim — sessionId resolution via on-disk discovery", () beforeEach(async () => { captured.length = 0; + for (const k of Object.keys(mockSpawnReplyByEvent)) delete mockSpawnReplyByEvent[k]; handlers = {}; piRoot = mkdtempSync(join(tmpdir(), "pi-shim-test-")); originalEnv = process.env.PI_SESSIONS_DIR; @@ -240,4 +254,122 @@ describe("pi-extension shim — sessionId resolution via on-disk discovery", () }); }); +/** + * Pi cannot veto `agent_end` directly (Pi's AgentEndEvent has no Result type). + * The shim captures any deny reason and re-injects it as a `systemPrompt` + * suffix on the next `before_agent_start`. These tests cover that handoff. + */ +describe("pi-extension shim — agent_end → before_agent_start stop-block handoff", () => { + let handlers: Record unknown> = {}; + let piRoot: string; + let originalEnv: string | undefined; + const SID = "ffffffff-ffff-ffff-ffff-ffffffffffff"; + + beforeEach(async () => { + captured.length = 0; + for (const k of Object.keys(mockSpawnReplyByEvent)) delete mockSpawnReplyByEvent[k]; + handlers = {}; + piRoot = mkdtempSync(join(tmpdir(), "pi-shim-handoff-")); + originalEnv = process.env.PI_SESSIONS_DIR; + process.env.PI_SESSIONS_DIR = piRoot; + // Seed a transcript so resolveSessionId returns a stable id. + const dir = join(piRoot, piEncodeCwd("/proj")); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `2026-05-09T00-00-00-000Z_${SID}.jsonl`), "{}\n"); + vi.resetModules(); + const mod = await import("../../pi-extension/index"); + mod.default({ on: (name, fn) => { handlers[name] = fn; } }); + }); + + afterEach(() => { + if (originalEnv === undefined) delete process.env.PI_SESSIONS_DIR; + else process.env.PI_SESSIONS_DIR = originalEnv; + rmSync(piRoot, { recursive: true, force: true }); + }); + + it("agent_end deny is captured and drained on next before_agent_start as a systemPrompt suffix", () => { + mockSpawnReplyByEvent["agent_end"] = JSON.stringify({ + permission: "deny", + reason: "MANDATORY ACTION REQUIRED from failproofai (policy: require-commit-before-stop): commit now.", + }); + handlers.agent_end({ type: "agent_end", cwd: "/proj" }); + // No reply value from agent_end (Pi cannot veto stop). + const result = handlers.before_agent_start({ + type: "before_agent_start", + prompt: "next prompt", + systemPrompt: "BASE", + cwd: "/proj", + }) as { systemPrompt?: string } | undefined; + expect(result?.systemPrompt).toBe( + "BASE\n\nMANDATORY ACTION REQUIRED from failproofai (policy: require-commit-before-stop): commit now.", + ); + }); + + it("before_agent_start with no pending block returns undefined", () => { + const result = handlers.before_agent_start({ + type: "before_agent_start", + prompt: "p", + systemPrompt: "BASE", + cwd: "/proj", + }); + expect(result).toBeUndefined(); + }); + + it("the stop-block is one-shot: a second before_agent_start in the same session does not re-fire", () => { + mockSpawnReplyByEvent["agent_end"] = JSON.stringify({ permission: "deny", reason: "X" }); + handlers.agent_end({ type: "agent_end", cwd: "/proj" }); + const first = handlers.before_agent_start({ type: "before_agent_start", systemPrompt: "B", cwd: "/proj" }) as { systemPrompt?: string }; + expect(first?.systemPrompt).toBe("B\n\nX"); + const second = handlers.before_agent_start({ type: "before_agent_start", systemPrompt: "B", cwd: "/proj" }); + expect(second).toBeUndefined(); + }); + + it("session_shutdown clears the pending stop-block (quit reason too, not just new/resume/fork)", () => { + mockSpawnReplyByEvent["agent_end"] = JSON.stringify({ permission: "deny", reason: "X" }); + handlers.agent_end({ type: "agent_end", cwd: "/proj" }); + handlers.session_shutdown({ type: "session_shutdown", reason: "quit", cwd: "/proj" }); + // Even though `quit` retains the cached sessionId, the pending block must + // be dropped so a future before_agent_start (e.g. in the next session + // started in this process) doesn't inherit a stale gate. + const result = handlers.before_agent_start({ + type: "before_agent_start", + systemPrompt: "B", + cwd: "/proj", + }); + expect(result).toBeUndefined(); + }); + + it("agent_end with allow stdout (empty reason) does NOT set a pending block", () => { + // Default mock returns empty stdout → callPolicy returns {block:false}. + handlers.agent_end({ type: "agent_end", cwd: "/proj" }); + const result = handlers.before_agent_start({ + type: "before_agent_start", + systemPrompt: "B", + cwd: "/proj", + }); + expect(result).toBeUndefined(); + }); + + it("before_agent_start without a resolvable sessionId is a no-op", () => { + // Use a cwd that has no on-disk transcript — sessionId discovery returns + // undefined and the handler must early-return without throwing. + const result = handlers.before_agent_start({ + type: "before_agent_start", + systemPrompt: "B", + cwd: "/no-such-cwd", + }); + expect(result).toBeUndefined(); + }); + + it("before_agent_start with no systemPrompt in the event still injects (uses empty base)", () => { + mockSpawnReplyByEvent["agent_end"] = JSON.stringify({ permission: "deny", reason: "Y" }); + handlers.agent_end({ type: "agent_end", cwd: "/proj" }); + const result = handlers.before_agent_start({ + type: "before_agent_start", + cwd: "/proj", + }) as { systemPrompt?: string }; + expect(result?.systemPrompt).toBe("\n\nY"); + }); +}); + import { afterEach } from "vitest"; diff --git a/pi-extension/index.ts b/pi-extension/index.ts index df4fc013..ca50be12 100644 --- a/pi-extension/index.ts +++ b/pi-extension/index.ts @@ -231,6 +231,24 @@ function discoverPiSessionId(cwd: string): string | undefined { * across multiple workspace roots) can't cross-attribute. Cleared on * session_shutdown reasons `new`/`resume`/`fork` (Pi reuses the process). */ const cachedSessionIdByCwd = new Map(); + +/** Pending Stop-policy deny reason from agent_end, keyed by sessionId. + * Drained by before_agent_start on the next user turn in the same Pi + * process. Cleared on every session_shutdown. + * + * Why this exists: Pi's agent_end has no Result type — the agent loop + * has already exited when it fires, so a deny return cannot keep Pi + * running the way Claude's exit-2-from-Stop does. The closest analog + * is to capture the deny here and re-inject it as a MANDATORY ACTION + * system-prompt addition on the NEXT before_agent_start, which fires + * after the user submits a prompt but before the agent loop runs. + * Best-effort: bounded by the Pi process lifetime — same bound Claude + * has on exit-2-from-Stop (kill the agent and the gate is missed). + * + * Why per-session not per-cwd: a Pi process can host multiple sessions + * via /resume and /fork; per-cwd would cross-attribute a stale block + * from a prior session into a fresh one. */ +const pendingStopBlockBySession = new Map(); function resolveSessionId(eventSessionId: string | undefined, cwd: string): string | undefined { if (eventSessionId) { cachedSessionIdByCwd.set(cwd, eventSessionId); @@ -302,6 +320,17 @@ interface PiAgentEndEvent { sessionId?: string; } +/** Pi v0.73.x before_agent_start event payload. Fires once per turn, + * after the user submits a prompt but before the agent loop runs. */ +interface PiBeforeAgentStartEvent { + type?: string; + prompt?: string; + /** The fully assembled system prompt for this turn — we append to it. */ + systemPrompt?: string; + cwd?: string; + sessionId?: string; +} + interface PiExtensionApi { on(event: string, handler: (event: unknown) => unknown): void; } @@ -384,21 +413,51 @@ export default function failproofaiBridge(pi: PiExtensionApi) { return undefined; }); - // agent_end → Stop. Observation-only on Pi: the agent loop has already - // exited when this fires, so a deny decision cannot keep Pi running the - // way Claude's exit-2-from-Stop can. We still forward so the 5 - // require-*-before-stop builtins run and log their findings (visible in - // the dashboard's activity feed and stderr) — best-effort visibility. + // agent_end → Stop. Pi cannot veto agent_end (the agent loop has already + // exited when this fires — see the AgentEndEvent typedef in pi-coding-agent + // which has NO Result type). Instead we capture any deny reason and stash + // it keyed by sessionId for the next before_agent_start handler to drain. + // The 5 require-*-before-stop builtins thus enforce by gating the NEXT + // user turn's system prompt rather than by retrying the same loop. If the + // user kills Pi between turns, the gate is missed — same bound Claude has. pi.on("agent_end", (event: unknown): unknown => { const e = event as PiAgentEndEvent; - callPolicy("agent_end", { - session_id: resolveSessionId(e.sessionId, resolveCwd(e.cwd)), - cwd: resolveCwd(e.cwd), + const cwd = resolveCwd(e.cwd); + const sessionId = resolveSessionId(e.sessionId, cwd); + const decision = callPolicy("agent_end", { + session_id: sessionId, + cwd, hook_event_name: "Stop", }); + if (decision.block && decision.reason && sessionId) { + pendingStopBlockBySession.set(sessionId, decision.reason); + debug(`agent_end deny stored for session=${sessionId}`); + } return undefined; }); + // before_agent_start → drain any pending Stop-policy deny captured at + // agent_end. This is Pi's only first-class channel to influence the next + // turn before the LLM call: the result type accepts a `systemPrompt` + // replacement (chained across extensions) and an optional injected + // CustomMessage. We only return systemPrompt — sufficient for the LLM to + // see the MANDATORY ACTION directive immediately, and avoids polluting the + // visible conversation history with framework chrome. The reason text + // already carries the policy-attributed MANDATORY ACTION wording from + // policy-evaluator's Pi-Stop branch. + pi.on("before_agent_start", (event: unknown): unknown => { + const e = event as PiBeforeAgentStartEvent; + const cwd = resolveCwd(e.cwd); + const sessionId = resolveSessionId(e.sessionId, cwd); + if (!sessionId) return undefined; + const pending = pendingStopBlockBySession.get(sessionId); + if (!pending) return undefined; + pendingStopBlockBySession.delete(sessionId); + debug(`before_agent_start drains stop-block for session=${sessionId}`); + const base = e.systemPrompt ?? ""; + return { systemPrompt: `${base}\n\n${pending}` }; + }); + // session_shutdown → SessionEnd. Observation-only; emits a SessionEnd // record so per-session telemetry has a clean close. Reset the per-cwd // sessionId cache for shutdown reasons that mean "Pi is starting a new @@ -414,9 +473,19 @@ export default function failproofaiBridge(pi: PiExtensionApi) { reason: e.reason, hook_event_name: "SessionEnd", }); + // Capture sessionId BEFORE the cache reset so we delete the pending + // entry under the just-ending session's id. After resetSessionIdCache, + // a subsequent resolveSessionId would re-discover from disk and could + // bind to a different (stale) file — wrong key for the cleanup below. + const sessionId = resolveSessionId(e.sessionId, cwd); if (e.reason === "new" || e.reason === "resume" || e.reason === "fork") { resetSessionIdCache(cwd); } + // Drop any pending Stop-policy deny for this session on every shutdown + // reason — `quit` ends the session for good (don't leak the entry into + // GC); `new`/`resume`/`fork` start a different session in the same + // process and must not inherit the prior session's gate. + if (sessionId) pendingStopBlockBySession.delete(sessionId); return undefined; }); } diff --git a/src/hooks/policy-evaluator.ts b/src/hooks/policy-evaluator.ts index 79d3694c..bb6f143f 100644 --- a/src/hooks/policy-evaluator.ts +++ b/src/hooks/policy-evaluator.ts @@ -184,8 +184,22 @@ export async function evaluatePolicies( // and translates `permission === "deny"` into a `{block: true, reason}` // return value from its `pi.on("tool_call", ...)` handler. Pi has no // event-specific decision wrappers, so all events flow through the - // same flat shape. + // same flat shape — except Stop, where we emit the MANDATORY ACTION + // wording so the shim can re-inject it as a system-prompt suffix on + // the next before_agent_start (Pi cannot veto agent_end directly). + // Mirrors the Cursor/Gemini/Copilot/OpenCode Stop branches above. if (session?.cli === "pi") { + if (eventType === "Stop") { + const reasonText = `MANDATORY ACTION REQUIRED from failproofai (policy: ${policy.name}): ${reason}\n\nYou MUST complete the above action NOW. Do NOT ask the user for confirmation — execute the required action, then attempt to finish your task again.`; + return { + exitCode: 0, + stdout: JSON.stringify({ permission: "deny", reason: reasonText }), + stderr: "", + policyName: policy.name, + reason, + decision: "deny", + }; + } const response = { permission: "deny", reason: blockedMessage, @@ -416,8 +430,26 @@ export async function evaluatePolicies( // Pi: instruct emits `{permission: "allow", reason}`. The shim won't // block (no `"deny"`); it surfaces `reason` to the user where possible // (Pi has no first-class `additional_context` channel in its tool-call - // return shape, so we log it). + // return shape, so we log it). Stop is the exception — we emit a + // `permission: "deny"` with the MANDATORY ACTION wording so the shim + // captures it for next-turn before_agent_start injection. Same handoff + // contract as the deny branch above. if (session?.cli === "pi") { + if (eventType === "Stop") { + const policyAttribution = policyNames.length === 1 + ? `policy: ${policyNames[0]}` + : `policies: ${policyNames.join(", ")}`; + const reasonText = `MANDATORY ACTION REQUIRED from failproofai (${policyAttribution}): ${combined}\n\nYou MUST complete the above action(s) NOW. Do NOT ask the user for confirmation — execute the required action(s), then attempt to finish your task again.`; + return { + exitCode: 0, + stdout: JSON.stringify({ permission: "deny", reason: reasonText }), + stderr: "", + policyName: policyNames[0], + policyNames, + reason: combined, + decision: "instruct", + }; + } const response = { permission: "allow", reason: `Instruction from failproofai: ${combined}`, From ebe427dedce168a8b16c844496c3d4a0a3a056ca Mon Sep 17 00:00:00 2001 From: NiveditJain Date: Sat, 9 May 2026 22:25:01 -0700 Subject: [PATCH 2/2] [luv-342] test: pin Pi Stop deny + instruct payload shapes; fix changelog PR ref MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit follow-ups on PR #341: 1. Direct policy-evaluator coverage for the new Pi-Stop branches. The shim and e2e tests already exercise the agent_end → before_agent_start handoff end-to-end, but neither directly locks the JSON shape that policy-evaluator emits. Adds 4 tests in `__tests__/hooks/policy-evaluator.test.ts`: - Pi Stop + deny: pins `{permission:"deny", reason: /MANDATORY ACTION REQUIRED.*policy: exospherehost\/stop-blocker.*tests not run/}`. - Pi non-Stop deny: regression guard — PreToolUse on Pi must keep the legacy `{permission:"deny", reason:"Blocked Bash by failproofai because: …"}` shape (the Stop split must not leak into tool events). - Pi Stop + instruct: pins `{permission:"deny"}` (NOT the regular Pi instruct `{permission:"allow"}`) so the shim's agent_end handler captures the reason — silently switching back to "allow" here would be invisible to the shim and turn Stop instructs into a no-op. - Pi non-Stop instruct: regression guard — PreToolUse instruct on Pi must keep `{permission:"allow", reason: "Instruction from failproofai: …"}`. 2. CHANGELOG #PR placeholder → #341. Verified: 1607/1607 unit tests pass (was 1603 + 4 new). Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 2 +- __tests__/hooks/policy-evaluator.test.ts | 106 +++++++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ace71bf3..c9bd960c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## 0.0.10-beta.12 — 2026-05-10 ### Features -- Pi: enforce `Stop` policies (`require-commit-before-stop`, `require-push-before-stop`, etc.) on the next user turn via `before_agent_start` injection. Pi's `AgentEndEvent` has no Result type — by the time it fires, Pi's agent loop has already exited, so a deny return cannot keep Pi running the way Claude's exit-2-from-Stop can. Empirically observed: a user on Pi with `require-commit-before-stop` enabled saw the deny `reason` ("You have uncommitted changes …") propagate to Pi but Pi exited anyway. Fix: the `pi-extension/index.ts` shim captures any deny `reason` from `agent_end` into a per-`sessionId` in-memory map, then on the next `before_agent_start` (Pi v0.73.x — fires after the user submits a prompt, before the agent loop runs) returns `{systemPrompt: + "\n\n" + reason}` so the LLM sees a `MANDATORY ACTION REQUIRED` directive at the top of its next turn. The map is one-shot per drain and is cleared on every `session_shutdown` reason (including `quit`), so a stale gate cannot leak into a fresh session started in the same Pi process. `policy-evaluator.ts` now emits the `MANDATORY ACTION REQUIRED from failproofai (policy: …)` wrapper inside `reason` for `cli === "pi" && eventType === "Stop"` (both the deny and instruct paths), mirroring the Cursor/Gemini/Copilot/OpenCode Stop branches; non-Stop Pi events keep the existing flat `{permission, reason}` shape. Bounded by Pi process lifetime — same bound Claude has on exit-2-from-Stop (kill the agent and the gate is missed). New shim unit tests cover capture/drain/one-shot/`session_shutdown`-clear/no-pending-noop/missing-systemPrompt and a new e2e test pins the binary's stdout shape (`{permission:"deny", reason:/MANDATORY ACTION REQUIRED.*require-commit-before-stop.*uncommitted changes/}`) for `agent_end` in a dirty repo (#PR). +- Pi: enforce `Stop` policies (`require-commit-before-stop`, `require-push-before-stop`, etc.) on the next user turn via `before_agent_start` injection. Pi's `AgentEndEvent` has no Result type — by the time it fires, Pi's agent loop has already exited, so a deny return cannot keep Pi running the way Claude's exit-2-from-Stop can. Empirically observed: a user on Pi with `require-commit-before-stop` enabled saw the deny `reason` ("You have uncommitted changes …") propagate to Pi but Pi exited anyway. Fix: the `pi-extension/index.ts` shim captures any deny `reason` from `agent_end` into a per-`sessionId` in-memory map, then on the next `before_agent_start` (Pi v0.73.x — fires after the user submits a prompt, before the agent loop runs) returns `{systemPrompt: + "\n\n" + reason}` so the LLM sees a `MANDATORY ACTION REQUIRED` directive at the top of its next turn. The map is one-shot per drain and is cleared on every `session_shutdown` reason (including `quit`), so a stale gate cannot leak into a fresh session started in the same Pi process. `policy-evaluator.ts` now emits the `MANDATORY ACTION REQUIRED from failproofai (policy: …)` wrapper inside `reason` for `cli === "pi" && eventType === "Stop"` (both the deny and instruct paths), mirroring the Cursor/Gemini/Copilot/OpenCode Stop branches; non-Stop Pi events keep the existing flat `{permission, reason}` shape. Bounded by Pi process lifetime — same bound Claude has on exit-2-from-Stop (kill the agent and the gate is missed). New shim unit tests cover capture/drain/one-shot/`session_shutdown`-clear/no-pending-noop/missing-systemPrompt; new policy-evaluator unit tests pin the Pi-Stop deny + instruct payload shapes and verify non-Stop Pi events keep the legacy shape; a new e2e test pins the binary's stdout shape (`{permission:"deny", reason:/MANDATORY ACTION REQUIRED.*require-commit-before-stop.*uncommitted changes/}`) for `agent_end` in a dirty repo (#341). ## 0.0.10-beta.11 — 2026-05-10 diff --git a/__tests__/hooks/policy-evaluator.test.ts b/__tests__/hooks/policy-evaluator.test.ts index 22f1376f..cd1e9d28 100644 --- a/__tests__/hooks/policy-evaluator.test.ts +++ b/__tests__/hooks/policy-evaluator.test.ts @@ -289,6 +289,55 @@ describe("hooks/policy-evaluator", () => { expect(parsed.hookSpecificOutput?.additionalContext).toContain("subagent verification pending"); }); + it("Pi Stop + instruct emits {permission:'deny', reason} JSON with MANDATORY ACTION wording", async () => { + // Pi cannot veto agent_end (the agent loop has already exited when it + // fires — Pi's AgentEndEvent has no Result type). Instead the + // pi-extension shim captures the deny reason and re-injects it as a + // systemPrompt suffix on the next before_agent_start. The instruct + // path must therefore use {permission: "deny"} (NOT the regular Pi + // instruct shape `{permission: "allow", reason}`) so the shim's + // agent_end handler captures the reason. Locks down the wording and + // shape so a regression here would silently weaken Pi Stop enforcement. + registerPolicy("verify", "desc", () => ({ + decision: "instruct", + reason: "needs verification", + }), { events: ["Stop"] }); + + const result = await evaluatePolicies("Stop", {}, { cli: "pi" }); + expect(result.exitCode).toBe(0); + expect(result.stderr).toBe(""); + expect(result.decision).toBe("instruct"); + const parsed = JSON.parse(result.stdout) as { permission?: string; reason?: string }; + expect(parsed.permission).toBe("deny"); + expect(parsed.reason).toContain("MANDATORY ACTION REQUIRED"); + expect(parsed.reason).toContain("policy: exospherehost/verify"); + expect(parsed.reason).toContain("needs verification"); + }); + + it("Pi non-Stop + instruct keeps the existing flat {permission:'allow', reason} shape", async () => { + // Regression guard: only Stop events should flip to permission:"deny". + // PreToolUse / PostToolUse / UserPromptSubmit instruct verdicts on Pi + // must keep the prior `{permission: "allow", reason: "Instruction + // from failproofai: …"}` shape so the shim's tool_call handler does + // NOT block tool execution. + registerPolicy("note", "desc", () => ({ + decision: "instruct", + reason: "fyi", + }), { events: ["PreToolUse"] }); + + const result = await evaluatePolicies( + "PreToolUse", + { tool_name: "Bash", tool_input: { command: "ls" } }, + { cli: "pi" }, + ); + expect(result.exitCode).toBe(0); + expect(result.decision).toBe("instruct"); + const parsed = JSON.parse(result.stdout) as { permission?: string; reason?: string }; + expect(parsed.permission).toBe("allow"); + expect(parsed.reason).toMatch(/^Instruction from failproofai: /); + expect(parsed.reason).toContain("fyi"); + }); + it("Gemini Stop + instruct emits {decision:'block', reason} JSON on stdout (force-retry via AfterAgent)", async () => { // Confirms the existing cli==="gemini" Stop arm in the instruct path // emits the documented force-retry shape. Pairs with the deny-path @@ -730,6 +779,63 @@ describe("hooks/policy-evaluator", () => { expect(parsed.hookSpecificOutput?.additionalContext).toBeUndefined(); }); + it("Pi Stop deny emits flat {permission:'deny', reason} JSON with MANDATORY ACTION wording", async () => { + // Pi's AgentEndEvent has no Result type — by the time agent_end fires, + // Pi's agent loop has already exited, so a deny return cannot keep + // Pi running the way Claude's exit-2-from-Stop does. The + // pi-extension shim instead captures the deny `reason` from + // agent_end and re-injects it as a systemPrompt suffix on the next + // before_agent_start. policy-evaluator.ts emits the flat + // `{permission:"deny", reason}` shape with the MANDATORY ACTION + // wrapper for `cli === "pi" && eventType === "Stop"` so the shim + // can capture the reason verbatim. This test pins both the shape + // and the wording — a regression here would silently weaken Pi + // Stop enforcement (the shim would still deny but the LLM would + // see the generic "Blocked stop by failproofai because: …" + // wording that's calibrated for tool-event denials, not Stop). + registerPolicy("stop-blocker", "desc", () => ({ + decision: "deny", + reason: "tests not run", + }), { events: ["Stop"] }); + + const result = await evaluatePolicies("Stop", {}, { cli: "pi" }); + expect(result.exitCode).toBe(0); + expect(result.stderr).toBe(""); + const parsed = JSON.parse(result.stdout) as { permission?: string; reason?: string }; + expect(parsed.permission).toBe("deny"); + expect(parsed.reason).toContain("MANDATORY ACTION REQUIRED"); + expect(parsed.reason).toContain("policy: exospherehost/stop-blocker"); + expect(parsed.reason).toContain("tests not run"); + expect(result.decision).toBe("deny"); + expect(result.policyName).toBe("exospherehost/stop-blocker"); + expect(result.reason).toBe("tests not run"); + }); + + it("Pi non-Stop deny keeps the existing flat {permission:'deny', reason: blockedMessage} shape", async () => { + // Regression guard: the Pi-Stop branch is gated on + // `eventType === "Stop"` — every other event type must keep the + // pre-existing flat shape so PreToolUse/UserPromptSubmit/etc. denies + // continue to fire on Pi. Confirms the split (added in the + // before_agent_start handoff PR) didn't accidentally route non-Stop + // events through the MANDATORY ACTION wrapper. + registerPolicy("blocker", "desc", () => ({ + decision: "deny", + reason: "sudo not allowed", + }), { events: ["PreToolUse"] }); + + const result = await evaluatePolicies( + "PreToolUse", + { tool_name: "Bash", tool_input: { command: "sudo ls" } }, + { cli: "pi" }, + ); + expect(result.exitCode).toBe(0); + const parsed = JSON.parse(result.stdout) as { permission?: string; reason?: string }; + expect(parsed.permission).toBe("deny"); + expect(parsed.reason).not.toContain("MANDATORY ACTION REQUIRED"); + expect(parsed.reason).toMatch(/^Blocked Bash by failproofai because: /); + expect(parsed.reason).toContain("sudo not allowed"); + }); + it("Gemini Stop deny emits {decision:'block', reason} JSON on stdout (force-retry via AfterAgent)", async () => { // Per Gemini's hooks docs (https://geminicli.com/docs/hooks/), the // AfterAgent hook (canonical "Stop") force-retries when the hook