exospherehost · NiveditJain · May 9, 2026 · May 9, 2026 · May 9, 2026 · May 9, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 ## Unreleased
 
+## 0.0.10-beta.8 — 2026-05-08
+
+### Fixes
+- Make `require-*-before-stop` policies actually enforce on OpenCode (sst/opencode). Empirically observed: a Stop hook fired (visible in the dashboard activity feed) but the agent stopped without retry, identical failure mode to Cursor pre-#318 and Copilot pre-#299. Root cause: the OpenCode plugin shim subscribes to `session.idle` (canonical `Stop`) and `policy-evaluator.ts` had no `cli === "opencode"` branch for `Stop` / `SubagentStop`, so OpenCode fell through to the generic Claude-shape `exitCode: 2 + stderr` response — which the shim's `applyDecision` turns into `throw new Error(reason)` from inside the `session.idle` event callback. By that point OpenCode has already gone idle; the throw is logged at most. The shim does have a working force-retry channel: when stdout JSON contains `hookSpecificOutput.additionalContext`, it submits the text via `client.session.prompt({path: {id: sessionID}, body: {parts: [{type: "text", text}]}})`, which becomes a new user message that re-triggers the agent loop — exactly the same model as Cursor's `followup_message` (#318) and Copilot's `{decision: "block", reason}` (#299). New `cli === "opencode" && eventType in {Stop, SubagentStop}` arms in both the deny and instruct paths of `policy-evaluator.ts` emit `{hookSpecificOutput: {additionalContext: <MANDATORY ACTION reasonText>}}` ahead of the generic Stop fall-through. The shim's `applyDecision` is promoted to `async` and now awaits `client.session.prompt` for `Stop` / `SubagentStop` events specifically (fire-and-forget for tool events, so we don't add SDK round-trip latency on the hot path) — without the await, OpenCode could tear down the plugin context before the SDK call completes. New unit tests in `policy-evaluator.test.ts` pin the deny + instruct shapes for opencode (Stop and SubagentStop) plus a regression test confirming PreToolUse still uses the Claude `permissionDecision: "deny"` shape; new tests in `opencode-plugin-shim.test.ts` assert the shim awaits `client.session.prompt` on `session.idle` (handler stays pending until the SDK call resolves), swallows SDK rejection (agent is exiting anyway), and still throws on exit-2 stderr (back-compat with stale binaries). Pi `agent_end` remains observation-only by upstream design — Pi's agent loop has already exited when `agent_end` fires and the `AgentEndEventResult` exposes no `block` field, documented in `CLAUDE.md`. Gemini `AfterAgent` (canonical `Stop`) was already correctly emitting `{decision: "block", reason}`; new unit tests in `policy-evaluator.test.ts` pin both the deny and instruct shapes to prevent regression (#323).
+
 ## 0.0.10-beta.7 — 2026-05-08
 
 ### Fixes

diff --git a/__tests__/hooks/opencode-plugin-shim.test.ts b/__tests__/hooks/opencode-plugin-shim.test.ts
@@ -385,6 +385,68 @@ describe("OpenCode plugin shim — translation of binary response to plugin acti
     await hooks["permission.ask"]!({ tool: "bash", sessionID: "s" }, out);
     expect(out.status).toBe("deny");
   });
+
+  it("event session.idle + additionalContext → AWAITS client.session.prompt (Stop force-retry)", async () => {
+    // For Stop events the prompt is the only force-retry channel — it MUST
+    // land before the plugin handler returns or OpenCode tears down the
+    // plugin context. Verify by making session.prompt return a Promise that
+    // resolves on a tick we control: if the handler awaits, it won't return
+    // until we tick.
+    let resolvePrompt: () => void = () => {};
+    const promptPromise = new Promise<void>((res) => { resolvePrompt = res; });
+    const client = { session: { prompt: vi.fn().mockReturnValue(promptPromise) } };
+
+    responses.push({
+      status: 0,
+      stdout: JSON.stringify({ hookSpecificOutput: { additionalContext: "Run tests before stopping" } }),
+      stderr: "",
+    });
+    const { plugin } = await setup();
+    const hooks = await plugin({ client, directory: "/repo" });
+
+    // Fire the handler and observe that it does NOT resolve until prompt resolves.
+    let handlerSettled = false;
+    const handlerDone = (async () => {
+      await hooks.event!({ event: { type: "session.idle", properties: { sessionID: "ses_1" } } });
+      handlerSettled = true;
+    })();
+
+    // Yield to the microtask queue; the handler should still be pending.
+    await new Promise((r) => setImmediate(r));
+    expect(handlerSettled).toBe(false);
+    expect(client.session.prompt).toHaveBeenCalledTimes(1);
+    const arg = client.session.prompt.mock.calls[0][0] as { path: { id: string }; body: { parts: Array<{ type: string; text: string }> } };
+    expect(arg.path.id).toBe("ses_1");
+    expect(arg.body.parts[0]).toEqual({ type: "text", text: "Run tests before stopping" });
+
+    // Resolve the SDK call; the handler should now finish.
+    resolvePrompt();
+    await handlerDone;
+    expect(handlerSettled).toBe(true);
+  });
+
+  it("event session.idle + SDK rejection on prompt is swallowed (agent already exiting)", async () => {
+    responses.push({
+      status: 0,
+      stdout: JSON.stringify({ hookSpecificOutput: { additionalContext: "..." } }),
+      stderr: "",
+    });
+    const client = { session: { prompt: vi.fn().mockRejectedValue(new Error("network")) } };
+    const { plugin } = await setup();
+    const hooks = await plugin({ client, directory: "/repo" });
+    await expect(
+      hooks.event!({ event: { type: "session.idle", properties: { sessionID: "ses_1" } } }),
+    ).resolves.toBeUndefined();
+  });
+
+  it("event session.idle + exit 2 → still throws (back-compat with stale binaries)", async () => {
+    responses.push({ status: 2, stdout: "", stderr: "MANDATORY: commit before stopping" });
+    const { plugin } = await setup();
+    const hooks = await plugin({ client: fakeClient(), directory: "/repo" });
+    await expect(
+      hooks.event!({ event: { type: "session.idle", properties: { sessionID: "ses_1" } } }),
+    ).rejects.toThrow(/MANDATORY: commit before stopping/);
+  });
 });
 
 describe("OpenCode plugin shim — spawn options and registration", () => {

diff --git a/__tests__/hooks/policy-evaluator.test.ts b/__tests__/hooks/policy-evaluator.test.ts
@@ -251,6 +251,63 @@ describe("hooks/policy-evaluator", () => {
     expect(parsed.followup_message).toContain("subagent verification pending");
   });
 
+  it("OpenCode Stop + instruct emits {hookSpecificOutput.additionalContext} JSON on stdout (NOT exit 2)", async () => {
+    // Mirrors the Copilot/Cursor instruct-on-Stop fixes: without a per-CLI
+    // branch, opencode instruct verdicts fall through to exit-2 + stderr,
+    // which the OpenCode shim throws from inside session.idle — a no-op
+    // because the agent has already gone idle. The shim's only working
+    // force-retry channel is hookSpecificOutput.additionalContext routed
+    // through client.session.prompt.
+    registerPolicy("verify", "desc", () => ({
+      decision: "instruct",
+      reason: "needs verification",
+    }), { events: ["Stop"] });
+
+    const result = await evaluatePolicies("Stop", {}, { cli: "opencode" });
+    expect(result.exitCode).toBe(0);
+    expect(result.stderr).toBe("");
+    expect(result.decision).toBe("instruct");
+    const parsed = JSON.parse(result.stdout) as { hookSpecificOutput?: { additionalContext?: string } };
+    expect(parsed.hookSpecificOutput?.additionalContext).toContain("MANDATORY ACTION REQUIRED");
+    expect(parsed.hookSpecificOutput?.additionalContext).toContain("needs verification");
+  });
+
+  it("OpenCode SubagentStop + instruct also emits {hookSpecificOutput.additionalContext} JSON", async () => {
+    // Forward-compat parity with the SubagentStop deny test — see comment
+    // there for why we widen even though OpenCode doesn't yet expose
+    // subagent boundaries to plugins.
+    registerPolicy("verify", "desc", () => ({
+      decision: "instruct",
+      reason: "subagent verification pending",
+    }), { events: ["SubagentStop"] });
+
+    const result = await evaluatePolicies("SubagentStop", {}, { cli: "opencode" });
+    expect(result.exitCode).toBe(0);
+    expect(result.stderr).toBe("");
+    expect(result.decision).toBe("instruct");
+    const parsed = JSON.parse(result.stdout) as { hookSpecificOutput?: { additionalContext?: string } };
+    expect(parsed.hookSpecificOutput?.additionalContext).toContain("subagent verification pending");
+  });
+
+  it("Gemini Stop + instruct emits {decision:'block', reason} JSON on stdout (force-retry via AfterAgent)", async () => {
+    // Confirms the existing cli==="gemini" Stop arm in the instruct path
+    // emits the documented force-retry shape. Pairs with the deny-path
+    // test in the "Stop event deny format" describe block.
+    registerPolicy("verify", "desc", () => ({
+      decision: "instruct",
+      reason: "needs verification",
+    }), { events: ["Stop"] });
+
+    const result = await evaluatePolicies("Stop", {}, { cli: "gemini" });
+    expect(result.exitCode).toBe(0);
+    expect(result.stderr).toBe("");
+    expect(result.decision).toBe("instruct");
+    const parsed = JSON.parse(result.stdout) as { decision?: string; reason?: string };
+    expect(parsed.decision).toBe("block");
+    expect(parsed.reason).toContain("MANDATORY ACTION REQUIRED");
+    expect(parsed.reason).toContain("needs verification");
+  });
+
   it("accumulates multiple instruct messages", async () => {
     registerPolicy("first", "desc", () => ({
       decision: "instruct",
@@ -605,6 +662,93 @@ describe("hooks/policy-evaluator", () => {
       await evaluatePolicies("Stop", {});
       expect(secondPolicyCalled.value).toBe(false);
     });
+
+    it("OpenCode Stop deny emits {hookSpecificOutput.additionalContext} JSON on stdout (NOT exit 2)", async () => {
+      // OpenCode's `session.idle` event is notification-only — by the time
+      // the plugin handler fires, the agent has already gone idle and a
+      // thrown error from the handler does not force-retry. The shim's only
+      // working force-retry channel is `client.session.prompt(...)`, which
+      // it routes through `hookSpecificOutput.additionalContext`. Without
+      // this branch, the 5 require-*-before-stop builtins were
+      // observation-only on OpenCode — the deny was logged in the dashboard
+      // but the agent stopped silently.
+      registerPolicy("stop-blocker", "desc", () => ({
+        decision: "deny",
+        reason: "changes not committed",
+      }), { events: ["Stop"] });
+
+      const result = await evaluatePolicies("Stop", {}, { cli: "opencode" });
+      expect(result.exitCode).toBe(0);
+      expect(result.stderr).toBe("");
+      const parsed = JSON.parse(result.stdout) as { hookSpecificOutput?: { additionalContext?: string; permissionDecision?: string } };
+      expect(parsed.hookSpecificOutput?.additionalContext).toContain("MANDATORY ACTION REQUIRED");
+      expect(parsed.hookSpecificOutput?.additionalContext).toContain("changes not committed");
+      // Load-bearing: must NOT emit exit-2 (the shim would `throw` from
+      // session.idle, which OpenCode logs but ignores) or
+      // permissionDecision: "deny" (the shim would also throw on that path).
+      expect(parsed.hookSpecificOutput?.permissionDecision).toBeUndefined();
+      expect(result.decision).toBe("deny");
+    });
+
+    it("OpenCode SubagentStop deny also emits {hookSpecificOutput.additionalContext} JSON (forward-compat)", async () => {
+      // OpenCode does not yet expose subagent boundaries to plugins, but
+      // custom policies subscribing to SubagentStop should still get the
+      // force-retry shape if OpenCode adds the bus event later. Mirrors the
+      // Cursor + Copilot SubagentStop widening.
+      registerPolicy("subagent-blocker", "desc", () => ({
+        decision: "deny",
+        reason: "subagent left work undone",
+      }), { events: ["SubagentStop"] });
+
+      const result = await evaluatePolicies("SubagentStop", {}, { cli: "opencode" });
+      expect(result.exitCode).toBe(0);
+      const parsed = JSON.parse(result.stdout) as { hookSpecificOutput?: { additionalContext?: string } };
+      expect(parsed.hookSpecificOutput?.additionalContext).toContain("MANDATORY ACTION REQUIRED");
+      expect(parsed.hookSpecificOutput?.additionalContext).toContain("subagent left work undone");
+      expect(result.decision).toBe("deny");
+    });
+
+    it("OpenCode PreToolUse deny still uses Claude permissionDecision shape (regression: tool-event path unchanged)", async () => {
+      // The Stop arm is added INSIDE the cli==="opencode" branch so the
+      // generic Claude shape below it is unchanged for tool events. The
+      // shim's applyDecision throws on permissionDecision: "deny" — that's
+      // the working path for tool events.
+      registerPolicy("tool-blocker", "desc", () => ({
+        decision: "deny",
+        reason: "blocked tool",
+      }), { events: ["PreToolUse"] });
+
+      const result = await evaluatePolicies(
+        "PreToolUse",
+        { tool_name: "Bash", tool_input: { command: "ls" } },
+        { cli: "opencode" },
+      );
+      expect(result.exitCode).toBe(0);
+      const parsed = JSON.parse(result.stdout) as { hookSpecificOutput?: { permissionDecision?: string; additionalContext?: string } };
+      expect(parsed.hookSpecificOutput?.permissionDecision).toBe("deny");
+      // No additionalContext on tool events — that's a Stop-only shape.
+      expect(parsed.hookSpecificOutput?.additionalContext).toBeUndefined();
+    });
+
+    it("Gemini Stop deny emits {decision:'block', reason} JSON on stdout (force-retry via AfterAgent)", async () => {
+      // Per Gemini's hooks docs (https://geminicli.com/docs/hooks/), the
+      // AfterAgent hook (canonical "Stop") force-retries when the hook
+      // returns `{decision: "block", reason}` on stdout. Exit-2 is per-
+      // action only ("turn continues") and would NOT trigger the retry.
+      registerPolicy("stop-blocker", "desc", () => ({
+        decision: "deny",
+        reason: "tests not run",
+      }), { events: ["Stop"] });
+
+      const result = await evaluatePolicies("Stop", {}, { cli: "gemini" });
+      expect(result.exitCode).toBe(0);
+      expect(result.stderr).toBe("");
+      const parsed = JSON.parse(result.stdout) as { decision?: string; reason?: string };
+      expect(parsed.decision).toBe("block");
+      expect(parsed.reason).toContain("MANDATORY ACTION REQUIRED");
+      expect(parsed.reason).toContain("tests not run");
+      expect(result.decision).toBe("deny");
+    });
   });
 
   describe("workflow policy chain integration", () => {