fix(hooks): use input budget as percentage base for context limits

isac322 · claude · isac322 · commit 4a5a1b56ecfc · 2026-05-07T22:22:55.000+09:00
DCP previously computed maxContextLimit/minContextLimit percentages from limit.context unconditionally — ignoring limit.input when present and not subtracting limit.output otherwise. This produced thresholds higher than the model's actual safe input window: - OpenAI GPT-5 line (gpt-5.4, gpt-5.4-mini, gpt-5.5, …) defines limit.input separately from limit.context; the percentage base was inflated to limit.context, so nudges fired later than the user's intent. - Shared-pool models (Anthropic, gpt-4o, Gemini, Grok, …) have no explicit limit.input; using limit.context as the base allows input + worst-case output to exceed the context window, risking context_length_exceeded. Match opencode core's session/overflow.ts::usable() convention: limit.input ?? max(0, limit.context - limit.output). Closes #512 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/lib/hooks.ts b/lib/hooks.ts
@@ -38,6 +38,7 @@ import { type HostPermissionSnapshot } from "./host-permissions"
 import { compressPermission, syncCompressPermissionState } from "./compress-permission"
 import { checkSession, ensureSessionInitialized, saveSessionState, syncToolCache } from "./state"
 import { cacheSystemPromptTokens } from "./ui/utils"
+import { computeInputBudget } from "./input-budget"
 
 const INTERNAL_AGENT_SIGNATURES = [
     "You are a title generator",
@@ -52,11 +53,14 @@ export function createSystemPromptHandler(
     prompts: PromptStore,
 ) {
     return async (
-        input: { sessionID?: string; model: { limit: { context: number } } },
+        input: {
+            sessionID?: string
+            model: { limit: { context: number; input?: number; output?: number } }
+        },
         output: { system: string[] },
     ) => {
         if (input.model?.limit?.context) {
-            state.modelContextLimit = input.model.limit.context
+            state.modelContextLimit = computeInputBudget(input.model.limit)
             logger.debug("Cached model context limit", { limit: state.modelContextLimit })
         }
 
diff --git a/lib/input-budget.ts b/lib/input-budget.ts
@@ -0,0 +1,23 @@
+/**
+ * Compute the input token budget for a model — the value DCP percentage
+ * thresholds (`maxContextLimit`, `minContextLimit`) should resolve against.
+ *
+ * Two cases:
+ *   1. `limit.input` defined (OpenAI GPT-5 line: gpt-5.4, gpt-5.5, …):
+ *      use it directly. The provider enforces it as a hard input ceiling,
+ *      and `limit.input + limit.output ≈ limit.context` by definition.
+ *   2. `limit.input` undefined (shared-pool models — all Anthropic, gpt-4o,
+ *      Gemini, Grok, DeepSeek, …): subtract `limit.output` from `limit.context`.
+ *      This guarantees `input + worst-case output ≤ limit.context`, preventing
+ *      `context_length_exceeded` errors when the model fills its output budget.
+ *
+ * Mirrors the convention in opencode core (`session/overflow.ts::usable()`).
+ */
+export function computeInputBudget(limit: {
+    context: number
+    input?: number
+    output?: number
+}): number {
+    if (!limit.context) return 0
+    return limit.input ?? Math.max(0, limit.context - (limit.output ?? 0))
+}
diff --git a/tests/input-budget.test.ts b/tests/input-budget.test.ts
@@ -0,0 +1,38 @@
+import assert from "node:assert/strict"
+import test from "node:test"
+import { computeInputBudget } from "../lib/input-budget"
+
+test("computeInputBudget uses limit.input when defined (split-budget OpenAI models)", () => {
+    // gpt-5.4-mini, gpt-5.5: 400K context, 272K input, 128K output
+    assert.equal(computeInputBudget({ context: 400000, input: 272000, output: 128000 }), 272000)
+    // gpt-5.4: 1.05M context, 922K input, 128K output
+    assert.equal(computeInputBudget({ context: 1050000, input: 922000, output: 128000 }), 922000)
+})
+
+test("computeInputBudget subtracts output from context when limit.input is undefined (shared-pool models)", () => {
+    // claude-opus-4-7: 1M context, 128K output, no explicit input limit
+    assert.equal(computeInputBudget({ context: 1000000, output: 128000 }), 872000)
+    // claude-haiku-4-5: 200K context, 64K output
+    assert.equal(computeInputBudget({ context: 200000, output: 64000 }), 136000)
+    // gpt-4o: 128K context, 16384 output
+    assert.equal(computeInputBudget({ context: 128000, output: 16384 }), 111616)
+})
+
+test("computeInputBudget treats missing output as 0", () => {
+    assert.equal(computeInputBudget({ context: 200000 }), 200000)
+})
+
+test("computeInputBudget returns 0 when context is 0", () => {
+    assert.equal(computeInputBudget({ context: 0, input: 100, output: 50 }), 0)
+})
+
+test("computeInputBudget never returns negative when output exceeds context", () => {
+    assert.equal(computeInputBudget({ context: 100, output: 200 }), 0)
+})
+
+test("computeInputBudget prefers explicit input over the context-minus-output fallback", () => {
+    // If both `input` and `output` are present, `input` wins regardless of what
+    // `context - output` would compute to. Defensive against models where the
+    // numbers don't satisfy `input + output = context`.
+    assert.equal(computeInputBudget({ context: 1000, input: 500, output: 200 }), 500)
+})