diff --git a/CHANGELOG.md b/CHANGELOG.md index fc88eec5..873698d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ ### Fixed +- 放宽 `/v1/responses` 的 missing-tool-call full-history replay guard 阈值:从 250KB / 80 items 提到 2MB / 1000 items。原阈值会把 Codex CLI `/compact` 之后正常的 client-driven full replay(实测 300-800KB / 100-800 items)误判成 runaway 风暴,连续 413 卡死对话。新阈值仍能挡真正失控的 multi-MB 重试循环,但不再误伤合法 fallback;测试侧把 padding 从 80 提到 1010 items 让 guard 仍能在新阈值下触发(`src/routes/shared/proxy-handler.ts`、`tests/integration/proxy-handler.test.ts`)。 - 修复个别账号被 Cloudflare Bot Management `__cf_bm` cookie 反噬导致 `/codex/responses` 全 404、`/codex/usage` 仍正常的"配额没限却用不了"假死状态:根因是 proxy 此前在 warmup `GET /codex/usage` 时通过 `captureCookies` 把 CF 偶发下发的 `__cf_bm` 收进 jar,而 `__cf_bm` 是绑死 (IP + UA + TLS fingerprint + 时序) 的 30 分钟会话指纹 cookie——一旦 fingerprint 漂(proxy pool 切出口 IP / cookie 过期 / 时序变化),CF 在重保护路径就用空 body 404(CF "stealth deny" 模式)拒绝该 cookie 持有者,而轻保护路径继续放行,造成 `cachedQuota.rate_limit.limit_reached=false / used 78%` 但 `/codex/responses` 14 连 404 的诊断矛盾;24 个号里只这一个号偶然命中过 CF 下发,其它 cookie jar 全空的号反而都正常。修复两层:(1) `src/proxy/cookie-jar.ts` 加 `CAPTURABLE_COOKIE_NAMES = {cf_clearance}` 白名单,`captureRaw` 主动丢弃 `__cf_bm` 等非白名单 cookie,从源头不让毒 cookie 入 jar;admin API 的手动 `set()` 不受白名单约束方便调试。(2) `src/proxy/error-classification.ts` 新增 `isCfPathBlockError`(404 + trimmed body 为空);`src/auth/cf-path-block-tracker.ts` 用 1 小时滑动窗口计数器追踪每个 entryId 的连续 CF block 次数;`src/routes/shared/proxy-error-handler.ts` 在 generic respond 之前加新分支——命中 CF block 时清这个号的 cookie jar、记录计数、`releaseBeforeRetry: true` 让请求 fail over 到不同号,同号 1h 内累计 ≥ 3 次自动 `markStatus("disabled")` 并 `appendErrorLog({ name: "CfPathBlockAutoDisable" })` 进 Errors tab;`src/services/account-mutation.ts` 在 dashboard re-enable 时 `resetCfPathBlock` 清计数避免历史欠账。新增 `tests/unit/auth/cf-path-block-tracker.test.ts`(4 个,计数 / 窗口过期 / reset / peek)、`tests/unit/proxy/error-classification.test.ts` `isCfPathBlockError` 一节(4 个分支)、`tests/unit/routes/shared/proxy-error-handler.test.ts` CF block retry/disable 路径(2 个,含非空 body 不误判),`tests/unit/proxy/cookie-jar.test.ts` 改写为白名单语义(+2,旧 `session_id` / `expired` 用例改用 `cf_clearance` 测通用 Max-Age 解析)。Full suite 2258 全绿(`src/proxy/cookie-jar.ts`、`src/proxy/error-classification.ts`、`src/auth/cf-path-block-tracker.ts`、`src/routes/shared/proxy-error-handler.ts`、`src/routes/shared/proxy-handler.ts`、`src/services/account-mutation.ts`、`tests/unit/proxy/cookie-jar.test.ts`、`tests/unit/proxy/error-classification.test.ts`、`tests/unit/auth/cf-path-block-tracker.test.ts`、`tests/unit/routes/shared/proxy-error-handler.test.ts`) - `/v1/responses` passthrough streaming / non-streaming paths now collect `function_call.call_id` from `response.output_item.done` and forward it through response metadata so implicit resume can validate following `function_call_output` turns instead of falling back to full-history replay. Oversized missing-tool-call replays are guarded with 413, and regression coverage now proves the issue red/green across the Responses format adapter (`src/routes/responses.ts`, `src/routes/shared/proxy-handler.ts`, `tests/unit/routes/responses-passthrough-metadata.test.ts`, `tests/integration/proxy-handler.test.ts`). - Release bump workflows now require runtime file changes in addition to meaningful commit subjects before tagging a beta or stable build. This prevents squash-promotion history divergence from re-counting old dev commits, and prevents workflow/docs/test-only fixes from producing empty Electron releases (`.github/workflows/bump-electron.yml`, `.github/workflows/bump-electron-beta.yml`, `tests/unit/ci/package-boundary.test.ts`). diff --git a/src/routes/shared/proxy-handler.ts b/src/routes/shared/proxy-handler.ts index a241f308..9357a8ff 100644 --- a/src/routes/shared/proxy-handler.ts +++ b/src/routes/shared/proxy-handler.ts @@ -118,8 +118,12 @@ export async function handleProxyRequest(options: HandleProxyRequestOptions): Pr // Guard: when implicit resume fails due to missing tool calls, block runaway // full-history replays that would burn massive token budgets silently. - const PAYLOAD_GUARD_BYTES = 250_000; - const PAYLOAD_GUARD_ITEMS = 80; + // Relaxed thresholds: legitimate client-driven full replays (e.g. after + // Codex CLI /compact) regularly hit 300-800KB / 100-800 items, and the + // previous 250KB / 80-item gate was 413'ing them. Real runaway loops + // typically blow past several MB before the issue becomes obvious. + const PAYLOAD_GUARD_BYTES = 2_000_000; + const PAYLOAD_GUARD_ITEMS = 1000; if ( implicitResume.evaluation.reason === "missing_tool_calls" || implicitResume.evaluation.reason === "unanswered_tool_calls" diff --git a/tests/integration/proxy-handler.test.ts b/tests/integration/proxy-handler.test.ts index 3d461f2f..6d6c9496 100644 --- a/tests/integration/proxy-handler.test.ts +++ b/tests/integration/proxy-handler.test.ts @@ -317,7 +317,7 @@ describe("proxy-handler integration", () => { { role: "user", content: "first" }, { type: "function_call", call_id: "call_expected", name: "read_file", arguments: "{}" }, { type: "function_call_output", call_id: "call_missing", output: "{}" }, - ...Array.from({ length: 80 }, (_, index) => ({ + ...Array.from({ length: 1010 }, (_, index) => ({ role: "user" as const, content: `padding message ${index}`, })),