diff --git a/CHANGELOG.md b/CHANGELOG.md index 26130d94..fc88eec5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ ### Fixed +- 修复个别账号被 Cloudflare Bot Management `__cf_bm` cookie 反噬导致 `/codex/responses` 全 404、`/codex/usage` 仍正常的"配额没限却用不了"假死状态:根因是 proxy 此前在 warmup `GET /codex/usage` 时通过 `captureCookies` 把 CF 偶发下发的 `__cf_bm` 收进 jar,而 `__cf_bm` 是绑死 (IP + UA + TLS fingerprint + 时序) 的 30 分钟会话指纹 cookie——一旦 fingerprint 漂(proxy pool 切出口 IP / cookie 过期 / 时序变化),CF 在重保护路径就用空 body 404(CF "stealth deny" 模式)拒绝该 cookie 持有者,而轻保护路径继续放行,造成 `cachedQuota.rate_limit.limit_reached=false / used 78%` 但 `/codex/responses` 14 连 404 的诊断矛盾;24 个号里只这一个号偶然命中过 CF 下发,其它 cookie jar 全空的号反而都正常。修复两层:(1) `src/proxy/cookie-jar.ts` 加 `CAPTURABLE_COOKIE_NAMES = {cf_clearance}` 白名单,`captureRaw` 主动丢弃 `__cf_bm` 等非白名单 cookie,从源头不让毒 cookie 入 jar;admin API 的手动 `set()` 不受白名单约束方便调试。(2) `src/proxy/error-classification.ts` 新增 `isCfPathBlockError`(404 + trimmed body 为空);`src/auth/cf-path-block-tracker.ts` 用 1 小时滑动窗口计数器追踪每个 entryId 的连续 CF block 次数;`src/routes/shared/proxy-error-handler.ts` 在 generic respond 之前加新分支——命中 CF block 时清这个号的 cookie jar、记录计数、`releaseBeforeRetry: true` 让请求 fail over 到不同号,同号 1h 内累计 ≥ 3 次自动 `markStatus("disabled")` 并 `appendErrorLog({ name: "CfPathBlockAutoDisable" })` 进 Errors tab;`src/services/account-mutation.ts` 在 dashboard re-enable 时 `resetCfPathBlock` 清计数避免历史欠账。新增 `tests/unit/auth/cf-path-block-tracker.test.ts`(4 个,计数 / 窗口过期 / reset / peek)、`tests/unit/proxy/error-classification.test.ts` `isCfPathBlockError` 一节(4 个分支)、`tests/unit/routes/shared/proxy-error-handler.test.ts` CF block retry/disable 路径(2 个,含非空 body 不误判),`tests/unit/proxy/cookie-jar.test.ts` 改写为白名单语义(+2,旧 `session_id` / `expired` 用例改用 `cf_clearance` 测通用 Max-Age 解析)。Full suite 2258 全绿(`src/proxy/cookie-jar.ts`、`src/proxy/error-classification.ts`、`src/auth/cf-path-block-tracker.ts`、`src/routes/shared/proxy-error-handler.ts`、`src/routes/shared/proxy-handler.ts`、`src/services/account-mutation.ts`、`tests/unit/proxy/cookie-jar.test.ts`、`tests/unit/proxy/error-classification.test.ts`、`tests/unit/auth/cf-path-block-tracker.test.ts`、`tests/unit/routes/shared/proxy-error-handler.test.ts`) - `/v1/responses` passthrough streaming / non-streaming paths now collect `function_call.call_id` from `response.output_item.done` and forward it through response metadata so implicit resume can validate following `function_call_output` turns instead of falling back to full-history replay. Oversized missing-tool-call replays are guarded with 413, and regression coverage now proves the issue red/green across the Responses format adapter (`src/routes/responses.ts`, `src/routes/shared/proxy-handler.ts`, `tests/unit/routes/responses-passthrough-metadata.test.ts`, `tests/integration/proxy-handler.test.ts`). - Release bump workflows now require runtime file changes in addition to meaningful commit subjects before tagging a beta or stable build. This prevents squash-promotion history divergence from re-counting old dev commits, and prevents workflow/docs/test-only fixes from producing empty Electron releases (`.github/workflows/bump-electron.yml`, `.github/workflows/bump-electron-beta.yml`, `tests/unit/ci/package-boundary.test.ts`). - Release bump workflows now skip the release-notes workflow hotfix subject itself, so promoting the stable-notes CI fix to `master` does not create an empty desktop release on the next scheduled bump (`.github/workflows/bump-electron.yml`, `.github/workflows/bump-electron-beta.yml`, `tests/unit/ci/package-boundary.test.ts`). diff --git a/src/auth/cf-path-block-tracker.ts b/src/auth/cf-path-block-tracker.ts new file mode 100644 index 00000000..33489b95 --- /dev/null +++ b/src/auth/cf-path-block-tracker.ts @@ -0,0 +1,55 @@ +/** + * Tracks consecutive Cloudflare path-block 404s per account entry. + * + * Background: Cloudflare's Bot Management can answer a "trust this client" + * mismatch with an empty-body 404 on the guarded path (e.g. + * /codex/responses) while leaving lighter paths (e.g. /codex/usage) + * reachable. The proxy reacts by clearing the account's cookie jar and + * retrying on a different account; this tracker watches for the + * pathological case where cookie clearing doesn't help (repeated CF + * blocks even with empty jar). When a configurable threshold is hit + * inside a sliding window, the account is disabled so it stops poisoning + * affinity routing for the same conversation. + * + * Stale entries (no increment within the window) auto-reset on the next + * increment, so an isolated CF blip never adds up over days into a + * spurious disable. + */ + +const STALE_MS = 60 * 60 * 1000; // 1h sliding window + +interface BlockState { + count: number; + lastAt: number; +} + +const counts = new Map(); + +/** + * Record one CF path-block 404 for the given entryId and return the + * resulting consecutive-block count (within the sliding window). + */ +export function recordCfPathBlock(entryId: string, now: number = Date.now()): number { + const prev = counts.get(entryId); + const count = !prev || now - prev.lastAt > STALE_MS ? 1 : prev.count + 1; + counts.set(entryId, { count, lastAt: now }); + return count; +} + +/** Reset the counter for an entry (e.g. on manual re-activation). */ +export function resetCfPathBlock(entryId: string): void { + counts.delete(entryId); +} + +/** Current count for an entry, without mutating. Returns 0 if absent or stale. */ +export function peekCfPathBlock(entryId: string, now: number = Date.now()): number { + const prev = counts.get(entryId); + if (!prev) return 0; + if (now - prev.lastAt > STALE_MS) return 0; + return prev.count; +} + +/** Visible for tests. */ +export function _resetAllCfPathBlocks(): void { + counts.clear(); +} diff --git a/src/proxy/cookie-jar.ts b/src/proxy/cookie-jar.ts index 315fc35d..b58b82c3 100644 --- a/src/proxy/cookie-jar.ts +++ b/src/proxy/cookie-jar.ts @@ -36,8 +36,26 @@ interface CookieFileV2 { accounts: Record>; } +/** + * Cookies allowed to be auto-captured from Set-Cookie response headers. + * + * Why a whitelist: `__cf_bm` is Cloudflare's Bot Management *session* cookie + * (not a challenge-pass token). When captured and replayed on later requests + * it acts as a "trust this is the same client" tag bound to (IP + UA + TLS + * fingerprint + timing) at issue time. If any of those drift (proxy pool + * rotates egress IP, ~30 min lifetime expires) CF returns 404 — empty-body + * "path not found" — on heavily-guarded paths like /codex/responses, while + * /codex/usage stays reachable. The cookie makes the account *worse off* + * than sending no cookie. Only `cf_clearance` (positive challenge-pass) is + * useful to replay. + * + * Manual set() via the admin API is NOT subject to this whitelist — + * operators can still inject arbitrary cookies for debugging. + */ +const CAPTURABLE_COOKIE_NAMES = new Set(["cf_clearance"]); + /** Critical cookie names that trigger immediate persistence on change */ -const CRITICAL_COOKIES = new Set(["cf_clearance", "__cf_bm"]); +const CRITICAL_COOKIES = new Set(["cf_clearance"]); export class CookieJar { private cookies: Map> = new Map(); @@ -125,6 +143,7 @@ export class CookieJar { const name = pair.slice(0, eq).trim(); const value = pair.slice(eq + 1).trim(); if (!name) continue; + if (!CAPTURABLE_COOKIE_NAMES.has(name)) continue; // Parse expiry from attributes let expires: number | null = null; diff --git a/src/proxy/error-classification.ts b/src/proxy/error-classification.ts index 61472b94..5e949f73 100644 --- a/src/proxy/error-classification.ts +++ b/src/proxy/error-classification.ts @@ -95,6 +95,22 @@ export function isUnansweredFunctionCallError(err: unknown): boolean { return haystack.includes("no tool output found for function call"); } +/** + * Detects Cloudflare path-level bot blocks that surface as empty-body 404s. + * + * Cloudflare's Bot Management can "hide" a guarded path (e.g. /codex/responses) + * by returning 404 with no body when the session's __cf_bm cookie or + * fingerprint no longer matches what it issued — this is its standard + * "stealth deny" pattern (more deniable than 403). The distinguishing + * signal is the empty body: real Codex 404s from upstream always carry a + * JSON error payload. + */ +export function isCfPathBlockError(err: unknown): boolean { + if (!isCodexLike(err)) return false; + if (err.status !== 404) return false; + return err.body.trim().length === 0; +} + /** Check if a CodexApiError indicates the model is not supported on the account's plan. */ export function isModelNotSupportedError(err: CodexLikeError): boolean { if (err.status < 400 || err.status >= 500 || err.status === 429) return false; diff --git a/src/routes/shared/proxy-error-handler.ts b/src/routes/shared/proxy-error-handler.ts index 64a4919e..fe9bc7c7 100644 --- a/src/routes/shared/proxy-error-handler.ts +++ b/src/routes/shared/proxy-error-handler.ts @@ -9,12 +9,19 @@ import type { AccountPool } from "../../auth/account-pool.js"; import { extractRetryAfterSec, isBanError, + isCfPathBlockError, isQuotaExhaustedError, isTokenInvalidError, isModelNotSupportedError, } from "../../proxy/error-classification.js"; import type { CodexApiError } from "../../proxy/codex-types.js"; import type { StatusCode } from "hono/utils/http-status"; +import type { CookieJar } from "../../proxy/cookie-jar.js"; +import { recordCfPathBlock } from "../../auth/cf-path-block-tracker.js"; +import { appendErrorLog } from "../../logs/error-log.js"; + +/** Consecutive CF path-blocks before the account is auto-disabled. */ +const CF_PATH_BLOCK_DISABLE_THRESHOLD = 3; /** Clamp an HTTP status to a valid error StatusCode, defaulting to 502 for non-error codes. */ export function toErrorStatus(status: number): StatusCode { @@ -54,6 +61,7 @@ export function handleCodexApiError( model: string, tag: string, modelRetried: boolean, + cookieJar?: CookieJar, ): ErrorAction { const email = pool.getEntry(entryId)?.email ?? "?"; @@ -119,7 +127,44 @@ export function handleCodexApiError( return { action: "retry", status: 401, message: err.message }; } - // 6. Generic error — return to client (preserve original body for passthrough) + // 6. Cloudflare path block (empty-body 404). CF's Bot Management can + // "hide" the /codex/responses path by returning 404 with no body when + // the captured __cf_bm cookie no longer matches the request + // fingerprint. Clear the cookie jar (so the next attempt is a clean, + // fingerprint-only request) and retry on a different account. After + // the threshold is reached within the sliding window, disable the + // account so session affinity stops pinning a dying conversation to + // it. + if (isCfPathBlockError(err)) { + cookieJar?.clear(entryId); + const blockCount = recordCfPathBlock(entryId); + if (blockCount >= CF_PATH_BLOCK_DISABLE_THRESHOLD) { + pool.markStatus(entryId, "disabled"); + console.warn( + `[${tag}] Account ${entryId} (${email}) | Cloudflare path-block 404 ×${blockCount} — auto-disabling account`, + ); + appendErrorLog({ + source: "server", + error: { + name: "CfPathBlockAutoDisable", + message: `Account auto-disabled after ${blockCount} consecutive Cloudflare path-block 404s on /codex/responses`, + }, + context: { entryId, email, model, tag, blockCount }, + }); + } else { + console.warn( + `[${tag}] Account ${entryId} (${email}) | Cloudflare path-block 404 ×${blockCount}, cleared cookies and retrying...`, + ); + } + return { + action: "retry", + releaseBeforeRetry: true, + status: 502, + message: "Upstream blocked the request (Cloudflare path-block)", + }; + } + + // 7. Generic error — return to client (preserve original body for passthrough) const status = toErrorStatus(err.status); return { action: "respond", status, message: err.message, errorBody: err.body }; } diff --git a/src/routes/shared/proxy-handler.ts b/src/routes/shared/proxy-handler.ts index 1bcbea31..a241f308 100644 --- a/src/routes/shared/proxy-handler.ts +++ b/src/routes/shared/proxy-handler.ts @@ -252,7 +252,7 @@ export async function handleProxyRequest(options: HandleProxyRequestOptions): Pr } const decision = handleCodexApiError( - err, accountPool, entryId, req.codexRequest.model, fmt.tag, modelRetried, + err, accountPool, entryId, req.codexRequest.model, fmt.tag, modelRetried, cookieJar, ); const errorRetryTransition = applyProxyErrorRetryTransition({ diff --git a/src/services/account-mutation.ts b/src/services/account-mutation.ts index a4de4914..6a670727 100644 --- a/src/services/account-mutation.ts +++ b/src/services/account-mutation.ts @@ -4,6 +4,7 @@ */ import type { AccountPool } from "../auth/account-pool.js"; +import { resetCfPathBlock } from "../auth/cf-path-block-tracker.js"; export interface DeleteResult { deleted: number; @@ -57,6 +58,9 @@ export class AccountMutationService { const entry = this.pool.getEntry(id); if (entry) { this.pool.markStatus(id, status); + // Re-enabling clears any in-memory CF block streak so the account + // gets a fresh allowance against the auto-disable threshold. + if (status === "active") resetCfPathBlock(id); updated++; } else { notFound.push(id); diff --git a/tests/unit/auth/cf-path-block-tracker.test.ts b/tests/unit/auth/cf-path-block-tracker.test.ts new file mode 100644 index 00000000..ffde53e2 --- /dev/null +++ b/tests/unit/auth/cf-path-block-tracker.test.ts @@ -0,0 +1,45 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { + recordCfPathBlock, + resetCfPathBlock, + peekCfPathBlock, + _resetAllCfPathBlocks, +} from "@src/auth/cf-path-block-tracker.js"; + +describe("cf-path-block-tracker", () => { + beforeEach(() => { + _resetAllCfPathBlocks(); + }); + + it("increments per entry independently", () => { + expect(recordCfPathBlock("a")).toBe(1); + expect(recordCfPathBlock("a")).toBe(2); + expect(recordCfPathBlock("b")).toBe(1); + expect(recordCfPathBlock("a")).toBe(3); + }); + + it("resets after sliding window expires", () => { + const t0 = 1_000_000; + const t1 = t0 + 1000; + expect(recordCfPathBlock("a", t0)).toBe(1); + expect(recordCfPathBlock("a", t1)).toBe(2); + // The window is measured from the most recent increment (t1). + expect(recordCfPathBlock("a", t1 + 60 * 60 * 1000 + 1)).toBe(1); + }); + + it("resetCfPathBlock clears the counter", () => { + recordCfPathBlock("a"); + recordCfPathBlock("a"); + resetCfPathBlock("a"); + expect(peekCfPathBlock("a")).toBe(0); + expect(recordCfPathBlock("a")).toBe(1); + }); + + it("peek returns 0 for unknown entry and stale entry", () => { + expect(peekCfPathBlock("ghost")).toBe(0); + const t0 = 1_000_000; + recordCfPathBlock("a", t0); + expect(peekCfPathBlock("a", t0)).toBe(1); + expect(peekCfPathBlock("a", t0 + 60 * 60 * 1000 + 1)).toBe(0); + }); +}); diff --git a/tests/unit/proxy/cookie-jar.test.ts b/tests/unit/proxy/cookie-jar.test.ts index 3a3bff45..c1f4e518 100644 --- a/tests/unit/proxy/cookie-jar.test.ts +++ b/tests/unit/proxy/cookie-jar.test.ts @@ -58,31 +58,52 @@ describe("CookieJar", () => { }); describe("captureRaw", () => { - it("parses Set-Cookie headers", () => { + it("parses Set-Cookie headers (whitelisted cookies only)", () => { jar.captureRaw("acct1", [ - "session_id=abc123; Path=/; HttpOnly", - "cf_clearance=xyz; Max-Age=3600", + "cf_clearance=xyz; Max-Age=3600; Path=/; HttpOnly", ]); const header = jar.getCookieHeader("acct1"); - expect(header).toContain("session_id=abc123"); expect(header).toContain("cf_clearance=xyz"); }); it("parses Max-Age for expiry", () => { - // Set a cookie with Max-Age=0 (immediately expired) - jar.captureRaw("acct1", [ - "expired=val; Max-Age=0", - "valid=val; Max-Age=3600", - ]); - const header = jar.getCookieHeader("acct1"); - expect(header).not.toContain("expired="); - expect(header).toContain("valid=val"); + // First write a valid cf_clearance, then a Max-Age=0 update should + // immediately expire it. Both names must be whitelisted to exercise the + // attribute parser. + jar.captureRaw("acct1", ["cf_clearance=v1; Max-Age=3600"]); + expect(jar.getCookieHeader("acct1")).toContain("cf_clearance=v1"); + jar.captureRaw("acct1", ["cf_clearance=v2; Max-Age=0"]); + expect(jar.getCookieHeader("acct1")).toBeNull(); }); it("does nothing with empty array", () => { jar.captureRaw("acct1", []); expect(jar.getCookieHeader("acct1")).toBeNull(); }); + + it("filters out non-whitelisted cookies (e.g. __cf_bm)", () => { + // __cf_bm is Cloudflare Bot Management — when captured and replayed it + // becomes a "suspicious session" tag (bound to IP+UA+TLS at issue time) + // and triggers path-level 404s on /codex/responses. Only cf_clearance + // (the positive challenge-pass token) should be captured automatically. + jar.captureRaw("acct1", [ + "__cf_bm=poison; Path=/; Max-Age=1800; HttpOnly", + "cf_clearance=ok; Path=/; Max-Age=3600", + "session_id=abc; Path=/; HttpOnly", + ]); + const raw = jar.get("acct1"); + expect(raw).not.toBeNull(); + expect(raw).not.toHaveProperty("__cf_bm"); + expect(raw).not.toHaveProperty("session_id"); + expect(raw).toEqual({ cf_clearance: "ok" }); + }); + + it("manual set() still accepts arbitrary cookies (debugging / overrides)", () => { + // The whitelist only applies to auto-capture from Set-Cookie headers. + // Operators may still inject any cookie manually via the admin API. + jar.set("acct1", { __cf_bm: "manual" }); + expect(jar.get("acct1")).toEqual({ __cf_bm: "manual" }); + }); }); describe("get", () => { diff --git a/tests/unit/proxy/error-classification.test.ts b/tests/unit/proxy/error-classification.test.ts index b84c51e1..c7998500 100644 --- a/tests/unit/proxy/error-classification.test.ts +++ b/tests/unit/proxy/error-classification.test.ts @@ -3,6 +3,7 @@ import { CodexApiError } from "@src/proxy/codex-types.js"; import { extractRetryAfterSec, isBanError, + isCfPathBlockError, isQuotaExhaustedError, isTokenInvalidError, isModelNotSupportedError, @@ -159,3 +160,26 @@ describe("isUnansweredFunctionCallError", () => { expect(isUnansweredFunctionCallError(null)).toBe(false); }); }); + +describe("isCfPathBlockError", () => { + it("matches empty-body 404 (Cloudflare stealth deny)", () => { + expect(isCfPathBlockError(new CodexApiError(404, ""))).toBe(true); + expect(isCfPathBlockError(new CodexApiError(404, " "))).toBe(true); + expect(isCfPathBlockError(new CodexApiError(404, "\n"))).toBe(true); + }); + + it("does not match 404 with a real error body", () => { + const body = JSON.stringify({ error: { message: "Not found" } }); + expect(isCfPathBlockError(new CodexApiError(404, body))).toBe(false); + }); + + it("does not match other empty-body statuses", () => { + expect(isCfPathBlockError(new CodexApiError(403, ""))).toBe(false); + expect(isCfPathBlockError(new CodexApiError(502, ""))).toBe(false); + }); + + it("returns false for non-CodexApiError", () => { + expect(isCfPathBlockError(new Error("404"))).toBe(false); + expect(isCfPathBlockError(null)).toBe(false); + }); +}); diff --git a/tests/unit/routes/shared/proxy-error-handler.test.ts b/tests/unit/routes/shared/proxy-error-handler.test.ts index 26a8ee0c..0acd2095 100644 --- a/tests/unit/routes/shared/proxy-error-handler.test.ts +++ b/tests/unit/routes/shared/proxy-error-handler.test.ts @@ -1,6 +1,7 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; import { handleCodexApiError, type ErrorAction } from "@src/routes/shared/proxy-error-handler.js"; import { CodexApiError } from "@src/proxy/codex-types.js"; +import { _resetAllCfPathBlocks } from "@src/auth/cf-path-block-tracker.js"; /* ── Minimal mock matching AccountPool subset used by error handler ── */ interface MockPool { @@ -21,6 +22,14 @@ function createMockPool(): MockPool { }; } +interface MockJar { + clear: ReturnType; +} + +function createMockJar(): MockJar { + return { clear: vi.fn() }; +} + describe("handleCodexApiError", () => { let pool: MockPool; const tag = "Test"; @@ -233,6 +242,44 @@ describe("handleCodexApiError", () => { expect(result.useFormat429).toBe(true); }); + it("Cloudflare path-block (empty-body 404): clears cookies, retries, disables after threshold", () => { + _resetAllCfPathBlocks(); + const jar = createMockJar(); + const err = new CodexApiError(404, ""); + + // 1st & 2nd: clear cookies, retry on different account, no disable + let result = handleCodexApiError(err, pool as never, entryId, model, tag, false, jar as never); + expect(result.action).toBe("retry"); + expect(result.releaseBeforeRetry).toBe(true); + expect(jar.clear).toHaveBeenCalledWith(entryId); + expect(pool.markStatus).not.toHaveBeenCalled(); + + result = handleCodexApiError(err, pool as never, entryId, model, tag, false, jar as never); + expect(result.action).toBe("retry"); + expect(pool.markStatus).not.toHaveBeenCalled(); + + // 3rd: threshold reached — disable account + result = handleCodexApiError(err, pool as never, entryId, model, tag, false, jar as never); + expect(pool.markStatus).toHaveBeenCalledWith(entryId, "disabled"); + // Still a retry so the request can fail over to another account on the + // same orchestration loop. + expect(result.action).toBe("retry"); + }); + + it("Cloudflare path-block branch ignores non-empty 404 bodies", () => { + _resetAllCfPathBlocks(); + const jar = createMockJar(); + const err = new CodexApiError(404, JSON.stringify({ error: { message: "real not found" } })); + + const result = handleCodexApiError(err, pool as never, entryId, model, tag, false, jar as never); + + // Falls through to generic respond path; no cookie clear, no disable. + expect(result.action).toBe("respond"); + expect(result.status).toBe(404); + expect(jar.clear).not.toHaveBeenCalled(); + expect(pool.markStatus).not.toHaveBeenCalled(); + }); + it("retry actions do NOT include errorBody", () => { const cases = [ new CodexApiError(429, JSON.stringify({ error: { resets_in_seconds: 30 } })),