diff --git a/deploy/k8s/ingress/ingress.yaml b/deploy/k8s/ingress/ingress.yaml index 89d2630a4..61b37ec26 100644 --- a/deploy/k8s/ingress/ingress.yaml +++ b/deploy/k8s/ingress/ingress.yaml @@ -5,8 +5,8 @@ metadata: namespace: {{NAMESPACE}} annotations: # 流式响应需要的长超时和禁用缓冲 (nginx-ingress) - nginx.ingress.kubernetes.io/proxy-read-timeout: "600" - nginx.ingress.kubernetes.io/proxy-send-timeout: "600" + nginx.ingress.kubernetes.io/proxy-read-timeout: "3700" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3700" nginx.ingress.kubernetes.io/proxy-buffering: "off" nginx.ingress.kubernetes.io/proxy-request-buffering: "off" # 客户端 IP 透传依赖 ingress controller 级 forwarded-header / real-ip 配置。 diff --git a/docs/k8s-deployment.md b/docs/k8s-deployment.md index 91969a582..84826a4ea 100644 --- a/docs/k8s-deployment.md +++ b/docs/k8s-deployment.md @@ -216,22 +216,40 @@ bash scripts/deploy-k8s.sh --replicas 3 --hpa-min 3 --hpa-max 10 -y ### Codex `/v1/responses` WebSocket 反代 `/v1/responses` 端点对 Codex 客户端会走 **WebSocket 升级**(其余路径仍是 HTTP)。 -反代必须显式放行 Upgrade/Connection 头并放宽 idle timeout,否则会出现 +同一条连接会连续承载多个 `response.create`,并依赖上游连接本地缓存支持 +`store=false` + `previous_response_id` 续接。反代必须显式放行 Upgrade/Connection +头并放宽 idle timeout,否则会出现 `stream disconnected before completion: WebSocket protocol error: Connection reset without closing handshake`(直连 CCH 正常,经反代失败时多半是这一项)。 **Nginx** +建议在 `http {}` 中先定义连接头映射,避免普通 HTTP 请求也被写死为 +`Connection: upgrade`: + +```nginx +map $http_upgrade $connection_upgrade { + default upgrade; + '' close; +} +``` + ```nginx location /v1/responses { proxy_pass http://cch_upstream; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; + proxy_set_header Connection $connection_upgrade; proxy_set_header Host $host; - proxy_read_timeout 600s; # Codex 长 reasoning 流式响应时间 - proxy_send_timeout 600s; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 3700s; # OpenAI Responses WS 连接上限约 60 分钟 + proxy_send_timeout 3700s; + proxy_connect_timeout 300s; proxy_buffering off; # 关闭缓冲,SSE / WS 才能实时 + proxy_request_buffering off; + client_max_body_size 100m; # 避免 Codex 大上下文在反代层被截断 } ``` @@ -243,8 +261,10 @@ location /v1/responses { ```yaml metadata: annotations: - nginx.ingress.kubernetes.io/proxy-read-timeout: "600" - nginx.ingress.kubernetes.io/proxy-send-timeout: "600" + nginx.ingress.kubernetes.io/proxy-read-timeout: "3700" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3700" + nginx.ingress.kubernetes.io/proxy-buffering: "off" + nginx.ingress.kubernetes.io/proxy-request-buffering: "off" ``` **Cloudflare / 其它 CDN** diff --git a/messages/en/settings/prices.json b/messages/en/settings/prices.json index 534410124..cb206b446 100644 --- a/messages/en/settings/prices.json +++ b/messages/en/settings/prices.json @@ -172,10 +172,10 @@ "deleteConfirm": "Are you sure you want to delete model {name}? This action cannot be undone.", "form": { "modelName": "Model ID", - "modelNamePlaceholder": "e.g., gpt-5.3-codex", + "modelNamePlaceholder": "e.g., gpt-5.4", "modelNameRequired": "Model ID is required", "displayName": "Display Name (Optional)", - "displayNamePlaceholder": "e.g., GPT-5.3 Codex", + "displayNamePlaceholder": "e.g., GPT-5.4 Codex", "type": "Type", "provider": "Provider", "providerPlaceholder": "e.g., openai", diff --git a/messages/en/settings/providers/form/modelSelect.json b/messages/en/settings/providers/form/modelSelect.json index 9a34815a7..23f77b387 100644 --- a/messages/en/settings/providers/form/modelSelect.json +++ b/messages/en/settings/providers/form/modelSelect.json @@ -10,7 +10,7 @@ "loading": "Loading...", "manualAdd": "Manually Add Model", "manualDesc": "Support adding any model name (not limited to price table)", - "manualPlaceholder": "Enter model name (e.g. gpt-5-turbo)", + "manualPlaceholder": "Enter model name (e.g. gpt-5.4)", "notFound": "Model not found", "openai": "OpenAI", "providerFilterAll": "All Providers", diff --git a/messages/en/usage.json b/messages/en/usage.json index c022cc62c..419669bc0 100644 --- a/messages/en/usage.json +++ b/messages/en/usage.json @@ -544,7 +544,7 @@ "importantPoints": [ "Create an API key in the cch console and set the CCH_API_KEY environment variable", "cchClaude/openai use ${resolvedOrigin}/v1; cchGemini uses ${resolvedOrigin}/v1beta", - "When selecting models, use provider_id/model_id (e.g. openai/gpt-5.2 or cchClaude/claude-sonnet-4-5-20250929)" + "When selecting models, use provider_id/model_id (e.g. openai/gpt-5.4 or cchClaude/claude-sonnet-4-5-20250929)" ] }, @@ -622,7 +622,7 @@ "steps": [ "Restart Droid", "Enter the /model command", - "Select GPT-5-Codex [cch] or Sonnet 4.5 [cch]", + "Select GPT-5.4 [cch] or Sonnet 4.5 [cch]", "Start using!" ] } diff --git a/messages/ja/settings/prices.json b/messages/ja/settings/prices.json index 5265e9518..82a9a94d9 100644 --- a/messages/ja/settings/prices.json +++ b/messages/ja/settings/prices.json @@ -172,10 +172,10 @@ "deleteConfirm": "モデル {name} を削除してもよろしいですか?この操作は元に戻せません。", "form": { "modelName": "モデルID", - "modelNamePlaceholder": "例: gpt-5.3-codex", + "modelNamePlaceholder": "例: gpt-5.4", "modelNameRequired": "モデルIDは必須です", "displayName": "表示名 (任意)", - "displayNamePlaceholder": "例: GPT-5.3 Codex", + "displayNamePlaceholder": "例: GPT-5.4 Codex", "type": "タイプ", "provider": "プロバイダー", "providerPlaceholder": "例: openai", diff --git a/messages/ja/settings/providers/form/modelSelect.json b/messages/ja/settings/providers/form/modelSelect.json index 9bc1993dc..801a9e6a0 100644 --- a/messages/ja/settings/providers/form/modelSelect.json +++ b/messages/ja/settings/providers/form/modelSelect.json @@ -10,7 +10,7 @@ "loading": "読み込み中...", "manualAdd": "手動でモデルを追加", "manualDesc": "任意のモデル名を追加できます(価格表のモデルに限定されません)", - "manualPlaceholder": "モデル名を入力(例:gpt-5-turbo)", + "manualPlaceholder": "モデル名を入力(例:gpt-5.4)", "notFound": "モデルが見つかりません", "openai": "OpenAI", "providerFilterAll": "すべてのプロバイダー", diff --git a/messages/ja/settings/providers/form/strings.json b/messages/ja/settings/providers/form/strings.json index 63267fc76..a5b92cd23 100644 --- a/messages/ja/settings/providers/form/strings.json +++ b/messages/ja/settings/providers/form/strings.json @@ -82,7 +82,7 @@ "modelWhitelistLoading": "読み込み中...", "modelWhitelistManualAdd": "モデルを手動追加", "modelWhitelistManualDesc": "価格表に限定せず、任意のモデル名を追加できます", - "modelWhitelistManualPlaceholder": "モデル名を入力 (例: gpt-5-turbo)", + "modelWhitelistManualPlaceholder": "モデル名を入力 (例: gpt-5.4)", "modelWhitelistNotFound": "モデルが見つかりません", "modelWhitelistSearchPlaceholder": "モデル名を検索...", "modelWhitelistSelectAll": "すべて選択 ({count})", diff --git a/messages/ja/usage.json b/messages/ja/usage.json index e72c8dffb..871f163e5 100644 --- a/messages/ja/usage.json +++ b/messages/ja/usage.json @@ -544,7 +544,7 @@ "importantPoints": [ "cch の管理画面で API Key を作成し、環境変数 CCH_API_KEY を設定してください", "cchClaude/openai は ${resolvedOrigin}/v1、cchGemini は ${resolvedOrigin}/v1beta を baseURL に使用します", - "モデル選択は provider_id/model_id 形式(例:openai/gpt-5.2 または cchClaude/claude-sonnet-4-5-20250929)" + "モデル選択は provider_id/model_id 形式(例:openai/gpt-5.4 または cchClaude/claude-sonnet-4-5-20250929)" ] }, @@ -622,7 +622,7 @@ "steps": [ "Droid を再起動", "/model コマンドを入力", - "GPT-5-Codex [cch] または Sonnet 4.5 [cch] を選択", + "GPT-5.4 [cch] または Sonnet 4.5 [cch] を選択", "使用開始!" ] } diff --git a/messages/ru/settings/prices.json b/messages/ru/settings/prices.json index 94e00aa80..cd9c9ef55 100644 --- a/messages/ru/settings/prices.json +++ b/messages/ru/settings/prices.json @@ -172,10 +172,10 @@ "deleteConfirm": "Удалить модель {name}? Это действие необратимо.", "form": { "modelName": "ID модели", - "modelNamePlaceholder": "например: gpt-5.3-codex", + "modelNamePlaceholder": "например: gpt-5.4", "modelNameRequired": "ID модели обязателен", "displayName": "Отображаемое имя (необязательно)", - "displayNamePlaceholder": "например: GPT-5.3 Codex", + "displayNamePlaceholder": "например: GPT-5.4 Codex", "type": "Тип", "provider": "Поставщик", "providerPlaceholder": "например: openai", diff --git a/messages/ru/settings/providers/form/modelSelect.json b/messages/ru/settings/providers/form/modelSelect.json index fd2763cdf..3bb7e58fe 100644 --- a/messages/ru/settings/providers/form/modelSelect.json +++ b/messages/ru/settings/providers/form/modelSelect.json @@ -10,7 +10,7 @@ "loading": "Загрузка...", "manualAdd": "Добавить модель вручную", "manualDesc": "Поддержка добавления любого названия модели (не ограничено прайс-листом)", - "manualPlaceholder": "Введите название модели (например, gpt-5-turbo)", + "manualPlaceholder": "Введите название модели (например, gpt-5.4)", "notFound": "Модели не найдены", "openai": "OpenAI", "providerFilterAll": "Все провайдеры", diff --git a/messages/ru/settings/providers/form/strings.json b/messages/ru/settings/providers/form/strings.json index b1191c1cb..7f0c9a113 100644 --- a/messages/ru/settings/providers/form/strings.json +++ b/messages/ru/settings/providers/form/strings.json @@ -82,7 +82,7 @@ "modelWhitelistLoading": "Загрузка...", "modelWhitelistManualAdd": "Добавить модель вручную", "modelWhitelistManualDesc": "Поддерживает добавление любого имени модели (не ограничено прайс-листом)", - "modelWhitelistManualPlaceholder": "Введите имя модели (например, gpt-5-turbo)", + "modelWhitelistManualPlaceholder": "Введите имя модели (например, gpt-5.4)", "modelWhitelistNotFound": "Модели не найдены", "modelWhitelistSearchPlaceholder": "Поиск по имени модели...", "modelWhitelistSelectAll": "Выбрать все ({count})", diff --git a/messages/ru/usage.json b/messages/ru/usage.json index babe42cc5..9e6971559 100644 --- a/messages/ru/usage.json +++ b/messages/ru/usage.json @@ -544,7 +544,7 @@ "importantPoints": [ "Создайте API key в панели cch и задайте переменную окружения CCH_API_KEY", "cchClaude/openai используют ${resolvedOrigin}/v1; cchGemini использует ${resolvedOrigin}/v1beta", - "При выборе модели используйте provider_id/model_id (например, openai/gpt-5.2 или cchClaude/claude-sonnet-4-5-20250929)" + "При выборе модели используйте provider_id/model_id (например, openai/gpt-5.4 или cchClaude/claude-sonnet-4-5-20250929)" ] }, @@ -622,7 +622,7 @@ "steps": [ "Перезагрузите Droid", "Введите команду /model", - "Выберите GPT-5-Codex [cch] или Sonnet 4.5 [cch]", + "Выберите GPT-5.4 [cch] или Sonnet 4.5 [cch]", "Начните использовать!" ] } diff --git a/messages/zh-CN/settings/prices.json b/messages/zh-CN/settings/prices.json index eb8cbac0b..4af7bbf8d 100644 --- a/messages/zh-CN/settings/prices.json +++ b/messages/zh-CN/settings/prices.json @@ -172,10 +172,10 @@ "deleteConfirm": "确定要删除模型 {name} 吗?此操作不可撤销。", "form": { "modelName": "模型 ID", - "modelNamePlaceholder": "例如: gpt-5.3-codex", + "modelNamePlaceholder": "例如: gpt-5.4", "modelNameRequired": "模型 ID 不能为空", "displayName": "展示名称(可选)", - "displayNamePlaceholder": "例如: GPT-5.3 Codex", + "displayNamePlaceholder": "例如: GPT-5.4 Codex", "type": "类型", "provider": "供应商", "providerPlaceholder": "例如: openai", diff --git a/messages/zh-CN/settings/providers/form/modelSelect.json b/messages/zh-CN/settings/providers/form/modelSelect.json index 60ac497fb..c58aa6ea6 100644 --- a/messages/zh-CN/settings/providers/form/modelSelect.json +++ b/messages/zh-CN/settings/providers/form/modelSelect.json @@ -13,7 +13,7 @@ "exactMatchHint": "这里选中的模型会作为精确匹配规则加入白名单;前缀、后缀、关键词和正则规则仍在下方高级编辑区维护。", "fallbackNotice": "当前无法获取上游模型列表,已自动切换到本地价格表目录。", "manualAdd": "手动添加模型", - "manualPlaceholder": "输入模型名称(如 gpt-5-turbo)", + "manualPlaceholder": "输入模型名称(如 gpt-5.4)", "manualDesc": "支持添加任意模型名称(不限于价格表中的模型)", "claude": "Claude", "openai": "OpenAI", diff --git a/messages/zh-CN/settings/providers/form/strings.json b/messages/zh-CN/settings/providers/form/strings.json index f1e2e54b2..154f1ae09 100644 --- a/messages/zh-CN/settings/providers/form/strings.json +++ b/messages/zh-CN/settings/providers/form/strings.json @@ -105,7 +105,7 @@ "modelWhitelistSelectAll": "全选 ({count})", "modelWhitelistClear": "清空", "modelWhitelistManualAdd": "手动添加模型", - "modelWhitelistManualPlaceholder": "输入模型名称(如 gpt-5-turbo)", + "modelWhitelistManualPlaceholder": "输入模型名称(如 gpt-5.4)", "modelWhitelistManualDesc": "支持添加任意模型名称(不限于价格表中的模型)", "modelWhitelistAllowAll": "允许所有 {type} 模型", "modelWhitelistAllowAllClause": "允许所有 Claude 模型", diff --git a/messages/zh-CN/usage.json b/messages/zh-CN/usage.json index 4bd2c86ff..6bdb3829f 100644 --- a/messages/zh-CN/usage.json +++ b/messages/zh-CN/usage.json @@ -540,7 +540,7 @@ "importantPoints": [ "请先在 cch 后台创建 API Key,并设置环境变量 CCH_API_KEY", "cchClaude/openai 使用 ${resolvedOrigin}/v1,cchGemini 使用 ${resolvedOrigin}/v1beta", - "模型选择时使用 provider_id/model_id 格式(例如 openai/gpt-5.2 或 cchClaude/claude-sonnet-4-5-20250929)" + "模型选择时使用 provider_id/model_id 格式(例如 openai/gpt-5.4 或 cchClaude/claude-sonnet-4-5-20250929)" ] }, @@ -618,7 +618,7 @@ "steps": [ "重启 Droid", "输入 /model 命令", - "选择 GPT-5-Codex [cch] 或 Sonnet 4.5 [cch]", + "选择 GPT-5.4 [cch] 或 Sonnet 4.5 [cch]", "开始使用!" ] } diff --git a/messages/zh-TW/settings/prices.json b/messages/zh-TW/settings/prices.json index d37f5d5be..68022a36d 100644 --- a/messages/zh-TW/settings/prices.json +++ b/messages/zh-TW/settings/prices.json @@ -172,10 +172,10 @@ "deleteConfirm": "確定要刪除模型 {name} 嗎?此操作無法復原。", "form": { "modelName": "模型識別碼", - "modelNamePlaceholder": "例如:gpt-5.3-codex", + "modelNamePlaceholder": "例如:gpt-5.4", "modelNameRequired": "模型 ID 為必填", "displayName": "顯示名稱(選填)", - "displayNamePlaceholder": "例如:GPT-5.3 Codex", + "displayNamePlaceholder": "例如:GPT-5.4 Codex", "type": "類型", "provider": "供應商", "providerPlaceholder": "例如:openai", diff --git a/messages/zh-TW/settings/providers/form/modelSelect.json b/messages/zh-TW/settings/providers/form/modelSelect.json index 0525b31d5..cfb705451 100644 --- a/messages/zh-TW/settings/providers/form/modelSelect.json +++ b/messages/zh-TW/settings/providers/form/modelSelect.json @@ -10,7 +10,7 @@ "loading": "載入中...", "manualAdd": "手動新增模型", "manualDesc": "支援新增任意模型名稱(不限於價格表中的模型)", - "manualPlaceholder": "輸入模型名稱(例如 gpt-5-turbo)", + "manualPlaceholder": "輸入模型名稱(例如 gpt-5.4)", "notFound": "找不到模型", "openai": "OpenAI", "providerFilterAll": "全部供應商", diff --git a/messages/zh-TW/settings/providers/form/strings.json b/messages/zh-TW/settings/providers/form/strings.json index 71bd1f47a..3b6f56a8f 100644 --- a/messages/zh-TW/settings/providers/form/strings.json +++ b/messages/zh-TW/settings/providers/form/strings.json @@ -82,7 +82,7 @@ "modelWhitelistLoading": "載入中...", "modelWhitelistManualAdd": "手動新增模型", "modelWhitelistManualDesc": "支援新增任意模型名稱(不限於價格表中的模型)", - "modelWhitelistManualPlaceholder": "輸入模型名稱(例如 gpt-5-turbo)", + "modelWhitelistManualPlaceholder": "輸入模型名稱(例如 gpt-5.4)", "modelWhitelistNotFound": "未找到模型", "modelWhitelistSearchPlaceholder": "搜尋模型名稱...", "modelWhitelistSelectAll": "全選({count})", diff --git a/messages/zh-TW/usage.json b/messages/zh-TW/usage.json index ec5c9afc5..e2ea4ea04 100644 --- a/messages/zh-TW/usage.json +++ b/messages/zh-TW/usage.json @@ -540,7 +540,7 @@ "importantPoints": [ "請先在 cch 後台創建 API Key,並設置環境變量 CCH_API_KEY", "cchClaude/openai 使用 ${resolvedOrigin}/v1,cchGemini 使用 ${resolvedOrigin}/v1beta", - "模型選擇時使用 provider_id/model_id 格式(例如 openai/gpt-5.2 或 cchClaude/claude-sonnet-4-5-20250929)" + "模型選擇時使用 provider_id/model_id 格式(例如 openai/gpt-5.4 或 cchClaude/claude-sonnet-4-5-20250929)" ] }, @@ -618,7 +618,7 @@ "steps": [ "重啟 Droid", "輸入 /model 命令", - "選擇 GPT-5-Codex [cch] 或 Sonnet 4.5 [cch]", + "選擇 GPT-5.4 [cch] 或 Sonnet 4.5 [cch]", "開始使用!" ] } diff --git a/server.js b/server.js index 08bb4a9ff..71d44b87c 100644 --- a/server.js +++ b/server.js @@ -7,10 +7,11 @@ // // Architecture: this server is a thin tunnel. For each client WebSocket frame, // we build an equivalent HTTP POST against the same app's /v1/responses -// endpoint (with an x-cch-client-transport header) so that auth, provider -// selection, guard pipeline, forwarder, circuit breakers, observability, and -// all existing TypeScript business logic run exactly once. Upstream WebSocket -// attempts live inside that TypeScript pipeline (forwarder), not here. +// endpoint (with x-cch-client-transport and x-cch-responses-ws-session headers) +// so that auth, provider selection, guard pipeline, forwarder, circuit +// breakers, observability, and all existing TypeScript business logic run +// exactly once. Upstream WebSocket attempts and per-client upstream reuse live +// inside that TypeScript pipeline (forwarder), not here. // // Compatibility: // - Non-WebSocket clients: unaffected. HTTP still flows through Next.js. @@ -25,7 +26,13 @@ const http = require("node:http"); const { randomUUID } = require("node:crypto"); const { parse } = require("node:url"); -const dev = process.env.NODE_ENV !== "production"; +function isNextDevMode(nodeEnv) { + return nodeEnv !== "production"; +} + +// 保留既有本地语义:只有显式 production 才服务已构建产物;Docker/K8s +// 镜像会显式设置 NODE_ENV=production 和 PORT=3000。 +const dev = isNextDevMode(process.env.NODE_ENV); const hostname = process.env.HOSTNAME || "0.0.0.0"; const port = parseInt(process.env.PORT || (dev ? "13500" : "3000"), 10); @@ -39,6 +46,7 @@ const INTERNAL_TUNNEL_HOST = const WS_PATH = "/v1/responses"; const CLIENT_TRANSPORT_HEADER = "x-cch-client-transport"; const WS_FORWARD_FLAG_HEADER = "x-cch-responses-ws-forward"; +const WS_SESSION_HEADER = "x-cch-responses-ws-session"; const INTERNAL_SECRET_HEADER = "x-cch-internal-secret"; const INTERNAL_SECRET_ENV = "CCH_RESPONSES_WS_INTERNAL_SECRET"; @@ -119,6 +127,7 @@ function sanitizedRequestPath(rawUrl) { async function handleWebSocketConnection(ws, req) { const url = new URL(req.url, `http://${req.headers.host || "localhost"}`); const queryModel = url.searchParams.get("model"); + const responsesWsSessionId = randomUUID(); let inFlight = false; const pending = []; let pendingBytes = 0; @@ -128,17 +137,32 @@ async function handleWebSocketConnection(ws, req) { // (and provider concurrency / breaker counters) keep running for minutes. let currentInternalReq = null; - const finalize = () => { - if (closed) return; - closed = true; - if (currentInternalReq) { - try { - currentInternalReq.destroy(); - } catch { - // ignore + const abortCurrentInternalReq = () => { + if (!currentInternalReq) return; + const reqToDestroy = currentInternalReq; + currentInternalReq = null; + try { + if (!reqToDestroy.destroyed) { + reqToDestroy.destroy(); } - currentInternalReq = null; + } catch { + // ignore } + }; + + const cleanupUpstreamWsSession = () => { + const cleanup = globalThis.__cchCleanupResponsesWsSession; + if (typeof cleanup !== "function") return; + try { + cleanup(responsesWsSessionId); + } catch (err) { + log("warn", "ws_upstream_session_cleanup_failed", { + error: String(err && err.message ? err.message : err), + }); + } + }; + + const dropPendingFrames = () => { if (pending.length > 0) { log("warn", "ws_pending_dropped_on_close", { droppedFrames: pending.length, @@ -149,6 +173,14 @@ async function handleWebSocketConnection(ws, req) { pendingBytes = 0; }; + const finalize = () => { + if (closed) return; + closed = true; + abortCurrentInternalReq(); + dropPendingFrames(); + cleanupUpstreamWsSession(); + }; + // Synchronously mark the connection closed so any pipelined frame in // `pending` is dropped *before* drain() can dispatch another upstream // request. Without this the gap between ws.close() and the async @@ -156,20 +188,20 @@ async function handleWebSocketConnection(ws, req) { // and run `forwardToInternalHttp` against the upstream — work the client // can never receive (safeSend would fail) but the provider would still bill. const requestClose = (code, reason) => { - if (closed || (ws && ws.readyState >= 2) /* CLOSING | CLOSED */) { + if (closed) { + abortCurrentInternalReq(); + dropPendingFrames(); + return; + } + if (ws && ws.readyState >= 2) { // Already closing/closed; just make sure local state matches. - if (!closed) finalize(); + finalize(); return; } closed = true; - if (pending.length > 0) { - log("warn", "ws_pending_dropped_on_close", { - droppedFrames: pending.length, - droppedBytes: pendingBytes, - }); - } - pending.length = 0; - pendingBytes = 0; + abortCurrentInternalReq(); + dropPendingFrames(); + cleanupUpstreamWsSession(); log("info", "ws_client_close_initiated", { code, reason }); try { ws.close(code, reason); @@ -191,11 +223,7 @@ async function handleWebSocketConnection(ws, req) { if (typeof raw !== "string") { emitErrorEvent(ws, "invalid_frame_type", "Only text WebSocket frames are supported"); - try { - ws.close(1003, "binary_not_supported"); - } catch { - // ignore - } + requestClose(1003, "binary_not_supported"); return; } @@ -243,6 +271,7 @@ async function handleWebSocketConnection(ws, req) { ws, req, body, + responsesWsSessionId, (clientReq) => { currentInternalReq = clientReq; }, @@ -270,11 +299,7 @@ async function handleWebSocketConnection(ws, req) { error: String(err && err.message ? err.message : err), }); emitErrorEvent(ws, "internal_error", "Failed to process queued request"); - try { - ws.close(1011, "internal_error"); - } catch { - // ignore - } + requestClose(1011, "internal_error"); }); } } @@ -284,11 +309,7 @@ async function handleWebSocketConnection(ws, req) { if (closed) return; if (isBinary) { emitErrorEvent(ws, "invalid_frame_type", "Only text WebSocket frames are supported"); - try { - ws.close(1003, "binary_not_supported"); - } catch { - // ignore - } + requestClose(1003, "binary_not_supported"); return; } const text = data.toString("utf8"); @@ -300,11 +321,7 @@ async function handleWebSocketConnection(ws, req) { attemptedFrameSize: size, }); emitErrorEvent(ws, "too_many_requests", "Pending frame limit exceeded"); - try { - ws.close(1008, "too_many_requests"); - } catch { - // ignore - } + requestClose(1008, "too_many_requests"); return; } pending.push(text); @@ -314,16 +331,19 @@ async function handleWebSocketConnection(ws, req) { error: String(err && err.message ? err.message : err), }); emitErrorEvent(ws, "internal_error", "Failed to process request"); - try { - ws.close(1011, "internal_error"); - } catch { - // ignore - } + requestClose(1011, "internal_error"); }); }); } -async function forwardToInternalHttp(ws, originalReq, body, registerInternalReq, requestClose) { +async function forwardToInternalHttp( + ws, + originalReq, + body, + responsesWsSessionId, + registerInternalReq, + requestClose +) { // requestClose(code, reason) initiates the WebSocket closing handshake AND // synchronously marks the client connection closed so the caller's pending // queue stops dispatching follow-up frames against the upstream. Tests that @@ -373,6 +393,9 @@ async function forwardToInternalHttp(ws, originalReq, body, registerInternalReq, internalHeaders["content-type"] = "application/json"; internalHeaders[CLIENT_TRANSPORT_HEADER] = "websocket"; internalHeaders[WS_FORWARD_FLAG_HEADER] = "1"; + if (typeof responsesWsSessionId === "string" && responsesWsSessionId.length > 0) { + internalHeaders[WS_SESSION_HEADER] = responsesWsSessionId; + } // Per-process loopback secret. Read from process.env so it can be picked // up by any code path that needs to verify (the TS forwarder reads the // same env var via `internal-secret.ts`). The secret is generated at @@ -403,6 +426,13 @@ async function forwardToInternalHttp(ws, originalReq, body, registerInternalReq, (res) => { const contentType = (res.headers["content-type"] || "").toLowerCase(); const isSse = contentType.includes("text/event-stream"); + let responseSettled = false; + const settleResponse = () => { + if (responseSettled) return false; + responseSettled = true; + resolve(); + return true; + }; if (!isSse) { // Upstream returned non-stream JSON (e.g. error response). Collect @@ -410,6 +440,7 @@ async function forwardToInternalHttp(ws, originalReq, body, registerInternalReq, const chunks = []; res.on("data", (c) => chunks.push(c)); res.on("end", () => { + if (responseSettled) return; const text = Buffer.concat(chunks).toString("utf8"); let parsed; try { @@ -421,30 +452,45 @@ async function forwardToInternalHttp(ws, originalReq, body, registerInternalReq, if (isHttpError) { safeSend(ws, { type: "error", + status: res.statusCode, error: typeof parsed === "object" && parsed && parsed.error ? parsed.error : { code: `http_${res.statusCode}`, message: text.slice(0, 512) }, }); - initiateClose(1011, `http_${res.statusCode}`); + log("info", "ws_terminal_event_sent", { + type: "error", + source: "json", + status: res.statusCode, + }); } else { safeSend(ws, { type: "response.completed", response: parsed, }); log("info", "ws_terminal_event_sent", { type: "response.completed", source: "json" }); - initiateClose(1000, "response_completed"); } - resolve(); + settleResponse(); }); res.on("error", (err) => { + if (responseSettled) return; emitErrorEvent( ws, "internal_response_error", String(err && err.message ? err.message : err) ); initiateClose(1011, "internal_response_error"); - resolve(); + settleResponse(); + }); + res.on("close", () => { + if (responseSettled) return; + emitErrorEvent( + ws, + "internal_response_closed", + "Internal response closed before a complete JSON body was received" + ); + initiateClose(1011, "internal_response_closed"); + settleResponse(); }); return; } @@ -456,6 +502,14 @@ async function forwardToInternalHttp(ws, originalReq, body, registerInternalReq, let sawTerminal = false; let terminalEventType = null; const EVENT_DELIMITER = /\r?\n\r?\n/; + const failIfUnsettled = (code, message, closeReason) => { + if (responseSettled) return; + if (!sawTerminal) { + emitErrorEvent(ws, code, message); + initiateClose(1011, closeReason); + } + settleResponse(); + }; const flushEvents = () => { const parts = buffer.split(EVENT_DELIMITER); @@ -502,10 +556,12 @@ async function forwardToInternalHttp(ws, originalReq, body, registerInternalReq, res.setEncoding("utf8"); res.on("data", (chunk) => { + if (responseSettled) return; buffer += chunk; flushEvents(); }); res.on("end", () => { + if (responseSettled) return; // Flush any remaining buffered event if (buffer.trim().length > 0) { buffer += "\n\n"; @@ -519,25 +575,27 @@ async function forwardToInternalHttp(ws, originalReq, body, registerInternalReq, ); initiateClose(1011, "stream_ended_without_terminal"); } else { - // Use 1000 for normal terminal types and the synthesized [DONE] - // path; reserve 1011 for the explicit upstream "error" terminal - // so the client distinguishes a clean response from a failure. - const isErrorTerminal = terminalEventType === "error"; - initiateClose( - isErrorTerminal ? 1011 : 1000, - isErrorTerminal ? "upstream_error" : "response_completed" - ); + // OpenAI Responses WebSocket mode is persistent: after a terminal + // event, the same client connection can send the next + // response.create. Do not close here; only fatal transport/protocol + // errors initiate a close handshake. + log("info", "ws_turn_completed", { terminalEventType }); } - resolve(); + settleResponse(); }); res.on("error", (err) => { - emitErrorEvent( - ws, + failIfUnsettled( "internal_response_error", - String(err && err.message ? err.message : err) + String(err && err.message ? err.message : err), + "internal_response_error" + ); + }); + res.on("close", () => { + failIfUnsettled( + "internal_response_closed", + "Internal response closed before emitting a terminal response event", + "internal_response_closed" ); - initiateClose(1011, "internal_response_error"); - resolve(); }); } ); @@ -668,6 +726,7 @@ async function main() { // Exposed for tests; not part of the long-lived server entrypoint. module.exports = { sanitizedRequestPath, + isNextDevMode, handleWebSocketConnection, forwardToInternalHttp, WS_MAX_PAYLOAD_BYTES, diff --git a/src/actions/providers.ts b/src/actions/providers.ts index 3110462a7..cdca252cc 100644 --- a/src/actions/providers.ts +++ b/src/actions/providers.ts @@ -4309,7 +4309,7 @@ export async function testProviderOpenAIChatCompletions( ): Promise { return executeProviderApiTest(data, { path: "/v1/chat/completions", - defaultModel: "gpt-5.3-codex", + defaultModel: "gpt-5.4", headers: (apiKey, context) => { void context; return { @@ -4343,7 +4343,7 @@ export async function testProviderOpenAIResponses( ): Promise { return executeProviderApiTest(data, { path: "/v1/responses", - defaultModel: "gpt-5.3-codex", + defaultModel: "gpt-5.4", headers: (apiKey, context) => { void context; return { diff --git a/src/app/[locale]/dashboard/sessions/[sessionId]/messages/_components/session-messages-client-actions.test.tsx b/src/app/[locale]/dashboard/sessions/[sessionId]/messages/_components/session-messages-client-actions.test.tsx index c476c168f..2b40f8de6 100644 --- a/src/app/[locale]/dashboard/sessions/[sessionId]/messages/_components/session-messages-client-actions.test.tsx +++ b/src/app/[locale]/dashboard/sessions/[sessionId]/messages/_components/session-messages-client-actions.test.tsx @@ -123,7 +123,7 @@ function createSnapshots(): SessionDetailSnapshots { defaultView: DEFAULT_SESSION_DETAIL_VIEW_MODE, request: { before: { - body: { model: "gpt-5.2", input: "before" }, + body: { model: "gpt-5.4", input: "before" }, messages: { role: "user", content: "before" }, headers: { "x-before": "1" }, meta: { @@ -133,7 +133,7 @@ function createSnapshots(): SessionDetailSnapshots { }, }, after: { - body: { model: "gpt-5.2", input: "after" }, + body: { model: "gpt-5.4", input: "after" }, messages: { role: "user", content: "after" }, headers: { "x-after": "1" }, meta: { @@ -174,7 +174,7 @@ function buildDetailsData( }> = {} ) { return { - requestBody: { model: "gpt-5.2", input: "legacy" }, + requestBody: { model: "gpt-5.4", input: "legacy" }, messages: { role: "user", content: "legacy" }, response: '{"legacy":true}', requestHeaders: { "x-legacy": "1" }, @@ -322,7 +322,7 @@ describe("SessionMessagesClient (request export actions)", () => { lastRequestAt: "2026-01-01T00:01:00.000Z", totalDurationMs: 1500, providers: [{ id: 1, name: "p1" }], - models: ["gpt-5.2"], + models: ["gpt-5.4"], totalInputTokens: 10, totalOutputTokens: 20, totalCacheCreationTokens: 30, @@ -589,7 +589,7 @@ describe("SessionMessagesClient (request export actions)", () => { lastRequestAt: "2026-01-01T00:01:00.000Z", totalDurationMs: 1500, providers: [{ id: 1, name: "p1" }], - models: ["gpt-5.2"], + models: ["gpt-5.4"], totalInputTokens: 10, totalOutputTokens: 20, totalCacheCreationTokens: 30, diff --git a/src/app/[locale]/dashboard/sessions/[sessionId]/messages/_components/session-messages-client.test.tsx b/src/app/[locale]/dashboard/sessions/[sessionId]/messages/_components/session-messages-client.test.tsx index 1342544e1..edbda27c7 100644 --- a/src/app/[locale]/dashboard/sessions/[sessionId]/messages/_components/session-messages-client.test.tsx +++ b/src/app/[locale]/dashboard/sessions/[sessionId]/messages/_components/session-messages-client.test.tsx @@ -55,7 +55,7 @@ function createSnapshots(): SessionDetailSnapshots { defaultView: DEFAULT_SESSION_DETAIL_VIEW_MODE, request: { before: { - body: { model: "gpt-5.2", instructions: "before body" }, + body: { model: "gpt-5.4", instructions: "before body" }, messages: { role: "user", content: "before hi" }, headers: { "x-before-request": "1" }, meta: { @@ -65,7 +65,7 @@ function createSnapshots(): SessionDetailSnapshots { }, }, after: { - body: { model: "gpt-5.2", instructions: "after body" }, + body: { model: "gpt-5.4", instructions: "after body" }, messages: { role: "user", content: "after hi" }, headers: { "x-after-request": "1" }, meta: { @@ -196,7 +196,10 @@ describe("SessionMessagesDetailsTabs", () => { if (!snapshots.request.after) { throw new Error("after snapshot missing"); } - snapshots.request.after.body = { model: "gpt-5.2", input: [{ role: "user", content: "hi" }] }; + snapshots.request.after.body = { + model: "gpt-5.4", + input: [{ role: "user", content: "hi" }], + }; snapshots.request.after.messages = null; const { container, unmount } = renderWithIntl( diff --git a/src/app/[locale]/settings/providers/_components/forms/api-test-button.tsx b/src/app/[locale]/settings/providers/_components/forms/api-test-button.tsx index 973e82014..7bbc578eb 100644 --- a/src/app/[locale]/settings/providers/_components/forms/api-test-button.tsx +++ b/src/app/[locale]/settings/providers/_components/forms/api-test-button.tsx @@ -33,7 +33,7 @@ const API_TEST_UI_CONFIG = { const DEFAULT_MODELS: Record = { claude: "claude-haiku-4-5-20251001", "claude-auth": "claude-haiku-4-5-20251001", - codex: "gpt-5.3-codex", + codex: "gpt-5.4", "openai-compatible": "gpt-4.1-mini", gemini: "gemini-2.5-flash", "gemini-cli": "gemini-2.5-flash", diff --git a/src/app/[locale]/usage-doc/page.tsx b/src/app/[locale]/usage-doc/page.tsx index 5ca162b78..73262ae0b 100644 --- a/src/app/[locale]/usage-doc/page.tsx +++ b/src/app/[locale]/usage-doc/page.tsx @@ -663,7 +663,7 @@ sk_xxxxxxxxxxxxxxxxxx`} { }); }); }); + +describe("getResponsesWsSessionId", () => { + it("extracts a bounded trusted session marker from Headers or plain records", () => { + const h = new Headers({ [RESPONSES_WS_SESSION_HEADER]: " ws-session_1.2-3 " }); + expect(getResponsesWsSessionId(h)).toBe("ws-session_1.2-3"); + expect(getResponsesWsSessionId({ "X-Cch-Responses-Ws-Session": "abc.DEF-123" })).toBe( + "abc.DEF-123" + ); + }); + + it("rejects empty, overlong, or unsafe session marker values", () => { + expect(getResponsesWsSessionId(new Headers())).toBeNull(); + expect(getResponsesWsSessionId({ [RESPONSES_WS_SESSION_HEADER]: " " })).toBeNull(); + expect(getResponsesWsSessionId({ [RESPONSES_WS_SESSION_HEADER]: "x".repeat(129) })).toBeNull(); + expect(getResponsesWsSessionId({ [RESPONSES_WS_SESSION_HEADER]: "abc/def" })).toBeNull(); + }); +}); diff --git a/src/app/v1/_lib/responses-ws/__tests__/internal-secret.test.ts b/src/app/v1/_lib/responses-ws/__tests__/internal-secret.test.ts index d078c889b..40d418371 100644 --- a/src/app/v1/_lib/responses-ws/__tests__/internal-secret.test.ts +++ b/src/app/v1/_lib/responses-ws/__tests__/internal-secret.test.ts @@ -4,6 +4,7 @@ import { getInternalSecret, INTERNAL_SECRET_HEADER, RESERVED_INTERNAL_HEADERS, + RESPONSES_WS_SESSION_HEADER, verifyInternalRequest, WS_FORWARD_FLAG_HEADER, } from "../internal-secret"; @@ -97,6 +98,7 @@ describe("internal-secret", () => { it("RESERVED_INTERNAL_HEADERS lists the secret + forward flag + transport markers", () => { expect(RESERVED_INTERNAL_HEADERS).toContain(INTERNAL_SECRET_HEADER); expect(RESERVED_INTERNAL_HEADERS).toContain(WS_FORWARD_FLAG_HEADER); + expect(RESERVED_INTERNAL_HEADERS).toContain(RESPONSES_WS_SESSION_HEADER); expect(RESERVED_INTERNAL_HEADERS).toContain("x-cch-client-transport"); }); }); diff --git a/src/app/v1/_lib/responses-ws/__tests__/server-helpers.test.ts b/src/app/v1/_lib/responses-ws/__tests__/server-helpers.test.ts index f245e05fa..106a2c01a 100644 --- a/src/app/v1/_lib/responses-ws/__tests__/server-helpers.test.ts +++ b/src/app/v1/_lib/responses-ws/__tests__/server-helpers.test.ts @@ -4,8 +4,9 @@ import { describe, expect, it } from "vitest"; // eslint-disable-next-line @typescript-eslint/no-require-imports -const { sanitizedRequestPath } = require("../../../../../../server.js") as { +const { sanitizedRequestPath, isNextDevMode } = require("../../../../../../server.js") as { sanitizedRequestPath: (rawUrl: string) => string; + isNextDevMode: (nodeEnv: string | undefined) => boolean; }; describe("server.js sanitizedRequestPath", () => { @@ -14,7 +15,7 @@ describe("server.js sanitizedRequestPath", () => { }); it("preserves the model query parameter (allow-listed)", () => { - expect(sanitizedRequestPath("/v1/responses?model=gpt-5")).toBe("/v1/responses?model=gpt-5"); + expect(sanitizedRequestPath("/v1/responses?model=gpt-5.4")).toBe("/v1/responses?model=gpt-5.4"); }); it("masks unknown / sensitive query parameters", () => { @@ -32,3 +33,13 @@ describe("server.js sanitizedRequestPath", () => { expect(sanitizedRequestPath(undefined as unknown as string)).toBe("/"); }); }); + +describe("server.js isNextDevMode", () => { + it("preserves the existing non-production dev-mode default", () => { + expect(isNextDevMode("development")).toBe(true); + expect(isNextDevMode(undefined)).toBe(true); + expect(isNextDevMode("test")).toBe(true); + expect(isNextDevMode("staging")).toBe(true); + expect(isNextDevMode("production")).toBe(false); + }); +}); diff --git a/src/app/v1/_lib/responses-ws/__tests__/upstream-adapter.test.ts b/src/app/v1/_lib/responses-ws/__tests__/upstream-adapter.test.ts index 7683679cb..97e3951ed 100644 --- a/src/app/v1/_lib/responses-ws/__tests__/upstream-adapter.test.ts +++ b/src/app/v1/_lib/responses-ws/__tests__/upstream-adapter.test.ts @@ -2,7 +2,18 @@ import type { AddressInfo } from "node:net"; import { afterEach, describe, expect, it } from "vitest"; import { WebSocketServer } from "ws"; import type { Provider } from "@/types/provider"; -import { tryResponsesWebsocketUpstream } from "../upstream-adapter"; +import { + clearResponsesWsSessionsForTests, + cleanupResponsesWsSession, + getResponsesWsSessionCountForTests, + setResponsesWsSessionMaxEntriesForTests, + tryResponsesWebsocketUpstream, +} from "../upstream-adapter"; +import { + INTERNAL_SECRET_HEADER, + RESPONSES_WS_SESSION_HEADER, + WS_FORWARD_FLAG_HEADER, +} from "../internal-secret"; type ServerHandle = { wss: WebSocketServer; @@ -61,10 +72,25 @@ async function collectSseBody(response: Response): Promise { return out; } +async function withTimeout(promise: Promise, timeoutMs: number, message: string): Promise { + let timer: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_resolve, reject) => { + timer = setTimeout(() => reject(new Error(message)), timeoutMs); + }), + ]); + } finally { + if (timer) clearTimeout(timer); + } +} + describe("tryResponsesWebsocketUpstream", () => { let server: ServerHandle | null = null; afterEach(async () => { + clearResponsesWsSessionsForTests(); if (server) { await server.close(); server = null; @@ -89,7 +115,7 @@ describe("tryResponsesWebsocketUpstream", () => { provider: codexProvider(), upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), - body: { model: "gpt-5", input: [{ role: "user", content: "hi" }] }, + body: { model: "gpt-5.4", input: [{ role: "user", content: "hi" }] }, }); expect("response" in result).toBe(true); @@ -120,7 +146,7 @@ describe("tryResponsesWebsocketUpstream", () => { provider: codexProvider(), upstreamUrl: `http://127.0.0.1:${addr.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), - body: { model: "gpt-5", input: "hi" }, + body: { model: "gpt-5.4", input: "hi" }, }); expect("failed" in result).toBe(true); @@ -142,7 +168,7 @@ describe("tryResponsesWebsocketUpstream", () => { provider: codexProvider(), upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), - body: { model: "gpt-5", input: "hi" }, + body: { model: "gpt-5.4", input: "hi" }, }); expect("failed" in result).toBe(true); @@ -176,7 +202,7 @@ describe("tryResponsesWebsocketUpstream", () => { upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), body: { - model: "gpt-5", + model: "gpt-5.4", input: "hi", stream: true, background: false, @@ -213,6 +239,10 @@ describe("tryResponsesWebsocketUpstream", () => { "transfer-encoding": "chunked", accept: "application/json", "content-type": "application/json", + "x-cch-client-transport": "websocket", + [WS_FORWARD_FLAG_HEADER]: "1", + [RESPONSES_WS_SESSION_HEADER]: "client-session-1", + [INTERNAL_SECRET_HEADER]: "loopback-secret-should-stay-local", // Custom header should pass through: "x-cch-tenant": "tenant-a", }; @@ -221,7 +251,7 @@ describe("tryResponsesWebsocketUpstream", () => { provider: codexProvider(), upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, upstreamHeaders: plainHeaders, - body: { model: "gpt-5", input: "hi" }, + body: { model: "gpt-5.4", input: "hi" }, }); expect("response" in result).toBe(true); @@ -230,6 +260,10 @@ describe("tryResponsesWebsocketUpstream", () => { expect(receivedHeaders.authorization).toBe("Bearer sk-mock"); expect(receivedHeaders["x-cch-tenant"]).toBe("tenant-a"); + expect(receivedHeaders["x-cch-client-transport"]).toBeUndefined(); + expect(receivedHeaders[WS_FORWARD_FLAG_HEADER]).toBeUndefined(); + expect(receivedHeaders[RESPONSES_WS_SESSION_HEADER]).toBeUndefined(); + expect(receivedHeaders[INTERNAL_SECRET_HEADER]).toBeUndefined(); // The host the upstream observed must come from the actual TCP target, // never the value we passed in the plain Record (which we filter): expect(receivedHeaders.host).not.toBe("evil.example.com"); @@ -269,7 +303,7 @@ describe("tryResponsesWebsocketUpstream", () => { upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), body: { - model: "gpt-5", + model: "gpt-5.4", store: false, prompt_cache_key: "tenantA:s1", input: [{ role: "user", content: "hello" }], @@ -287,7 +321,7 @@ describe("tryResponsesWebsocketUpstream", () => { upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), body: { - model: "gpt-5", + model: "gpt-5.4", store: false, prompt_cache_key: "tenantA:s1", previous_response_id: "resp_1", @@ -319,6 +353,527 @@ describe("tryResponsesWebsocketUpstream", () => { expect(Array.isArray(second.input)).toBe(true); }); + it("reuses one upstream WS when the client WebSocket session id is stable", async () => { + const receivedFrames: Array> = []; + let connectionCount = 0; + server = await startMockServer((socket) => { + connectionCount += 1; + let turn = 0; + socket.on("message", (data) => { + const frame = JSON.parse(data.toString("utf8")) as Record; + receivedFrames.push(frame); + const responseId = `resp_${++turn}`; + socket.send(JSON.stringify({ type: "response.created", response: { id: responseId } })); + socket.send( + JSON.stringify({ + type: "response.completed", + response: { id: responseId }, + }) + ); + }); + }); + + const common = { + provider: codexProvider(), + upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, + upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), + sessionId: "client-ws-session-1", + }; + + const turn1 = await tryResponsesWebsocketUpstream({ + ...common, + body: { model: "gpt-5.4", store: false, input: "first" }, + }); + expect("response" in turn1).toBe(true); + if (!("response" in turn1)) return; + expect(turn1.reused).toBe(false); + await collectSseBody(turn1.response); + + const turn2 = await tryResponsesWebsocketUpstream({ + ...common, + body: { + model: "gpt-5.4", + store: false, + previous_response_id: "resp_1", + input: [{ type: "function_call_output", call_id: "call_1", output: "ok" }], + }, + }); + expect("response" in turn2).toBe(true); + if (!("response" in turn2)) return; + expect(turn2.reused).toBe(true); + await collectSseBody(turn2.response); + + expect(connectionCount).toBe(1); + expect(receivedFrames).toHaveLength(2); + expect(receivedFrames[0]?.generate).toBeUndefined(); + expect(receivedFrames[1]?.previous_response_id).toBe("resp_1"); + }); + + it("keeps generate=false warmup on the same upstream WS for the generated turn", async () => { + const receivedFrames: Array> = []; + let connectionCount = 0; + server = await startMockServer((socket) => { + connectionCount += 1; + socket.on("message", (data) => { + const frame = JSON.parse(data.toString("utf8")) as Record; + receivedFrames.push(frame); + const responseId = receivedFrames.length === 1 ? "resp_warmup" : "resp_generated"; + socket.send(JSON.stringify({ type: "response.created", response: { id: responseId } })); + socket.send(JSON.stringify({ type: "response.completed", response: { id: responseId } })); + }); + }); + + const common = { + provider: codexProvider(), + upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, + upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), + sessionId: "client-ws-session-generate-false", + }; + + const warmup = await tryResponsesWebsocketUpstream({ + ...common, + body: { + model: "gpt-5.4", + store: false, + generate: false, + input: "warm up", + }, + }); + expect("response" in warmup).toBe(true); + if (!("response" in warmup)) return; + await collectSseBody(warmup.response); + + const generated = await tryResponsesWebsocketUpstream({ + ...common, + body: { + model: "gpt-5.4", + store: false, + previous_response_id: "resp_warmup", + input: "continue", + }, + }); + expect("response" in generated).toBe(true); + if (!("response" in generated)) return; + expect(generated.reused).toBe(true); + await collectSseBody(generated.response); + + expect(connectionCount).toBe(1); + expect(receivedFrames[0]).toMatchObject({ generate: false, store: false }); + expect(receivedFrames[1]).toMatchObject({ previous_response_id: "resp_warmup" }); + }); + + it("forgets the upstream WS after websocket_connection_limit_reached", async () => { + let connectionCount = 0; + server = await startMockServer((socket) => { + connectionCount += 1; + socket.on("message", () => { + if (connectionCount === 1) { + socket.send( + JSON.stringify({ + type: "error", + status: 400, + error: { + type: "invalid_request_error", + code: "websocket_connection_limit_reached", + message: "Responses websocket connection limit reached (60 minutes).", + }, + }) + ); + return; + } + socket.send(JSON.stringify({ type: "response.completed", response: { id: "resp_new" } })); + }); + }); + + const common = { + provider: codexProvider(), + upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, + upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), + sessionId: "client-ws-session-limit", + }; + + const first = await tryResponsesWebsocketUpstream({ + ...common, + body: { model: "gpt-5.4", input: "first" }, + }); + expect("response" in first).toBe(true); + if (!("response" in first)) return; + expect(await collectSseBody(first.response)).toContain("websocket_connection_limit_reached"); + + const second = await tryResponsesWebsocketUpstream({ + ...common, + body: { model: "gpt-5.4", input: "after reconnect" }, + }); + expect("response" in second).toBe(true); + if (!("response" in second)) return; + expect(second.reused).toBe(false); + await collectSseBody(second.response); + + expect(connectionCount).toBe(2); + }); + + it("cleanupResponsesWsSession closes the retained upstream WS for a client disconnect", async () => { + let upstreamCloseCode: number | null = null; + let resolveClosed: (() => void) | null = null; + const closed = new Promise((resolve) => { + resolveClosed = resolve; + }); + server = await startMockServer((socket) => { + socket.on("close", (code) => { + upstreamCloseCode = code; + resolveClosed?.(); + }); + socket.on("message", () => { + socket.send(JSON.stringify({ type: "response.completed", response: { id: "resp_1" } })); + }); + }); + + const result = await tryResponsesWebsocketUpstream({ + provider: codexProvider(), + upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, + upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), + sessionId: "client-ws-session-cleanup", + body: { model: "gpt-5.4", input: "hi" }, + }); + expect("response" in result).toBe(true); + if (!("response" in result)) return; + await collectSseBody(result.response); + + cleanupResponsesWsSession("client-ws-session-cleanup"); + await closed; + + expect(upstreamCloseCode).toBe(1000); + }); + + it("global cleanup hook closes retained upstream WS sessions from shared state", async () => { + let upstreamCloseCode: number | null = null; + let resolveClosed: (() => void) | null = null; + const closed = new Promise((resolve) => { + resolveClosed = resolve; + }); + server = await startMockServer((socket) => { + socket.on("close", (code) => { + upstreamCloseCode = code; + resolveClosed?.(); + }); + socket.on("message", () => { + socket.send(JSON.stringify({ type: "response.completed", response: { id: "resp_1" } })); + }); + }); + + const sessionId = "client-ws-session-global-cleanup"; + const result = await tryResponsesWebsocketUpstream({ + provider: codexProvider(), + upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, + upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), + sessionId, + body: { model: "gpt-5.4", input: "hi" }, + }); + expect("response" in result).toBe(true); + if (!("response" in result)) return; + await collectSseBody(result.response); + + const globalState = globalThis as unknown as { + __cchCleanupResponsesWsSession?: (sessionId: string) => void; + __cchResponsesWsPersistentState?: { sessions: Map }; + }; + expect(globalState.__cchResponsesWsPersistentState?.sessions.has(sessionId)).toBe(true); + + globalState.__cchCleanupResponsesWsSession?.(sessionId); + await closed; + + expect(upstreamCloseCode).toBe(1000); + expect(globalState.__cchResponsesWsPersistentState?.sessions.has(sessionId)).toBe(false); + }); + + it("does not close an active retained session when a concurrent same-session request opens a fresh upstream WS", async () => { + let connectionCount = 0; + let firstUpstreamClosed = false; + let firstUpstreamCloseCode: number | null = null; + let resolveFirstClosed: (() => void) | null = null; + const firstClosed = new Promise((resolve) => { + resolveFirstClosed = resolve; + }); + let releaseFirstTerminal!: () => void; + const firstTerminalReleased = new Promise((resolve) => { + releaseFirstTerminal = resolve; + }); + + server = await startMockServer((socket) => { + connectionCount += 1; + const connectionIndex = connectionCount; + socket.on("close", (code) => { + if (connectionIndex === 1) { + firstUpstreamClosed = true; + firstUpstreamCloseCode = code; + resolveFirstClosed?.(); + } + }); + socket.on("message", () => { + if (connectionIndex === 1) { + socket.send( + JSON.stringify({ type: "response.created", response: { id: "resp_active" } }) + ); + firstTerminalReleased.then(() => { + if (socket.readyState === 1) { + socket.send( + JSON.stringify({ type: "response.completed", response: { id: "resp_active" } }) + ); + } + }); + return; + } + + socket.send(JSON.stringify({ type: "response.created", response: { id: "resp_fresh" } })); + socket.send(JSON.stringify({ type: "response.completed", response: { id: "resp_fresh" } })); + }); + }); + + const common = { + provider: codexProvider(), + upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, + upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), + sessionId: "client-ws-session-active-race", + }; + + const first = await tryResponsesWebsocketUpstream({ + ...common, + body: { model: "gpt-5.4", input: "first" }, + }); + expect("response" in first).toBe(true); + if (!("response" in first)) return; + expect(first.reused).toBe(false); + + const second = await tryResponsesWebsocketUpstream({ + ...common, + body: { model: "gpt-5.4", input: "second" }, + }); + expect("response" in second).toBe(true); + if (!("response" in second)) return; + expect(second.reused).toBe(false); + + expect(await collectSseBody(second.response)).toContain("resp_fresh"); + expect(connectionCount).toBe(2); + expect(firstUpstreamClosed).toBe(false); + expect(getResponsesWsSessionCountForTests()).toBe(1); + + releaseFirstTerminal(); + expect(await collectSseBody(first.response)).toContain("resp_active"); + expect(firstUpstreamClosed).toBe(false); + + cleanupResponsesWsSession(common.sessionId); + await withTimeout( + firstClosed, + 1_000, + "retained active upstream WS did not close after cleanup" + ); + expect(firstUpstreamCloseCode).toBe(1000); + }); + + it("keeps the busy retained session addressable for cleanup while a fresh same-session request runs", async () => { + let connectionCount = 0; + const socketRefs: { + first: import("ws").WebSocket | null; + second: import("ws").WebSocket | null; + } = { + first: null, + second: null, + }; + let firstUpstreamCloseCode: number | null = null; + let secondUpstreamCloseCode: number | null = null; + let resolveFirstClosed: (() => void) | null = null; + let resolveSecondClosed: (() => void) | null = null; + const firstClosed = new Promise((resolve) => { + resolveFirstClosed = resolve; + }); + const secondClosed = new Promise((resolve) => { + resolveSecondClosed = resolve; + }); + + server = await startMockServer((socket) => { + connectionCount += 1; + const connectionIndex = connectionCount; + if (connectionIndex === 1) { + socketRefs.first = socket; + } else if (connectionIndex === 2) { + socketRefs.second = socket; + } + socket.on("close", (code) => { + if (connectionIndex === 1) { + firstUpstreamCloseCode = code; + resolveFirstClosed?.(); + } else if (connectionIndex === 2) { + secondUpstreamCloseCode = code; + resolveSecondClosed?.(); + } + }); + socket.on("message", () => { + if (connectionIndex === 1) { + socket.send( + JSON.stringify({ type: "response.created", response: { id: "resp_busy_active" } }) + ); + return; + } + + socket.send( + JSON.stringify({ type: "response.created", response: { id: "resp_busy_fresh" } }) + ); + socket.send( + JSON.stringify({ type: "response.completed", response: { id: "resp_busy_fresh" } }) + ); + }); + }); + + try { + const sessionId = "client-ws-session-busy-cleanup"; + const common = { + provider: codexProvider(), + upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, + upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), + sessionId, + }; + + const first = await tryResponsesWebsocketUpstream({ + ...common, + body: { model: "gpt-5.4", input: "first" }, + }); + expect("response" in first).toBe(true); + if (!("response" in first)) return; + + const second = await tryResponsesWebsocketUpstream({ + ...common, + body: { model: "gpt-5.4", input: "second" }, + }); + expect("response" in second).toBe(true); + if (!("response" in second)) return; + + expect(await collectSseBody(second.response)).toContain("resp_busy_fresh"); + await withTimeout( + secondClosed, + 1_000, + "busy-session fresh upstream WS did not close after terminal" + ); + expect(secondUpstreamCloseCode).toBe(1000); + expect(getResponsesWsSessionCountForTests()).toBe(1); + + cleanupResponsesWsSession(sessionId); + await withTimeout( + firstClosed, + 1_000, + "cleanup hook did not close the original busy upstream WS session" + ); + expect(firstUpstreamCloseCode).toBe(1000); + + const firstBody = await collectSseBody(first.response); + expect(firstBody).toContain("resp_busy_active"); + expect(firstBody).toContain('"type":"error"'); + } finally { + if (socketRefs.first?.readyState === 1) socketRefs.first.close(1000); + if (socketRefs.second?.readyState === 1) socketRefs.second.close(1000); + } + }); + + it("resolves and closes upstream when aborted before the first WS event", async () => { + let resolveMessageReceived: (() => void) | null = null; + const messageReceived = new Promise((resolve) => { + resolveMessageReceived = resolve; + }); + let upstreamCloseCode: number | null = null; + let resolveClosed: (() => void) | null = null; + const upstreamClosed = new Promise((resolve) => { + resolveClosed = resolve; + }); + server = await startMockServer((socket) => { + socket.on("message", () => { + resolveMessageReceived?.(); + }); + socket.on("close", (code) => { + upstreamCloseCode = code; + resolveClosed?.(); + }); + }); + + const abortController = new AbortController(); + const resultPromise = tryResponsesWebsocketUpstream({ + provider: codexProvider(), + upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, + upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), + sessionId: "client-ws-session-aborted-before-first-event", + abortSignal: abortController.signal, + body: { model: "gpt-5.4", input: "hi" }, + }); + + await withTimeout(messageReceived, 1_000, "upstream did not receive the WS request frame"); + abortController.abort(); + + const result = await withTimeout( + resultPromise, + 1_000, + "upstream WS attempt hung after abort before first event" + ); + await withTimeout(upstreamClosed, 1_000, "upstream WS did not close after abort"); + + expect("failed" in result).toBe(true); + if (!("failed" in result)) return; + expect(result.reason).toBe("ws_error_pre_first_event"); + expect(result.message).toContain("aborted before first upstream WebSocket event"); + expect(result.cacheableAsUnsupported).toBe(false); + expect(upstreamCloseCode).toBe(1000); + }); + + it("keeps the persistent session map bounded when every retained session is active", async () => { + setResponsesWsSessionMaxEntriesForTests(1); + let connectionCount = 0; + let secondUpstreamCloseCode: number | null = null; + let resolveSecondClosed: (() => void) | null = null; + const secondClosed = new Promise((resolve) => { + resolveSecondClosed = resolve; + }); + server = await startMockServer((socket) => { + connectionCount += 1; + const connectionIndex = connectionCount; + socket.on("close", (code) => { + if (connectionIndex === 2) { + secondUpstreamCloseCode = code; + resolveSecondClosed?.(); + } + }); + socket.on("message", () => { + const responseId = `resp_cap_${connectionIndex}`; + socket.send(JSON.stringify({ type: "response.created", response: { id: responseId } })); + if (connectionIndex > 1) { + socket.send(JSON.stringify({ type: "response.completed", response: { id: responseId } })); + } + }); + }); + + const first = await tryResponsesWebsocketUpstream({ + provider: codexProvider(), + upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, + upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), + sessionId: "client-ws-session-cap-active-1", + body: { model: "gpt-5.4", input: "first" }, + }); + expect("response" in first).toBe(true); + if (!("response" in first)) return; + expect(getResponsesWsSessionCountForTests()).toBe(1); + + const second = await tryResponsesWebsocketUpstream({ + provider: codexProvider(), + upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, + upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), + sessionId: "client-ws-session-cap-active-2", + body: { model: "gpt-5.4", input: "second" }, + }); + expect("response" in second).toBe(true); + if (!("response" in second)) return; + await collectSseBody(second.response); + await withTimeout(secondClosed, 1_000, "unretained upstream WS did not close after terminal"); + + expect(getResponsesWsSessionCountForTests()).toBe(1); + expect(secondUpstreamCloseCode).toBe(1000); + }); + it("classifies HTTP 426 / 404 / 501 upgrade failures as cacheable-unsupported", async () => { const http = await import("node:http"); for (const status of [426, 404, 501]) { @@ -333,7 +888,7 @@ describe("tryResponsesWebsocketUpstream", () => { provider: codexProvider(), upstreamUrl: `http://127.0.0.1:${addr.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), - body: { model: "gpt-5", input: "hi" }, + body: { model: "gpt-5.4", input: "hi" }, }); expect("failed" in result).toBe(true); if (!("failed" in result)) continue; @@ -358,7 +913,7 @@ describe("tryResponsesWebsocketUpstream", () => { provider: codexProvider(), upstreamUrl: `http://127.0.0.1:${addr.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), - body: { model: "gpt-5", input: "hi" }, + body: { model: "gpt-5.4", input: "hi" }, }); expect("failed" in result).toBe(true); if (!("failed" in result)) continue; @@ -388,7 +943,7 @@ describe("tryResponsesWebsocketUpstream", () => { provider: codexProvider(), upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), - body: { model: "gpt-5", input: "hi" }, + body: { model: "gpt-5.4", input: "hi" }, }); expect("response" in result).toBe(true); @@ -430,7 +985,7 @@ describe("tryResponsesWebsocketUpstream", () => { provider: codexProvider(), upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), - body: { model: "gpt-5", input: "hi" }, + body: { model: "gpt-5.4", input: "hi" }, }); expect("response" in result).toBe(true); @@ -463,7 +1018,7 @@ describe("tryResponsesWebsocketUpstream", () => { provider: codexProvider(), upstreamUrl: `http://127.0.0.1:${server.port}/v1/responses`, upstreamHeaders: new Headers({ authorization: "Bearer sk-mock" }), - body: { model: "gpt-5", input: "hi" }, + body: { model: "gpt-5.4", input: "hi" }, }); expect("response" in result).toBe(true); diff --git a/src/app/v1/_lib/responses-ws/eligibility.ts b/src/app/v1/_lib/responses-ws/eligibility.ts index a14a9ad6b..2602e6b90 100644 --- a/src/app/v1/_lib/responses-ws/eligibility.ts +++ b/src/app/v1/_lib/responses-ws/eligibility.ts @@ -15,7 +15,7 @@ import { isOpenaiResponsesWebsocketEnabled } from "@/lib/config/system-settings-cache"; import type { Provider } from "@/types/provider"; -import { verifyInternalRequest } from "./internal-secret"; +import { RESPONSES_WS_SESSION_HEADER, verifyInternalRequest } from "./internal-secret"; import { isResponsesWsUnsupported } from "./unsupported-cache"; export const CLIENT_TRANSPORT_HEADER = "x-cch-client-transport"; @@ -63,6 +63,25 @@ export function isWebsocketClientRequest(headers: Headers | Record): string | null { + let value: string | null | undefined; + if (headers instanceof Headers) { + value = headers.get(RESPONSES_WS_SESSION_HEADER); + } else { + for (const [k, v] of Object.entries(headers)) { + if (k.toLowerCase() === RESPONSES_WS_SESSION_HEADER) { + value = v; + break; + } + } + } + + if (typeof value !== "string") return null; + const trimmed = value.trim(); + if (!trimmed || trimmed.length > 128) return null; + return /^[\w.-]+$/.test(trimmed) ? trimmed : null; +} + export async function evaluateResponsesWsEligibility(options: { headers: Headers | Record; provider: Provider; diff --git a/src/app/v1/_lib/responses-ws/internal-secret.ts b/src/app/v1/_lib/responses-ws/internal-secret.ts index 0d193ae7b..6081ea70f 100644 --- a/src/app/v1/_lib/responses-ws/internal-secret.ts +++ b/src/app/v1/_lib/responses-ws/internal-secret.ts @@ -32,6 +32,7 @@ import { randomUUID } from "node:crypto"; export const INTERNAL_SECRET_HEADER = "x-cch-internal-secret"; export const WS_FORWARD_FLAG_HEADER = "x-cch-responses-ws-forward"; +export const RESPONSES_WS_SESSION_HEADER = "x-cch-responses-ws-session"; const ENV_VAR = "CCH_RESPONSES_WS_INTERNAL_SECRET"; /** @@ -42,6 +43,7 @@ const ENV_VAR = "CCH_RESPONSES_WS_INTERNAL_SECRET"; export const RESERVED_INTERNAL_HEADERS = [ "x-cch-client-transport", WS_FORWARD_FLAG_HEADER, + RESPONSES_WS_SESSION_HEADER, INTERNAL_SECRET_HEADER, ]; diff --git a/src/app/v1/_lib/responses-ws/upstream-adapter.ts b/src/app/v1/_lib/responses-ws/upstream-adapter.ts index 5214fa0d4..bd379fc3a 100644 --- a/src/app/v1/_lib/responses-ws/upstream-adapter.ts +++ b/src/app/v1/_lib/responses-ws/upstream-adapter.ts @@ -7,23 +7,31 @@ * prompt_cache_key extraction, usage aggregation, finalization) treats the * response exactly like an HTTP Responses SSE stream. * - * On handshake rejection, close-before-first-event, or other fallback-safe - * errors, returns null so the caller can fall back to the HTTP path. No - * circuit-breaker accounting happens here — the fallback is purely informational. - * - * Scope: this adapter only handles the pre-flight connection attempt. It does - * NOT re-use connections across requests (first pass); each call opens and - * closes its own WebSocket. A future revision can add per-socket pooling and - * previous_response_id delta frames. + * When the request came from one client WebSocket connection, server.js passes + * a per-client `x-cch-responses-ws-session` marker. We reuse one upstream + * WebSocket for that marker so Codex's `store=false` + `previous_response_id` + * continuation can hit the upstream connection-local cache, matching OpenAI's + * WebSocket mode semantics. */ +import { createHash } from "node:crypto"; import type WebSocketType from "ws"; import { logger } from "@/lib/logger"; import type { Provider } from "@/types/provider"; +import { RESERVED_INTERNAL_HEADERS } from "./internal-secret"; + +declare global { + // server.js is CommonJS and cannot import this TS module directly. The + // adapter registers a tiny cleanup hook on globalThis so the custom server + // can close the matching upstream WS as soon as the client WS disconnects. + // eslint-disable-next-line no-var + var __cchCleanupResponsesWsSession: ((sessionId: string) => void) | undefined; +} export interface UpstreamWsOutcome { response: Response; connected: boolean; + reused: boolean; } export type UpstreamWsFallbackReason = @@ -69,6 +77,12 @@ const FIRST_EVENT_TIMEOUT_MS = 20_000; // than spilling into the heap unboundedly. const MAX_BUFFERED_QUEUE_BYTES = 8 * 1024 * 1024; // 8 MiB +// Keep idle upstream sessions long enough for normal Codex interactive use. +// server.js calls cleanup immediately on client WS close; this timer is only a +// leak backstop if a process-level close notification is missed. +const PERSISTENT_SESSION_IDLE_TIMEOUT_MS = 65 * 60 * 1000; +const DEFAULT_PERSISTENT_SESSION_MAX_ENTRIES = 512; + // HTTP statuses on the upgrade handshake that we treat as a definitive // "this endpoint does not speak WebSocket" signal and cache as unsupported. // 401 / 403 are NOT in this list because they reflect auth state, not @@ -91,8 +105,38 @@ const FORBIDDEN_UPSTREAM_WS_HEADERS = new Set([ "transfer-encoding", "accept", "content-type", + ...RESERVED_INTERNAL_HEADERS, ]); +type PersistentWsEntry = { + sessionId: string; + fingerprint: string; + ws: WebSocketType; + active: boolean; + createdAt: number; + lastUsedAt: number; + idleTimer: ReturnType | null; +}; + +type PersistentWsState = { + sessions: Map; + maxEntries: number; +}; + +declare global { + // Keep retained upstream WS state stable across Next.js dev/test module + // reloads. server.js calls the latest cleanup hook, so the hook must still + // see sessions created by an older module instance. + // eslint-disable-next-line no-var + var __cchResponsesWsPersistentState: PersistentWsState | undefined; +} + +const persistentState = (globalThis.__cchResponsesWsPersistentState ??= { + sessions: new Map(), + maxEntries: DEFAULT_PERSISTENT_SESSION_MAX_ENTRIES, +}); +const persistentSessions = persistentState.sessions; + function toWsUrl(httpUrl: string): string { const url = new URL(httpUrl); url.protocol = url.protocol === "https:" ? "wss:" : "ws:"; @@ -120,6 +164,28 @@ function buildUpstreamWsHeaders(source: Headers | Record): Recor return out; } +function buildConnectionFingerprint(options: { + provider: Provider; + endpointId?: number | null; + upstreamUrl: string; + headers: Record; +}): string { + const normalizedHeaders = Object.entries(options.headers) + .map(([key, value]) => [key.toLowerCase(), value] as const) + .sort(([a], [b]) => a.localeCompare(b)); + + return createHash("sha256") + .update( + JSON.stringify({ + providerId: options.provider.id, + endpointId: options.endpointId ?? null, + upstreamUrl: options.upstreamUrl, + headers: normalizedHeaders, + }) + ) + .digest("hex"); +} + async function loadWsModule(): Promise { try { const mod = await import("ws"); @@ -132,11 +198,146 @@ async function loadWsModule(): Promise { } } +function isWsOpen(ws: WebSocketType): boolean { + return ws.readyState === 1; +} + +function isWsClosingOrClosed(ws: WebSocketType): boolean { + return ws.readyState >= 2; +} + +function closeWs(ws: WebSocketType, code: number): void { + try { + ws.close(code); + } catch { + // ignore + } +} + +function terminateWs(ws: WebSocketType): void { + try { + ws.terminate?.(); + } catch { + // ignore + } +} + +function forgetPersistentSession(sessionId: string, ws?: WebSocketType): void { + const entry = persistentSessions.get(sessionId); + if (!entry) return; + if (ws && entry.ws !== ws) return; + if (entry.idleTimer) { + clearTimeout(entry.idleTimer); + entry.idleTimer = null; + } + persistentSessions.delete(sessionId); +} + +function closePersistentEntry(entry: PersistentWsEntry, code: number): void { + forgetPersistentSession(entry.sessionId, entry.ws); + closeWs(entry.ws, code); +} + +function armPersistentIdleTimer(entry: PersistentWsEntry): void { + if (entry.idleTimer) clearTimeout(entry.idleTimer); + entry.idleTimer = setTimeout(() => { + const current = persistentSessions.get(entry.sessionId); + if (current !== entry || current.active) return; + logger.info("[ResponsesWsAdapter] closing idle upstream WS session", { + sessionId: entry.sessionId, + idleMs: Date.now() - entry.lastUsedAt, + }); + closePersistentEntry(entry, 1000); + }, PERSISTENT_SESSION_IDLE_TIMEOUT_MS); + if (typeof entry.idleTimer === "object" && "unref" in entry.idleTimer) { + entry.idleTimer.unref(); + } +} + +function prunePersistentSessions(): void { + const maxEntries = persistentState.maxEntries; + if (persistentSessions.size < maxEntries) return; + + const idleEntries = [...persistentSessions.values()] + .filter((entry) => !entry.active) + .sort((a, b) => a.lastUsedAt - b.lastUsedAt); + const overflow = persistentSessions.size - maxEntries + 1; + for (const entry of idleEntries.slice(0, overflow)) { + logger.warn("[ResponsesWsAdapter] pruning idle upstream WS session", { + sessionId: entry.sessionId, + }); + closePersistentEntry(entry, 1000); + } +} + +function registerPersistentSession( + sessionId: string, + fingerprint: string, + ws: WebSocketType +): PersistentWsEntry | null { + prunePersistentSessions(); + if (persistentSessions.size >= persistentState.maxEntries) { + logger.warn("[ResponsesWsAdapter] upstream WS session cap reached; not retaining session", { + sessionId, + maxEntries: persistentState.maxEntries, + }); + return null; + } + + const entry: PersistentWsEntry = { + sessionId, + fingerprint, + ws, + active: true, + createdAt: Date.now(), + lastUsedAt: Date.now(), + idleTimer: null, + }; + + ws.on("close", () => { + forgetPersistentSession(sessionId, ws); + }); + ws.on("error", () => { + forgetPersistentSession(sessionId, ws); + }); + + persistentSessions.set(sessionId, entry); + return entry; +} + +export function cleanupResponsesWsSession(sessionId: string): void { + const entry = persistentSessions.get(sessionId); + if (!entry) return; + logger.info("[ResponsesWsAdapter] cleaning upstream WS session", { sessionId }); + closePersistentEntry(entry, 1000); +} + +export function clearResponsesWsSessionsForTests(): void { + for (const entry of persistentSessions.values()) { + closePersistentEntry(entry, 1000); + } + persistentSessions.clear(); + persistentState.maxEntries = DEFAULT_PERSISTENT_SESSION_MAX_ENTRIES; +} + +export function setResponsesWsSessionMaxEntriesForTests(maxEntries: number): void { + const normalized = Math.floor(maxEntries); + persistentState.maxEntries = Number.isFinite(normalized) ? Math.max(0, normalized) : 0; +} + +export function getResponsesWsSessionCountForTests(): number { + return persistentSessions.size; +} + +globalThis.__cchCleanupResponsesWsSession = cleanupResponsesWsSession; + export async function tryResponsesWebsocketUpstream(options: { provider: Provider; upstreamUrl: string; upstreamHeaders: Headers | Record; body: Record; + sessionId?: string | null; + endpointId?: number | null; abortSignal?: AbortSignal; }): Promise { const WsCtor = (await loadWsModule()) as @@ -148,27 +349,72 @@ export async function tryResponsesWebsocketUpstream(options: { const wssUrl = toWsUrl(options.upstreamUrl); const headers = buildUpstreamWsHeaders(options.upstreamHeaders); + const sessionId = options.sessionId ?? null; + const fingerprint = buildConnectionFingerprint({ + provider: options.provider, + endpointId: options.endpointId, + upstreamUrl: wssUrl, + headers, + }); const frame = { type: "response.create", ...stripTransportOnlyFields(options.body), }; + let persistentEntry: PersistentWsEntry | null = null; + let reused = false; + let canRetainFreshSession = Boolean(sessionId); let ws: WebSocketType; - try { - ws = new (WsCtor as unknown as new (url: string, opts?: unknown) => WebSocketType)(wssUrl, { - headers, - handshakeTimeout: HANDSHAKE_TIMEOUT_MS, - }); - } catch (err) { - return { - failed: true, - reason: "ws_upgrade_rejected", - message: String(err && (err as Error).message ? (err as Error).message : err), - // Constructor throws are typically URL parsing / TLS configuration — - // not a server-side protocol negative signal — so don't cache. - cacheableAsUnsupported: false, - }; + + if (sessionId) { + const existing = persistentSessions.get(sessionId) ?? null; + if (existing) { + if (existing.active && !isWsClosingOrClosed(existing.ws)) { + logger.warn( + "[ResponsesWsAdapter] active upstream WS session is busy; opening a fresh one", + { + sessionId, + } + ); + // Keep the active retained entry addressable by cleanupResponsesWsSession(). + // The concurrent fresh socket is request-scoped and must close after its + // terminal event instead of replacing the in-flight session in the map. + canRetainFreshSession = false; + } else if (existing.fingerprint === fingerprint && !isWsClosingOrClosed(existing.ws)) { + persistentEntry = existing; + persistentEntry.active = true; + persistentEntry.lastUsedAt = Date.now(); + if (persistentEntry.idleTimer) { + clearTimeout(persistentEntry.idleTimer); + persistentEntry.idleTimer = null; + } + ws = existing.ws; + reused = true; + } else { + closePersistentEntry(existing, 1000); + } + } + } + + if (!reused) { + try { + ws = new (WsCtor as unknown as new (url: string, opts?: unknown) => WebSocketType)(wssUrl, { + headers, + handshakeTimeout: HANDSHAKE_TIMEOUT_MS, + }); + } catch (err) { + return { + failed: true, + reason: "ws_upgrade_rejected", + message: String(err && (err as Error).message ? (err as Error).message : err), + // Constructor throws are typically URL parsing / TLS configuration — + // not a server-side protocol negative signal — so don't cache. + cacheableAsUnsupported: false, + }; + } + } else { + ws = persistentEntry!.ws; } type OpenResult = @@ -193,53 +439,14 @@ export async function tryResponsesWebsocketUpstream(options: { openPromiseResolve(result); }; - // Idempotent close helper. Centralizes the try/catch wrapping so every exit - // path can call closeUpstream() without leaking an upstream socket. Calling - // close() on an already-CLOSING/CLOSED ws is a no-op in `ws`. - const closeUpstream = (code: number) => { - try { - ws.close(code); - } catch { - // ignore - } + const closeAndForget = (code: number) => { + if (sessionId) forgetPersistentSession(sessionId, ws); + closeWs(ws, code); }; - ws.on("open", () => { - try { - ws.send(JSON.stringify(frame)); - } catch (err) { - finishOpen({ - ok: false, - reason: "ws_error_pre_first_event", - message: String(err && (err as Error).message ? (err as Error).message : err), - // Local send failure (closed underlying socket, etc.) is transient. - cacheableAsUnsupported: false, - }); - closeUpstream(1011); - } - }); - - ws.on( - "unexpected-response", - (_req: unknown, res: { statusCode?: number; statusMessage?: string }) => { - const status = typeof res.statusCode === "number" ? res.statusCode : undefined; - const cacheable = - typeof status === "number" && PROTOCOL_UNSUPPORTED_HTTP_STATUSES.has(status); - finishOpen({ - ok: false, - reason: "ws_upgrade_rejected", - message: `HTTP ${status ?? "?"} ${res.statusMessage ?? ""}`.trim(), - // Only definitive protocol negatives (4xx/501 on the upgrade path) - // are cacheable. 401/403/5xx/etc. are auth or transient state. - cacheableAsUnsupported: cacheable, - }); - closeUpstream(1011); - } - ); - const messageQueue: string[] = []; let queueResolver: ((value: string | null) => void) | null = null; - let closed = false; + let socketClosed = isWsClosingOrClosed(ws); let queuedBytes = 0; // Marks an upstream failure observed AFTER the first event was emitted. // The downstream pipeline must see this as an error rather than a clean @@ -249,12 +456,68 @@ export async function tryResponsesWebsocketUpstream(options: { // ReadableStream's start()). The `ws.on("close")` handler runs in this // outer scope and would otherwise have no way to tell whether a terminal // event was already forwarded — without this flag a clean post-terminal - // close (e.g. our own `closeUpstream(1000)`) would be misclassified as a - // mid-stream error. + // close would be misclassified as a mid-stream error. let terminalEventSeen = false; + let terminalEventShouldClosePersistent = false; let firstEventTimer: ReturnType | null = null; - ws.on("message", (data: Buffer | string) => { + const sendFrame = () => { + if (!isWsOpen(ws)) { + finishOpen({ + ok: false, + reason: "ws_error_pre_first_event", + message: "websocket is not open", + cacheableAsUnsupported: false, + }); + closeAndForget(1011); + return; + } + + try { + ws.send(JSON.stringify(frame), (err?: Error) => { + if (!err) return; + finishOpen({ + ok: false, + reason: "ws_error_pre_first_event", + message: String(err.message ? err.message : err), + // Local send failure (closed underlying socket, etc.) is transient. + cacheableAsUnsupported: false, + }); + closeAndForget(1011); + }); + } catch (err) { + finishOpen({ + ok: false, + reason: "ws_error_pre_first_event", + message: String(err && (err as Error).message ? (err as Error).message : err), + cacheableAsUnsupported: false, + }); + closeAndForget(1011); + } + }; + + const onOpen = () => { + sendFrame(); + }; + + const onUnexpectedResponse = ( + _req: unknown, + res: { statusCode?: number; statusMessage?: string } + ) => { + const status = typeof res.statusCode === "number" ? res.statusCode : undefined; + const cacheable = typeof status === "number" && PROTOCOL_UNSUPPORTED_HTTP_STATUSES.has(status); + finishOpen({ + ok: false, + reason: "ws_upgrade_rejected", + message: `HTTP ${status ?? "?"} ${res.statusMessage ?? ""}`.trim(), + // Only definitive protocol negatives (4xx/501 on the upgrade path) + // are cacheable. 401/403/5xx/etc. are auth or transient state. + cacheableAsUnsupported: cacheable, + }); + closeAndForget(1011); + }; + + const onMessage = (data: Buffer | string) => { const text = typeof data === "string" ? data : data.toString("utf8"); const size = Buffer.byteLength(text, "utf8"); if (!firstEventSeen) { @@ -282,18 +545,19 @@ export async function tryResponsesWebsocketUpstream(options: { code: "upstream_ws_queue_overflow", message: `buffered upstream payload exceeded ${MAX_BUFFERED_QUEUE_BYTES} bytes`, }; - closed = true; - closeUpstream(1011); + socketClosed = true; + closeAndForget(1011); return; } messageQueue.push(text); queuedBytes += size; - }); + }; - ws.on("error", (err: Error) => { + const onError = (err: Error) => { logger.warn("[ResponsesWsAdapter] upstream ws error", { error: String(err?.message ? err.message : err), firstEventSeen, + reused, }); if (!firstEventSeen) { finishOpen({ @@ -310,16 +574,18 @@ export async function tryResponsesWebsocketUpstream(options: { message: String(err?.message ? err.message : err), }; } - closed = true; + socketClosed = true; + if (sessionId) forgetPersistentSession(sessionId, ws); if (queueResolver) { const resolve = queueResolver; queueResolver = null; resolve(null); } - }); + }; - ws.on("close", (code: number, reason: Buffer | string) => { - closed = true; + const onClose = (code: number, reason: Buffer | string) => { + socketClosed = true; + if (sessionId) forgetPersistentSession(sessionId, ws); if (!firstEventSeen) { finishOpen({ ok: false, @@ -335,10 +601,6 @@ export async function tryResponsesWebsocketUpstream(options: { // Record this as an error so the synthesized error frame downstream // carries the actual close code instead of a generic message — and so // the forwarder doesn't bill the truncated stream as a clean success. - // We must also gate on `terminalEventSeen` because our own clean - // closeUpstream(1000) after a terminal event triggers this same - // handler — without the flag we'd inject a spurious error frame after - // a successful response. const reasonText = reason?.length ? typeof reason === "string" ? reason @@ -356,16 +618,82 @@ export async function tryResponsesWebsocketUpstream(options: { queueResolver = null; resolve(null); } - }); + }; + + const resolveMessageWaiter = () => { + if (!queueResolver) return; + const resolve = queueResolver; + queueResolver = null; + resolve(null); + }; + + const cleanupRequestListeners = () => { + ws.off("message", onMessage); + ws.off("error", onError); + ws.off("close", onClose); + ws.off("open", onOpen); + ws.off("unexpected-response", onUnexpectedResponse); + if (options.abortSignal) { + options.abortSignal.removeEventListener("abort", onAbort); + } + if (firstEventTimer) { + clearTimeout(firstEventTimer); + firstEventTimer = null; + } + }; + + const finishRequest = (options?: { closeCode?: number; forgetSession?: boolean }) => { + cleanupRequestListeners(); + let closeDetachedEntry = false; + if (persistentEntry) { + persistentEntry.active = false; + persistentEntry.lastUsedAt = Date.now(); + const retainedForReuse = sessionId + ? persistentSessions.get(sessionId) === persistentEntry + : false; + if (!retainedForReuse) { + closeDetachedEntry = !options?.closeCode; + } else if (!isWsClosingOrClosed(persistentEntry.ws)) { + armPersistentIdleTimer(persistentEntry); + } + } + if (options?.forgetSession && sessionId) { + forgetPersistentSession(sessionId, ws); + } + if (options?.closeCode) { + closeAndForget(options.closeCode); + } else if (closeDetachedEntry) { + closeWs(ws, 1000); + } + }; + + function onAbort() { + socketClosed = true; + if (!firstEventSeen) { + finishOpen({ + ok: false, + reason: "ws_error_pre_first_event", + message: "aborted before first upstream WebSocket event", + cacheableAsUnsupported: false, + }); + } + resolveMessageWaiter(); + finishRequest({ closeCode: 1000, forgetSession: true }); + } + + ws.on("message", onMessage); + ws.on("error", onError); + ws.on("close", onClose); + if (!reused) { + ws.on("open", onOpen); + ws.on("unexpected-response", onUnexpectedResponse); + } if (options.abortSignal) { - options.abortSignal.addEventListener( - "abort", - () => { - closeUpstream(1000); - }, - { once: true } - ); + options.abortSignal.addEventListener("abort", onAbort, { once: true }); + if (options.abortSignal.aborted) { + onAbort(); + } } // Bound the wait for the first event so a silent upstream cannot pin a @@ -381,20 +709,22 @@ export async function tryResponsesWebsocketUpstream(options: { // next request should re-probe rather than skip the WS path. cacheableAsUnsupported: false, }); - closeUpstream(1011); + closeAndForget(1011); }, FIRST_EVENT_TIMEOUT_MS); + if (reused) { + sendFrame(); + } + const openResult = await openPromise; if (firstEventTimer) { clearTimeout(firstEventTimer); firstEventTimer = null; } if (!openResult.ok) { - try { - ws.terminate?.(); - } catch { - // ignore - } + cleanupRequestListeners(); + if (sessionId) forgetPersistentSession(sessionId, ws); + terminateWs(ws); return { failed: true, reason: openResult.reason, @@ -403,6 +733,10 @@ export async function tryResponsesWebsocketUpstream(options: { }; } + if (sessionId && canRetainFreshSession && !persistentEntry && !socketClosed) { + persistentEntry = registerPersistentSession(sessionId, fingerprint, ws); + } + // Upstream WS is open and at least one event was received. Build an SSE // ReadableStream that replays queued messages and streams future ones until // a terminal event arrives or the connection closes. @@ -424,6 +758,9 @@ export async function tryResponsesWebsocketUpstream(options: { // Hoisted twin so the outer `ws.on("close")` handler can tell // a clean post-terminal close apart from a real mid-stream drop. terminalEventSeen = true; + terminalEventShouldClosePersistent = + parsed.type === "error" || + parsed.error?.code === "websocket_connection_limit_reached"; return true; } } catch { @@ -441,18 +778,26 @@ export async function tryResponsesWebsocketUpstream(options: { return msg; }; + const completeTerminal = () => { + controller.close(); + if (sessionId && persistentEntry && !terminalEventShouldClosePersistent) { + finishRequest(); + } else { + finishRequest({ closeCode: 1000, forgetSession: true }); + } + }; + // Drain queued first-event(s) while (messageQueue.length > 0) { const msg = popMessage(); if (msg === undefined) break; if (processText(msg)) { - controller.close(); - closeUpstream(1000); + completeTerminal(); return; } } - while (!closed) { + while (!socketClosed) { const next = await new Promise((resolve) => { if (messageQueue.length > 0) { resolve(popMessage() ?? null); @@ -462,20 +807,18 @@ export async function tryResponsesWebsocketUpstream(options: { }); if (next === null) break; if (processText(next)) { - controller.close(); - closeUpstream(1000); + completeTerminal(); return; } } // Drain any messages enqueued after the loop's last `await` resolved - // with `null` (race between shift() and `closed` becoming true). + // with `null` (race between shift() and `socketClosed` becoming true). while (messageQueue.length > 0) { const msg = popMessage(); if (msg === undefined) break; if (processText(msg)) { - controller.close(); - closeUpstream(1000); + completeTerminal(); return; } } @@ -497,13 +840,10 @@ export async function tryResponsesWebsocketUpstream(options: { } controller.close(); - // Belt-and-suspenders: ensure the upstream socket is closed even if the - // earlier paths above didn't run (e.g. closed=true before we entered - // the loop). Idempotent. - closeUpstream(sawTerminalEvent ? 1000 : 1011); + finishRequest({ closeCode: sawTerminalEvent ? 1000 : 1011, forgetSession: true }); }, cancel() { - closeUpstream(1000); + finishRequest({ closeCode: 1000, forgetSession: true }); }, }); @@ -517,5 +857,6 @@ export async function tryResponsesWebsocketUpstream(options: { }, }), connected: true, + reused, }; } diff --git a/src/lib/model-vendor-icons.test.ts b/src/lib/model-vendor-icons.test.ts index 632a17ff1..327f32997 100644 --- a/src/lib/model-vendor-icons.test.ts +++ b/src/lib/model-vendor-icons.test.ts @@ -8,7 +8,7 @@ describe("getModelVendor", () => { { modelId: "claude-3-opus-20240229", expectedKey: "anthropic" }, // OpenAI - gpt prefix { modelId: "gpt-4o-mini", expectedKey: "openai" }, - { modelId: "gpt-5.2-codex", expectedKey: "openai" }, + { modelId: "gpt-5.4", expectedKey: "openai" }, // OpenAI - chatgpt prefix { modelId: "chatgpt-4o-latest", expectedKey: "openai" }, // OpenAI - o1/o3/o4 prefix diff --git a/src/lib/provider-testing/data/cx_base.json b/src/lib/provider-testing/data/cx_base.json index bb327e307..87bf27867 100644 --- a/src/lib/provider-testing/data/cx_base.json +++ b/src/lib/provider-testing/data/cx_base.json @@ -1,5 +1,5 @@ { - "model": "gpt-5-codex", + "model": "gpt-5.4", "instructions": "You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.\n\n## General\n\n- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with [\"bash\", \"-lc\"].\n- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.\n- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)\n\n## Editing constraints\n\n- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.\n- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like \"Assigns the value to the variable\", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.\n- Try to use apply_patch for single file edits, but it is fine to explore other options to make the edit if it does not work well. Do not use apply_patch for changes that are auto-generated (i.e. generating package.json or running a lint or format command like gofmt) or when scripting is more efficient (such as search and replacing a string across a codebase).\n- You may be in a dirty git worktree.\n * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.\n * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.\n * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.\n * If the changes are in unrelated files, just ignore them and don't revert them.\n- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.\n- **NEVER** use destructive commands like `git reset --hard` or `git checkout --` unless specifically requested or approved by the user.", "input": [ { "role": "system", "content": "You are a echo bot. Always say 'pong'." }, diff --git a/src/lib/provider-testing/data/cx_codex_basic.json b/src/lib/provider-testing/data/cx_codex_basic.json index ed98a4e54..3a1d3043b 100644 --- a/src/lib/provider-testing/data/cx_codex_basic.json +++ b/src/lib/provider-testing/data/cx_codex_basic.json @@ -1,5 +1,5 @@ { - "model": "gpt-5.3-codex", + "model": "gpt-5.4", "instructions": "You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.", "input": [ { diff --git a/src/lib/provider-testing/presets.ts b/src/lib/provider-testing/presets.ts index d3719e3dd..0866d4a5f 100644 --- a/src/lib/provider-testing/presets.ts +++ b/src/lib/provider-testing/presets.ts @@ -96,7 +96,7 @@ export const PRESETS: Record = { providerTypes: ["codex"], payload: cxCodexBasic, defaultSuccessContains: "pong", - defaultModel: "gpt-5.3-codex", + defaultModel: "gpt-5.4", path: "/v1/responses", userAgent: "Codex-CLI/1.0", extraHeaders: { diff --git a/src/lib/provider-testing/test-service.test.ts b/src/lib/provider-testing/test-service.test.ts index 5659044cd..11861fd89 100644 --- a/src/lib/provider-testing/test-service.test.ts +++ b/src/lib/provider-testing/test-service.test.ts @@ -95,7 +95,7 @@ describe("executeProviderTest", () => { const assistantText = `pong-${"x".repeat(7000)}`; const responseBody = mockJsonResponse({ id: "resp_test", - model: "gpt-5-codex", + model: "gpt-5.4", output: [ { type: "message", @@ -114,7 +114,7 @@ describe("executeProviderTest", () => { providerUrl: "https://api.example.com", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5-codex", + model: "gpt-5.4", }); expect(result.success).toBe(true); @@ -148,7 +148,7 @@ describe("executeProviderTest", () => { test("codex full-path baseUrl 不应重复拼接 /v1/responses", async () => { mockJsonResponse({ id: "resp_test", - model: "gpt-5.3-codex", + model: "gpt-5.4", output: [ { type: "message", @@ -162,7 +162,7 @@ describe("executeProviderTest", () => { providerUrl: "https://relay.example.com/openai/v1/responses", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", }); expect(result.success).toBe(true); @@ -175,7 +175,7 @@ describe("executeProviderTest", () => { ])("codex bare /openai base preserves absolute versioned request url: %s", async (providerUrl) => { mockJsonResponse({ id: "resp_test", - model: "gpt-5.3-codex", + model: "gpt-5.4", output: [ { type: "message", @@ -189,7 +189,7 @@ describe("executeProviderTest", () => { providerUrl, apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", }); expect(result.success).toBe(true); @@ -328,7 +328,7 @@ describe("executeProviderTest", () => { test("无版本 endpoint 根路径在 provider testing 中应与 runtime URL 语义一致", async () => { mockJsonResponse({ id: "resp_test", - model: "gpt-5.3-codex", + model: "gpt-5.4", output: [ { type: "message", @@ -342,7 +342,7 @@ describe("executeProviderTest", () => { providerUrl: "https://relay.example.com/openai/responses", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", }); expect(result.success).toBe(true); @@ -352,7 +352,7 @@ describe("executeProviderTest", () => { test("非标准相似路径在 provider testing 中不应被错误折叠", async () => { mockJsonResponse({ id: "resp_test", - model: "gpt-5.3-codex", + model: "gpt-5.4", output: [ { type: "message", @@ -366,7 +366,7 @@ describe("executeProviderTest", () => { providerUrl: "https://relay.example.com/openai/responses-archive", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", }); expect(result.success).toBe(true); @@ -437,7 +437,7 @@ describe("executeProviderTest", () => { }); const okBody = JSON.stringify({ id: "resp_test", - model: "gpt-5.3-codex", + model: "gpt-5.4", output: [ { type: "message", @@ -461,7 +461,7 @@ describe("executeProviderTest", () => { providerUrl: "https://api.gptclubapi.xyz/openai", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", preset: "cx_codex_basic", }); @@ -481,7 +481,7 @@ describe("executeProviderTest", () => { }); const okBody = JSON.stringify({ id: "resp_test", - model: "gpt-5.3-codex", + model: "gpt-5.4", output: [ { type: "message", @@ -505,7 +505,7 @@ describe("executeProviderTest", () => { providerUrl: "https://api.gptclubapi.xyz/openai", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", }); expect(fetchMock).toHaveBeenCalledTimes(2); @@ -530,7 +530,7 @@ describe("executeProviderTest", () => { }); const okBody = JSON.stringify({ id: "resp_test", - model: "gpt-5.3-codex", + model: "gpt-5.4", output: [ { type: "message", @@ -561,7 +561,7 @@ describe("executeProviderTest", () => { providerUrl: "https://api.gptclubapi.xyz/openai", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", }); expect(fetchMock).toHaveBeenCalledTimes(3); @@ -683,7 +683,7 @@ describe("executeProviderTest", () => { }); const okBody = JSON.stringify({ id: "resp_test", - model: "gpt-5.3-codex", + model: "gpt-5.4", output: [ { type: "message", @@ -708,7 +708,7 @@ describe("executeProviderTest", () => { providerUrl: "https://api.gptclubapi.xyz/openai", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", preset: "cx_codex_basic", }); @@ -740,7 +740,7 @@ describe("executeProviderTest", () => { providerUrl: "https://api.gptclubapi.xyz/openai", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", preset: "cx_codex_basic", }); @@ -752,13 +752,13 @@ describe("executeProviderTest", () => { test("codex 新版 SSE 事件流应正确提取 output_text delta,避免误判为内容不匹配", async () => { const responseBody = `event: response.created -data: {"type":"response.created","response":{"model":"gpt-5.3-codex","usage":null},"sequence_number":0} +data: {"type":"response.created","response":{"model":"gpt-5.4","usage":null},"sequence_number":0} event: response.output_text.delta data: {"type":"response.output_text.delta","delta":"pong","item_id":"msg_123","output_index":0,"sequence_number":1} event: response.completed -data: {"type":"response.completed","response":{"model":"gpt-5.3-codex","usage":{"input_tokens":39,"output_tokens":5,"total_tokens":44}},"sequence_number":2} +data: {"type":"response.completed","response":{"model":"gpt-5.4","usage":{"input_tokens":39,"output_tokens":5,"total_tokens":44}},"sequence_number":2} `; mockSseResponse(responseBody); @@ -767,13 +767,13 @@ data: {"type":"response.completed","response":{"model":"gpt-5.3-codex","usage":{ providerUrl: "https://sub.fkcodex.com", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", }); expect(result.success).toBe(true); expect(result.subStatus).toBe("success"); expect(result.content).toBe("pong"); - expect(result.model).toBe("gpt-5.3-codex"); + expect(result.model).toBe("gpt-5.4"); expect(result.usage).toEqual({ inputTokens: 39, outputTokens: 5, @@ -785,7 +785,7 @@ data: {"type":"response.completed","response":{"model":"gpt-5.3-codex","usage":{ data: {"type":"response.output_text.done","text":"pong","item_id":"msg_123","output_index":0,"content_index":0,"sequence_number":1} event: response.completed -data: {"type":"response.completed","response":{"model":"gpt-5.3-codex","usage":{"input_tokens":39,"output_tokens":5,"total_tokens":44},"output":[{"type":"message","content":[{"type":"output_text","text":"pong"}]}]},"sequence_number":2} +data: {"type":"response.completed","response":{"model":"gpt-5.4","usage":{"input_tokens":39,"output_tokens":5,"total_tokens":44},"output":[{"type":"message","content":[{"type":"output_text","text":"pong"}]}]},"sequence_number":2} `; mockSseResponse(responseBody); @@ -794,12 +794,12 @@ data: {"type":"response.completed","response":{"model":"gpt-5.3-codex","usage":{ providerUrl: "https://sub.fkcodex.com", apiKey: "sk-test-codex", providerType: "codex", - model: "gpt-5.3-codex", + model: "gpt-5.4", }); expect(result.success).toBe(true); expect(result.content).toBe("pong"); - expect(result.model).toBe("gpt-5.3-codex"); + expect(result.model).toBe("gpt-5.4"); }); test("内容校验应优先使用解析后的文本,不能被原始 JSON 字段名误判为成功", async () => { diff --git a/src/lib/provider-testing/utils/test-prompts.ts b/src/lib/provider-testing/utils/test-prompts.ts index 05c3226ac..0be2c49e2 100644 --- a/src/lib/provider-testing/utils/test-prompts.ts +++ b/src/lib/provider-testing/utils/test-prompts.ts @@ -44,7 +44,7 @@ export const CLAUDE_TEST_BODY: ClaudeTestBody = { }; export const CODEX_TEST_BODY: CodexTestBody = { - model: "gpt-5.3-codex", + model: "gpt-5.4", instructions: "You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.", input: [ @@ -110,7 +110,7 @@ export const GEMINI_TEST_HEADERS = { export const DEFAULT_MODELS: Record = { claude: "claude-haiku-4-5-20251001", "claude-auth": "claude-haiku-4-5-20251001", - codex: "gpt-5.3-codex", + codex: "gpt-5.4", "openai-compatible": "gpt-4.1-mini", gemini: "gemini-2.5-flash", "gemini-cli": "gemini-2.5-flash", diff --git a/src/lib/session-manager-detail-snapshots.test.ts b/src/lib/session-manager-detail-snapshots.test.ts index 828b5a00d..0691581fa 100644 --- a/src/lib/session-manager-detail-snapshots.test.ts +++ b/src/lib/session-manager-detail-snapshots.test.ts @@ -76,7 +76,7 @@ describe("SessionManager detail snapshots", () => { "before", { body: { - model: "gpt-5.2", + model: "gpt-5.4", messages: [{ role: "user", content: "top secret request" }], }, messages: [{ role: "user", content: "top secret request" }], @@ -98,7 +98,7 @@ describe("SessionManager detail snapshots", () => { "after", { body: JSON.stringify({ - model: "gpt-5.2", + model: "gpt-5.4", messages: [{ role: "user", content: "processed request body" }], }), headers: new Headers({ @@ -175,7 +175,7 @@ describe("SessionManager detail snapshots", () => { expect(requestBefore).toEqual({ body: { - model: "gpt-5.2", + model: "gpt-5.4", messages: [{ role: "user", content: "[REDACTED]" }], }, messages: [{ role: "user", content: "[REDACTED]" }], @@ -192,7 +192,7 @@ describe("SessionManager detail snapshots", () => { expect(requestAfter).toEqual({ body: { - model: "gpt-5.2", + model: "gpt-5.4", messages: [{ role: "user", content: "[REDACTED]" }], }, messages: null, @@ -312,7 +312,7 @@ describe("SessionManager detail snapshots", () => { "sess_empty_headers", "after", { - body: { model: "gpt-5.2" }, + body: { model: "gpt-5.4" }, headers: new Headers(), meta: { clientUrl: null, @@ -326,7 +326,7 @@ describe("SessionManager detail snapshots", () => { expect( await SessionManager.getSessionRequestPhaseSnapshot("sess_empty_headers", "after", 1) ).toEqual({ - body: { model: "gpt-5.2" }, + body: { model: "gpt-5.4" }, messages: null, headers: null, meta: { diff --git a/tests/api/v1/providers/providers.read.test.ts b/tests/api/v1/providers/providers.read.test.ts index 53260f012..ffe266796 100644 --- a/tests/api/v1/providers/providers.read.test.ts +++ b/tests/api/v1/providers/providers.read.test.ts @@ -241,13 +241,13 @@ describe("v1 providers read endpoints", () => { id: "cc_base", description: "Codex", defaultSuccessContains: "Hello", - defaultModel: "gpt-5.3-codex", + defaultModel: "gpt-5.4", }, ], }); fetchUpstreamModelsMock.mockResolvedValue({ ok: true, - data: { models: ["gpt-5.3-codex"], source: "upstream" }, + data: { models: ["gpt-5.4"], source: "upstream" }, }); getModelSuggestionsByProviderGroupMock.mockResolvedValue({ ok: true, diff --git a/tests/e2e/responses-ws-codex-cli-transport.test.ts b/tests/e2e/responses-ws-codex-cli-transport.test.ts new file mode 100644 index 000000000..ba15323f6 --- /dev/null +++ b/tests/e2e/responses-ws-codex-cli-transport.test.ts @@ -0,0 +1,1624 @@ +import { execFileSync, spawn } from "node:child_process"; +import { existsSync } from "node:fs"; +import { createRequire } from "node:module"; +import http from "node:http"; +import { dirname, isAbsolute, join } from "node:path"; +import process from "node:process"; +import { afterAll, beforeAll, describe, expect, test } from "vitest"; +import WebSocket, { type RawData, WebSocketServer } from "ws"; + +/** + * Opt-in Codex CLI transport probe for `/v1/responses`. + * + * Default Vitest/E2E runs skip this file's body. To run it locally: + * PowerShell: + * $env:CCH_CODEX_E2E="1"; $env:CCH_CODEX_E2E_EXPECT_TRANSPORT="websocket"; npx vitest run --config tests/configs/e2e.config.ts tests/e2e/responses-ws-codex-cli-transport.test.ts + * POSIX: + * CCH_CODEX_E2E=1 CCH_CODEX_E2E_EXPECT_TRANSPORT=websocket npx vitest run --config tests/configs/e2e.config.ts tests/e2e/responses-ws-codex-cli-transport.test.ts + * + * `CCH_CODEX_E2E_EXPECT_TRANSPORT=any|http|websocket` controls how strict the + * assertion is. Use `websocket` when validating a Codex build that should speak + * Responses WebSocket; use `any` to record the actual transport without making + * the test version-sensitive. + * + * Fault-injection probes are also opt-in: + * PowerShell: + * $env:CCH_CODEX_E2E="1"; $env:CCH_CODEX_E2E_FAULTS="1"; npx vitest run --config tests/configs/e2e.config.ts tests/e2e/responses-ws-codex-cli-transport.test.ts + */ + +type ProbeEvent = + | { type: "server_started"; port: number } + | { type: "http_models" } + | { type: "http_responses"; bytes: number } + | { type: "http_unknown"; method: string | undefined; path: string } + | { type: "ws_upgrade"; path: string } + | { type: "ws_connection"; path: string | undefined } + | { + type: "ws_message"; + bytes: number; + frameType: string | null; + generate: boolean | null; + previousResponseId: string | null; + isBinary: boolean; + } + | { type: "ws_close"; code: number; reason: string }; + +type CodexResult = { + code: number | null; + stdout: string; + stderr: string; +}; + +type CodexRunOptions = { + prompt?: string; + timeoutMs?: number; + extraConfig?: string[]; +}; + +type RunningCodexProcess = { + child: ReturnType; + result: Promise; + stdout: () => string; + stderr: () => string; +}; + +type ProbeServer = { + port: number; + events: ProbeEvent[]; + close: () => Promise; +}; + +type CodexInvocation = { + command: string; + argsPrefix: string[]; + display: string; +}; + +const shouldRunCodexE2e = process.env.CCH_CODEX_E2E === "1"; +const run = shouldRunCodexE2e ? describe : describe.skip; +const shouldRunFaultE2e = shouldRunCodexE2e && process.env.CCH_CODEX_E2E_FAULTS === "1"; +const faultRun = shouldRunFaultE2e ? describe : describe.skip; +const providerName = "local-cch-ws-e2e"; +const model = process.env.CCH_CODEX_E2E_MODEL || "gpt-5.4"; +const responseText = "E2E_TRANSPORT_OK"; +const defaultFeatures = "responses_websockets,responses_websockets_v2"; +const requireFromHere = createRequire(import.meta.url); + +function responseEnvelope(responseId: string, includeOutput: boolean) { + return { + id: responseId, + object: "response", + created_at: Math.floor(Date.now() / 1000), + model, + status: "completed", + output: includeOutput + ? [ + { + id: `msg_${responseId}`, + type: "message", + status: "completed", + role: "assistant", + content: [{ type: "output_text", text: responseText }], + }, + ] + : [], + usage: { + input_tokens: 8, + output_tokens: includeOutput ? 4 : 0, + total_tokens: includeOutput ? 12 : 8, + }, + }; +} + +function responseEvents(responseId: string, includeOutput: boolean) { + const response = responseEnvelope(responseId, includeOutput); + if (!includeOutput) { + return [ + { type: "response.created", response: { ...response, output: [] } }, + { type: "response.completed", response }, + ]; + } + + const item = response.output[0]!; + const content = item.content[0]!; + return [ + { type: "response.created", response: { ...response, output: [] } }, + { type: "response.output_item.added", output_index: 0, item }, + { + type: "response.output_text.delta", + output_index: 0, + content_index: 0, + delta: content.text, + }, + { + type: "response.output_text.done", + output_index: 0, + content_index: 0, + text: content.text, + }, + { type: "response.output_item.done", output_index: 0, item }, + { type: "response.completed", response }, + ]; +} + +function writeSse(res: http.ServerResponse) { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.setHeader("cache-control", "no-cache, no-transform"); + for (const event of responseEvents("resp_cch_ws_e2e_http", true)) { + res.write(`event: ${event.type}\n`); + res.write(`data: ${JSON.stringify(event)}\n\n`); + } + res.end(); +} + +async function readBody(req: http.IncomingMessage): Promise { + const chunks: Buffer[] = []; + for await (const chunk of req) { + chunks.push(Buffer.from(chunk)); + } + return Buffer.concat(chunks).toString("utf8"); +} + +async function startProbeServer(): Promise { + const events: ProbeEvent[] = []; + const record = (event: ProbeEvent) => { + events.push(event); + }; + + const server = http.createServer(async (req, res) => { + const url = new URL(req.url || "/", "http://127.0.0.1"); + if (req.method === "GET" && url.pathname === "/v1/models") { + record({ type: "http_models" }); + res.setHeader("content-type", "application/json"); + res.end( + JSON.stringify({ + object: "list", + data: [{ id: model, object: "model", owned_by: "cch-ws-e2e" }], + }) + ); + return; + } + + if (req.method === "POST" && url.pathname === "/v1/responses") { + const body = await readBody(req); + record({ type: "http_responses", bytes: Buffer.byteLength(body, "utf8") }); + writeSse(res); + return; + } + + record({ type: "http_unknown", method: req.method, path: url.pathname }); + res.statusCode = 404; + res.end("not found"); + }); + + const wss = new WebSocketServer({ noServer: true, maxPayload: 32 * 1024 * 1024 }); + const sockets = new Set(); + let responseSeq = 0; + wss.on("connection", (ws, req) => { + sockets.add(ws); + record({ type: "ws_connection", path: req.url }); + ws.on("message", (raw, isBinary) => { + const text = isBinary ? raw.toString("base64") : raw.toString("utf8"); + let frameType: string | null = null; + let generate: boolean | null = null; + let previousResponseId: string | null = null; + try { + const frame = JSON.parse(text); + frameType = frame.type || null; + generate = typeof frame.generate === "boolean" ? frame.generate : null; + previousResponseId = + typeof frame.previous_response_id === "string" ? frame.previous_response_id : null; + } catch { + frameType = "invalid_json"; + } + record({ + type: "ws_message", + bytes: Buffer.byteLength(text, "utf8"), + frameType, + generate, + previousResponseId, + isBinary, + }); + if (frameType !== "response.create") { + return; + } + responseSeq += 1; + const includeOutput = generate !== false; + for (const event of responseEvents(`resp_cch_ws_e2e_${responseSeq}`, includeOutput)) { + ws.send(JSON.stringify(event)); + } + }); + ws.on("close", (code, reason) => { + sockets.delete(ws); + record({ type: "ws_close", code, reason: reason.toString("utf8") }); + }); + }); + + server.on("upgrade", (req, socket, head) => { + const url = new URL(req.url || "/", "http://127.0.0.1"); + record({ type: "ws_upgrade", path: url.pathname }); + if (url.pathname !== "/v1/responses") { + socket.destroy(); + return; + } + wss.handleUpgrade(req, socket, head, (ws) => wss.emit("connection", ws, req)); + }); + + await new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(0, "127.0.0.1", resolve); + }); + const address = server.address(); + if (!address || typeof address !== "object") { + throw new Error("failed to allocate local port"); + } + record({ type: "server_started", port: address.port }); + + return { + port: address.port, + events, + close: async () => { + for (const socket of sockets) { + if (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING) { + socket.close(1000, "test_done"); + } + } + const forceClose = setTimeout(() => { + for (const socket of sockets) { + socket.terminate(); + } + }, 250); + await new Promise((resolve) => + wss.close(() => { + clearTimeout(forceClose); + resolve(); + }) + ); + await new Promise((resolve) => server.close(() => resolve())); + }, + }; +} + +function nodeInvocationForCodexScript(scriptPath: string, display = scriptPath): CodexInvocation { + return { + command: process.execPath, + argsPrefix: [scriptPath], + display, + }; +} + +function isPathLikeCommand(command: string) { + return isAbsolute(command) || command.includes("/") || command.includes("\\"); +} + +function locateWindowsCmdOnPath(cmdName: string) { + try { + return ( + execFileSync("where.exe", [cmdName], { encoding: "utf8" }) + .split(/\r?\n/) + .map((line) => line.trim()) + .find(Boolean) ?? null + ); + } catch { + return null; + } +} + +function resolveWindowsCmdPath( + cmdPath: string, + lookupOnPath: (cmdName: string) => string | null = locateWindowsCmdOnPath +) { + const trimmed = cmdPath.trim(); + if (!trimmed) { + throw new Error("Codex CLI .cmd path is empty."); + } + if (isPathLikeCommand(trimmed) || existsSync(trimmed)) { + return trimmed; + } + const resolved = lookupOnPath(trimmed); + if (!resolved) { + throw new Error(`Cannot resolve Codex CLI .cmd on PATH: ${cmdPath}`); + } + return resolved; +} + +function nodeInvocationForWindowsCmd(cmdPath: string): CodexInvocation { + const resolvedCmdPath = resolveWindowsCmdPath(cmdPath); + const scriptPath = join( + dirname(resolvedCmdPath), + "node_modules", + "@openai", + "codex", + "bin", + "codex.js" + ); + if (!existsSync(scriptPath)) { + throw new Error( + `Cannot locate Codex CLI JS entrypoint next to ${resolvedCmdPath}: ${scriptPath}` + ); + } + const bundledNode = join(dirname(resolvedCmdPath), "node.exe"); + return { + command: existsSync(bundledNode) ? bundledNode : process.execPath, + argsPrefix: [scriptPath], + display: cmdPath, + }; +} + +function resolveCodexInvocation(): CodexInvocation { + const configuredBin = process.env.CCH_CODEX_E2E_BIN; + if (configuredBin) { + if (/\.cmd$/i.test(configuredBin)) { + return nodeInvocationForWindowsCmd(configuredBin); + } + if (/\.js$/i.test(configuredBin)) { + return nodeInvocationForCodexScript(configuredBin); + } + return { command: configuredBin, argsPrefix: [], display: configuredBin }; + } + + if (process.platform === "win32") { + const cmdPath = locateWindowsCmdOnPath("codex.cmd"); + if (!cmdPath) { + throw new Error("Cannot find codex.cmd on PATH. Install Codex CLI or set CCH_CODEX_E2E_BIN."); + } + return nodeInvocationForWindowsCmd(cmdPath); + } + + return { command: "codex", argsPrefix: [], display: "codex" }; +} + +describe("Codex CLI invocation helpers", () => { + test("resolves PATH-only Windows .cmd shims before deriving sibling paths", () => { + const resolved = resolveWindowsCmdPath("codex.cmd", (cmdName) => + cmdName === "codex.cmd" ? "C:/Program Files/nodejs/codex.cmd" : null + ); + + expect(dirname(resolved)).toBe("C:/Program Files/nodejs"); + }); + + test("keeps explicit Windows .cmd filesystem paths without PATH lookup", () => { + const explicitPath = "C:/tools/codex.cmd"; + const resolved = resolveWindowsCmdPath(explicitPath, () => { + throw new Error("PATH lookup should not be used for explicit paths"); + }); + + expect(resolved).toBe(explicitPath); + }); +}); + +function featureArgs() { + const features = (process.env.CCH_CODEX_E2E_FEATURES ?? defaultFeatures) + .split(",") + .map((feature) => feature.trim()) + .filter(Boolean); + return features.flatMap((feature) => ["--enable", feature]); +} + +function spawnCodex( + port: number, + invocation: CodexInvocation, + options: CodexRunOptions = {} +): RunningCodexProcess { + const baseUrl = `http://127.0.0.1:${port}/v1`; + const args = [ + ...invocation.argsPrefix, + "exec", + "--ignore-user-config", + "--ignore-rules", + "--ephemeral", + "--skip-git-repo-check", + "--json", + ...featureArgs(), + "-m", + model, + "-c", + `model_provider="${providerName}"`, + "-c", + 'preferred_auth_method="apikey"', + "-c", + 'approval_policy="never"', + "-c", + 'sandbox_mode="read-only"', + "-c", + `model_providers.${providerName}.name="${providerName}"`, + "-c", + `model_providers.${providerName}.base_url="${baseUrl}"`, + "-c", + `model_providers.${providerName}.wire_api="responses"`, + "-c", + `model_providers.${providerName}.supports_websockets=true`, + "-c", + `model_providers.${providerName}.requires_openai_auth=true`, + ...(options.extraConfig ?? []).flatMap((config) => ["-c", config]), + "-C", + process.cwd(), + options.prompt ?? `Reply exactly ${responseText} and do not run tools.`, + ]; + + const child = spawn(invocation.command, args, { + cwd: process.cwd(), + env: { + ...process.env, + OPENAI_API_KEY: process.env.OPENAI_API_KEY || "sk-cch-ws-e2e-placeholder", + NO_COLOR: "1", + }, + stdio: ["ignore", "pipe", "pipe"], + windowsHide: true, + }); + + let stdout = ""; + let stderr = ""; + const result = new Promise((resolve) => { + let settled = false; + const finish = (finished: CodexResult) => { + if (settled) return; + settled = true; + clearTimeout(timeout); + resolve(finished); + }; + const timeout = setTimeout(() => { + stderr += "codex exec timed out"; + child.kill(); + finish({ code: -2, stdout, stderr }); + }, options.timeoutMs ?? 60_000); + + child.stdout.on("data", (chunk) => { + stdout += chunk.toString("utf8"); + }); + child.stderr.on("data", (chunk) => { + stderr += chunk.toString("utf8"); + }); + child.on("error", (err) => { + stderr += err instanceof Error ? err.message : String(err); + finish({ code: -1, stdout, stderr }); + }); + child.on("close", (code) => finish({ code, stdout, stderr })); + }); + + return { + child, + result, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function runCodex( + port: number, + invocation: CodexInvocation, + options: CodexRunOptions = {} +): Promise { + return spawnCodex(port, invocation, options).result; +} + +function isResponsesPath(path: string | undefined) { + if (!path) return false; + try { + return new URL(path, "http://probe.local").pathname === "/v1/responses"; + } catch { + return path.split("?")[0] === "/v1/responses"; + } +} + +function observedTransport(events: ProbeEvent[]) { + const sawResponsesWs = events.some( + (event) => + (event.type === "ws_connection" || event.type === "ws_upgrade") && isResponsesPath(event.path) + ); + if (sawResponsesWs) return "websocket"; + if (events.some((event) => event.type === "http_responses")) return "http"; + return "none"; +} + +type ProbeWsMessageEvent = Extract; + +function isResponseCreateWsMessage(event: ProbeEvent): event is ProbeWsMessageEvent { + return event.type === "ws_message" && event.frameType === "response.create"; +} + +type ServerJsModule = { + handleWebSocketConnection: (ws: WebSocket, req: http.IncomingMessage) => Promise; +}; + +type ServerJsModuleLoader = (port: number) => ServerJsModule; + +type CchEdgeEvent = + | { type: "server_started"; port: number } + | { type: "http_models" } + | { type: "ws_upgrade"; path: string } + | { type: "ws_connection"; path: string | undefined } + | { type: "ws_close"; code: number; reason: string } + | { + type: "internal_http_responses"; + bytes: number; + generate: boolean | null; + previousResponseId: string | null; + sessionId: string | null; + clientTransport: string | null; + } + | { type: "internal_response_close"; sessionId: string | null } + | { type: "internal_request_aborted"; sessionId: string | null } + | { type: "handler_error"; message: string }; + +type CchRequestContext = { + req: http.IncomingMessage; + res: http.ServerResponse; + bodyText: string; + body: Record; + sessionId: string | null; + responseClosed: Promise; + requestAborted: Promise; +}; + +type CchEdgeHarness = { + port: number; + events: CchEdgeEvent[]; + setResponseHandler: (handler: (context: CchRequestContext) => void | Promise) => void; + nextInternalRequest: () => Promise; + close: () => Promise; +}; + +type EnvSnapshot = { + PORT: string | undefined; + HOSTNAME: string | undefined; + NODE_ENV: string | undefined; + CCH_RESPONSES_WS_INTERNAL_SECRET: string | undefined; +}; + +type WsClientMessage = Record | string; + +type RawWsClient = { + ws: WebSocket; + opened: Promise; + closeEvent: Promise<{ code: number; reason: string }>; + messages: WsClientMessage[]; + nextMessage: ( + predicate: (message: WsClientMessage) => boolean, + timeoutMs: number, + message: string + ) => Promise; +}; + +let cchFaultHarness: CchEdgeHarness | null = null; +let cchFaultEnv: EnvSnapshot | null = null; + +function deferred() { + let resolve!: (value: T | PromiseLike) => void; + let reject!: (reason?: unknown) => void; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + return { promise, resolve, reject }; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +async function withTimeout(promise: Promise, timeoutMs: number, message: string): Promise { + let timer: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_resolve, reject) => { + timer = setTimeout(() => reject(new Error(message)), timeoutMs); + }), + ]); + } finally { + if (timer) clearTimeout(timer); + } +} + +function restoreEnvVar(name: keyof EnvSnapshot, value: string | undefined) { + if (value === undefined) { + delete process.env[name]; + } else { + process.env[name] = value; + } +} + +function captureEnv(): EnvSnapshot { + return { + PORT: process.env.PORT, + HOSTNAME: process.env.HOSTNAME, + NODE_ENV: process.env.NODE_ENV, + CCH_RESPONSES_WS_INTERNAL_SECRET: process.env.CCH_RESPONSES_WS_INTERNAL_SECRET, + }; +} + +function restoreEnv(snapshot: EnvSnapshot) { + restoreEnvVar("PORT", snapshot.PORT); + restoreEnvVar("HOSTNAME", snapshot.HOSTNAME); + restoreEnvVar("NODE_ENV", snapshot.NODE_ENV); + restoreEnvVar("CCH_RESPONSES_WS_INTERNAL_SECRET", snapshot.CCH_RESPONSES_WS_INTERNAL_SECRET); +} + +function parseJsonObject(text: string): Record { + try { + const parsed = JSON.parse(text); + if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) { + return parsed as Record; + } + } catch { + // ignore + } + return {}; +} + +function eventChunk(event: unknown): string { + return `data: ${JSON.stringify(event)}\n\n`; +} + +function crlfEventChunk(event: unknown): string { + return `event: ${(event as { type?: string }).type || "message"}\r\ndata: ${JSON.stringify(event)}\r\n\r\n`; +} + +function isRecord(value: unknown): value is Record { + return !!value && typeof value === "object" && !Array.isArray(value); +} + +function parseWsPayload(raw: RawData): WsClientMessage { + const text = Array.isArray(raw) + ? Buffer.concat(raw).toString("utf8") + : Buffer.from(raw).toString("utf8"); + try { + const parsed = JSON.parse(text); + return isRecord(parsed) ? parsed : text; + } catch { + return text; + } +} + +function wsMessageType(message: WsClientMessage): string | null { + return isRecord(message) && typeof message.type === "string" ? message.type : null; +} + +function wsResponseId(message: WsClientMessage): string | null { + if (!isRecord(message) || !isRecord(message.response)) return null; + return typeof message.response.id === "string" ? message.response.id : null; +} + +function wsErrorCode(message: WsClientMessage): string | null { + if (!isRecord(message) || !isRecord(message.error)) return null; + return typeof message.error.code === "string" ? message.error.code : null; +} + +function completedResponse(responseId: string) { + return (message: WsClientMessage) => + wsMessageType(message) === "response.completed" && wsResponseId(message) === responseId; +} + +function errorEvent(code: string) { + return (message: WsClientMessage) => + wsMessageType(message) === "error" && wsErrorCode(message) === code; +} + +function connectRawWsClient( + port: number, + options: { path?: string; headers?: Record } = {} +): RawWsClient { + const messages: WsClientMessage[] = []; + const waiters: Array<{ + predicate: (message: WsClientMessage) => boolean; + resolve: (message: WsClientMessage) => void; + }> = []; + const ws = new WebSocket(`ws://127.0.0.1:${port}${options.path ?? "/v1/responses"}`, { + headers: options.headers, + }); + + const opened = new Promise((resolve, reject) => { + ws.once("open", () => resolve()); + ws.once("error", reject); + }); + const closeEvent = new Promise<{ code: number; reason: string }>((resolve) => { + ws.once("close", (code, reason) => resolve({ code, reason: reason.toString("utf8") })); + }); + + ws.on("message", (raw) => { + const parsed = parseWsPayload(raw); + messages.push(parsed); + for (let i = waiters.length - 1; i >= 0; i -= 1) { + const waiter = waiters[i]!; + if (waiter.predicate(parsed)) { + waiters.splice(i, 1); + waiter.resolve(parsed); + } + } + }); + + return { + ws, + opened, + closeEvent, + messages, + nextMessage: (predicate, timeoutMs, message) => { + const existing = messages.find(predicate); + if (existing) return Promise.resolve(existing); + return withTimeout( + new Promise((resolve) => waiters.push({ predicate, resolve })), + timeoutMs, + message + ); + }, + }; +} + +function sendResponseCreate(client: RawWsClient, body: Record) { + client.ws.send(JSON.stringify({ type: "response.create", ...body })); +} + +async function writeFragmentedSse(res: http.ServerResponse, events: unknown[], delayMs: number) { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.setHeader("cache-control", "no-cache, no-transform"); + for (const event of events) { + const chunk = eventChunk(event); + const splitAt = Math.max(1, Math.floor(chunk.length / 2)); + res.write(chunk.slice(0, splitAt)); + await sleep(delayMs); + res.write(chunk.slice(splitAt)); + await sleep(delayMs); + } + res.end(); +} + +async function startIsolatedCchEdgeHarness(secret?: string) { + const env = captureEnv(); + const serverPath = requireFromHere.resolve("../../server.js"); + try { + const harness = await startCchEdgeHarness((port) => { + process.env.PORT = String(port); + process.env.HOSTNAME = "127.0.0.1"; + process.env.NODE_ENV = "test"; + process.env.CCH_RESPONSES_WS_INTERNAL_SECRET = secret ?? `cch-ws-e2e-secret-${port}`; + + delete requireFromHere.cache[serverPath]; + return requireFromHere("../../server.js") as ServerJsModule; + }); + return { + harness, + close: async () => { + try { + await harness.close(); + } finally { + delete requireFromHere.cache[serverPath]; + restoreEnv(env); + } + }, + }; + } catch (err) { + delete requireFromHere.cache[serverPath]; + restoreEnv(env); + throw err; + } +} + +function retryDisabledConfig() { + return [ + `model_providers.${providerName}.request_max_retries=0`, + `model_providers.${providerName}.stream_max_retries=0`, + ]; +} + +function assertNoResetWithoutClosingHandshake(result: CodexResult) { + const combined = `${result.stdout}\n${result.stderr}`; + expect(combined).not.toContain("Connection reset without closing handshake"); + expect(combined).not.toContain("reset without closing handshake"); +} + +async function startCchEdgeHarness(loadServerModule: ServerJsModuleLoader) { + const events: CchEdgeEvent[] = []; + const sockets = new Set(); + const arrivedInternalRequests: CchRequestContext[] = []; + const internalRequestWaiters: Array<(context: CchRequestContext) => void> = []; + let responseHandler: ((context: CchRequestContext) => void | Promise) | null = null; + let serverModule: ServerJsModule | null = null; + + const record = (event: CchEdgeEvent) => { + events.push(event); + }; + + const server = http.createServer(async (req, res) => { + const url = new URL(req.url || "/", "http://127.0.0.1"); + if (req.method === "GET" && url.pathname === "/v1/models") { + record({ type: "http_models" }); + res.setHeader("content-type", "application/json"); + res.end( + JSON.stringify({ + object: "list", + data: [{ id: model, object: "model", owned_by: "cch-ws-fault-e2e" }], + }) + ); + return; + } + + if (req.method === "POST" && url.pathname === "/v1/responses") { + const sessionHeader = req.headers["x-cch-responses-ws-session"]; + const sessionId = Array.isArray(sessionHeader) ? sessionHeader[0] : sessionHeader || null; + const responseClosed = deferred(); + const requestAborted = deferred(); + res.once("close", () => { + record({ type: "internal_response_close", sessionId }); + responseClosed.resolve(); + }); + req.once("aborted", () => { + record({ type: "internal_request_aborted", sessionId }); + requestAborted.resolve(); + }); + + const bodyText = await readBody(req); + const body = parseJsonObject(bodyText); + const context: CchRequestContext = { + req, + res, + bodyText, + body, + sessionId, + responseClosed: responseClosed.promise, + requestAborted: requestAborted.promise, + }; + record({ + type: "internal_http_responses", + bytes: Buffer.byteLength(bodyText, "utf8"), + generate: typeof body.generate === "boolean" ? body.generate : null, + previousResponseId: + typeof body.previous_response_id === "string" ? body.previous_response_id : null, + sessionId, + clientTransport: + typeof req.headers["x-cch-client-transport"] === "string" + ? req.headers["x-cch-client-transport"] + : null, + }); + const waiter = internalRequestWaiters.shift(); + if (waiter) { + waiter(context); + } else { + arrivedInternalRequests.push(context); + } + + if (!responseHandler) { + res.statusCode = 503; + res.end("no response handler configured"); + return; + } + + try { + await responseHandler(context); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + record({ type: "handler_error", message }); + if (!res.headersSent) res.statusCode = 500; + if (!res.writableEnded) res.end(message); + } + return; + } + + res.statusCode = 404; + res.end("not found"); + }); + + const wss = new WebSocketServer({ noServer: true, maxPayload: 32 * 1024 * 1024 }); + server.on("upgrade", (req, socket, head) => { + const url = new URL(req.url || "/", "http://127.0.0.1"); + record({ type: "ws_upgrade", path: url.pathname }); + if (url.pathname !== "/v1/responses") { + socket.destroy(); + return; + } + wss.handleUpgrade(req, socket, head, (ws) => { + if (!serverModule) { + ws.close(1011, "server_module_not_ready"); + return; + } + sockets.add(ws); + record({ type: "ws_connection", path: req.url }); + ws.once("close", (code, reason) => { + sockets.delete(ws); + record({ type: "ws_close", code, reason: reason.toString("utf8") }); + }); + serverModule.handleWebSocketConnection(ws, req).catch((err) => { + record({ + type: "handler_error", + message: err instanceof Error ? err.message : String(err), + }); + try { + ws.close(1011, "internal_error"); + } catch { + ws.terminate(); + } + }); + }); + }); + + await new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(0, "127.0.0.1", resolve); + }); + const address = server.address(); + if (!address || typeof address !== "object") { + await new Promise((resolve) => server.close(() => resolve())); + throw new Error("failed to allocate local port"); + } + const port = address.port; + try { + serverModule = loadServerModule(port); + } catch (err) { + await new Promise((resolve) => server.close(() => resolve())); + throw err; + } + record({ type: "server_started", port }); + + return { + port, + events, + setResponseHandler: (handler) => { + arrivedInternalRequests.length = 0; + internalRequestWaiters.length = 0; + responseHandler = handler; + }, + nextInternalRequest: () => { + const arrived = arrivedInternalRequests.shift(); + if (arrived) return Promise.resolve(arrived); + return new Promise((resolve) => { + internalRequestWaiters.push(resolve); + }); + }, + close: async () => { + for (const socket of sockets) { + if (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING) { + socket.close(1000, "test_done"); + } + } + const forceClose = setTimeout(() => { + for (const socket of sockets) socket.terminate(); + }, 250); + await new Promise((resolve) => + wss.close(() => { + clearTimeout(forceClose); + resolve(); + }) + ); + await new Promise((resolve) => server.close(() => resolve())); + }, + } satisfies CchEdgeHarness; +} + +function cchInternalRequests(events: CchEdgeEvent[]) { + return events.filter( + (event): event is Extract => + event.type === "internal_http_responses" + ); +} + +function cchWsCloses(events: CchEdgeEvent[]) { + return events.filter( + (event): event is Extract => event.type === "ws_close" + ); +} + +describe("CCH Responses WebSocket edge E2E", () => { + test("serializes pipelined response.create frames and keeps the socket reusable", async () => { + const { harness, close } = await startIsolatedCchEdgeHarness(); + try { + let activeRequests = 0; + let maxActiveRequests = 0; + harness.setResponseHandler(async ({ res, body }) => { + activeRequests += 1; + maxActiveRequests = Math.max(maxActiveRequests, activeRequests); + const input = typeof body.input === "string" ? body.input : "unknown"; + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write(eventChunk({ type: "response.created", response: { id: `resp_${input}` } })); + if (input === "first") await sleep(75); + res.write( + eventChunk({ + type: "response.completed", + response: responseEnvelope(`resp_${input}`, true), + }) + ); + res.end(); + activeRequests -= 1; + }); + + const client = connectRawWsClient(harness.port); + await client.opened; + sendResponseCreate(client, { model, input: "first" }); + sendResponseCreate(client, { model, input: "second", previous_response_id: "resp_first" }); + + await client.nextMessage(completedResponse("resp_first"), 3000, "first turn did not finish"); + expect(client.ws.readyState).toBe(WebSocket.OPEN); + await client.nextMessage( + completedResponse("resp_second"), + 3000, + "second turn did not finish" + ); + + const internalRequests = cchInternalRequests(harness.events); + expect(internalRequests).toHaveLength(2); + expect(new Set(internalRequests.map((event) => event.sessionId)).size).toBe(1); + expect(internalRequests[1]?.previousResponseId).toBe("resp_first"); + expect(maxActiveRequests).toBe(1); + client.ws.close(1000, "test_done"); + await client.closeEvent; + } finally { + await close(); + } + }); + + test("turns non-SSE JSON success and error responses into visible WS events", async () => { + const { harness, close } = await startIsolatedCchEdgeHarness(); + try { + harness.setResponseHandler(({ res, body }) => { + res.setHeader("content-type", "application/json"); + if (body.input === "json-error") { + res.statusCode = 429; + res.end( + JSON.stringify({ + error: { code: "rate_limit_exceeded", message: "synthetic rate limit" }, + }) + ); + return; + } + res.statusCode = 200; + res.end(JSON.stringify(responseEnvelope("resp_json_ok", true))); + }); + + const client = connectRawWsClient(harness.port); + await client.opened; + sendResponseCreate(client, { model, input: "json-ok" }); + await client.nextMessage( + completedResponse("resp_json_ok"), + 3000, + "JSON success was not translated to response.completed" + ); + + sendResponseCreate(client, { model, input: "json-error" }); + const error = await client.nextMessage( + errorEvent("rate_limit_exceeded"), + 3000, + "JSON error was not translated to an error event" + ); + expect(isRecord(error) ? error.status : null).toBe(429); + expect(client.ws.readyState).toBe(WebSocket.OPEN); + client.ws.close(1000, "test_done"); + await client.closeEvent; + } finally { + await close(); + } + }); + + test("handles CRLF fragmented SSE and [DONE] without poisoning the connection", async () => { + const { harness, close } = await startIsolatedCchEdgeHarness(); + try { + harness.setResponseHandler(async ({ res, body }) => { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.setHeader("cache-control", "no-cache, no-transform"); + if (body.input === "done-only") { + res.write("data: [DONE]\r\n\r\n"); + res.end(); + return; + } + for (const event of responseEvents("resp_crlf_fragmented", true)) { + const chunk = crlfEventChunk(event); + res.write(chunk.slice(0, 7)); + await sleep(2); + res.write(chunk.slice(7)); + } + res.end(); + }); + + const client = connectRawWsClient(harness.port); + await client.opened; + sendResponseCreate(client, { model, input: "crlf-fragmented" }); + await client.nextMessage( + completedResponse("resp_crlf_fragmented"), + 3000, + "CRLF fragmented SSE did not complete" + ); + + sendResponseCreate(client, { model, input: "done-only" }); + const doneOnly = await client.nextMessage( + (message) => + wsMessageType(message) === "response.completed" && + isRecord(message) && + message.response === null, + 3000, + "[DONE] fallback did not synthesize response.completed" + ); + expect(doneOnly).toMatchObject({ type: "response.completed", response: null }); + expect(client.ws.readyState).toBe(WebSocket.OPEN); + client.ws.close(1000, "test_done"); + await client.closeEvent; + } finally { + await close(); + } + }); + + test("sends a diagnostic error and close handshake when SSE ends without a terminal event", async () => { + const { harness, close } = await startIsolatedCchEdgeHarness(); + try { + harness.setResponseHandler(({ res }) => { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write(eventChunk({ type: "response.created", response: { id: "resp_no_terminal" } })); + res.end(); + }); + + const client = connectRawWsClient(harness.port); + await client.opened; + sendResponseCreate(client, { model, input: "no-terminal" }); + await client.nextMessage( + errorEvent("stream_ended_without_terminal"), + 3000, + "missing terminal event did not surface as error" + ); + const closeEvent = await client.closeEvent; + expect(closeEvent).toEqual({ code: 1011, reason: "stream_ended_without_terminal" }); + } finally { + await close(); + } + }); + + test("sends an error frame before closing when the internal response hard-drops", async () => { + const { harness, close } = await startIsolatedCchEdgeHarness(); + try { + harness.setResponseHandler(({ res }) => { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write(eventChunk({ type: "response.created", response: { id: "resp_hard_drop" } })); + setTimeout(() => res.socket?.destroy(), 10); + }); + + const client = connectRawWsClient(harness.port); + await client.opened; + sendResponseCreate(client, { model, input: "hard-drop" }); + await client.nextMessage( + (message) => + wsMessageType(message) === "error" && + ["internal_response_closed", "internal_response_error"].includes( + wsErrorCode(message) ?? "" + ), + 3000, + "hard-dropped response did not surface as a diagnostic error" + ); + const closeEvent = await client.closeEvent; + expect(closeEvent.code).toBe(1011); + expect(["internal_response_closed", "internal_response_error"]).toContain(closeEvent.reason); + } finally { + await close(); + } + }); + + test("aborts the in-flight internal request and drops queued frames when a client vanishes", async () => { + const { harness, close } = await startIsolatedCchEdgeHarness(); + try { + const firstResponseClosed = deferred(); + harness.setResponseHandler(({ res }) => { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write(":\n\n"); + res.once("close", () => firstResponseClosed.resolve()); + }); + + const client = connectRawWsClient(harness.port); + await client.opened; + sendResponseCreate(client, { model, input: "in-flight" }); + await withTimeout( + harness.nextInternalRequest(), + 3000, + "first internal request did not start" + ); + for (let i = 0; i < 8; i += 1) { + sendResponseCreate(client, { model, input: `queued-${i}` }); + } + client.ws.terminate(); + + await withTimeout( + firstResponseClosed.promise, + 3000, + "client disappearance did not close the in-flight internal response" + ); + await client.closeEvent; + expect(cchInternalRequests(harness.events)).toHaveLength(1); + } finally { + await close(); + } + }); + + test("strips forged x-cch headers and injects only trusted tunnel markers", async () => { + const secret = "trusted-cch-ws-e2e-secret"; + const { harness, close } = await startIsolatedCchEdgeHarness(secret); + try { + harness.setResponseHandler(({ req, res, sessionId }) => { + expect(req.headers["x-cch-client-transport"]).toBe("websocket"); + expect(req.headers["x-cch-responses-ws-forward"]).toBe("1"); + expect(req.headers["x-cch-internal-secret"]).toBe(secret); + expect(sessionId).not.toBe("forged-session"); + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write( + eventChunk({ + type: "response.completed", + response: responseEnvelope("resp_header_strip", true), + }) + ); + res.end(); + }); + + const client = connectRawWsClient(harness.port, { + headers: { + "x-cch-client-transport": "http", + "x-cch-internal-secret": "forged-secret", + "x-cch-responses-ws-forward": "forged-forward", + "x-cch-responses-ws-session": "forged-session", + }, + }); + await client.opened; + sendResponseCreate(client, { model, input: "headers" }); + await client.nextMessage( + completedResponse("resp_header_strip"), + 3000, + "trusted tunnel header test did not complete" + ); + client.ws.close(1000, "test_done"); + await client.closeEvent; + } finally { + await close(); + } + }); + + test("keeps large requests under the payload cap and removes transport-only fields", async () => { + const { harness, close } = await startIsolatedCchEdgeHarness(); + try { + const largeInput = "x".repeat(512 * 1024); + harness.setResponseHandler(({ body, res }) => { + expect(body.model).toBe(model); + expect(body.stream).toBe(true); + expect(body.background).toBeUndefined(); + expect(typeof body.input === "string" ? body.input.length : 0).toBe(largeInput.length); + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write( + eventChunk({ + type: "response.completed", + response: responseEnvelope("resp_large_payload", true), + }) + ); + res.end(); + }); + + const client = connectRawWsClient(harness.port, { + path: `/v1/responses?model=${encodeURIComponent(model)}&api_key=should_not_matter`, + }); + await client.opened; + sendResponseCreate(client, { input: largeInput, background: true }); + await client.nextMessage( + completedResponse("resp_large_payload"), + 5000, + "large request did not complete" + ); + client.ws.close(1000, "test_done"); + await client.closeEvent; + } finally { + await close(); + } + }); + + test("reports recoverable client protocol mistakes without poisoning later turns", async () => { + const { harness, close } = await startIsolatedCchEdgeHarness(); + try { + harness.setResponseHandler(({ res }) => { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write( + eventChunk({ + type: "response.completed", + response: responseEnvelope("resp_after_bad_frame", true), + }) + ); + res.end(); + }); + + const client = connectRawWsClient(harness.port); + await client.opened; + client.ws.send("{not-json"); + await client.nextMessage(errorEvent("invalid_json"), 3000, "invalid JSON was not reported"); + client.ws.send(JSON.stringify({ type: "session.update" })); + await client.nextMessage( + errorEvent("unsupported_event_type"), + 3000, + "unsupported event type was not reported" + ); + sendResponseCreate(client, { model, input: "after-bad-frame" }); + await client.nextMessage( + completedResponse("resp_after_bad_frame"), + 3000, + "valid turn after recoverable protocol mistakes did not complete" + ); + client.ws.close(1000, "test_done"); + await client.closeEvent; + } finally { + await close(); + } + }); + + test("closes with policy diagnostics on binary frames and queue overflow", async () => { + const { harness, close } = await startIsolatedCchEdgeHarness(); + try { + harness.setResponseHandler(({ res }) => { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write(":\n\n"); + }); + + const binaryClient = connectRawWsClient(harness.port); + await binaryClient.opened; + binaryClient.ws.send(Buffer.from("binary"), { binary: true }); + await binaryClient.nextMessage( + errorEvent("invalid_frame_type"), + 3000, + "binary frame was not reported" + ); + expect(await binaryClient.closeEvent).toEqual({ + code: 1003, + reason: "binary_not_supported", + }); + + const overflowClient = connectRawWsClient(harness.port); + await overflowClient.opened; + sendResponseCreate(overflowClient, { model, input: "first" }); + await withTimeout( + harness.nextInternalRequest(), + 3000, + "overflow baseline request did not start" + ); + for (let i = 0; i < 70; i += 1) { + sendResponseCreate(overflowClient, { model, input: `overflow-${i}` }); + } + await overflowClient.nextMessage( + errorEvent("too_many_requests"), + 3000, + "queue overflow was not reported" + ); + expect(await overflowClient.closeEvent).toEqual({ + code: 1008, + reason: "too_many_requests", + }); + } finally { + await close(); + } + }); +}); + +run("Codex CLI Responses transport probe", () => { + test("records whether Codex reaches /v1/responses over HTTP or WebSocket", async () => { + const expectedTransport = (process.env.CCH_CODEX_E2E_EXPECT_TRANSPORT || "any").toLowerCase(); + expect(["any", "http", "websocket"]).toContain(expectedTransport); + + const probe = await startProbeServer(); + try { + const invocation = resolveCodexInvocation(); + const result = await runCodex(probe.port, invocation); + const transport = observedTransport(probe.events); + const sawFinalText = + result.stdout.includes(responseText) || result.stderr.includes(responseText); + const wsResponseCreates = probe.events.filter(isResponseCreateWsMessage); + const wsConnections = probe.events.filter((event) => event.type === "ws_connection"); + const sawWarmup = wsResponseCreates.some((event) => event.generate === false); + + console.info( + JSON.stringify({ + probe: "codex_responses_transport", + codexCommand: invocation.display, + codexLauncher: invocation.command, + expectedTransport, + observedTransport: transport, + events: probe.events, + exitCode: result.code, + }) + ); + + if (result.code !== 0 || !sawFinalText || transport === "none") { + throw new Error( + JSON.stringify( + { + error: "codex_transport_probe_failed", + exitCode: result.code, + sawFinalText, + observedTransport: transport, + events: probe.events, + stderrTail: result.stderr.slice(-2000), + }, + null, + 2 + ) + ); + } + + if (expectedTransport !== "any") { + expect(transport).toBe(expectedTransport); + } + if (transport === "websocket") { + expect(wsResponseCreates.length).toBeGreaterThan(0); + if (sawWarmup && wsResponseCreates.length >= 2) { + expect(wsConnections).toHaveLength(1); + expect(wsResponseCreates[1]?.previousResponseId).toBeTruthy(); + } + } + } finally { + await probe.close(); + } + }, 70_000); +}); + +faultRun("Codex CLI through CCH WebSocket fault injection", () => { + let invocation: CodexInvocation; + let cchFaultServerPath: string | null = null; + + beforeAll(async () => { + invocation = resolveCodexInvocation(); + cchFaultEnv = captureEnv(); + cchFaultServerPath = requireFromHere.resolve("../../server.js"); + cchFaultHarness = await startCchEdgeHarness((port) => { + process.env.PORT = String(port); + process.env.HOSTNAME = "127.0.0.1"; + process.env.NODE_ENV = "test"; + process.env.CCH_RESPONSES_WS_INTERNAL_SECRET = `cch-ws-fault-secret-${port}`; + + delete requireFromHere.cache[cchFaultServerPath!]; + return requireFromHere("../../server.js") as ServerJsModule; + }); + }); + + afterAll(async () => { + try { + if (cchFaultHarness) { + await cchFaultHarness.close(); + } + } finally { + cchFaultHarness = null; + if (cchFaultServerPath) { + delete requireFromHere.cache[cchFaultServerPath]; + cchFaultServerPath = null; + } + if (cchFaultEnv) { + restoreEnv(cchFaultEnv); + cchFaultEnv = null; + } + } + }); + + test("survives fragmented and delayed upstream SSE chunks through the CCH tunnel", async () => { + if (!cchFaultHarness) throw new Error("CCH fault harness is not initialized"); + cchFaultHarness.events.length = 0; + let responseSeq = 0; + cchFaultHarness.setResponseHandler(async ({ res, body }) => { + responseSeq += 1; + const includeOutput = body.generate !== false; + await writeFragmentedSse( + res, + responseEvents(`resp_cch_fault_fragmented_${responseSeq}`, includeOutput), + 8 + ); + }); + + const result = await runCodex(cchFaultHarness.port, invocation, { timeoutMs: 90_000 }); + const internalRequests = cchInternalRequests(cchFaultHarness.events); + const sawFinalText = + result.stdout.includes(responseText) || result.stderr.includes(responseText); + const sawWarmup = internalRequests.some((event) => event.generate === false); + const generatedAfterWarmup = internalRequests.find((event) => event.generate !== false); + + console.info( + JSON.stringify({ + probe: "codex_cch_ws_fragmented_delayed_sse", + exitCode: result.code, + internalRequests, + wsCloses: cchWsCloses(cchFaultHarness.events), + }) + ); + + expect(result.code).toBe(0); + expect(sawFinalText).toBe(true); + assertNoResetWithoutClosingHandshake(result); + expect(internalRequests.some((event) => event.clientTransport === "websocket")).toBe(true); + if (sawWarmup) { + expect(generatedAfterWarmup?.previousResponseId).toBeTruthy(); + } + }, 90_000); + + test("surfaces abrupt upstream response destruction to Codex without reset noise", async () => { + if (!cchFaultHarness) throw new Error("CCH fault harness is not initialized"); + cchFaultHarness.events.length = 0; + cchFaultHarness.setResponseHandler(({ res }) => { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write( + eventChunk({ + type: "response.created", + response: { id: "resp_cch_fault_destroyed" }, + }) + ); + setTimeout(() => { + res.socket?.destroy(); + }, 10); + }); + + const result = await runCodex(cchFaultHarness.port, invocation, { + timeoutMs: 90_000, + extraConfig: retryDisabledConfig(), + }); + const closes = cchWsCloses(cchFaultHarness.events); + + console.info( + JSON.stringify({ + probe: "codex_cch_ws_upstream_hard_disconnect", + exitCode: result.code, + internalRequests: cchInternalRequests(cchFaultHarness.events), + wsCloses: closes, + stderrTail: result.stderr.slice(-1200), + }) + ); + + expect(result.code).not.toBe(0); + assertNoResetWithoutClosingHandshake(result); + expect(closes.length).toBeGreaterThan(0); + expect(cchFaultHarness.events.some((event) => event.type === "internal_response_close")).toBe( + true + ); + }, 90_000); + + test("aborts the internal response when the real Codex client process disappears", async () => { + if (!cchFaultHarness) throw new Error("CCH fault harness is not initialized"); + cchFaultHarness.events.length = 0; + cchFaultHarness.setResponseHandler(({ res }) => { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write(":\n\n"); + }); + + const internalRequestPromise = cchFaultHarness.nextInternalRequest(); + const running = spawnCodex(cchFaultHarness.port, invocation, { + timeoutMs: 60_000, + extraConfig: retryDisabledConfig(), + prompt: `Reply exactly ${responseText} after waiting for the stream.`, + }); + const internalRequest = await withTimeout( + internalRequestPromise, + 15_000, + "Codex did not open an internal CCH tunnel request before client-drop simulation" + ); + await sleep(50); + running.child.kill(); + + await withTimeout( + internalRequest.responseClosed, + 15_000, + "CCH did not abort the in-flight internal response after client process exit" + ); + const result = await running.result; + + console.info( + JSON.stringify({ + probe: "codex_cch_ws_client_process_disappears", + exitCode: result.code, + internalRequest: { + generate: + typeof internalRequest.body.generate === "boolean" + ? internalRequest.body.generate + : null, + sessionId: internalRequest.sessionId, + }, + wsCloses: cchWsCloses(cchFaultHarness.events), + }) + ); + + expect(result.code).not.toBe(0); + expect(cchInternalRequests(cchFaultHarness.events).length).toBeGreaterThan(0); + expect(cchWsCloses(cchFaultHarness.events).length).toBeGreaterThan(0); + }, 90_000); +}); diff --git a/tests/helpers/bash.ts b/tests/helpers/bash.ts new file mode 100644 index 000000000..01ebb03ed --- /dev/null +++ b/tests/helpers/bash.ts @@ -0,0 +1,159 @@ +import { execFileSync } from "node:child_process"; + +type BashCommand = { + command: string; + argsPrefix: string[]; +}; + +type RunBashOptions = { + env?: NodeJS.ProcessEnv; + label?: string; + requiredFunctions?: string[]; + setup?: string; + timeoutMs?: number; +}; + +let cachedBashCommand: BashCommand | null = null; + +function splitLines(value: string): string[] { + return value + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean); +} + +function isLegacyWindowsBash(path: string): boolean { + const normalized = path.replace(/\//g, "\\").toLowerCase(); + return ( + normalized.endsWith("\\windows\\system32\\bash.exe") || + normalized.endsWith("\\windows\\sysnative\\bash.exe") || + normalized.endsWith("\\microsoft\\windowsapps\\bash.exe") + ); +} + +function windowsCommandExists(command: string): boolean { + try { + execFileSync("where.exe", [command], { stdio: "ignore" }); + return true; + } catch { + return false; + } +} + +function resolveWindowsBashCommand(): BashCommand { + const candidates = (() => { + try { + return splitLines(execFileSync("where.exe", ["bash"], { encoding: "utf8" })); + } catch { + return []; + } + })(); + const nativeBash = candidates.find((candidate) => !isLegacyWindowsBash(candidate)); + + if (nativeBash) { + return { + command: nativeBash, + argsPrefix: ["--noprofile", "--norc", "-c"], + }; + } + + // Windows 的旧 bash.exe / WindowsApps alias 会提前改写 -c 脚本里的 $1/$@。 + // 通过 wsl.exe --exec 直接启动 Linux bash,保留 shell 函数体原文。 + if (windowsCommandExists("wsl.exe")) { + return { + command: "wsl.exe", + argsPrefix: ["--exec", "bash", "--noprofile", "--norc", "-c"], + }; + } + + return { + command: "bash", + argsPrefix: ["--noprofile", "--norc", "-c"], + }; +} + +function resolveBashCommand(): BashCommand { + if (cachedBashCommand) return cachedBashCommand; + + cachedBashCommand = + process.platform === "win32" + ? resolveWindowsBashCommand() + : { + command: "bash", + argsPrefix: ["--noprofile", "--norc", "-c"], + }; + return cachedBashCommand; +} + +function makeShellEnv(extraEnv?: NodeJS.ProcessEnv): NodeJS.ProcessEnv { + const env: NodeJS.ProcessEnv = {}; + for (const [key, value] of Object.entries(process.env)) { + if (value === undefined) continue; + if (key === "BASH_ENV" || key === "ENV" || key === "SHELLOPTS") continue; + if (key.startsWith("BASH_FUNC_")) continue; + env[key] = value; + } + + return { + ...env, + ...extraEnv, + NO_COLOR: "1", + }; +} + +function buildFunctionAssertions(requiredFunctions: string[] | undefined): string { + if (!requiredFunctions?.length) return ""; + + const quotedFunctions = requiredFunctions.map((name) => `"${name}"`).join(" "); + return ` +for __cch_required_function in ${quotedFunctions}; do + if ! declare -F "$__cch_required_function" >/dev/null; then + printf 'CCH shell helper failed: required function %s was not loaded\\n' "$__cch_required_function" >&2 + printf ' bash: %s\\n' "$BASH_VERSION" >&2 + printf ' pwd: %s\\n' "$PWD" >&2 + printf ' script root listing:\\n' >&2 + ls -la scripts >&2 || true + exit 127 + fi +done +unset __cch_required_function +`; +} + +export function runBashScript(scriptBody: string, options: RunBashOptions = {}): string { + const { command, argsPrefix } = resolveBashCommand(); + const script = ` +set -euo pipefail +${options.setup ?? ""} +${buildFunctionAssertions(options.requiredFunctions)} +${scriptBody} +`; + + try { + return execFileSync(command, [...argsPrefix, script], { + cwd: process.cwd(), + encoding: "utf8", + env: makeShellEnv(options.env), + timeout: options.timeoutMs ?? 20_000, + }).trim(); + } catch (error) { + const shellError = error as Error & { + status?: number; + stderr?: Buffer | string; + stdout?: Buffer | string; + }; + const stdout = shellError.stdout?.toString() ?? ""; + const stderr = shellError.stderr?.toString() ?? ""; + const details = [ + `CCH shell helper failed${options.label ? ` (${options.label})` : ""}`, + `command: ${[command, ...argsPrefix].join(" ")}`, + `cwd: ${process.cwd()}`, + `status: ${shellError.status ?? "unknown"}`, + stdout ? `stdout:\n${stdout.trimEnd()}` : "", + stderr ? `stderr:\n${stderr.trimEnd()}` : "", + ] + .filter(Boolean) + .join("\n"); + throw new Error(details, { cause: error }); + } +} diff --git a/tests/integration/non-chat-endpoint-fallback-observability.test.ts b/tests/integration/non-chat-endpoint-fallback-observability.test.ts index 55feb8783..70370b5df 100644 --- a/tests/integration/non-chat-endpoint-fallback-observability.test.ts +++ b/tests/integration/non-chat-endpoint-fallback-observability.test.ts @@ -164,7 +164,7 @@ run("non-chat endpoint fallback observability", () => { key: key.key, endpoint: "/v1/responses/compact", sessionId: `${KEY_PREFIX}-session-compact`, - model: "gpt-5", + model: "gpt-5.4", providerChain: [ { id: 21, name: "provider-c", reason: "retry_failed", statusCode: 500 }, { id: 22, name: "provider-d", reason: "retry_success", statusCode: 200 }, diff --git a/tests/unit/actions/active-sessions-detail-snapshots.test.ts b/tests/unit/actions/active-sessions-detail-snapshots.test.ts index 60ad79532..cf02705f7 100644 --- a/tests/unit/actions/active-sessions-detail-snapshots.test.ts +++ b/tests/unit/actions/active-sessions-detail-snapshots.test.ts @@ -103,7 +103,7 @@ describe("getSessionDetails - additive detail snapshots contract", () => { findMessageRequestAuditBySessionIdAndSequenceMock.mockResolvedValue(null); getSessionRequestCountMock.mockResolvedValue(1); - getSessionRequestBodyMock.mockResolvedValue({ model: "gpt-5.2", input: "hi" }); + getSessionRequestBodyMock.mockResolvedValue({ model: "gpt-5.4", input: "hi" }); getSessionMessagesMock.mockResolvedValue([{ role: "user", content: "hi" }]); getSessionResponseMock.mockResolvedValue('{"ok":true}'); getSessionRequestHeadersMock.mockResolvedValue({ "content-type": "application/json" }); @@ -132,7 +132,7 @@ describe("getSessionDetails - additive detail snapshots contract", () => { expect(result.ok).toBe(true); if (!result.ok) return; - expect(result.data.requestBody).toEqual({ model: "gpt-5.2", input: "hi" }); + expect(result.data.requestBody).toEqual({ model: "gpt-5.4", input: "hi" }); expect(result.data.messages).toEqual([{ role: "user", content: "hi" }]); expect(result.data.response).toBe('{"ok":true}'); expect(result.data.requestHeaders).toEqual({ "content-type": "application/json" }); @@ -152,7 +152,7 @@ describe("getSessionDetails - additive detail snapshots contract", () => { request: { before: null, after: { - body: { model: "gpt-5.2", input: "hi" }, + body: { model: "gpt-5.4", input: "hi" }, messages: [{ role: "user", content: "hi" }], headers: { "content-type": "application/json" }, meta: { @@ -179,7 +179,7 @@ describe("getSessionDetails - additive detail snapshots contract", () => { test("builds before-after snapshots from new snapshot getters", async () => { getSessionRequestPhaseSnapshotMock .mockResolvedValueOnce({ - body: { model: "gpt-5.2", messages: [{ role: "user", content: "before body" }] }, + body: { model: "gpt-5.4", messages: [{ role: "user", content: "before body" }] }, messages: [{ role: "user", content: "before messages" }], headers: { "x-before": "1" }, meta: { @@ -190,7 +190,7 @@ describe("getSessionDetails - additive detail snapshots contract", () => { }) .mockResolvedValueOnce({ body: JSON.stringify({ - model: "gpt-5.2", + model: "gpt-5.4", messages: [{ role: "user", content: "after body messages" }], }), messages: null, @@ -229,7 +229,7 @@ describe("getSessionDetails - additive detail snapshots contract", () => { defaultView: DEFAULT_SESSION_DETAIL_VIEW_MODE, request: { before: { - body: { model: "gpt-5.2", messages: [{ role: "user", content: "before body" }] }, + body: { model: "gpt-5.4", messages: [{ role: "user", content: "before body" }] }, messages: [{ role: "user", content: "before messages" }], headers: { "x-before": "1" }, meta: { @@ -240,7 +240,7 @@ describe("getSessionDetails - additive detail snapshots contract", () => { }, after: { body: { - model: "gpt-5.2", + model: "gpt-5.4", messages: [{ role: "user", content: "after body messages" }], }, messages: [{ role: "user", content: "after body messages" }], @@ -276,7 +276,7 @@ describe("getSessionDetails - additive detail snapshots contract", () => { test("returns null after request messages when processed body has no messages field", async () => { getSessionRequestPhaseSnapshotMock.mockResolvedValueOnce(null).mockResolvedValueOnce({ body: JSON.stringify({ - model: "gpt-5.2", + model: "gpt-5.4", input: [{ role: "user", content: "no messages field here" }], }), messages: null, @@ -297,7 +297,7 @@ describe("getSessionDetails - additive detail snapshots contract", () => { expect(result.data.snapshots.request.after).toEqual({ body: { - model: "gpt-5.2", + model: "gpt-5.4", input: [{ role: "user", content: "no messages field here" }], }, messages: null, @@ -314,7 +314,7 @@ describe("getSessionDetails - additive detail snapshots contract", () => { getSessionRequestCountMock.mockResolvedValue(3); findAdjacentRequestSequencesMock.mockResolvedValue({ prevSequence: 2, nextSequence: null }); getSessionRequestPhaseSnapshotMock.mockResolvedValueOnce(null).mockResolvedValueOnce({ - body: JSON.stringify({ model: "gpt-5.2", messages: [] }), + body: JSON.stringify({ model: "gpt-5.4", messages: [] }), messages: null, headers: { "x-after": "3" }, meta: { diff --git a/tests/unit/actions/model-prices.test.ts b/tests/unit/actions/model-prices.test.ts index ad3c9b722..9bc3f7a54 100644 --- a/tests/unit/actions/model-prices.test.ts +++ b/tests/unit/actions/model-prices.test.ts @@ -136,7 +136,7 @@ describe("Model Price Actions", () => { describe("upsertSingleModelPrice", () => { it("should create a new model price for admin", async () => { - const mockResult = makeMockPrice("gpt-5.2-codex", { + const mockResult = makeMockPrice("gpt-5.4", { mode: "chat", input_cost_per_token: 0.000015, output_cost_per_token: 0.00006, @@ -145,7 +145,7 @@ describe("Model Price Actions", () => { const { upsertSingleModelPrice } = await import("@/actions/model-prices"); const result = await upsertSingleModelPrice({ - modelName: "gpt-5.2-codex", + modelName: "gpt-5.4", mode: "chat", litellmProvider: "openai", inputCostPerToken: 0.000015, @@ -153,9 +153,9 @@ describe("Model Price Actions", () => { }); expect(result.ok).toBe(true); - expect(result.data?.modelName).toBe("gpt-5.2-codex"); + expect(result.data?.modelName).toBe("gpt-5.4"); expect(upsertModelPriceMock).toHaveBeenCalledWith( - "gpt-5.2-codex", + "gpt-5.4", expect.objectContaining({ mode: "chat", litellm_provider: "openai", @@ -309,10 +309,10 @@ describe("Model Price Actions", () => { deleteModelPriceByNameMock.mockResolvedValue(undefined); const { deleteSingleModelPrice } = await import("@/actions/model-prices"); - const result = await deleteSingleModelPrice("gpt-5.2-codex"); + const result = await deleteSingleModelPrice("gpt-5.4"); expect(result.ok).toBe(true); - expect(deleteModelPriceByNameMock).toHaveBeenCalledWith("gpt-5.2-codex"); + expect(deleteModelPriceByNameMock).toHaveBeenCalledWith("gpt-5.4"); }); it("should reject empty model name", async () => { diff --git a/tests/unit/codex/session-completer.test.ts b/tests/unit/codex/session-completer.test.ts index 311c81bc2..e82c110b3 100644 --- a/tests/unit/codex/session-completer.test.ts +++ b/tests/unit/codex/session-completer.test.ts @@ -22,7 +22,7 @@ vi.mock("@/lib/redis", () => ({ function makeCodexRequestBody(overrides?: Record): Record { return { - model: "gpt-5-codex", + model: "gpt-5.4", input: [ { type: "message", diff --git a/tests/unit/k8s-cch-update-flow.test.ts b/tests/unit/k8s-cch-update-flow.test.ts index 290c22534..2c5bcb30f 100644 --- a/tests/unit/k8s-cch-update-flow.test.ts +++ b/tests/unit/k8s-cch-update-flow.test.ts @@ -1,27 +1,24 @@ -import { execFileSync } from "node:child_process"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; +import { runBashScript } from "../helpers/bash"; + +vi.setConfig({ testTimeout: 25_000 }); function runCchHelper(scriptBody: string) { - return execFileSync( - "bash", - [ - "-lc", - ` -set -euo pipefail + return runBashScript(scriptBody, { + label: "scripts/cch", + requiredFunctions: [ + "build_image_ref_with_digest", + "cmd_doctor", + "detect_runtime", + "update_k3s_image_by_digest_or_restart", + ], + setup: ` export CCH_SOURCE_ONLY=1 +export CCH_CONFIG_FILE=/dev/null +unset CCH_NAMESPACE CCH_IMAGE CCH_DEPLOY_DIR CCH_RUNTIME CCH_INGRESS_HOST CCH_INGRESS_VARIANT CCH_BACKUP_DIR CCH_BACKUP_KEEP source scripts/cch -${scriptBody} - `, - ], - { - encoding: "utf8", - cwd: process.cwd(), - env: { - ...process.env, - NO_COLOR: "1", - }, - } - ).trim(); +`, + }); } function runK3sUpdateHarness(options: { k3sBody: string; kubectlBody?: string; tail?: string }) { diff --git a/tests/unit/k8s-deploy-shell-helpers.test.ts b/tests/unit/k8s-deploy-shell-helpers.test.ts index d5a1cec41..6d2d18013 100644 --- a/tests/unit/k8s-deploy-shell-helpers.test.ts +++ b/tests/unit/k8s-deploy-shell-helpers.test.ts @@ -1,27 +1,23 @@ -import { execFileSync } from "node:child_process"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; +import { runBashScript } from "../helpers/bash"; + +vi.setConfig({ testTimeout: 25_000 }); function runDeployHelper(scriptBody: string) { - return execFileSync( - "bash", - [ - "-lc", - ` -set -euo pipefail + return runBashScript(scriptBody, { + label: "scripts/deploy-k8s.sh", + requiredFunctions: [ + "detect_ingress_variant", + "detect_runtime", + "detect_storage_class", + "generate_random", + ], + setup: ` export DEPLOY_K8S_SOURCE_ONLY=1 +unset RUNTIME_OVERRIDE source scripts/deploy-k8s.sh -${scriptBody} - `, - ], - { - cwd: process.cwd(), - encoding: "utf8", - env: { - ...process.env, - NO_COLOR: "1", - }, - } - ).trim(); +`, + }); } describe("scripts/deploy-k8s.sh shell helpers", () => { diff --git a/tests/unit/proxy/actual-response-model.test.ts b/tests/unit/proxy/actual-response-model.test.ts index e7c1e383b..c3625bc7d 100644 --- a/tests/unit/proxy/actual-response-model.test.ts +++ b/tests/unit/proxy/actual-response-model.test.ts @@ -45,10 +45,10 @@ describe("extractActualResponseModel - 8 happy-path cases", () => { id: "chatcmpl-abc", object: "chat.completion", created: 1710000000, - model: "gpt-5-2025-08-07", + model: "gpt-5.4", choices: [{ index: 0, message: { role: "assistant", content: "Hi" }, finish_reason: "stop" }], }); - expect(extractActualResponseModel("openai-chat/non-stream", body)).toBe("gpt-5-2025-08-07"); + expect(extractActualResponseModel("openai-chat/non-stream", body)).toBe("gpt-5.4"); }); it("openai-chat/stream: reads first chunk $.model", () => { diff --git a/tests/unit/proxy/codex-provider-overrides.test.ts b/tests/unit/proxy/codex-provider-overrides.test.ts index 072919c0a..646e16057 100644 --- a/tests/unit/proxy/codex-provider-overrides.test.ts +++ b/tests/unit/proxy/codex-provider-overrides.test.ts @@ -13,7 +13,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], parallel_tool_calls: true, reasoning: { effort: "low", summary: "auto" }, @@ -34,7 +34,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], parallel_tool_calls: false, reasoning: { effort: "low", summary: "auto" }, @@ -58,7 +58,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], parallel_tool_calls: false, reasoning: { effort: "low", summary: "auto" }, @@ -79,7 +79,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], parallel_tool_calls: true, }; @@ -97,7 +97,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], reasoning: { effort: "low", summary: "auto", extra: "keep" }, }; @@ -116,7 +116,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], }; @@ -132,7 +132,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], service_tier: "default", }; @@ -152,7 +152,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], parallel_tool_calls: true, }; @@ -172,7 +172,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], parallel_tool_calls: false, reasoning: { effort: "low", summary: "auto" }, @@ -193,7 +193,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], parallel_tool_calls: false, }; @@ -226,7 +226,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], parallel_tool_calls: false, reasoning: { effort: "low", summary: "auto" }, @@ -260,7 +260,7 @@ describe("Codex 供应商级参数覆写", () => { }; const input: Record = { - model: "gpt-5-codex", + model: "gpt-5.4", input: [], service_tier: "priority", }; diff --git a/tests/unit/proxy/non-chat-endpoint-fallback.test.ts b/tests/unit/proxy/non-chat-endpoint-fallback.test.ts index 9bf325d0b..4f0f3f00b 100644 --- a/tests/unit/proxy/non-chat-endpoint-fallback.test.ts +++ b/tests/unit/proxy/non-chat-endpoint-fallback.test.ts @@ -303,7 +303,7 @@ describe("non-chat endpoint fallback", () => { session.originalFormat = "response"; session.setRawCrossProviderFallbackEnabled(false); session.request.message = { - model: "gpt-5", + model: "gpt-5.4", input: [{ role: "user", content: "compact me" }], }; session.setProvider(providerA); @@ -329,7 +329,7 @@ describe("non-chat endpoint fallback", () => { session.originalFormat = "response"; session.setRawCrossProviderFallbackEnabled(false); session.request.message = { - model: "gpt-5", + model: "gpt-5.4", input: [{ role: "user", content: "compact me" }], }; session.setProvider(providerA); @@ -356,7 +356,7 @@ describe("non-chat endpoint fallback", () => { session.originalFormat = "response"; session.setRawCrossProviderFallbackEnabled(false); session.request.message = { - model: "gpt-5", + model: "gpt-5.4", input: [{ role: "user", content: "compact me" }], }; session.setProvider(providerA); @@ -383,7 +383,7 @@ describe("non-chat endpoint fallback", () => { session.originalFormat = "response"; session.setRawCrossProviderFallbackEnabled(false); session.request.message = { - model: "gpt-5", + model: "gpt-5.4", input: [{ role: "user", content: "compact me" }], }; session.setProvider(providerA); diff --git a/tests/unit/proxy/non-chat-endpoint-session-context.test.ts b/tests/unit/proxy/non-chat-endpoint-session-context.test.ts index 6786bee6b..f2ffb9d6f 100644 --- a/tests/unit/proxy/non-chat-endpoint-session-context.test.ts +++ b/tests/unit/proxy/non-chat-endpoint-session-context.test.ts @@ -348,7 +348,7 @@ describe("non-chat endpoint session context", () => { const rawCompactSession = createProxySession(V1_ENDPOINT_PATHS.RESPONSES_COMPACT); rawCompactSession.originalFormat = "response"; rawCompactSession.request.message = { - model: "gpt-5", + model: "gpt-5.4", input: [{ role: "user", content: "compact me" }], }; rawCompactSession.sessionId = "sess_compact"; @@ -391,7 +391,7 @@ describe("non-chat endpoint session context", () => { const compactSession = createProxySession(V1_ENDPOINT_PATHS.RESPONSES_COMPACT); compactSession.originalFormat = "response"; compactSession.request.message = { - model: "gpt-5", + model: "gpt-5.4", input: [{ role: "user", content: "compact me" }], }; const compactBefore = structuredClone(compactSession.request.message); diff --git a/tests/unit/proxy/proxy-forwarder-large-chunked-response.test.ts b/tests/unit/proxy/proxy-forwarder-large-chunked-response.test.ts index 4e9cfd9c7..2ec83c17d 100644 --- a/tests/unit/proxy/proxy-forwarder-large-chunked-response.test.ts +++ b/tests/unit/proxy/proxy-forwarder-large-chunked-response.test.ts @@ -103,10 +103,10 @@ function createSession(params?: { clientAbortSignal?: AbortSignal | null }): Pro originalHeaders: new Headers(headers), headerLog: JSON.stringify(Object.fromEntries(headers.entries())), request: { - model: "gpt-5.2", + model: "gpt-5.4", log: "(test)", message: { - model: "gpt-5.2", + model: "gpt-5.4", messages: [{ role: "user", content: "hi" }], }, }, diff --git a/tests/unit/proxy/proxy-forwarder-nonok-body-hang.test.ts b/tests/unit/proxy/proxy-forwarder-nonok-body-hang.test.ts index 95eee1076..01be0b640 100644 --- a/tests/unit/proxy/proxy-forwarder-nonok-body-hang.test.ts +++ b/tests/unit/proxy/proxy-forwarder-nonok-body-hang.test.ts @@ -103,10 +103,10 @@ function createSession(params?: { clientAbortSignal?: AbortSignal | null }): Pro originalHeaders: new Headers(headers), headerLog: JSON.stringify(Object.fromEntries(headers.entries())), request: { - model: "gpt-5.2", + model: "gpt-5.4", log: "(test)", message: { - model: "gpt-5.2", + model: "gpt-5.4", messages: [{ role: "user", content: "hi" }], }, }, diff --git a/tests/unit/proxy/proxy-forwarder-raw-passthrough-regression.test.ts b/tests/unit/proxy/proxy-forwarder-raw-passthrough-regression.test.ts index 4dc1287cb..8bb55808b 100644 --- a/tests/unit/proxy/proxy-forwarder-raw-passthrough-regression.test.ts +++ b/tests/unit/proxy/proxy-forwarder-raw-passthrough-regression.test.ts @@ -65,7 +65,7 @@ function createRawPassthroughSession(bodyText: string, extraHeaders?: HeadersIni originalHeaders, headerLog: JSON.stringify(Object.fromEntries(headers.entries())), request: { - model: "gpt-5", + model: "gpt-5.4", log: bodyText, message: JSON.parse(bodyText) as Record, buffer: new TextEncoder().encode(bodyText).buffer, @@ -92,7 +92,7 @@ function createRawPassthroughSession(bodyText: string, extraHeaders?: HeadersIni endpointPolicy: resolveEndpointPolicy("/v1/responses/compact"), setCacheTtlResolved: vi.fn(), getCacheTtlResolved: vi.fn(() => null), - getCurrentModel: vi.fn(() => "gpt-5"), + getCurrentModel: vi.fn(() => "gpt-5.4"), clientRequestsContext1m: vi.fn(() => false), setContext1mApplied: vi.fn(), getContext1mApplied: vi.fn(() => false), @@ -126,7 +126,7 @@ describe("ProxyForwarder raw passthrough regression", () => { }); it("raw passthrough 应优先保留原始请求体字节,而不是重新 JSON.stringify", async () => { - const originalBody = '{\n "model": "gpt-5",\n "input": [1, 2, 3]\n}\n'; + const originalBody = '{\n "model": "gpt-5.4",\n "input": [1, 2, 3]\n}\n'; const session = createRawPassthroughSession(originalBody); const provider = createProvider(); @@ -150,7 +150,7 @@ describe("ProxyForwarder raw passthrough regression", () => { }); it("raw passthrough 出站请求不得继续携带 transfer-encoding 这类 hop-by-hop 头", async () => { - const body = '{"model":"gpt-5","input":[]}'; + const body = '{"model":"gpt-5.4","input":[]}'; const session = createRawPassthroughSession(body, { connection: "keep-alive", "transfer-encoding": "chunked", diff --git a/tests/unit/proxy/proxy-forwarder.test.ts b/tests/unit/proxy/proxy-forwarder.test.ts index f1b500fb2..e0524413a 100644 --- a/tests/unit/proxy/proxy-forwarder.test.ts +++ b/tests/unit/proxy/proxy-forwarder.test.ts @@ -2,6 +2,11 @@ import { describe, expect, it } from "vitest"; import type { Provider } from "@/types/provider"; import { DEFAULT_CODEX_USER_AGENT, ProxyForwarder } from "@/app/v1/_lib/proxy/forwarder"; import { ProxySession } from "@/app/v1/_lib/proxy/session"; +import { + INTERNAL_SECRET_HEADER, + RESPONSES_WS_SESSION_HEADER, + WS_FORWARD_FLAG_HEADER, +} from "@/app/v1/_lib/responses-ws/internal-secret"; function createSession({ userAgent, @@ -203,6 +208,27 @@ describe("ProxyForwarder - buildHeaders User-Agent resolution", () => { expect(resultHeaders.get("transfer-encoding")).toBeNull(); expect(resultHeaders.get("content-length")).toBeNull(); }); + + it("应该剥离 WS 内部隧道 header,避免把 loopback secret 透传给上游", () => { + const session = createSession({ + userAgent: "Original-UA/1.0", + headers: new Headers([ + ["user-agent", "Original-UA/1.0"], + ["x-cch-client-transport", "websocket"], + [WS_FORWARD_FLAG_HEADER, "1"], + [RESPONSES_WS_SESSION_HEADER, "client-session-1"], + [INTERNAL_SECRET_HEADER, "loopback-secret-should-stay-local"], + ]), + }); + + const provider = createCodexProvider(); + const resultHeaders = buildHeaders(session, provider); + + expect(resultHeaders.get("x-cch-client-transport")).toBeNull(); + expect(resultHeaders.get(WS_FORWARD_FLAG_HEADER)).toBeNull(); + expect(resultHeaders.get(RESPONSES_WS_SESSION_HEADER)).toBeNull(); + expect(resultHeaders.get(INTERNAL_SECRET_HEADER)).toBeNull(); + }); }); describe("ProxyForwarder - buildHeaders auth minimization", () => { diff --git a/tests/unit/server-ws-close-handshake.test.ts b/tests/unit/server-ws-close-handshake.test.ts index 2927396ba..3462c9bfe 100644 --- a/tests/unit/server-ws-close-handshake.test.ts +++ b/tests/unit/server-ws-close-handshake.test.ts @@ -1,10 +1,11 @@ /** * server.js WebSocket close-handshake regression for issue #1150. * - * Verifies that after the SSE→WS bridge delivers a terminal event (or runs - * into an error), the client WebSocket receives a proper close frame instead - * of being abruptly torn down — which clients like Codex (tungstenite-rs) - * surface as "Connection reset without closing handshake". + * Verifies that normal terminal events keep the persistent client WebSocket + * usable for the next response.create, while fatal protocol/transport paths + * still receive a proper close frame instead of being abruptly torn down — + * which clients like Codex (tungstenite-rs) surface as + * "Connection reset without closing handshake". */ import { createRequire } from "node:module"; @@ -19,6 +20,10 @@ type ServerHarness = { server: http.Server; wss: WebSocketServer; setSseHandler: (handler: (req: http.IncomingMessage, res: http.ServerResponse) => void) => void; + nextServerConnection: () => Promise<{ + close: Promise; + waitForMessageCount: (count: number) => Promise; + }>; close: () => Promise; }; @@ -28,6 +33,74 @@ type ServerJsModule = { let serverModule: ServerJsModule; let harness: ServerHarness | null = null; +let originalEnv: { + PORT: string | undefined; + HOSTNAME: string | undefined; + NODE_ENV: string | undefined; +} = { + PORT: process.env.PORT, + HOSTNAME: process.env.HOSTNAME, + NODE_ENV: process.env.NODE_ENV, +}; + +function restoreEnvVar(name: "PORT" | "HOSTNAME" | "NODE_ENV", value: string | undefined) { + if (value === undefined) { + delete process.env[name]; + } else { + process.env[name] = value; + } +} + +function deferred() { + let resolve!: (value: T | PromiseLike) => void; + let reject!: (reason?: unknown) => void; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + return { promise, resolve, reject }; +} + +async function withTimeout(promise: Promise, timeoutMs: number, message: string): Promise { + let timer: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_resolve, reject) => { + timer = setTimeout(() => reject(new Error(message)), timeoutMs); + }), + ]); + } finally { + if (timer) clearTimeout(timer); + } +} + +async function waitForMessageCount( + messages: unknown[], + count: number, + timeoutMs: number, + message: string +): Promise { + await new Promise((resolve, reject) => { + let timer: ReturnType; + const timeout = setTimeout(() => { + clearInterval(timer); + reject(new Error(message)); + }, timeoutMs); + timer = setInterval(() => { + if (messages.length >= count) { + clearTimeout(timeout); + clearInterval(timer); + resolve(); + } + }, 5); + if (messages.length >= count) { + clearTimeout(timeout); + clearInterval(timer); + resolve(); + } + }); +} async function pickFreePort(): Promise { return new Promise((resolve, reject) => { @@ -47,6 +120,12 @@ async function pickFreePort(): Promise { async function startHarness(port: number): Promise { let sseHandler: ((req: http.IncomingMessage, res: http.ServerResponse) => void) | null = null; + const connectionWaiters: Array< + (signal: { + close: Promise; + waitForMessageCount: (count: number) => Promise; + }) => void + > = []; const server = http.createServer((req, res) => { if (req.method === "POST" && req.url === "/v1/responses") { @@ -74,7 +153,49 @@ async function startHarness(port: number): Promise { return; } wss.handleUpgrade(req, socket, head, (ws) => { - void serverModule.handleWebSocketConnection(ws as unknown as WebSocket, req); + const closeSignal = deferred(); + const messageWaiters: Array<{ count: number; resolve: () => void }> = []; + let messageCount = 0; + const notifyMessageWaiters = () => { + for (let i = messageWaiters.length - 1; i >= 0; i -= 1) { + const waiter = messageWaiters[i]!; + if (messageCount >= waiter.count) { + messageWaiters.splice(i, 1); + waiter.resolve(); + } + } + }; + ws.once("close", () => closeSignal.resolve()); + serverModule.handleWebSocketConnection(ws as unknown as WebSocket, req).catch((err) => { + process.stderr.write( + `[server-ws-close-handshake] handleWebSocketConnection failed: ${ + err instanceof Error ? err.stack || err.message : String(err) + }\n` + ); + try { + ws.close(1011, "internal_error"); + } catch { + ws.terminate(); + } + }); + // Register after the bridge handler so waitForMessageCount() resolves only + // after the production message listener has accepted or rejected the frame. + ws.on("message", () => { + messageCount += 1; + notifyMessageWaiters(); + }); + const waiter = connectionWaiters.shift(); + if (waiter) { + waiter({ + close: closeSignal.promise, + waitForMessageCount: (count) => { + if (messageCount >= count) return Promise.resolve(); + return new Promise((resolve) => { + messageWaiters.push({ count, resolve }); + }); + }, + }); + } }); }); @@ -87,6 +208,10 @@ async function startHarness(port: number): Promise { setSseHandler: (handler) => { sseHandler = handler; }, + nextServerConnection: () => + new Promise((resolve) => { + connectionWaiters.push(resolve); + }), close: () => new Promise((resolve) => { wss.close(() => { @@ -119,6 +244,11 @@ function connectClient(port: number) { describe("server.js WebSocket close-handshake (issue #1150)", () => { beforeAll(async () => { const port = await pickFreePort(); + originalEnv = { + PORT: process.env.PORT, + HOSTNAME: process.env.HOSTNAME, + NODE_ENV: process.env.NODE_ENV, + }; process.env.PORT = String(port); process.env.HOSTNAME = "127.0.0.1"; process.env.NODE_ENV = "test"; @@ -133,9 +263,12 @@ describe("server.js WebSocket close-handshake (issue #1150)", () => { await harness.close(); harness = null; } + restoreEnvVar("PORT", originalEnv.PORT); + restoreEnvVar("HOSTNAME", originalEnv.HOSTNAME); + restoreEnvVar("NODE_ENV", originalEnv.NODE_ENV); }); - it("sends close(1000) after delivering response.completed", async () => { + it("keeps the client WebSocket open after delivering response.completed", async () => { if (!harness) throw new Error("harness not initialized"); harness.setSseHandler((_req, res) => { res.statusCode = 200; @@ -154,11 +287,18 @@ describe("server.js WebSocket close-handshake (issue #1150)", () => { const client = connectClient(harness.port); await client.opened; - client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5", input: "hi" })); + client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5.4", input: "hi" })); + await waitForMessageCount( + client.messages, + 2, + 3000, + "response.completed was not forwarded to the client" + ); + expect(client.ws.readyState).toBe(WebSocket.OPEN); + client.ws.close(1000, "test_done"); const close = await client.closeEvent; expect(close.code).toBe(1000); - expect(close.reason).toBe("response_completed"); const types = client.messages .filter((m): m is { type: string } => typeof m === "object" && m !== null) .map((m) => m.type); @@ -180,7 +320,7 @@ describe("server.js WebSocket close-handshake (issue #1150)", () => { const client = connectClient(harness.port); await client.opened; - client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5", input: "hi" })); + client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5.4", input: "hi" })); const close = await client.closeEvent; expect(close.code).toBe(1011); @@ -193,7 +333,37 @@ describe("server.js WebSocket close-handshake (issue #1150)", () => { expect(errorEvent?.error.code).toBe("stream_ended_without_terminal"); }); - it("sends close(1011) when the upstream returns a non-stream HTTP error", async () => { + it("sends close(1011) when the internal HTTP response is destroyed mid-stream", async () => { + if (!harness) throw new Error("harness not initialized"); + harness.setSseHandler((_req, res) => { + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write( + `data: ${JSON.stringify({ type: "response.created", response: { id: "r_destroy" } })}\n\n` + ); + setTimeout(() => { + res.socket?.destroy(); + }, 10); + }); + + const client = connectClient(harness.port); + await client.opened; + client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5.4", input: "hi" })); + + const close = await client.closeEvent; + expect(close.code).toBe(1011); + expect(["internal_response_closed", "internal_response_error"]).toContain(close.reason); + const errorEvent = client.messages.find( + (m): m is { type: string; error: { code: string } } => + typeof m === "object" && m !== null && (m as { type?: unknown }).type === "error" + ); + expect(errorEvent).toBeTruthy(); + expect(["internal_response_closed", "internal_response_error"]).toContain( + errorEvent?.error.code + ); + }); + + it("forwards a non-stream HTTP error without closing the persistent client socket", async () => { if (!harness) throw new Error("harness not initialized"); harness.setSseHandler((_req, res) => { res.statusCode = 502; @@ -203,14 +373,21 @@ describe("server.js WebSocket close-handshake (issue #1150)", () => { const client = connectClient(harness.port); await client.opened; - client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5", input: "hi" })); + client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5.4", input: "hi" })); - const close = await client.closeEvent; - expect(close.code).toBe(1011); - expect(close.reason).toBe("http_502"); + await waitForMessageCount(client.messages, 1, 3000, "HTTP error was not forwarded"); + expect(client.ws.readyState).toBe(WebSocket.OPEN); + const errorEvent = client.messages.find( + (m): m is { type: string; status: number; error: { code: string } } => + typeof m === "object" && m !== null && (m as { type?: unknown }).type === "error" + ); + expect(errorEvent?.status).toBe(502); + expect(errorEvent?.error.code).toBe("bad_gateway"); + client.ws.close(1000, "test_done"); + await client.closeEvent; }); - it("sends close(1011) labelled upstream_error when terminal type is 'error'", async () => { + it("forwards terminal type 'error' without closing the persistent client socket", async () => { if (!harness) throw new Error("harness not initialized"); harness.setSseHandler((_req, res) => { res.statusCode = 200; @@ -226,11 +403,12 @@ describe("server.js WebSocket close-handshake (issue #1150)", () => { const client = connectClient(harness.port); await client.opened; - client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5", input: "hi" })); + client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5.4", input: "hi" })); - const close = await client.closeEvent; - expect(close.code).toBe(1011); - expect(close.reason).toBe("upstream_error"); + await waitForMessageCount(client.messages, 1, 3000, "terminal error was not forwarded"); + expect(client.ws.readyState).toBe(WebSocket.OPEN); + client.ws.close(1000, "test_done"); + await client.closeEvent; }); it("accepts response.create bodies up to 4 MiB without a maxPayload teardown", async () => { @@ -253,27 +431,30 @@ describe("server.js WebSocket close-handshake (issue #1150)", () => { // caused tungstenite to surface "Connection reset without closing // handshake". const bigInput = "x".repeat(4 * 1024 * 1024); - client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5", input: bigInput })); + client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5.4", input: bigInput })); + await waitForMessageCount(client.messages, 1, 3000, "large response was not forwarded"); + expect(client.ws.readyState).toBe(WebSocket.OPEN); + client.ws.close(1000, "test_done"); const close = await client.closeEvent; expect(close.code).toBe(1000); - expect(close.reason).toBe("response_completed"); }, 20000); - it("drops queued frames once a terminal close is initiated (no extra upstream calls)", async () => { + it("processes queued response.create frames sequentially after a terminal event", async () => { if (!harness) throw new Error("harness not initialized"); let upstreamCalls = 0; harness.setSseHandler((_req, res) => { upstreamCalls += 1; + const callNo = upstreamCalls; res.statusCode = 200; res.setHeader("content-type", "text/event-stream"); - // Stagger the response so the second frame, if not dropped, has time - // to be dequeued and dispatched while we're closing the first. + // Stagger the response so the second frame remains queued until the + // first turn's terminal event has been fully forwarded. setTimeout(() => { res.write( `data: ${JSON.stringify({ type: "response.completed", - response: { id: `r_${upstreamCalls}` }, + response: { id: `r_${callNo}` }, })}\n\n` ); res.end(); @@ -282,18 +463,129 @@ describe("server.js WebSocket close-handshake (issue #1150)", () => { const client = connectClient(harness.port); await client.opened; - // Pipeline two frames before the first response completes. With the race - // present, drain() pops the second after closeClient() initiates the - // close handshake but before ws.on("close") fires, hitting the upstream - // a second time and burning provider quota. - client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5", input: "first" })); - client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5", input: "second" })); - + // Pipeline two frames before the first response completes. A compliant + // Responses WS bridge keeps the client socket open and drains them + // sequentially. + client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5.4", input: "first" })); + client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5.4", input: "second" })); + + await waitForMessageCount(client.messages, 2, 3000, "both queued responses were not forwarded"); + expect(client.ws.readyState).toBe(WebSocket.OPEN); + client.ws.close(1000, "test_done"); const close = await client.closeEvent; expect(close.code).toBe(1000); - expect(close.reason).toBe("response_completed"); - // Exactly one upstream call must have happened — the second frame is - // dropped synchronously when we initiate the close. - expect(upstreamCalls).toBe(1); + expect(upstreamCalls).toBe(2); + }); + + it("drops any pipelined frame after a binary protocol close", async () => { + if (!harness) throw new Error("harness not initialized"); + let upstreamCalls = 0; + harness.setSseHandler((_req, res) => { + upstreamCalls += 1; + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + res.write( + `data: ${JSON.stringify({ + type: "response.completed", + response: { id: "should_not_run" }, + })}\n\n` + ); + res.end(); + }); + + const serverConnectionPromise = harness.nextServerConnection(); + const client = connectClient(harness.port); + const serverConnection = await withTimeout( + serverConnectionPromise, + 3000, + "server WebSocket did not accept the binary-close test connection" + ); + await client.opened; + const queuedFrameObserved = serverConnection.waitForMessageCount(2); + client.ws.send(Buffer.from("not a text frame"), { binary: true }); + client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5.4", input: "queued" })); + + const close = await client.closeEvent; + expect(close.code).toBe(1003); + expect(close.reason).toBe("binary_not_supported"); + await withTimeout( + serverConnection.close, + 3000, + "server WebSocket did not close after binary protocol close" + ); + await withTimeout( + queuedFrameObserved, + 3000, + "server WebSocket did not observe the queued text frame after binary close" + ); + expect(upstreamCalls).toBe(0); + }); + + it("aborts the in-flight request and clears the pending queue on overflow close", async () => { + if (!harness) throw new Error("harness not initialized"); + let upstreamCalls = 0; + let firstResponse: http.ServerResponse | null = null; + const firstRequestStarted = deferred(); + const firstResponseClosed = deferred(); + harness.setSseHandler((_req, res) => { + upstreamCalls += 1; + res.statusCode = 200; + res.setHeader("content-type", "text/event-stream"); + if (upstreamCalls === 1) { + firstResponse = res; + res.on("close", () => firstResponseClosed.resolve()); + res.write(":\n\n"); + firstRequestStarted.resolve(); + return; + } + res.write( + `data: ${JSON.stringify({ + type: "response.completed", + response: { id: `unexpected_${upstreamCalls}` }, + })}\n\n` + ); + res.end(); + }); + + try { + const serverConnectionPromise = harness.nextServerConnection(); + const client = connectClient(harness.port); + const serverConnection = await withTimeout( + serverConnectionPromise, + 3000, + "server WebSocket did not accept the overflow test connection" + ); + await client.opened; + client.ws.send(JSON.stringify({ type: "response.create", model: "gpt-5.4", input: "first" })); + await withTimeout( + firstRequestStarted.promise, + 3000, + "first upstream request did not start before overflow test" + ); + for (let i = 0; i < 70; i += 1) { + client.ws.send( + JSON.stringify({ type: "response.create", model: "gpt-5.4", input: `queued-${i}` }) + ); + } + + const close = await client.closeEvent; + expect(close.code).toBe(1008); + expect(close.reason).toBe("too_many_requests"); + await withTimeout( + serverConnection.close, + 3000, + "server WebSocket did not close after overflow protocol close" + ); + await withTimeout( + firstResponseClosed.promise, + 3000, + "overflow close did not abort the in-flight internal request" + ); + expect(upstreamCalls).toBe(1); + } finally { + if (firstResponse && !firstResponse.destroyed && !firstResponse.writableEnded) { + firstResponse.end(); + } + } }); }); diff --git a/tests/unit/settings/providers/api-test-button.test.tsx b/tests/unit/settings/providers/api-test-button.test.tsx index 809d1926d..4920185a7 100644 --- a/tests/unit/settings/providers/api-test-button.test.tsx +++ b/tests/unit/settings/providers/api-test-button.test.tsx @@ -90,7 +90,7 @@ describe("ApiTestButton", () => { id: "cx_base", description: "legacy preset", defaultSuccessContains: "pong", - defaultModel: "gpt-5.1-codex", + defaultModel: "gpt-5.4", }, ], }); diff --git a/tests/unit/usage-doc/opencode-usage-doc.test.tsx b/tests/unit/usage-doc/opencode-usage-doc.test.tsx index 7c09f40cc..ebff173ff 100644 --- a/tests/unit/usage-doc/opencode-usage-doc.test.tsx +++ b/tests/unit/usage-doc/opencode-usage-doc.test.tsx @@ -77,11 +77,11 @@ describe("UsageDoc - OpenCode 配置教程", () => { expect(text).toContain("claude-sonnet-4-5-20250929"); expect(text).toContain("claude-opus-4-5-20251101"); - expect(text).toContain('"model": "openai/gpt-5.2"'); - expect(text).toContain('"small_model": "openai/gpt-5.2-small"'); + expect(text).toContain('"model": "openai/gpt-5.4"'); + expect(text).toContain('"small_model": "openai/gpt-5.4-small"'); - expect(text).toContain("gpt-5.2"); - expect(text).toContain("gpt-5.2-small"); + expect(text).toContain("gpt-5.4"); + expect(text).toContain("gpt-5.4-small"); expect(text).toContain('"reasoningEffort": "xhigh"'); expect(text).toContain('"reasoningEffort": "medium"'); expect(text).toContain('"store": false');