From 0b3c27064d3755e1d4a019153c386096bc5b822e Mon Sep 17 00:00:00 2001 From: gulang91-svg <273951465+gulang91-svg@users.noreply.github.com> Date: Mon, 22 Jun 2026 16:56:35 +0800 Subject: [PATCH] Disable client fast mode requests --- .env.example | 4 +++ .env.sqlite.example | 4 +++ docs/CONFIGURATION.md | 1 + main.go | 3 +- proxy/anthropic.go | 3 ++ proxy/anthropic_test.go | 25 ++++++++++++++++ proxy/handler.go | 10 ++++++- proxy/runtime_config.go | 23 ++++++++++++++- proxy/translator.go | 21 ++++++++++++++ proxy/translator_test.go | 61 ++++++++++++++++++++++++++++++++++++++++ 10 files changed, 152 insertions(+), 3 deletions(-) diff --git a/.env.example b/.env.example index 81355590..54d5f9ef 100644 --- a/.env.example +++ b/.env.example @@ -52,6 +52,10 @@ CODEX_PORT=8080 # 快速调度器(可选,也可在管理后台运行时开启) # FAST_SCHEDULER_ENABLED=true +# Ignore client fast/priority service tier requests; service_tier=fast and +# Anthropic speed=fast will be treated as normal requests. +# CODEX_DISABLE_FAST_MODE=false + # 生图工作台图库目录;Docker 标准版 compose 已将 /data 挂载为持久化卷 IMAGE_ASSET_DIR=/data/images diff --git a/.env.sqlite.example b/.env.sqlite.example index 7c816c49..9b339c49 100644 --- a/.env.sqlite.example +++ b/.env.sqlite.example @@ -62,6 +62,10 @@ DATABASE_PATH=/data/codex2api.db # 快速调度器(可选,也可在管理后台运行时开启) # FAST_SCHEDULER_ENABLED=true +# Ignore client fast/priority service tier requests; service_tier=fast and +# Anthropic speed=fast will be treated as normal requests. +# CODEX_DISABLE_FAST_MODE=false + # 生图工作台图库目录;SQLite 版与数据库共用 /data 持久化卷 IMAGE_ASSET_DIR=/data/images diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index af8e8c19..db358e97 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -48,6 +48,7 @@ Codex2API 采用三层配置架构: | `ADMIN_SECRET` | 否 | - | 管理后台登录密钥 | | `CODEX_ALLOW_ANONYMOUS` | 否 | `false` | 设为 `true` 时,未配置任何对外 API Key 也允许 `/v1/*` 直接调用(仅限内网测试场景) | | `FAST_SCHEDULER_ENABLED` | 否 | `false` | 通过环境变量启用快速调度器(也可在管理后台运行时开启) | +| `CODEX_DISABLE_FAST_MODE` | 否 | `false` | 设为 `true` 时忽略客户端 fast/priority 档位请求;OpenAI `service_tier=fast|priority` 和 Anthropic `speed=fast` 都按普通请求处理 | | `TZ` | 否 | UTC | 时区,如 `Asia/Shanghai` | ### Codex 上游稳定性配置 diff --git a/main.go b/main.go index 2cb7633b..b574de96 100644 --- a/main.go +++ b/main.go @@ -196,7 +196,7 @@ func main() { } db.SetUsageLogConfig(settings.UsageLogMode, settings.UsageLogBatchSize, settings.UsageLogFlushIntervalSeconds) runtimeSettings := proxy.ApplyRuntimeSettingsFromSystem(settings) - log.Printf("运行时优化配置: client_compat=%s min_cli=%s usage_log=%s batch=%d flush=%ds stream_flush=%s/%dms first_token_mode=%s first_token_timeout=%ds billing_tier_policy=%s", + log.Printf("运行时优化配置: client_compat=%s min_cli=%s usage_log=%s batch=%d flush=%ds stream_flush=%s/%dms first_token_mode=%s first_token_timeout=%ds billing_tier_policy=%s disable_fast_mode=%t", runtimeSettings.ClientCompatMode, runtimeSettings.CodexMinCLIVersion, db.GetUsageLogMode(), @@ -207,6 +207,7 @@ func main() { runtimeSettings.FirstTokenMode, runtimeSettings.FirstTokenTimeoutSec, runtimeSettings.BillingTierPolicy, + runtimeSettings.DisableFastMode, ) // 4b'. 应用图片存储后端配置 diff --git a/proxy/anthropic.go b/proxy/anthropic.go index 02e33b2d..7018d356 100644 --- a/proxy/anthropic.go +++ b/proxy/anthropic.go @@ -237,6 +237,9 @@ func TranslateAnthropicToCodex(rawJSON []byte, modelMappingJSON string) ([]byte, } func shouldUseCodexPriorityForAnthropicSpeed(speed string) bool { + if disableFastMode() { + return false + } return strings.ToLower(strings.TrimSpace(speed)) == "fast" } diff --git a/proxy/anthropic_test.go b/proxy/anthropic_test.go index 0131f034..8c8df8d6 100644 --- a/proxy/anthropic_test.go +++ b/proxy/anthropic_test.go @@ -161,6 +161,31 @@ func TestTranslateAnthropicToCodex_SpeedFastMapsToCodexPriority(t *testing.T) { } } +func TestTranslateAnthropicToCodex_DisableFastModeDropsSpeedFastTier(t *testing.T) { + previous := CurrentRuntimeSettings() + t.Cleanup(func() { ApplyRuntimeSettings(previous) }) + next := previous + next.DisableFastMode = true + ApplyRuntimeSettings(next) + + raw := []byte(`{ + "model":"claude-sonnet-4-5", + "messages":[{"role":"user","content":"hello"}], + "speed":"fast" + }`) + + got, _, err := TranslateAnthropicToCodexWithModels(raw, "", []string{"gpt-5.4"}) + if err != nil { + t.Fatalf("TranslateAnthropicToCodexWithModels returned error: %v", err) + } + if gjson.GetBytes(got, "service_tier").Exists() { + t.Fatalf("service_tier should be omitted when fast mode is disabled; body=%s", got) + } + if speed := gjson.GetBytes(got, "speed"); speed.Exists() { + t.Fatalf("speed should not be forwarded to Codex body; body=%s", got) + } +} + func TestAnthropicUsageServiceTierResolution(t *testing.T) { cases := []struct { name string diff --git a/proxy/handler.go b/proxy/handler.go index a067611d..3ce308bc 100644 --- a/proxy/handler.go +++ b/proxy/handler.go @@ -705,9 +705,17 @@ func extractReasoningEffort(body []byte) string { // extractServiceTier 从请求体提取服务等级 func extractServiceTier(body []byte) string { if tier := gjson.GetBytes(body, "service_tier").String(); tier != "" { + tier = strings.TrimSpace(tier) + if disableFastMode() && isFastServiceTier(tier) { + return "" + } return tier } - return gjson.GetBytes(body, "serviceTier").String() + tier := strings.TrimSpace(gjson.GetBytes(body, "serviceTier").String()) + if disableFastMode() && isFastServiceTier(tier) { + return "" + } + return tier } const upstreamErrorKindMessageTooBig = "message_too_big" diff --git a/proxy/runtime_config.go b/proxy/runtime_config.go index 856ebb43..45437c89 100644 --- a/proxy/runtime_config.go +++ b/proxy/runtime_config.go @@ -28,7 +28,7 @@ const ( // per-api-key —— 无显式会话的请求按下游 API Key 共享上游身份(恢复 v2 旧行为, // 保留隐式 prompt cache 命中)。 // 用环境变量 CODEX_REQUEST_ISOLATION_MODE 覆盖默认值。 - RequestIsolationModeIsolated = "isolated" + RequestIsolationModeIsolated = "isolated" RequestIsolationModePerAPIKey = "per-api-key" defaultClientCompatMode = ClientCompatModePreserve @@ -41,6 +41,7 @@ const ( defaultFirstTokenTimeoutSec = 0 maxFirstTokenTimeoutSec = 600 defaultBillingTierPolicy = BillingTierPolicyActual + defaultDisableFastMode = false defaultCodexWSHideErrors = true defaultCodexWSSilentRetry = true defaultCodexWSSilentRetries = 2 @@ -55,6 +56,7 @@ type RuntimeSettings struct { FirstTokenMode string FirstTokenTimeoutSec int BillingTierPolicy string + DisableFastMode bool // Ignore client fast/priority service tier requests when true. CodexForceWebsocket bool // 强制 Codex 上游走 WebSocket(默认 false) CodexWSHideErrors bool // 隐藏 Codex WS 上游原始错误(默认 true) CodexWSSilentRetry bool // 首包前 Codex WS 上游错误静默换号重试(默认 true) @@ -84,6 +86,7 @@ func DefaultRuntimeSettings() RuntimeSettings { FirstTokenMode: defaultFirstTokenMode, FirstTokenTimeoutSec: defaultFirstTokenTimeoutSec, BillingTierPolicy: defaultBillingTierPolicy, + DisableFastMode: defaultDisableFastModeFromEnv(), CodexWSHideErrors: defaultCodexWSHideErrors, CodexWSSilentRetry: defaultCodexWSSilentRetry, CodexWSSilentRetries: defaultCodexWSSilentRetries, @@ -98,6 +101,24 @@ func defaultRequestIsolationMode() string { return NormalizeRequestIsolationMode(os.Getenv("CODEX_REQUEST_ISOLATION_MODE")) } +func defaultDisableFastModeFromEnv() bool { + for _, key := range []string{"CODEX_DISABLE_FAST_MODE", "DISABLE_FAST_MODE"} { + if truthyRuntimeEnv(os.Getenv(key)) { + return true + } + } + return defaultDisableFastMode +} + +func truthyRuntimeEnv(value string) bool { + switch strings.ToLower(strings.TrimSpace(value)) { + case "1", "true", "yes", "y", "on", "enable", "enabled": + return true + default: + return false + } +} + // NormalizeRequestIsolationMode 归一化隔离模式,空/未知值回落到 isolated。 func NormalizeRequestIsolationMode(mode string) string { switch strings.ToLower(strings.TrimSpace(mode)) { diff --git a/proxy/translator.go b/proxy/translator.go index e66d9d81..e6ca651a 100644 --- a/proxy/translator.go +++ b/proxy/translator.go @@ -1783,9 +1783,25 @@ func isAllowedServiceTier(tier string) bool { } } +func disableFastMode() bool { + return CurrentRuntimeSettings().DisableFastMode +} + +func isFastServiceTier(tier string) bool { + switch tier { + case "fast", "priority": + return true + default: + return false + } +} + // upstreamServiceTier 将客户端 service_tier 映射为上游接受的值。 // Codex 上游当前只接受 priority;auto/default/flex/scale 都不应显式转发。 func upstreamServiceTier(tier string) (string, bool) { + if disableFastMode() && isFastServiceTier(tier) { + return "", false + } switch tier { case "fast", "priority": return "priority", true @@ -2015,6 +2031,11 @@ func sanitizeServiceTierForUpstream(body []byte) []byte { body, _ = sjson.DeleteBytes(body, "serviceTier") return body } + if disableFastMode() && isFastServiceTier(tier) { + body, _ = sjson.DeleteBytes(body, "service_tier") + body, _ = sjson.DeleteBytes(body, "serviceTier") + return body + } switch tier { case "auto", "default", "flex", "priority", "scale", "fast": body, _ = sjson.DeleteBytes(body, "serviceTier") diff --git a/proxy/translator_test.go b/proxy/translator_test.go index c92bc497..5dffd8bf 100644 --- a/proxy/translator_test.go +++ b/proxy/translator_test.go @@ -125,6 +125,14 @@ func TestResolveUsageServiceTiersSplitsRequestedActualBilling(t *testing.T) { } } +func TestDefaultRuntimeSettingsReadsDisableFastModeEnv(t *testing.T) { + t.Setenv("CODEX_DISABLE_FAST_MODE", "true") + + if !DefaultRuntimeSettings().DisableFastMode { + t.Fatal("DisableFastMode should be true when CODEX_DISABLE_FAST_MODE=true") + } +} + func TestSanitizeServiceTierForUpstream_FastToPriority(t *testing.T) { raw := []byte(`{ "model":"gpt-5.4", @@ -159,6 +167,33 @@ func TestSanitizeServiceTierForUpstream_DropsUnsupportedClientTiers(t *testing.T } } +func TestSanitizeServiceTierForUpstream_DisableFastModeDropsFastTiers(t *testing.T) { + previous := CurrentRuntimeSettings() + t.Cleanup(func() { ApplyRuntimeSettings(previous) }) + next := previous + next.DisableFastMode = true + ApplyRuntimeSettings(next) + + for _, tier := range []string{"fast", "priority"} { + t.Run(tier, func(t *testing.T) { + raw := []byte(fmt.Sprintf(`{ + "model":"gpt-5.4", + "service_tier":%q, + "serviceTier":%q + }`, tier, tier)) + + got := sanitizeServiceTierForUpstream(raw) + + if gjson.GetBytes(got, "service_tier").Exists() { + t.Fatalf("%s service_tier should be omitted when fast mode is disabled, got body=%s", tier, got) + } + if gjson.GetBytes(got, "serviceTier").Exists() { + t.Fatalf("serviceTier should be removed when fast mode is disabled, got body=%s", got) + } + }) + } +} + func TestTranslateRequest_PreservesSupportedServiceTier(t *testing.T) { raw := []byte(`{ "model":"gpt-5.4", @@ -183,6 +218,32 @@ func TestTranslateRequest_PreservesSupportedServiceTier(t *testing.T) { } } +func TestTranslateRequest_DisableFastModeDropsClientFastTier(t *testing.T) { + previous := CurrentRuntimeSettings() + t.Cleanup(func() { ApplyRuntimeSettings(previous) }) + next := previous + next.DisableFastMode = true + ApplyRuntimeSettings(next) + + raw := []byte(`{ + "model":"gpt-5.4", + "messages":[{"role":"user","content":"hello"}], + "serviceTier":"priority" + }`) + + got, err := TranslateRequest(raw) + if err != nil { + t.Fatalf("TranslateRequest returned error: %v", err) + } + + if gjson.GetBytes(got, "service_tier").Exists() { + t.Fatalf("service_tier should be omitted when fast mode is disabled, got body=%s", got) + } + if gotTier := extractServiceTier(raw); gotTier != "" { + t.Fatalf("extractServiceTier should ignore fast tier when disabled, got %q", gotTier) + } +} + func TestTranslateRequest_DropsUnsupportedClientServiceTier(t *testing.T) { raw := []byte(`{ "model":"gpt-5.4",