From 7a85779a70c31e491d7081fa2b76105386ea7e1a Mon Sep 17 00:00:00 2001 From: yuxingfei Date: Thu, 9 Apr 2026 16:59:04 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86=EF=BC=8C=E8=B0=83?= =?UTF-8?q?=E7=94=A8gpt-5.4=E6=8A=A5max=5Ftokens=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/routes/chat-completions/handler.ts | 20 +++-- src/routes/messages/anthropic-types.ts | 1 + src/routes/messages/non-stream-translation.ts | 4 +- .../copilot/create-chat-completions.ts | 46 +++++++++++- tests/anthropic-request.test.ts | 37 ++++++++- tests/create-chat-completions.test.ts | 75 ++++++++++++++++--- 6 files changed, 159 insertions(+), 24 deletions(-) diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts index 04a5ae9ed..113520e51 100644 --- a/src/routes/chat-completions/handler.ts +++ b/src/routes/chat-completions/handler.ts @@ -39,13 +39,21 @@ export async function handleCompletion(c: Context) { if (state.manualApprove) await awaitApproval() - if (isNullish(payload.max_tokens)) { - payload = { - ...payload, - max_tokens: selectedModel?.capabilities.limits.max_output_tokens, - } - consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens)) + const resolvedMaxTokens = !isNullish(payload.max_tokens) ? + payload.max_tokens + : !isNullish(payload.max_completion_tokens) ? + payload.max_completion_tokens + : selectedModel?.capabilities.limits.max_output_tokens + + payload = { + ...payload, + max_tokens: undefined, + max_completion_tokens: resolvedMaxTokens, } + consola.debug( + "Set max_completion_tokens to:", + JSON.stringify(payload.max_completion_tokens), + ) const response = await createChatCompletions(payload) diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts index 881fffcc8..e75a20252 100644 --- a/src/routes/messages/anthropic-types.ts +++ b/src/routes/messages/anthropic-types.ts @@ -4,6 +4,7 @@ export interface AnthropicMessagesPayload { model: string messages: Array max_tokens: number + max_completion_tokens?: number system?: string | Array metadata?: { user_id?: string diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e6382..52b49833b 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -29,13 +29,15 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils" export function translateToOpenAI( payload: AnthropicMessagesPayload, ): ChatCompletionsPayload { + const maxTokens = payload.max_tokens ?? payload.max_completion_tokens + return { model: translateModelName(payload.model), messages: translateAnthropicMessagesToOpenAI( payload.messages, payload.system, ), - max_tokens: payload.max_tokens, + max_completion_tokens: maxTokens, stop: payload.stop_sequences, stream: payload.stream, temperature: payload.temperature, diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 8534151da..30c849d26 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -5,12 +5,29 @@ import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config" import { HTTPError } from "~/lib/error" import { state } from "~/lib/state" +const GPT_5_4_MODEL_PATTERN = /(^|[^a-z0-9])gpt[-_.]?5[-_.]?4(?:$|[^a-z0-9])/i + export const createChatCompletions = async ( payload: ChatCompletionsPayload, ) => { if (!state.copilotToken) throw new Error("Copilot token not found") - const enableVision = payload.messages.some( + const normalizedPayload = normalizeChatCompletionsPayload(payload) + consola.debug("Upstream token parameter routing:", { + model: payload.model, + inputMaxTokens: payload.max_tokens, + inputMaxCompletionTokens: payload.max_completion_tokens, + upstreamTokenField: + normalizedPayload.max_completion_tokens !== undefined ? + "max_completion_tokens" + : normalizedPayload.max_tokens !== undefined ? + "max_tokens" + : null, + upstreamMaxTokens: normalizedPayload.max_tokens, + upstreamMaxCompletionTokens: normalizedPayload.max_completion_tokens, + }) + + const enableVision = normalizedPayload.messages.some( (x) => typeof x.content !== "string" && x.content?.some((x) => x.type === "image_url"), @@ -18,7 +35,7 @@ export const createChatCompletions = async ( // Agent/user check for X-Initiator header // Determine if any message is from an agent ("assistant" or "tool") - const isAgentCall = payload.messages.some((msg) => + const isAgentCall = normalizedPayload.messages.some((msg) => ["assistant", "tool"].includes(msg.role), ) @@ -31,7 +48,7 @@ export const createChatCompletions = async ( const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, { method: "POST", headers, - body: JSON.stringify(payload), + body: JSON.stringify(normalizedPayload), }) if (!response.ok) { @@ -39,13 +56,33 @@ export const createChatCompletions = async ( throw new HTTPError("Failed to create chat completions", response) } - if (payload.stream) { + if (normalizedPayload.stream) { return events(response) } return (await response.json()) as ChatCompletionResponse } +function normalizeChatCompletionsPayload( + payload: ChatCompletionsPayload, +): ChatCompletionsPayload { + const resolvedMaxTokens = payload.max_tokens ?? payload.max_completion_tokens + const useMaxCompletionTokens = shouldUseMaxCompletionTokens(payload.model) + + return { + ...payload, + max_tokens: useMaxCompletionTokens ? undefined : resolvedMaxTokens, + max_completion_tokens: useMaxCompletionTokens ? resolvedMaxTokens : undefined, + } +} + +function shouldUseMaxCompletionTokens(modelId: string): boolean { + const resolvedModelId = + state.models?.data.find((model) => model.id === modelId)?.id ?? modelId + + return GPT_5_4_MODEL_PATTERN.test(resolvedModelId) +} + // Streaming types export interface ChatCompletionChunk { @@ -130,6 +167,7 @@ export interface ChatCompletionsPayload { temperature?: number | null top_p?: number | null max_tokens?: number | null + max_completion_tokens?: number | null stop?: string | Array | null n?: number | null stream?: boolean | null diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts index 06c663778..a80ef04ba 100644 --- a/tests/anthropic-request.test.ts +++ b/tests/anthropic-request.test.ts @@ -2,8 +2,7 @@ import { describe, test, expect } from "bun:test" import { z } from "zod" import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" - -import { translateToOpenAI } from "../src/routes/messages/non-stream-translation" +import { translateToOpenAI } from "~/routes/messages/non-stream-translation" // Zod schema for a single message in the chat completion request. const messageSchema = z.object({ @@ -31,6 +30,7 @@ const chatCompletionRequestSchema = z.object({ logprobs: z.boolean().optional().nullable(), top_logprobs: z.number().int().min(0).max(20).optional().nullable(), max_tokens: z.number().int().optional().nullable(), + max_completion_tokens: z.number().int().optional().nullable(), n: z.number().int().min(1).max(128).optional().nullable(), presence_penalty: z.number().min(-2).max(2).optional().nullable(), response_format: z @@ -72,6 +72,8 @@ describe("Anthropic to OpenAI translation logic", () => { const openAIPayload = translateToOpenAI(anthropicPayload) expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) + expect(openAIPayload.max_tokens).toBeUndefined() + expect(openAIPayload.max_completion_tokens).toBe(0) }) test("should translate comprehensive Anthropic payload to valid OpenAI payload", () => { @@ -101,6 +103,8 @@ describe("Anthropic to OpenAI translation logic", () => { } const openAIPayload = translateToOpenAI(anthropicPayload) expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) + expect(openAIPayload.max_tokens).toBeUndefined() + expect(openAIPayload.max_completion_tokens).toBe(150) }) test("should handle missing fields gracefully", () => { @@ -111,6 +115,35 @@ describe("Anthropic to OpenAI translation logic", () => { } const openAIPayload = translateToOpenAI(anthropicPayload) expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) + expect(openAIPayload.max_tokens).toBeUndefined() + expect(openAIPayload.max_completion_tokens).toBe(0) + }) + + test("should map max_tokens into max_completion_tokens", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "gpt-4o", + messages: [{ role: "user", content: "Hello!" }], + max_tokens: 128, + } + + const openAIPayload = translateToOpenAI(anthropicPayload) + + expect(openAIPayload.max_tokens).toBeUndefined() + expect(openAIPayload.max_completion_tokens).toBe(128) + }) + + test("should prefer max_tokens when both token fields are present", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "gpt-4o", + messages: [{ role: "user", content: "Hello!" }], + max_tokens: 128, + max_completion_tokens: 64, + } + + const openAIPayload = translateToOpenAI(anthropicPayload) + + expect(openAIPayload.max_tokens).toBeUndefined() + expect(openAIPayload.max_completion_tokens).toBe(128) }) test("should handle invalid types in Anthropic payload", () => { diff --git a/tests/create-chat-completions.test.ts b/tests/create-chat-completions.test.ts index d18e741aa..1948f0b25 100644 --- a/tests/create-chat-completions.test.ts +++ b/tests/create-chat-completions.test.ts @@ -1,9 +1,10 @@ import { test, expect, mock } from "bun:test" -import type { ChatCompletionsPayload } from "../src/services/copilot/create-chat-completions" - -import { state } from "../src/lib/state" -import { createChatCompletions } from "../src/services/copilot/create-chat-completions" +import { state } from "~/lib/state" +import { + createChatCompletions, + type ChatCompletionsPayload, +} from "~/services/copilot/create-chat-completions" // Mock state state.copilotToken = "test-token" @@ -12,11 +13,12 @@ state.accountType = "individual" // Helper to mock fetch const fetchMock = mock( - (_url: string, opts: { headers: Record }) => { + (_url: string, opts: { headers: Record; body?: string }) => { return { ok: true, json: () => ({ id: "123", object: "chat.completion", choices: [] }), headers: opts.headers, + body: opts.body, } }, ) @@ -33,9 +35,8 @@ test("sets X-Initiator to agent if tool/assistant present", async () => { } await createChatCompletions(payload) expect(fetchMock).toHaveBeenCalled() - const headers = ( - fetchMock.mock.calls[0][1] as { headers: Record } - ).headers + const lastCall = fetchMock.mock.calls.at(-1) + const headers = (lastCall?.[1] as { headers: Record }).headers expect(headers["X-Initiator"]).toBe("agent") }) @@ -49,8 +50,60 @@ test("sets X-Initiator to user if only user present", async () => { } await createChatCompletions(payload) expect(fetchMock).toHaveBeenCalled() - const headers = ( - fetchMock.mock.calls[1][1] as { headers: Record } - ).headers + const lastCall = fetchMock.mock.calls.at(-1) + const headers = (lastCall?.[1] as { headers: Record }).headers expect(headers["X-Initiator"]).toBe("user") }) + +test("forwards max_completion_tokens for gpt-5.4 models", async () => { + const payload: ChatCompletionsPayload = { + messages: [{ role: "user", content: "hi" }], + model: "gpt-5.4", + max_tokens: 128, + max_completion_tokens: 128, + } + + await createChatCompletions(payload) + + const body = JSON.parse( + (fetchMock.mock.calls.at(-1)?.[1] as { body?: string }).body ?? "{}", + ) as ChatCompletionsPayload + + expect(body.max_tokens).toBeUndefined() + expect(body.max_completion_tokens).toBe(128) +}) + +test("maps legacy max_tokens to max_completion_tokens for gpt-5.4 models", async () => { + const payload: ChatCompletionsPayload = { + messages: [{ role: "user", content: "hi" }], + model: "gpt-5.4-mini", + max_tokens: 256, + } + + await createChatCompletions(payload) + + const body = JSON.parse( + (fetchMock.mock.calls.at(-1)?.[1] as { body?: string }).body ?? "{}", + ) as ChatCompletionsPayload + + expect(body.max_tokens).toBeUndefined() + expect(body.max_completion_tokens).toBe(256) +}) + +test("keeps max_tokens for gpt-5.2 models", async () => { + const payload: ChatCompletionsPayload = { + messages: [{ role: "user", content: "hi" }], + model: "gpt-5.2", + max_completion_tokens: 512, + } + + await createChatCompletions(payload) + + const body = JSON.parse( + (fetchMock.mock.calls.at(-1)?.[1] as { body?: string }).body ?? "{}", + ) as ChatCompletionsPayload + + expect(body.max_tokens).toBe(512) + expect(body.max_completion_tokens).toBeUndefined() +}) +