Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions src/routes/chat-completions/handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,21 @@ export async function handleCompletion(c: Context) {

if (state.manualApprove) await awaitApproval()

if (isNullish(payload.max_tokens)) {
payload = {
...payload,
max_tokens: selectedModel?.capabilities.limits.max_output_tokens,
}
consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
const resolvedMaxTokens = !isNullish(payload.max_tokens) ?
payload.max_tokens
: !isNullish(payload.max_completion_tokens) ?
payload.max_completion_tokens
: selectedModel?.capabilities.limits.max_output_tokens

payload = {
...payload,
max_tokens: undefined,
max_completion_tokens: resolvedMaxTokens,
}
consola.debug(
"Set max_completion_tokens to:",
JSON.stringify(payload.max_completion_tokens),
)

const response = await createChatCompletions(payload)

Expand Down
1 change: 1 addition & 0 deletions src/routes/messages/anthropic-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export interface AnthropicMessagesPayload {
model: string
messages: Array<AnthropicMessage>
max_tokens: number
max_completion_tokens?: number
system?: string | Array<AnthropicTextBlock>
metadata?: {
user_id?: string
Expand Down
4 changes: 3 additions & 1 deletion src/routes/messages/non-stream-translation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils"
export function translateToOpenAI(
payload: AnthropicMessagesPayload,
): ChatCompletionsPayload {
const maxTokens = payload.max_tokens ?? payload.max_completion_tokens

return {
model: translateModelName(payload.model),
messages: translateAnthropicMessagesToOpenAI(
payload.messages,
payload.system,
),
max_tokens: payload.max_tokens,
max_completion_tokens: maxTokens,
stop: payload.stop_sequences,
stream: payload.stream,
temperature: payload.temperature,
Expand Down
46 changes: 42 additions & 4 deletions src/services/copilot/create-chat-completions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,37 @@ import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
import { HTTPError } from "~/lib/error"
import { state } from "~/lib/state"

const GPT_5_4_MODEL_PATTERN = /(^|[^a-z0-9])gpt[-_.]?5[-_.]?4(?:$|[^a-z0-9])/i

export const createChatCompletions = async (
payload: ChatCompletionsPayload,
) => {
if (!state.copilotToken) throw new Error("Copilot token not found")

const enableVision = payload.messages.some(
const normalizedPayload = normalizeChatCompletionsPayload(payload)
consola.debug("Upstream token parameter routing:", {
model: payload.model,
inputMaxTokens: payload.max_tokens,
inputMaxCompletionTokens: payload.max_completion_tokens,
upstreamTokenField:
normalizedPayload.max_completion_tokens !== undefined ?
"max_completion_tokens"
: normalizedPayload.max_tokens !== undefined ?
"max_tokens"
: null,
upstreamMaxTokens: normalizedPayload.max_tokens,
upstreamMaxCompletionTokens: normalizedPayload.max_completion_tokens,
})

const enableVision = normalizedPayload.messages.some(
(x) =>
typeof x.content !== "string"
&& x.content?.some((x) => x.type === "image_url"),
)

// Agent/user check for X-Initiator header
// Determine if any message is from an agent ("assistant" or "tool")
const isAgentCall = payload.messages.some((msg) =>
const isAgentCall = normalizedPayload.messages.some((msg) =>
["assistant", "tool"].includes(msg.role),
)

Expand All @@ -31,21 +48,41 @@ export const createChatCompletions = async (
const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
method: "POST",
headers,
body: JSON.stringify(payload),
body: JSON.stringify(normalizedPayload),
})

if (!response.ok) {
consola.error("Failed to create chat completions", response)
throw new HTTPError("Failed to create chat completions", response)
}

if (payload.stream) {
if (normalizedPayload.stream) {
return events(response)
}

return (await response.json()) as ChatCompletionResponse
}

function normalizeChatCompletionsPayload(
payload: ChatCompletionsPayload,
): ChatCompletionsPayload {
const resolvedMaxTokens = payload.max_tokens ?? payload.max_completion_tokens
const useMaxCompletionTokens = shouldUseMaxCompletionTokens(payload.model)

return {
...payload,
max_tokens: useMaxCompletionTokens ? undefined : resolvedMaxTokens,
max_completion_tokens: useMaxCompletionTokens ? resolvedMaxTokens : undefined,
}
}

function shouldUseMaxCompletionTokens(modelId: string): boolean {
const resolvedModelId =
state.models?.data.find((model) => model.id === modelId)?.id ?? modelId

return GPT_5_4_MODEL_PATTERN.test(resolvedModelId)
}

// Streaming types

export interface ChatCompletionChunk {
Expand Down Expand Up @@ -130,6 +167,7 @@ export interface ChatCompletionsPayload {
temperature?: number | null
top_p?: number | null
max_tokens?: number | null
max_completion_tokens?: number | null
stop?: string | Array<string> | null
n?: number | null
stream?: boolean | null
Expand Down
37 changes: 35 additions & 2 deletions tests/anthropic-request.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ import { describe, test, expect } from "bun:test"
import { z } from "zod"

import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"

import { translateToOpenAI } from "../src/routes/messages/non-stream-translation"
import { translateToOpenAI } from "~/routes/messages/non-stream-translation"

// Zod schema for a single message in the chat completion request.
const messageSchema = z.object({
Expand Down Expand Up @@ -31,6 +30,7 @@ const chatCompletionRequestSchema = z.object({
logprobs: z.boolean().optional().nullable(),
top_logprobs: z.number().int().min(0).max(20).optional().nullable(),
max_tokens: z.number().int().optional().nullable(),
max_completion_tokens: z.number().int().optional().nullable(),
n: z.number().int().min(1).max(128).optional().nullable(),
presence_penalty: z.number().min(-2).max(2).optional().nullable(),
response_format: z
Expand Down Expand Up @@ -72,6 +72,8 @@ describe("Anthropic to OpenAI translation logic", () => {

const openAIPayload = translateToOpenAI(anthropicPayload)
expect(isValidChatCompletionRequest(openAIPayload)).toBe(true)
expect(openAIPayload.max_tokens).toBeUndefined()
expect(openAIPayload.max_completion_tokens).toBe(0)
})

test("should translate comprehensive Anthropic payload to valid OpenAI payload", () => {
Expand Down Expand Up @@ -101,6 +103,8 @@ describe("Anthropic to OpenAI translation logic", () => {
}
const openAIPayload = translateToOpenAI(anthropicPayload)
expect(isValidChatCompletionRequest(openAIPayload)).toBe(true)
expect(openAIPayload.max_tokens).toBeUndefined()
expect(openAIPayload.max_completion_tokens).toBe(150)
})

test("should handle missing fields gracefully", () => {
Expand All @@ -111,6 +115,35 @@ describe("Anthropic to OpenAI translation logic", () => {
}
const openAIPayload = translateToOpenAI(anthropicPayload)
expect(isValidChatCompletionRequest(openAIPayload)).toBe(true)
expect(openAIPayload.max_tokens).toBeUndefined()
expect(openAIPayload.max_completion_tokens).toBe(0)
})

test("should map max_tokens into max_completion_tokens", () => {
const anthropicPayload: AnthropicMessagesPayload = {
model: "gpt-4o",
messages: [{ role: "user", content: "Hello!" }],
max_tokens: 128,
}

const openAIPayload = translateToOpenAI(anthropicPayload)

expect(openAIPayload.max_tokens).toBeUndefined()
expect(openAIPayload.max_completion_tokens).toBe(128)
})

test("should prefer max_tokens when both token fields are present", () => {
const anthropicPayload: AnthropicMessagesPayload = {
model: "gpt-4o",
messages: [{ role: "user", content: "Hello!" }],
max_tokens: 128,
max_completion_tokens: 64,
}

const openAIPayload = translateToOpenAI(anthropicPayload)

expect(openAIPayload.max_tokens).toBeUndefined()
expect(openAIPayload.max_completion_tokens).toBe(128)
})

test("should handle invalid types in Anthropic payload", () => {
Expand Down
75 changes: 64 additions & 11 deletions tests/create-chat-completions.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { test, expect, mock } from "bun:test"

import type { ChatCompletionsPayload } from "../src/services/copilot/create-chat-completions"

import { state } from "../src/lib/state"
import { createChatCompletions } from "../src/services/copilot/create-chat-completions"
import { state } from "~/lib/state"
import {
createChatCompletions,
type ChatCompletionsPayload,
} from "~/services/copilot/create-chat-completions"

// Mock state
state.copilotToken = "test-token"
Expand All @@ -12,11 +13,12 @@ state.accountType = "individual"

// Helper to mock fetch
const fetchMock = mock(
(_url: string, opts: { headers: Record<string, string> }) => {
(_url: string, opts: { headers: Record<string, string>; body?: string }) => {
return {
ok: true,
json: () => ({ id: "123", object: "chat.completion", choices: [] }),
headers: opts.headers,
body: opts.body,
}
},
)
Expand All @@ -33,9 +35,8 @@ test("sets X-Initiator to agent if tool/assistant present", async () => {
}
await createChatCompletions(payload)
expect(fetchMock).toHaveBeenCalled()
const headers = (
fetchMock.mock.calls[0][1] as { headers: Record<string, string> }
).headers
const lastCall = fetchMock.mock.calls.at(-1)
const headers = (lastCall?.[1] as { headers: Record<string, string> }).headers
expect(headers["X-Initiator"]).toBe("agent")
})

Expand All @@ -49,8 +50,60 @@ test("sets X-Initiator to user if only user present", async () => {
}
await createChatCompletions(payload)
expect(fetchMock).toHaveBeenCalled()
const headers = (
fetchMock.mock.calls[1][1] as { headers: Record<string, string> }
).headers
const lastCall = fetchMock.mock.calls.at(-1)
const headers = (lastCall?.[1] as { headers: Record<string, string> }).headers
expect(headers["X-Initiator"]).toBe("user")
})

test("forwards max_completion_tokens for gpt-5.4 models", async () => {
const payload: ChatCompletionsPayload = {
messages: [{ role: "user", content: "hi" }],
model: "gpt-5.4",
max_tokens: 128,
max_completion_tokens: 128,
}

await createChatCompletions(payload)

const body = JSON.parse(
(fetchMock.mock.calls.at(-1)?.[1] as { body?: string }).body ?? "{}",
) as ChatCompletionsPayload

expect(body.max_tokens).toBeUndefined()
expect(body.max_completion_tokens).toBe(128)
})

test("maps legacy max_tokens to max_completion_tokens for gpt-5.4 models", async () => {
const payload: ChatCompletionsPayload = {
messages: [{ role: "user", content: "hi" }],
model: "gpt-5.4-mini",
max_tokens: 256,
}

await createChatCompletions(payload)

const body = JSON.parse(
(fetchMock.mock.calls.at(-1)?.[1] as { body?: string }).body ?? "{}",
) as ChatCompletionsPayload

expect(body.max_tokens).toBeUndefined()
expect(body.max_completion_tokens).toBe(256)
})

test("keeps max_tokens for gpt-5.2 models", async () => {
const payload: ChatCompletionsPayload = {
messages: [{ role: "user", content: "hi" }],
model: "gpt-5.2",
max_completion_tokens: 512,
}

await createChatCompletions(payload)

const body = JSON.parse(
(fetchMock.mock.calls.at(-1)?.[1] as { body?: string }).body ?? "{}",
) as ChatCompletionsPayload

expect(body.max_tokens).toBe(512)
expect(body.max_completion_tokens).toBeUndefined()
})