From 8be4517ad3c15b4da06d520dacb114f37ec1c8dc Mon Sep 17 00:00:00 2001 From: lyzgeorge Date: Mon, 13 Apr 2026 19:59:13 +0800 Subject: [PATCH 1/8] chore: ignore local worktrees --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 577a4f199..d26414de8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,7 @@ node_modules/ .eslintcache # build output -dist/ \ No newline at end of file +dist/ + +# local worktrees +.worktrees/ \ No newline at end of file From 7ecb1618e2a296040db2c448d9420e1013f2e9b2 Mon Sep 17 00:00:00 2001 From: lyzgeorge Date: Mon, 13 Apr 2026 21:47:14 +0800 Subject: [PATCH 2/8] feat: translate Copilot reasoning for OpenAI and Anthropic Normalize reasoning effort, thinking budget, and Anthropic reasoning streams so both proxy surfaces stay aligned with Copilot model capabilities. Co-Authored-By: Claude Opus 4.6 --- src/routes/chat-completions/handler.ts | 16 + src/routes/messages/anthropic-types.ts | 7 +- src/routes/messages/count-tokens-handler.ts | 7 +- src/routes/messages/handler.ts | 22 +- src/routes/messages/non-stream-translation.ts | 64 ++- src/routes/messages/stream-translation.ts | 93 ++-- src/routes/reasoning-context.ts | 40 ++ .../copilot/create-chat-completions.ts | 7 + src/services/copilot/get-models.ts | 2 + tests/anthropic-request.test.ts | 209 ++++++++- tests/anthropic-response.test.ts | 442 +++++++++++++++++- tests/chat-completions-handler.test.ts | 203 ++++++++ tests/create-chat-completions.test.ts | 86 +++- 13 files changed, 1099 insertions(+), 99 deletions(-) create mode 100644 src/routes/reasoning-context.ts create mode 100644 tests/chat-completions-handler.test.ts diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts index 04a5ae9ed..c4a0fc4fd 100644 --- a/src/routes/chat-completions/handler.ts +++ b/src/routes/chat-completions/handler.ts @@ -8,6 +8,7 @@ import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" import { getTokenCount } from "~/lib/tokenizer" import { isNullish } from "~/lib/utils" +import { buildOpenAIReasoningContext } from "~/routes/reasoning-context" import { createChatCompletions, type ChatCompletionResponse, @@ -47,6 +48,21 @@ export async function handleCompletion(c: Context) { consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens)) } + const reasoningContext = buildOpenAIReasoningContext(payload, selectedModel) + + if (payload.thinking_budget && !reasoningContext.thinkingBudget) { + consola.debug( + "Dropping unsupported OpenAI thinking_budget for model:", + payload.model, + ) + } + + payload = { + ...payload, + reasoning_effort: reasoningContext.reasoningEffort, + thinking_budget: reasoningContext.thinkingBudget, + } + const response = await createChatCompletions(payload) if (isNonStreaming(response)) { diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts index 881fffcc8..ca851dc18 100644 --- a/src/routes/messages/anthropic-types.ts +++ b/src/routes/messages/anthropic-types.ts @@ -19,7 +19,7 @@ export interface AnthropicMessagesPayload { name?: string } thinking?: { - type: "enabled" + type: "enabled" | "disabled" budget_tokens?: number } service_tier?: "auto" | "standard_only" @@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock { export interface AnthropicThinkingBlock { type: "thinking" thinking: string + signature?: string } export type AnthropicUserContentBlock = @@ -92,6 +93,7 @@ export interface AnthropicResponse { role: "assistant" content: Array model: string + reasoning_opaque?: string stop_reason: | "end_turn" | "max_tokens" @@ -195,7 +197,8 @@ export type AnthropicStreamEventData = export interface AnthropicStreamState { messageStartSent: boolean contentBlockIndex: number - contentBlockOpen: boolean + currentBlockType?: "text" | "thinking" | "tool_use" + reasoningOpaque?: string toolCalls: { [openAIToolIndex: number]: { id: string diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts index 2ec849cb8..60e0a417d 100644 --- a/src/routes/messages/count-tokens-handler.ts +++ b/src/routes/messages/count-tokens-handler.ts @@ -4,6 +4,7 @@ import consola from "consola" import { state } from "~/lib/state" import { getTokenCount } from "~/lib/tokenizer" +import { buildAnthropicReasoningContext } from "~/routes/reasoning-context" import { type AnthropicMessagesPayload } from "./anthropic-types" import { translateToOpenAI } from "./non-stream-translation" @@ -17,11 +18,13 @@ export async function handleCountTokens(c: Context) { const anthropicPayload = await c.req.json() - const openAIPayload = translateToOpenAI(anthropicPayload) - const selectedModel = state.models?.data.find( (model) => model.id === anthropicPayload.model, ) + const openAIPayload = translateToOpenAI( + anthropicPayload, + buildAnthropicReasoningContext(anthropicPayload, selectedModel), + ) if (!selectedModel) { consola.warn("Model not found, returning default token count") diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 85dbf6243..3e8f236cb 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -6,6 +6,7 @@ import { streamSSE } from "hono/streaming" import { awaitApproval } from "~/lib/approval" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" +import { buildAnthropicReasoningContext } from "~/routes/reasoning-context" import { createChatCompletions, type ChatCompletionChunk, @@ -28,7 +29,25 @@ export async function handleCompletion(c: Context) { const anthropicPayload = await c.req.json() consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload)) - const openAIPayload = translateToOpenAI(anthropicPayload) + const selectedModel = state.models?.data.find( + (model) => model.id === anthropicPayload.model, + ) + const reasoningContext = buildAnthropicReasoningContext( + anthropicPayload, + selectedModel, + ) + + if ( + anthropicPayload.thinking?.type === "enabled" + && selectedModel?.capabilities.adaptive_thinking !== true + ) { + consola.debug( + "Stripping unsupported Anthropic thinking config for model:", + anthropicPayload.model, + ) + } + + const openAIPayload = translateToOpenAI(anthropicPayload, reasoningContext) consola.debug( "Translated OpenAI request payload:", JSON.stringify(openAIPayload), @@ -58,7 +77,6 @@ export async function handleCompletion(c: Context) { const streamState: AnthropicStreamState = { messageStartSent: false, contentBlockIndex: 0, - contentBlockOpen: false, toolCalls: {}, } diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e6382..31ec1ec48 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -1,3 +1,5 @@ +import type { ReasoningContext } from "~/routes/reasoning-context" + import { type ChatCompletionResponse, type ChatCompletionsPayload, @@ -28,6 +30,7 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils" export function translateToOpenAI( payload: AnthropicMessagesPayload, + context: ReasoningContext, ): ChatCompletionsPayload { return { model: translateModelName(payload.model), @@ -43,6 +46,8 @@ export function translateToOpenAI( user: payload.metadata?.user_id, tools: translateAnthropicToolsToOpenAI(payload.tools), tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice), + reasoning_effort: context.reasoningEffort, + thinking_budget: context.thinkingBudget, } } @@ -281,35 +286,23 @@ function translateAnthropicToolChoiceToOpenAI( export function translateToAnthropic( response: ChatCompletionResponse, ): AnthropicResponse { - // Merge content from all choices - const allTextBlocks: Array = [] - const allToolUseBlocks: Array = [] - let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null = - null // default - stopReason = response.choices[0]?.finish_reason ?? stopReason - - // Process all choices to extract text and tool use blocks - for (const choice of response.choices) { - const textBlocks = getAnthropicTextBlocks(choice.message.content) - const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls) - - allTextBlocks.push(...textBlocks) - allToolUseBlocks.push(...toolUseBlocks) - - // Use the finish_reason from the first choice, or prioritize tool_calls - if (choice.finish_reason === "tool_calls" || stopReason === "stop") { - stopReason = choice.finish_reason - } - } - - // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses + const content = response.choices.flatMap((choice) => [ + ...getAnthropicThinkingBlocks(choice.message.reasoning_text), + ...getAnthropicTextBlocks(choice.message.content), + ...getAnthropicToolUseBlocks(choice.message.tool_calls), + ]) + const reasoningOpaque = response.choices.find( + (choice) => choice.message.reasoning_opaque, + )?.message.reasoning_opaque + const stopReason = getAnthropicStopReason(response.choices) return { id: response.id, type: "message", role: "assistant", model: response.model, - content: [...allTextBlocks, ...allToolUseBlocks], + reasoning_opaque: reasoningOpaque, + content, stop_reason: mapOpenAIStopReasonToAnthropic(stopReason), stop_sequence: null, usage: { @@ -326,6 +319,31 @@ export function translateToAnthropic( } } +function getAnthropicStopReason( + choices: ChatCompletionResponse["choices"], +): "stop" | "length" | "tool_calls" | "content_filter" | null { + let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null = + choices[0]?.finish_reason ?? null + + for (const choice of choices) { + if (choice.finish_reason === "tool_calls" || stopReason === "stop") { + stopReason = choice.finish_reason + } + } + + return stopReason +} + +function getAnthropicThinkingBlocks( + reasoningText: string | null | undefined, +): Array { + if (!reasoningText) { + return [] + } + + return [{ type: "thinking", thinking: reasoningText }] +} + function getAnthropicTextBlocks( messageContent: Message["content"], ): Array { diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts index 55094448f..884d78234 100644 --- a/src/routes/messages/stream-translation.ts +++ b/src/routes/messages/stream-translation.ts @@ -6,14 +6,32 @@ import { } from "./anthropic-types" import { mapOpenAIStopReasonToAnthropic } from "./utils" -function isToolBlockOpen(state: AnthropicStreamState): boolean { - if (!state.contentBlockOpen) { - return false +function closeOpenBlock( + events: Array, + state: AnthropicStreamState, +): void { + if (!state.currentBlockType) { + return + } + + if (state.currentBlockType === "thinking" && state.reasoningOpaque) { + events.push({ + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "signature_delta", + signature: state.reasoningOpaque, + }, + }) + state.reasoningOpaque = undefined } - // Check if the current block index corresponds to any known tool call - return Object.values(state.toolCalls).some( - (tc) => tc.anthropicBlockIndex === state.contentBlockIndex, - ) + + events.push({ + type: "content_block_stop", + index: state.contentBlockIndex, + }) + state.contentBlockIndex++ + state.currentBlockType = undefined } // eslint-disable-next-line max-lines-per-function, complexity @@ -57,18 +75,43 @@ export function translateChunkToAnthropicEvents( state.messageStartSent = true } - if (delta.content) { - if (isToolBlockOpen(state)) { - // A tool block was open, so close it before starting a text block. + if (delta.reasoning_opaque) { + state.reasoningOpaque = delta.reasoning_opaque + } + + if (delta.reasoning_text) { + if (state.currentBlockType && state.currentBlockType !== "thinking") { + closeOpenBlock(events, state) + } + + if (!state.currentBlockType) { events.push({ - type: "content_block_stop", + type: "content_block_start", index: state.contentBlockIndex, + content_block: { + type: "thinking", + thinking: "", + }, }) - state.contentBlockIndex++ - state.contentBlockOpen = false + state.currentBlockType = "thinking" } - if (!state.contentBlockOpen) { + events.push({ + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "thinking_delta", + thinking: delta.reasoning_text, + }, + }) + } + + if (delta.content) { + if (state.currentBlockType && state.currentBlockType !== "text") { + closeOpenBlock(events, state) + } + + if (!state.currentBlockType) { events.push({ type: "content_block_start", index: state.contentBlockIndex, @@ -77,7 +120,7 @@ export function translateChunkToAnthropicEvents( text: "", }, }) - state.contentBlockOpen = true + state.currentBlockType = "text" } events.push({ @@ -94,14 +137,8 @@ export function translateChunkToAnthropicEvents( for (const toolCall of delta.tool_calls) { if (toolCall.id && toolCall.function?.name) { // New tool call starting. - if (state.contentBlockOpen) { - // Close any previously open block. - events.push({ - type: "content_block_stop", - index: state.contentBlockIndex, - }) - state.contentBlockIndex++ - state.contentBlockOpen = false + if (state.currentBlockType) { + closeOpenBlock(events, state) } const anthropicBlockIndex = state.contentBlockIndex @@ -121,7 +158,7 @@ export function translateChunkToAnthropicEvents( input: {}, }, }) - state.contentBlockOpen = true + state.currentBlockType = "tool_use" } if (toolCall.function?.arguments) { @@ -143,13 +180,7 @@ export function translateChunkToAnthropicEvents( } if (choice.finish_reason) { - if (state.contentBlockOpen) { - events.push({ - type: "content_block_stop", - index: state.contentBlockIndex, - }) - state.contentBlockOpen = false - } + closeOpenBlock(events, state) events.push( { diff --git a/src/routes/reasoning-context.ts b/src/routes/reasoning-context.ts new file mode 100644 index 000000000..b41b742e8 --- /dev/null +++ b/src/routes/reasoning-context.ts @@ -0,0 +1,40 @@ +import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" +import type { ChatCompletionsPayload } from "~/services/copilot/create-chat-completions" +import type { Model } from "~/services/copilot/get-models" + +export interface ReasoningContext { + reasoningEffort?: "low" | "medium" | "high" + thinkingBudget?: number +} + +export function buildAnthropicReasoningContext( + payload: AnthropicMessagesPayload, + model: Model | undefined, +): ReasoningContext { + const adaptiveThinkingSupported = + model?.capabilities.adaptive_thinking === true + const thinkingEnabled = payload.thinking?.type === "enabled" + return { + reasoningEffort: + thinkingEnabled && adaptiveThinkingSupported ? "high" : undefined, + thinkingBudget: + thinkingEnabled && adaptiveThinkingSupported ? + payload.thinking?.budget_tokens + : undefined, + } +} + +export function buildOpenAIReasoningContext( + payload: ChatCompletionsPayload, + model: Model | undefined, +): ReasoningContext { + const adaptiveThinkingSupported = + model?.capabilities.adaptive_thinking === true + return { + reasoningEffort: payload.reasoning_effort ?? undefined, + thinkingBudget: + adaptiveThinkingSupported ? + (payload.thinking_budget ?? undefined) + : undefined, + } +} diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 8534151da..15f3e0a21 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -71,6 +71,8 @@ export interface ChatCompletionChunk { interface Delta { content?: string | null + reasoning_opaque?: string | null + reasoning_text?: string | null role?: "user" | "assistant" | "system" | "tool" tool_calls?: Array<{ index: number @@ -112,6 +114,8 @@ export interface ChatCompletionResponse { interface ResponseMessage { role: "assistant" content: string | null + reasoning_opaque?: string | null + reasoning_text?: string | null tool_calls?: Array } @@ -139,7 +143,10 @@ export interface ChatCompletionsPayload { logit_bias?: Record | null logprobs?: boolean | null response_format?: { type: "json_object" } | null + reasoning_effort?: "low" | "medium" | "high" | null seed?: number | null + stream_options?: { include_usage?: boolean } | null + thinking_budget?: number | null tools?: Array | null tool_choice?: | "none" diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts index 3cfa30af0..8d5fdb1d3 100644 --- a/src/services/copilot/get-models.ts +++ b/src/services/copilot/get-models.ts @@ -28,9 +28,11 @@ interface ModelSupports { tool_calls?: boolean parallel_tool_calls?: boolean dimensions?: boolean + adaptive_thinking?: boolean } interface ModelCapabilities { + adaptive_thinking?: boolean family: string limits: ModelLimits object: string diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts index 06c663778..0bca2c7a7 100644 --- a/tests/anthropic-request.test.ts +++ b/tests/anthropic-request.test.ts @@ -4,6 +4,12 @@ import { z } from "zod" import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" import { translateToOpenAI } from "../src/routes/messages/non-stream-translation" +import { buildAnthropicReasoningContext } from "../src/routes/reasoning-context" + +const disabledReasoningContext = { + reasoningEffort: undefined, + thinkingBudget: undefined, +} // Zod schema for a single message in the chat completion request. const messageSchema = z.object({ @@ -50,6 +56,8 @@ const chatCompletionRequestSchema = z.object({ tools: z.array(z.any()).optional(), tool_choice: z.union([z.string(), z.object({})]).optional(), user: z.string().optional(), + reasoning_effort: z.enum(["low", "medium", "high"]).optional(), + thinking_budget: z.number().int().optional(), }) /** @@ -62,6 +70,7 @@ function isValidChatCompletionRequest(payload: unknown): boolean { return result.success } +// eslint-disable-next-line max-lines-per-function describe("Anthropic to OpenAI translation logic", () => { test("should translate minimal Anthropic payload to valid OpenAI payload", () => { const anthropicPayload: AnthropicMessagesPayload = { @@ -70,7 +79,10 @@ describe("Anthropic to OpenAI translation logic", () => { max_tokens: 0, } - const openAIPayload = translateToOpenAI(anthropicPayload) + const openAIPayload = translateToOpenAI( + anthropicPayload, + disabledReasoningContext, + ) expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) }) @@ -99,7 +111,10 @@ describe("Anthropic to OpenAI translation logic", () => { ], tool_choice: { type: "auto" }, } - const openAIPayload = translateToOpenAI(anthropicPayload) + const openAIPayload = translateToOpenAI( + anthropicPayload, + disabledReasoningContext, + ) expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) }) @@ -109,7 +124,10 @@ describe("Anthropic to OpenAI translation logic", () => { messages: [{ role: "user", content: "Hello!" }], max_tokens: 0, } - const openAIPayload = translateToOpenAI(anthropicPayload) + const openAIPayload = translateToOpenAI( + anthropicPayload, + disabledReasoningContext, + ) expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) }) @@ -120,7 +138,10 @@ describe("Anthropic to OpenAI translation logic", () => { temperature: "hot", // Should be a number } // @ts-expect-error intended to be invalid - const openAIPayload = translateToOpenAI(anthropicPayload) + const openAIPayload = translateToOpenAI( + anthropicPayload, + disabledReasoningContext, + ) // Should fail validation expect(isValidChatCompletionRequest(openAIPayload)).toBe(false) }) @@ -143,7 +164,10 @@ describe("Anthropic to OpenAI translation logic", () => { ], max_tokens: 100, } - const openAIPayload = translateToOpenAI(anthropicPayload) + const openAIPayload = translateToOpenAI( + anthropicPayload, + disabledReasoningContext, + ) expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) // Check that thinking content is combined with text content @@ -181,7 +205,10 @@ describe("Anthropic to OpenAI translation logic", () => { ], max_tokens: 100, } - const openAIPayload = translateToOpenAI(anthropicPayload) + const openAIPayload = translateToOpenAI( + anthropicPayload, + disabledReasoningContext, + ) expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) // Check that thinking content is included in the message content @@ -197,8 +224,178 @@ describe("Anthropic to OpenAI translation logic", () => { expect(assistantMessage?.tool_calls).toHaveLength(1) expect(assistantMessage?.tool_calls?.[0].function.name).toBe("get_weather") }) + + test("enabled thinking maps to reasoning effort and thinking budget", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "claude-sonnet-4-20250514", + messages: [{ role: "user", content: "Think carefully." }], + max_tokens: 256, + thinking: { type: "enabled", budget_tokens: 2048 }, + } + + const openAIPayload = translateToOpenAI(anthropicPayload, { + reasoningEffort: "high", + thinkingBudget: 2048, + adaptiveThinkingSupported: true, + }) + + expect(openAIPayload.reasoning_effort).toBe("high") + expect(openAIPayload.thinking_budget).toBe(2048) + expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) + }) + + test("disabled thinking omits reasoning fields", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "claude-sonnet-4-20250514", + messages: [{ role: "user", content: "Answer directly." }], + max_tokens: 256, + thinking: { type: "disabled" }, + } + + const openAIPayload = translateToOpenAI( + anthropicPayload, + disabledReasoningContext, + ) + + expect(openAIPayload.reasoning_effort).toBeUndefined() + expect(openAIPayload.thinking_budget).toBeUndefined() + expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) + }) + + test("emits tool results before remaining user content from mixed user content arrays", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "claude-sonnet-4-20250514", + messages: [ + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "toolu_123", + name: "lookup_weather", + input: { location: "Boston" }, + }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_123", + content: "72 and sunny", + }, + { + type: "text", + text: "Please summarize that for me.", + }, + ], + }, + ], + max_tokens: 256, + } + + const openAIPayload = translateToOpenAI( + anthropicPayload, + disabledReasoningContext, + ) + + expect(openAIPayload.messages).toEqual([ + { + role: "assistant", + content: null, + tool_calls: [ + { + id: "toolu_123", + type: "function", + function: { + name: "lookup_weather", + arguments: JSON.stringify({ location: "Boston" }), + }, + }, + ], + }, + { + role: "tool", + tool_call_id: "toolu_123", + content: "72 and sunny", + }, + { + role: "user", + content: "Please summarize that for me.", + }, + ]) + }) }) +describe("reasoning context helpers", () => { + test("adaptive Claude model returns the expected Anthropic reasoning context", () => { + expect( + buildAnthropicReasoningContext( + { + model: "claude-sonnet-4-20250514", + messages: [], + max_tokens: 1024, + thinking: { type: "enabled", budget_tokens: 2048 }, + }, + { + id: "claude-sonnet-4-20250514", + model_picker_enabled: true, + name: "Claude Sonnet 4", + object: "model", + preview: false, + vendor: "anthropic", + version: "20250514", + capabilities: { + adaptive_thinking: true, + family: "claude", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "claude", + type: "chat", + }, + }, + ), + ).toEqual({ + reasoningEffort: "high", + thinkingBudget: 2048, + }) + }) + + test("unsupported model does not expose Anthropic adaptive thinking fields", () => { + expect( + buildAnthropicReasoningContext( + { + model: "mistral-large", + messages: [], + max_tokens: 1024, + thinking: { type: "enabled", budget_tokens: 2048 }, + }, + { + id: "mistral-large", + model_picker_enabled: true, + name: "Mistral Large", + object: "model", + preview: false, + vendor: "mistral", + version: "latest", + capabilities: { + family: "mistral", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "mistral", + type: "chat", + }, + }, + ), + ).toEqual({ + reasoningEffort: undefined, + thinkingBudget: undefined, + }) + }) +}) describe("OpenAI Chat Completion v1 Request Payload Validation with Zod", () => { test("should return true for a minimal valid request payload", () => { const validPayload = { diff --git a/tests/anthropic-response.test.ts b/tests/anthropic-response.test.ts index ecd71aacc..3fc48c129 100644 --- a/tests/anthropic-response.test.ts +++ b/tests/anthropic-response.test.ts @@ -6,7 +6,10 @@ import type { ChatCompletionResponse, } from "~/services/copilot/create-chat-completions" -import { type AnthropicStreamState } from "~/routes/messages/anthropic-types" +import { + type AnthropicResponse, + type AnthropicStreamState, +} from "~/routes/messages/anthropic-types" import { translateToAnthropic } from "~/routes/messages/non-stream-translation" import { translateChunkToAnthropicEvents } from "~/routes/messages/stream-translation" @@ -20,6 +23,11 @@ const anthropicContentBlockTextSchema = z.object({ text: z.string(), }) +const anthropicContentBlockThinkingSchema = z.object({ + type: z.literal("thinking"), + thinking: z.string(), +}) + const anthropicContentBlockToolUseSchema = z.object({ type: z.literal("tool_use"), id: z.string(), @@ -27,6 +35,19 @@ const anthropicContentBlockToolUseSchema = z.object({ input: z.record(z.string(), z.any()), }) +const anthropicStopReasonSchema = z.custom( + (value) => + value === null + || [ + "end_turn", + "max_tokens", + "pause_turn", + "refusal", + "stop_sequence", + "tool_use", + ].includes(value as string), +) + const anthropicMessageResponseSchema = z.object({ id: z.string(), type: z.literal("message"), @@ -34,11 +55,13 @@ const anthropicMessageResponseSchema = z.object({ content: z.array( z.union([ anthropicContentBlockTextSchema, + anthropicContentBlockThinkingSchema, anthropicContentBlockToolUseSchema, ]), ), model: z.string(), - stop_reason: z.enum(["end_turn", "max_tokens", "stop_sequence", "tool_use"]), + reasoning_opaque: z.string().optional(), + stop_reason: anthropicStopReasonSchema, stop_sequence: z.string().nullable(), usage: anthropicUsageSchema, }) @@ -63,12 +86,20 @@ const anthropicStreamEventSchema = z.looseObject({ ]), }) +function createInitialStreamState(): AnthropicStreamState { + return { + messageStartSent: false, + contentBlockIndex: 0, + toolCalls: {}, + } +} + function isValidAnthropicStreamEvent(payload: unknown): boolean { return anthropicStreamEventSchema.safeParse(payload).success } describe("OpenAI to Anthropic Non-Streaming Response Translation", () => { - test("should translate a simple text response correctly", () => { + test("should translate reasoning_text into a thinking block and preserve reasoning_opaque", () => { const openAIResponse: ChatCompletionResponse = { id: "chatcmpl-123", object: "chat.completion", @@ -80,6 +111,8 @@ describe("OpenAI to Anthropic Non-Streaming Response Translation", () => { message: { role: "assistant", content: "Hello! How can I help you today?", + reasoning_text: "Need to explain available help clearly.", + reasoning_opaque: "opaque-token-123", }, finish_reason: "stop", logprobs: null, @@ -98,15 +131,53 @@ describe("OpenAI to Anthropic Non-Streaming Response Translation", () => { expect(anthropicResponse.id).toBe("chatcmpl-123") expect(anthropicResponse.stop_reason).toBe("end_turn") + expect(anthropicResponse.reasoning_opaque).toBe("opaque-token-123") expect(anthropicResponse.usage.input_tokens).toBe(9) - expect(anthropicResponse.content[0].type).toBe("text") - if (anthropicResponse.content[0].type === "text") { - expect(anthropicResponse.content[0].text).toBe( - "Hello! How can I help you today?", - ) - } else { - throw new Error("Expected text block") + expect(anthropicResponse.content).toHaveLength(2) + expect(anthropicResponse.content[0]).toEqual({ + type: "thinking", + thinking: "Need to explain available help clearly.", + }) + expect(anthropicResponse.content[1]).toEqual({ + type: "text", + text: "Hello! How can I help you today?", + }) + }) + + test("should keep a plain text response unchanged when reasoning is absent", () => { + const openAIResponse: ChatCompletionResponse = { + id: "chatcmpl-124", + object: "chat.completion", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + message: { + role: "assistant", + content: "Hello! How can I help you today?", + }, + finish_reason: "stop", + logprobs: null, + }, + ], + usage: { + prompt_tokens: 9, + completion_tokens: 12, + total_tokens: 21, + }, } + + const anthropicResponse = translateToAnthropic(openAIResponse) + + expect(isValidAnthropicResponse(anthropicResponse)).toBe(true) + expect(anthropicResponse.reasoning_opaque).toBeUndefined() + expect(anthropicResponse.content).toEqual([ + { + type: "text", + text: "Hello! How can I help you today?", + }, + ]) }) test("should translate a response with tool calls", () => { @@ -191,6 +262,7 @@ describe("OpenAI to Anthropic Non-Streaming Response Translation", () => { }) }) +// eslint-disable-next-line max-lines-per-function describe("OpenAI to Anthropic Streaming Response Translation", () => { test("should translate a simple text stream correctly", () => { const openAIStream: Array = [ @@ -247,12 +319,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => { }, ] - const streamState: AnthropicStreamState = { - messageStartSent: false, - contentBlockIndex: 0, - contentBlockOpen: false, - toolCalls: {}, - } + const streamState = createInitialStreamState() const translatedStream = openAIStream.flatMap((chunk) => translateChunkToAnthropicEvents(chunk, streamState), ) @@ -262,6 +329,342 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => { } }) + test("should emit thinking transitions before text transitions when reasoning chunk precedes text chunk", () => { + const openAIStream: Array = [ + { + id: "cmpl-reasoning", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: { role: "assistant" }, + finish_reason: null, + logprobs: null, + }, + ], + }, + { + id: "cmpl-reasoning", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: { + reasoning_text: "Need to answer carefully.", + reasoning_opaque: "sig-123", + }, + finish_reason: null, + logprobs: null, + }, + ], + }, + { + id: "cmpl-reasoning", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: { content: "Hello" }, + finish_reason: null, + logprobs: null, + }, + ], + }, + { + id: "cmpl-reasoning", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: {}, + finish_reason: "stop", + logprobs: null, + }, + ], + usage: { + prompt_tokens: 20, + completion_tokens: 7, + total_tokens: 27, + prompt_tokens_details: { + cached_tokens: 5, + }, + }, + }, + ] + + const streamState = createInitialStreamState() + const translatedStream = openAIStream.flatMap((chunk) => + translateChunkToAnthropicEvents(chunk, streamState), + ) + + expect(translatedStream).toEqual([ + { + type: "message_start", + message: { + id: "cmpl-reasoning", + type: "message", + role: "assistant", + content: [], + model: "gpt-4o-2024-05-13", + stop_reason: null, + stop_sequence: null, + usage: { + input_tokens: 0, + output_tokens: 0, + }, + }, + }, + { + type: "content_block_start", + index: 0, + content_block: { + type: "thinking", + thinking: "", + }, + }, + { + type: "content_block_delta", + index: 0, + delta: { + type: "thinking_delta", + thinking: "Need to answer carefully.", + }, + }, + { + type: "content_block_delta", + index: 0, + delta: { + type: "signature_delta", + signature: "sig-123", + }, + }, + { + type: "content_block_stop", + index: 0, + }, + { + type: "content_block_start", + index: 1, + content_block: { + type: "text", + text: "", + }, + }, + { + type: "content_block_delta", + index: 1, + delta: { + type: "text_delta", + text: "Hello", + }, + }, + { + type: "content_block_stop", + index: 1, + }, + { + type: "message_delta", + delta: { + stop_reason: "end_turn", + stop_sequence: null, + }, + usage: { + input_tokens: 15, + output_tokens: 7, + cache_read_input_tokens: 5, + }, + }, + { + type: "message_stop", + }, + ]) + }) + + test("should emit each thinking signature only for its own thinking block", () => { + const openAIStream: Array = [ + { + id: "cmpl-signature-scope", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: { role: "assistant" }, + finish_reason: null, + logprobs: null, + }, + ], + }, + { + id: "cmpl-signature-scope", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: { + reasoning_text: "First thought.", + reasoning_opaque: "sig-first", + }, + finish_reason: null, + logprobs: null, + }, + ], + }, + { + id: "cmpl-signature-scope", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: { content: "Answer" }, + finish_reason: null, + logprobs: null, + }, + ], + }, + { + id: "cmpl-signature-scope", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: { reasoning_text: "Second thought." }, + finish_reason: null, + logprobs: null, + }, + ], + }, + { + id: "cmpl-signature-scope", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: {}, + finish_reason: "stop", + logprobs: null, + }, + ], + }, + ] + + const streamState = createInitialStreamState() + const translatedStream = openAIStream.flatMap((chunk) => + translateChunkToAnthropicEvents(chunk, streamState), + ) + + const signatureEvents = translatedStream.filter( + (event) => + event.type === "content_block_delta" + && event.delta.type === "signature_delta", + ) + + expect(signatureEvents).toEqual([ + { + type: "content_block_delta", + index: 0, + delta: { + type: "signature_delta", + signature: "sig-first", + }, + }, + ]) + }) + + test("should preserve final usage on message_delta including cache_read_input_tokens", () => { + const openAIStream: Array = [ + { + id: "cmpl-usage", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: { role: "assistant" }, + finish_reason: null, + logprobs: null, + }, + ], + }, + { + id: "cmpl-usage", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: { content: "Hello" }, + finish_reason: null, + logprobs: null, + }, + ], + }, + { + id: "cmpl-usage", + object: "chat.completion.chunk", + created: 1677652288, + model: "gpt-4o-2024-05-13", + choices: [ + { + index: 0, + delta: {}, + finish_reason: "stop", + logprobs: null, + }, + ], + usage: { + prompt_tokens: 14, + completion_tokens: 6, + total_tokens: 20, + prompt_tokens_details: { + cached_tokens: 4, + }, + }, + }, + ] + + const streamState = createInitialStreamState() + const translatedStream = openAIStream.flatMap((chunk) => + translateChunkToAnthropicEvents(chunk, streamState), + ) + + const messageDeltaEvent = translatedStream.find( + (event) => event.type === "message_delta", + ) + + expect(messageDeltaEvent).toEqual({ + type: "message_delta", + delta: { + stop_reason: "end_turn", + stop_sequence: null, + }, + usage: { + input_tokens: 10, + output_tokens: 6, + cache_read_input_tokens: 4, + }, + }) + }) + test("should translate a stream with tool calls", () => { const openAIStream: Array = [ { @@ -347,12 +750,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => { ] // Streaming translation requires state - const streamState: AnthropicStreamState = { - messageStartSent: false, - contentBlockIndex: 0, - contentBlockOpen: false, - toolCalls: {}, - } + const streamState = createInitialStreamState() const translatedStream = openAIStream.flatMap((chunk) => translateChunkToAnthropicEvents(chunk, streamState), ) diff --git a/tests/chat-completions-handler.test.ts b/tests/chat-completions-handler.test.ts new file mode 100644 index 000000000..e128b268e --- /dev/null +++ b/tests/chat-completions-handler.test.ts @@ -0,0 +1,203 @@ +import type { Context } from "hono" + +import { beforeEach, describe, expect, mock, test } from "bun:test" + +import type { ChatCompletionsPayload } from "../src/services/copilot/create-chat-completions" + +import { state } from "../src/lib/state" +import { handleCompletion } from "../src/routes/chat-completions/handler" + +const fetchMock = mock( + (_url: string, opts: { body?: string | ReadableStream | null }) => { + return Promise.resolve({ + ok: true, + json: () => + Promise.resolve({ + id: "chatcmpl-test", + object: "chat.completion" as const, + created: 1, + model: "mock-model", + choices: [], + }), + body: opts.body, + }) + }, +) + +const awaitApprovalMock = mock(() => Promise.resolve()) +const checkRateLimitMock = mock(() => Promise.resolve()) +const getTokenCountMock = mock(() => Promise.resolve(123)) +const streamSSEMock = mock(() => Promise.resolve(new Response("stream"))) +const debugMock = mock(() => {}) +const infoMock = mock(() => {}) +const warnMock = mock(() => {}) + +// @ts-expect-error - Mock fetch doesn't implement all fetch properties +;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock + +void mock.module("../src/lib/approval", () => ({ + awaitApproval: awaitApprovalMock, +})) + +void mock.module("../src/lib/rate-limit", () => ({ + checkRateLimit: checkRateLimitMock, +})) + +void mock.module("../src/lib/tokenizer", () => ({ + getTokenCount: getTokenCountMock, +})) + +void mock.module("hono/streaming", () => ({ + streamSSE: streamSSEMock, +})) + +void mock.module("consola", () => ({ + default: { + debug: debugMock, + info: infoMock, + warn: warnMock, + }, +})) + +function createContext(payload: ChatCompletionsPayload): Context { + return { + req: { + json: () => Promise.resolve(payload), + }, + json: (body: unknown) => body, + } as unknown as Context +} + +function getLastRequestBody() { + const lastCall = fetchMock.mock.calls.at(-1) + expect(lastCall).toBeDefined() + + if (!lastCall) { + throw new Error("Expected fetch to be called") + } + + const options = lastCall[1] as { body: string } + return JSON.parse(options.body) as ChatCompletionsPayload +} + +describe("handleCompletion reasoning normalization", () => { + beforeEach(() => { + fetchMock.mockClear() + awaitApprovalMock.mockClear() + checkRateLimitMock.mockClear() + getTokenCountMock.mockClear() + streamSSEMock.mockClear() + debugMock.mockClear() + infoMock.mockClear() + warnMock.mockClear() + + state.manualApprove = false + state.copilotToken = "test-token" + state.vsCodeVersion = "1.0.0" + state.accountType = "individual" + state.models = { + object: "list", + data: [], + } + }) + + test("adaptive Claude model keeps reasoning_effort, thinking_budget, stream_options", async () => { + state.models = { + object: "list", + data: [ + { + id: "claude-adaptive", + name: "Claude Adaptive", + object: "model", + model_picker_enabled: true, + preview: false, + vendor: "anthropic", + version: "1", + capabilities: { + family: "claude", + object: "model_capabilities", + tokenizer: "claude", + type: "chat", + adaptive_thinking: true, + supports: { + adaptive_thinking: true, + }, + limits: { + max_output_tokens: 8192, + }, + }, + }, + ], + } + + const payload = { + messages: [{ role: "user", content: "hello" }], + model: "claude-adaptive", + reasoning_effort: "high", + thinking_budget: 2048, + stream_options: { include_usage: true }, + } satisfies ChatCompletionsPayload + + await handleCompletion(createContext(payload)) + + expect(fetchMock).toHaveBeenCalledTimes(1) + expect(getLastRequestBody()).toMatchObject({ + reasoning_effort: "high", + thinking_budget: 2048, + stream_options: { include_usage: true }, + max_tokens: 8192, + }) + }) + + test("non-Claude adaptive model keeps thinking_budget", async () => { + state.models = { + object: "list", + data: [ + { + id: "gpt-adaptive", + name: "GPT Adaptive", + object: "model", + model_picker_enabled: true, + preview: false, + vendor: "openai", + version: "1", + capabilities: { + family: "gpt", + object: "model_capabilities", + tokenizer: "gpt", + type: "chat", + adaptive_thinking: true, + supports: { + adaptive_thinking: true, + }, + limits: { + max_output_tokens: 4096, + }, + }, + }, + ], + } + + const payload = { + messages: [{ role: "user", content: "hello" }], + model: "gpt-adaptive", + reasoning_effort: "medium", + thinking_budget: 1024, + stream_options: { include_usage: true }, + } satisfies ChatCompletionsPayload + + await handleCompletion(createContext(payload)) + + expect(fetchMock).toHaveBeenCalledTimes(1) + expect(getLastRequestBody()).toMatchObject({ + reasoning_effort: "medium", + thinking_budget: 1024, + stream_options: { include_usage: true }, + max_tokens: 4096, + }) + expect(debugMock).not.toHaveBeenCalledWith( + "Dropping unsupported OpenAI thinking_budget for model:", + "gpt-adaptive", + ) + }) +}) diff --git a/tests/create-chat-completions.test.ts b/tests/create-chat-completions.test.ts index d18e741aa..1b7f15e9d 100644 --- a/tests/create-chat-completions.test.ts +++ b/tests/create-chat-completions.test.ts @@ -1,9 +1,11 @@ -import { test, expect, mock } from "bun:test" +import { beforeEach, test, expect, mock } from "bun:test" -import type { ChatCompletionsPayload } from "../src/services/copilot/create-chat-completions" +import type { + ChatCompletionChunk, + ChatCompletionsPayload, +} from "../src/services/copilot/create-chat-completions" import { state } from "../src/lib/state" -import { createChatCompletions } from "../src/services/copilot/create-chat-completions" // Mock state state.copilotToken = "test-token" @@ -23,7 +25,29 @@ const fetchMock = mock( // @ts-expect-error - Mock fetch doesn't implement all fetch properties ;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock +function getLastFetchCallOptions() { + const lastCall = fetchMock.mock.calls.at(-1) + expect(lastCall).toBeDefined() + + if (!lastCall) { + throw new Error("Expected fetch to be called") + } + + return lastCall[1] as { headers: Record; body: string } +} + +async function loadCreateChatCompletions() { + const mod = await import("../src/services/copilot/create-chat-completions") + return mod.createChatCompletions +} + +beforeEach(() => { + fetchMock.mockClear() +}) + test("sets X-Initiator to agent if tool/assistant present", async () => { + const createChatCompletions = await loadCreateChatCompletions() + const payload: ChatCompletionsPayload = { messages: [ { role: "user", content: "hi" }, @@ -32,14 +56,14 @@ test("sets X-Initiator to agent if tool/assistant present", async () => { model: "gpt-test", } await createChatCompletions(payload) - expect(fetchMock).toHaveBeenCalled() - const headers = ( - fetchMock.mock.calls[0][1] as { headers: Record } - ).headers + expect(fetchMock).toHaveBeenCalledTimes(1) + const { headers } = getLastFetchCallOptions() expect(headers["X-Initiator"]).toBe("agent") }) test("sets X-Initiator to user if only user present", async () => { + const createChatCompletions = await loadCreateChatCompletions() + const payload: ChatCompletionsPayload = { messages: [ { role: "user", content: "hi" }, @@ -48,9 +72,49 @@ test("sets X-Initiator to user if only user present", async () => { model: "gpt-test", } await createChatCompletions(payload) - expect(fetchMock).toHaveBeenCalled() - const headers = ( - fetchMock.mock.calls[1][1] as { headers: Record } - ).headers + expect(fetchMock).toHaveBeenCalledTimes(1) + const { headers } = getLastFetchCallOptions() expect(headers["X-Initiator"]).toBe("user") }) + +test("forwards reasoning and stream options upstream unchanged", async () => { + const createChatCompletions = await loadCreateChatCompletions() + + const payload = { + messages: [{ role: "user", content: "reason" }], + model: "gpt-test", + reasoning_effort: "high", + thinking_budget: 2048, + stream_options: { include_usage: true }, + } satisfies ChatCompletionsPayload + + await createChatCompletions(payload) + + expect(fetchMock).toHaveBeenCalledTimes(1) + const { body } = getLastFetchCallOptions() + expect(JSON.parse(body)).toEqual(payload) +}) + +test("ChatCompletionChunk typing accepts reasoning fields", () => { + const chunk = { + id: "chunk-1", + object: "chat.completion.chunk", + created: 123, + model: "gpt-test", + choices: [ + { + index: 0, + delta: { + role: "assistant", + reasoning_text: "thinking", + reasoning_opaque: "opaque-token", + }, + finish_reason: null, + logprobs: null, + }, + ], + } satisfies ChatCompletionChunk + + expect(chunk.choices[0]?.delta.reasoning_text).toBe("thinking") + expect(chunk.choices[0]?.delta.reasoning_opaque).toBe("opaque-token") +}) From 3a659461f1469d1e8c06f95a52d2d445cdc091e6 Mon Sep 17 00:00:00 2001 From: lyzgeorge Date: Mon, 13 Apr 2026 22:53:09 +0800 Subject: [PATCH 3/8] fix: read reasoning capabilities from capabilities.supports Copilot advertises adaptive_thinking and reasoning_effort under `capabilities.supports`, not at the top level of `capabilities`. The previous gate looked at the wrong field, so Anthropic `thinking` was always stripped and reasoning never reached upstream for /v1/messages. Read the correct fields and gate each surface on what the model actually supports. Co-Authored-By: Claude Opus 4.6 --- src/routes/messages/handler.ts | 3 ++- src/routes/messages/non-stream-translation.ts | 2 +- src/routes/reasoning-context.ts | 27 ++++++++++++------- src/services/copilot/get-models.ts | 2 +- tests/anthropic-request.test.ts | 11 ++++---- tests/chat-completions-handler.test.ts | 4 +-- 6 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 3e8f236cb..b1e579148 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -39,7 +39,8 @@ export async function handleCompletion(c: Context) { if ( anthropicPayload.thinking?.type === "enabled" - && selectedModel?.capabilities.adaptive_thinking !== true + && reasoningContext.reasoningEffort === undefined + && reasoningContext.thinkingBudget === undefined ) { consola.debug( "Stripping unsupported Anthropic thinking config for model:", diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index 31ec1ec48..4b299c726 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -301,7 +301,7 @@ export function translateToAnthropic( type: "message", role: "assistant", model: response.model, - reasoning_opaque: reasoningOpaque, + reasoning_opaque: reasoningOpaque ?? undefined, content, stop_reason: mapOpenAIStopReasonToAnthropic(stopReason), stop_sequence: null, diff --git a/src/routes/reasoning-context.ts b/src/routes/reasoning-context.ts index b41b742e8..03f851c31 100644 --- a/src/routes/reasoning-context.ts +++ b/src/routes/reasoning-context.ts @@ -7,18 +7,26 @@ export interface ReasoningContext { thinkingBudget?: number } +function supportsReasoningEffort(model: Model | undefined): boolean { + const levels = model?.capabilities.supports.reasoning_effort + return Array.isArray(levels) && levels.length > 0 +} + +function supportsAdaptiveThinking(model: Model | undefined): boolean { + return model?.capabilities.supports.adaptive_thinking === true +} + export function buildAnthropicReasoningContext( payload: AnthropicMessagesPayload, model: Model | undefined, ): ReasoningContext { - const adaptiveThinkingSupported = - model?.capabilities.adaptive_thinking === true const thinkingEnabled = payload.thinking?.type === "enabled" + if (!thinkingEnabled) return {} + return { - reasoningEffort: - thinkingEnabled && adaptiveThinkingSupported ? "high" : undefined, + reasoningEffort: supportsReasoningEffort(model) ? "high" : undefined, thinkingBudget: - thinkingEnabled && adaptiveThinkingSupported ? + supportsAdaptiveThinking(model) ? payload.thinking?.budget_tokens : undefined, } @@ -28,12 +36,13 @@ export function buildOpenAIReasoningContext( payload: ChatCompletionsPayload, model: Model | undefined, ): ReasoningContext { - const adaptiveThinkingSupported = - model?.capabilities.adaptive_thinking === true return { - reasoningEffort: payload.reasoning_effort ?? undefined, + reasoningEffort: + supportsReasoningEffort(model) ? + (payload.reasoning_effort ?? undefined) + : undefined, thinkingBudget: - adaptiveThinkingSupported ? + supportsAdaptiveThinking(model) ? (payload.thinking_budget ?? undefined) : undefined, } diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts index 8d5fdb1d3..92d84674e 100644 --- a/src/services/copilot/get-models.ts +++ b/src/services/copilot/get-models.ts @@ -29,10 +29,10 @@ interface ModelSupports { parallel_tool_calls?: boolean dimensions?: boolean adaptive_thinking?: boolean + reasoning_effort?: Array } interface ModelCapabilities { - adaptive_thinking?: boolean family: string limits: ModelLimits object: string diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts index 0bca2c7a7..480cce767 100644 --- a/tests/anthropic-request.test.ts +++ b/tests/anthropic-request.test.ts @@ -135,9 +135,9 @@ describe("Anthropic to OpenAI translation logic", () => { const anthropicPayload = { model: "gpt-4o", messages: [{ role: "user", content: "Hello!" }], + max_tokens: 0, temperature: "hot", // Should be a number - } - // @ts-expect-error intended to be invalid + } as unknown as AnthropicMessagesPayload const openAIPayload = translateToOpenAI( anthropicPayload, disabledReasoningContext, @@ -236,7 +236,6 @@ describe("Anthropic to OpenAI translation logic", () => { const openAIPayload = translateToOpenAI(anthropicPayload, { reasoningEffort: "high", thinkingBudget: 2048, - adaptiveThinkingSupported: true, }) expect(openAIPayload.reasoning_effort).toBe("high") @@ -347,11 +346,13 @@ describe("reasoning context helpers", () => { vendor: "anthropic", version: "20250514", capabilities: { - adaptive_thinking: true, family: "claude", limits: {}, object: "model_capabilities", - supports: {}, + supports: { + adaptive_thinking: true, + reasoning_effort: ["low", "medium", "high"], + }, tokenizer: "claude", type: "chat", }, diff --git a/tests/chat-completions-handler.test.ts b/tests/chat-completions-handler.test.ts index e128b268e..6e3d09682 100644 --- a/tests/chat-completions-handler.test.ts +++ b/tests/chat-completions-handler.test.ts @@ -118,9 +118,9 @@ describe("handleCompletion reasoning normalization", () => { object: "model_capabilities", tokenizer: "claude", type: "chat", - adaptive_thinking: true, supports: { adaptive_thinking: true, + reasoning_effort: ["low", "medium", "high"], }, limits: { max_output_tokens: 8192, @@ -166,9 +166,9 @@ describe("handleCompletion reasoning normalization", () => { object: "model_capabilities", tokenizer: "gpt", type: "chat", - adaptive_thinking: true, supports: { adaptive_thinking: true, + reasoning_effort: ["low", "medium", "high"], }, limits: { max_output_tokens: 4096, From 34927088a601ca141f67068ec4e22bdc946294bf Mon Sep 17 00:00:00 2001 From: lyzgeorge Date: Mon, 13 Apr 2026 23:57:40 +0800 Subject: [PATCH 4/8] docs: document reasoning and thinking translation, add handler tests Add a Reasoning & Extended Thinking section to the README, highlight the feature in the intro and features list, and cover the capability gating with new handler tests for the Anthropic /v1/messages surface and additional cases for /v1/chat/completions. Co-Authored-By: Claude Opus 4.6 --- README.md | 53 ++++++ tests/anthropic-request.test.ts | 121 +++++++++++++- tests/chat-completions-handler.test.ts | 88 ++++++++++ tests/messages-handler.test.ts | 213 +++++++++++++++++++++++++ 4 files changed, 474 insertions(+), 1 deletion(-) create mode 100644 tests/messages-handler.test.ts diff --git a/README.md b/README.md index 0d36c13c9..92f0df518 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Copilot API Proxy +**One Copilot subscription. Every frontier reasoning model. OpenAI and Anthropic shaped.** Point Claude Code, Cline, or your own scripts at a single localhost URL and unlock Claude Sonnet 4.6, GPT-5, Gemini, and friends — with real reasoning traces and thinking budgets routed to whichever knob the upstream model actually supports. + > [!WARNING] > This is a reverse-engineered proxy of GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk. @@ -32,6 +34,7 @@ A reverse-engineered proxy for the GitHub Copilot API that exposes it as an Open ## Features - **OpenAI & Anthropic Compatibility**: Exposes GitHub Copilot as an OpenAI-compatible (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`) and Anthropic-compatible (`/v1/messages`) API. +- **Reasoning & Extended Thinking**: Capability-aware translation of `reasoning_effort` and Anthropic `thinking` blocks. Thinking traces, signatures, and `reasoning_opaque` tokens flow through both non-streaming and streaming responses without you having to know which upstream flag each model wants. - **Claude Code Integration**: Easily configure and launch [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) to use Copilot as its backend with a simple command-line flag (`--claude-code`). - **Usage Dashboard**: A web-based dashboard to monitor your Copilot API usage, view quotas, and see detailed statistics. - **Rate Limit Control**: Manage API usage with rate-limiting options (`--rate-limit`) and a waiting mechanism (`--wait`) to prevent errors from rapid requests. @@ -278,6 +281,56 @@ The dashboard provides a user-friendly interface to view your Copilot usage data - **URL-based Configuration**: You can also specify the API endpoint directly in the URL using a query parameter. This is useful for bookmarks or sharing links. For example: `https://ericc-ch.github.io/copilot-api?endpoint=http://your-api-server/usage` +## Reasoning & Extended Thinking + +Each Copilot model advertises its own reasoning knobs under `capabilities.supports`. The proxy reads them at startup and translates requests accordingly, so the same client call works across Claude, GPT, Gemini, and friends. + +### OpenAI-shaped requests (`/v1/chat/completions`) + +- `reasoning_effort` (`low` | `medium` | `high`, plus `minimal` for GPT-5 family) is passed through to any model whose `supports.reasoning_effort` is non-empty. Other models get it stripped. +- `thinking_budget` is passed through only when the model advertises `supports.adaptive_thinking` (currently Claude Sonnet 4.5+/4.6, Opus 4.6). Unsupported models silently drop it. +- Claude reasoning responses surface as `reasoning_text` and `reasoning_opaque` on the assistant message. + +```sh +# GPT-5 mini with heavy reasoning +curl http://localhost:4141/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-5-mini", + "reasoning_effort": "high", + "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}] + }' + +# Claude Sonnet 4.6 with an explicit thinking budget +curl http://localhost:4141/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "claude-sonnet-4.6", + "reasoning_effort": "high", + "thinking_budget": 2048, + "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}] + }' +``` + +### Anthropic-shaped requests (`/v1/messages`) + +- `thinking: {"type": "enabled", "budget_tokens": N}` is translated into `reasoning_effort: "high"` for any reasoning-capable model, plus `thinking_budget` for adaptive-thinking models. +- `thinking: {"type": "disabled"}` suppresses both fields upstream. +- If the selected model supports neither knob, the thinking config is silently stripped and logged at debug level — the request still succeeds. +- Claude thinking streams emit `content_block_start` / `thinking_delta` / `signature_delta` / `content_block_stop` events before the text block, so Claude Code and similar clients see native thinking UIs. + +```sh +# Extended thinking via the Anthropic surface +curl http://localhost:4141/v1/messages \ + -H "Content-Type: application/json" \ + -d '{ + "model": "claude-sonnet-4.6", + "max_tokens": 1024, + "thinking": {"type": "enabled", "budget_tokens": 2048}, + "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}] + }' +``` + ## Using with Claude Code This proxy can be used to power [Claude Code](https://docs.anthropic.com/en/claude-code), an experimental conversational AI assistant for developers from Anthropic. diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts index 480cce767..b0c9d44d2 100644 --- a/tests/anthropic-request.test.ts +++ b/tests/anthropic-request.test.ts @@ -3,8 +3,36 @@ import { z } from "zod" import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" +import type { Model } from "../src/services/copilot/get-models" + import { translateToOpenAI } from "../src/routes/messages/non-stream-translation" -import { buildAnthropicReasoningContext } from "../src/routes/reasoning-context" +import { + buildAnthropicReasoningContext, + buildOpenAIReasoningContext, +} from "../src/routes/reasoning-context" + +function makeModel( + id: string, + supports: Model["capabilities"]["supports"], +): Model { + return { + id, + model_picker_enabled: true, + name: id, + object: "model", + preview: false, + vendor: "test", + version: "1", + capabilities: { + family: id, + limits: {}, + object: "model_capabilities", + supports, + tokenizer: "test", + type: "chat", + }, + } +} const disabledReasoningContext = { reasoningEffort: undefined, @@ -364,6 +392,97 @@ describe("reasoning context helpers", () => { }) }) + test("reasoning_effort-only model gets reasoning_effort but no thinking_budget", () => { + expect( + buildAnthropicReasoningContext( + { + model: "gpt-5-mini", + messages: [], + max_tokens: 1024, + thinking: { type: "enabled", budget_tokens: 2048 }, + }, + makeModel("gpt-5-mini", { + reasoning_effort: ["low", "medium", "high"], + }), + ), + ).toEqual({ + reasoningEffort: "high", + thinkingBudget: undefined, + }) + }) + + test("disabled thinking returns an empty context regardless of capability", () => { + expect( + buildAnthropicReasoningContext( + { + model: "claude-sonnet-4.6", + messages: [], + max_tokens: 1024, + thinking: { type: "disabled" }, + }, + makeModel("claude-sonnet-4.6", { + adaptive_thinking: true, + reasoning_effort: ["low", "medium", "high"], + }), + ), + ).toEqual({}) + }) + + test("buildOpenAIReasoningContext keeps supported fields and drops unsupported ones", () => { + const claudeModel = makeModel("claude-sonnet-4.6", { + adaptive_thinking: true, + reasoning_effort: ["low", "medium", "high"], + }) + expect( + buildOpenAIReasoningContext( + { + model: "claude-sonnet-4.6", + messages: [], + reasoning_effort: "high", + thinking_budget: 2048, + }, + claudeModel, + ), + ).toEqual({ + reasoningEffort: "high", + thinkingBudget: 2048, + }) + + const gptModel = makeModel("gpt-5-mini", { + reasoning_effort: ["low", "medium", "high"], + }) + expect( + buildOpenAIReasoningContext( + { + model: "gpt-5-mini", + messages: [], + reasoning_effort: "high", + thinking_budget: 2048, + }, + gptModel, + ), + ).toEqual({ + reasoningEffort: "high", + thinkingBudget: undefined, + }) + + const plainModel = makeModel("gpt-4o", {}) + expect( + buildOpenAIReasoningContext( + { + model: "gpt-4o", + messages: [], + reasoning_effort: "high", + thinking_budget: 2048, + }, + plainModel, + ), + ).toEqual({ + reasoningEffort: undefined, + thinkingBudget: undefined, + }) + }) + test("unsupported model does not expose Anthropic adaptive thinking fields", () => { expect( buildAnthropicReasoningContext( diff --git a/tests/chat-completions-handler.test.ts b/tests/chat-completions-handler.test.ts index 6e3d09682..2167cba43 100644 --- a/tests/chat-completions-handler.test.ts +++ b/tests/chat-completions-handler.test.ts @@ -200,4 +200,92 @@ describe("handleCompletion reasoning normalization", () => { "gpt-adaptive", ) }) + + test("reasoning_effort-only model keeps reasoning_effort and drops thinking_budget", async () => { + state.models = { + object: "list", + data: [ + { + id: "gpt-reasoning", + name: "GPT Reasoning", + object: "model", + model_picker_enabled: true, + preview: false, + vendor: "openai", + version: "1", + capabilities: { + family: "gpt", + object: "model_capabilities", + tokenizer: "gpt", + type: "chat", + supports: { + reasoning_effort: ["low", "medium", "high"], + }, + limits: { + max_output_tokens: 4096, + }, + }, + }, + ], + } + + const payload = { + messages: [{ role: "user", content: "hello" }], + model: "gpt-reasoning", + reasoning_effort: "high", + thinking_budget: 2048, + } satisfies ChatCompletionsPayload + + await handleCompletion(createContext(payload)) + + expect(fetchMock).toHaveBeenCalledTimes(1) + const body = getLastRequestBody() + expect(body.reasoning_effort).toBe("high") + expect(body.thinking_budget).toBeUndefined() + expect(debugMock).toHaveBeenCalledWith( + "Dropping unsupported OpenAI thinking_budget for model:", + "gpt-reasoning", + ) + }) + + test("plain model without reasoning capabilities drops both fields", async () => { + state.models = { + object: "list", + data: [ + { + id: "gpt-4o", + name: "GPT-4o", + object: "model", + model_picker_enabled: true, + preview: false, + vendor: "openai", + version: "1", + capabilities: { + family: "gpt", + object: "model_capabilities", + tokenizer: "gpt", + type: "chat", + supports: {}, + limits: { + max_output_tokens: 4096, + }, + }, + }, + ], + } + + const payload = { + messages: [{ role: "user", content: "hello" }], + model: "gpt-4o", + reasoning_effort: "high", + thinking_budget: 2048, + } satisfies ChatCompletionsPayload + + await handleCompletion(createContext(payload)) + + expect(fetchMock).toHaveBeenCalledTimes(1) + const body = getLastRequestBody() + expect(body.reasoning_effort).toBeUndefined() + expect(body.thinking_budget).toBeUndefined() + }) }) diff --git a/tests/messages-handler.test.ts b/tests/messages-handler.test.ts new file mode 100644 index 000000000..9878b1a97 --- /dev/null +++ b/tests/messages-handler.test.ts @@ -0,0 +1,213 @@ +import type { Context } from "hono" + +import { beforeEach, describe, expect, mock, test } from "bun:test" + +import type { AnthropicMessagesPayload } from "../src/routes/messages/anthropic-types" +import type { ChatCompletionsPayload } from "../src/services/copilot/create-chat-completions" + +import { state } from "../src/lib/state" +import { handleCompletion } from "../src/routes/messages/handler" + +const fetchMock = mock( + (_url: string, opts: { body?: string | ReadableStream | null }) => { + return Promise.resolve({ + ok: true, + json: () => + Promise.resolve({ + id: "chatcmpl-test", + object: "chat.completion" as const, + created: 1, + model: "mock-model", + choices: [ + { + index: 0, + message: { + role: "assistant", + content: "ok", + }, + finish_reason: "stop", + logprobs: null, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 2, + total_tokens: 12, + }, + }), + body: opts.body, + }) + }, +) + +const awaitApprovalMock = mock(() => Promise.resolve()) +const checkRateLimitMock = mock(() => Promise.resolve()) +const debugMock = mock(() => {}) +const infoMock = mock(() => {}) +const warnMock = mock(() => {}) + +// @ts-expect-error - Mock fetch doesn't implement all fetch properties +;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock + +void mock.module("../src/lib/approval", () => ({ + awaitApproval: awaitApprovalMock, +})) + +void mock.module("../src/lib/rate-limit", () => ({ + checkRateLimit: checkRateLimitMock, +})) + +void mock.module("consola", () => ({ + default: { + debug: debugMock, + info: infoMock, + warn: warnMock, + }, +})) + +function createContext(payload: AnthropicMessagesPayload): Context { + return { + req: { + json: () => Promise.resolve(payload), + }, + json: (body: unknown) => body, + } as unknown as Context +} + +function getLastRequestBody(): ChatCompletionsPayload { + const lastCall = fetchMock.mock.calls.at(-1) + expect(lastCall).toBeDefined() + if (!lastCall) throw new Error("Expected fetch to be called") + const options = lastCall[1] as { body: string } + return JSON.parse(options.body) as ChatCompletionsPayload +} + +function setModel( + id: string, + supports: { + adaptive_thinking?: boolean + reasoning_effort?: Array + }, +) { + state.models = { + object: "list", + data: [ + { + id, + name: id, + object: "model", + model_picker_enabled: true, + preview: false, + vendor: "test", + version: "1", + capabilities: { + family: id, + object: "model_capabilities", + tokenizer: "test", + type: "chat", + supports, + limits: {}, + }, + }, + ], + } +} + +describe("Anthropic messages handler reasoning translation", () => { + beforeEach(() => { + fetchMock.mockClear() + awaitApprovalMock.mockClear() + checkRateLimitMock.mockClear() + debugMock.mockClear() + infoMock.mockClear() + warnMock.mockClear() + state.manualApprove = false + state.copilotToken = "test-token" + state.vsCodeVersion = "1.0.0" + state.accountType = "individual" + }) + + test("claude-style model forwards reasoning_effort and thinking_budget", async () => { + setModel("claude-sonnet-4.6", { + adaptive_thinking: true, + reasoning_effort: ["low", "medium", "high"], + }) + + await handleCompletion( + createContext({ + model: "claude-sonnet-4.6", + max_tokens: 256, + thinking: { type: "enabled", budget_tokens: 2048 }, + messages: [{ role: "user", content: "Think carefully." }], + }), + ) + + expect(fetchMock).toHaveBeenCalledTimes(1) + const body = getLastRequestBody() + expect(body.reasoning_effort).toBe("high") + expect(body.thinking_budget).toBe(2048) + }) + + test("reasoning_effort-only model forwards reasoning_effort and drops thinking_budget", async () => { + setModel("gpt-5-mini", { + reasoning_effort: ["low", "medium", "high"], + }) + + await handleCompletion( + createContext({ + model: "gpt-5-mini", + max_tokens: 256, + thinking: { type: "enabled", budget_tokens: 2048 }, + messages: [{ role: "user", content: "Think carefully." }], + }), + ) + + expect(fetchMock).toHaveBeenCalledTimes(1) + const body = getLastRequestBody() + expect(body.reasoning_effort).toBe("high") + expect(body.thinking_budget).toBeUndefined() + }) + + test("disabled thinking never forwards reasoning fields", async () => { + setModel("claude-sonnet-4.6", { + adaptive_thinking: true, + reasoning_effort: ["low", "medium", "high"], + }) + + await handleCompletion( + createContext({ + model: "claude-sonnet-4.6", + max_tokens: 256, + thinking: { type: "disabled" }, + messages: [{ role: "user", content: "Answer directly." }], + }), + ) + + expect(fetchMock).toHaveBeenCalledTimes(1) + const body = getLastRequestBody() + expect(body.reasoning_effort).toBeUndefined() + expect(body.thinking_budget).toBeUndefined() + }) + + test("unsupported model strips thinking config and logs debug", async () => { + setModel("gpt-4o", {}) + + await handleCompletion( + createContext({ + model: "gpt-4o", + max_tokens: 256, + thinking: { type: "enabled", budget_tokens: 2048 }, + messages: [{ role: "user", content: "Think carefully." }], + }), + ) + + expect(fetchMock).toHaveBeenCalledTimes(1) + const body = getLastRequestBody() + expect(body.reasoning_effort).toBeUndefined() + expect(body.thinking_budget).toBeUndefined() + expect(debugMock).toHaveBeenCalledWith( + "Stripping unsupported Anthropic thinking config for model:", + "gpt-4o", + ) + }) +}) From 0e2e8124cc1f07c0423e1970e3232f49417304c1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 03:52:07 +0000 Subject: [PATCH 5/8] Initial plan From caf33deb8236ee9ef3fd09aacd939a4c99840d15 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 03:53:09 +0000 Subject: [PATCH 6/8] feat: allow arbitrary reasoning_effort values for forward compatibility Agent-Logs-Url: https://github.com/lyzgeorge/copilot-api/sessions/10f76b26-728d-41d8-8fb3-432974af4318 Co-authored-by: lyzgeorge <8285196+lyzgeorge@users.noreply.github.com> --- src/routes/reasoning-context.ts | 2 +- src/services/copilot/create-chat-completions.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/routes/reasoning-context.ts b/src/routes/reasoning-context.ts index 03f851c31..7bef76e73 100644 --- a/src/routes/reasoning-context.ts +++ b/src/routes/reasoning-context.ts @@ -3,7 +3,7 @@ import type { ChatCompletionsPayload } from "~/services/copilot/create-chat-comp import type { Model } from "~/services/copilot/get-models" export interface ReasoningContext { - reasoningEffort?: "low" | "medium" | "high" + reasoningEffort?: "low" | "medium" | "high" | string thinkingBudget?: number } diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 15f3e0a21..8da439c24 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -143,7 +143,7 @@ export interface ChatCompletionsPayload { logit_bias?: Record | null logprobs?: boolean | null response_format?: { type: "json_object" } | null - reasoning_effort?: "low" | "medium" | "high" | null + reasoning_effort?: "low" | "medium" | "high" | (string & {}) | null seed?: number | null stream_options?: { include_usage?: boolean } | null thinking_budget?: number | null From dcfb3690c60ac4c09832f8dec3ea1f60afafd947 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 03:54:01 +0000 Subject: [PATCH 7/8] fix: use (string & {}) in ReasoningContext for consistent IDE autocomplete Agent-Logs-Url: https://github.com/lyzgeorge/copilot-api/sessions/10f76b26-728d-41d8-8fb3-432974af4318 Co-authored-by: lyzgeorge <8285196+lyzgeorge@users.noreply.github.com> --- src/routes/reasoning-context.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/routes/reasoning-context.ts b/src/routes/reasoning-context.ts index 7bef76e73..ee542e273 100644 --- a/src/routes/reasoning-context.ts +++ b/src/routes/reasoning-context.ts @@ -3,7 +3,7 @@ import type { ChatCompletionsPayload } from "~/services/copilot/create-chat-comp import type { Model } from "~/services/copilot/get-models" export interface ReasoningContext { - reasoningEffort?: "low" | "medium" | "high" | string + reasoningEffort?: "low" | "medium" | "high" | (string & {}) thinkingBudget?: number } From 4f0d9cafafd0b7306972beb801e3b8599c5c9c64 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 03:57:16 +0000 Subject: [PATCH 8/8] docs: add fork improvements hook at top of README Agent-Logs-Url: https://github.com/lyzgeorge/copilot-api/sessions/83622886-a808-4ba6-820c-75ed8300f3ab Co-authored-by: lyzgeorge <8285196+lyzgeorge@users.noreply.github.com> --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 92f0df518..b0b3a55aa 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,18 @@ # Copilot API Proxy +> **This is a fork of [ericc-ch/copilot-api](https://github.com/ericc-ch/copilot-api) with full reasoning / extended-thinking support added.** +> +> **What this fork adds:** +> +> - **Capability-aware reasoning routing** — reads each model's `capabilities.supports` at startup and routes `reasoning_effort` / `thinking_budget` only to models that actually support them; unsupported models silently strip the fields. +> - **Anthropic ↔ OpenAI thinking translation** — `thinking: { type: "enabled", budget_tokens: N }` on the `/v1/messages` surface is automatically translated into `reasoning_effort: "high"` + `thinking_budget` for the upstream call, and vice versa. +> - **Streaming thinking traces** — Claude thinking streams emit proper `content_block_start` / `thinking_delta` / `signature_delta` / `content_block_stop` events so Claude Code and similar clients see native thinking UIs. +> - **Forward-compatible `reasoning_effort`** — type accepts any string (not just `"low" | "medium" | "high"`), so new model-specific values like `"xhigh"` or `"minimal"` are transparently passed through without code changes. +> +> Everything else — auth, rate limiting, usage dashboard, CLI flags — is identical to the upstream project. + +--- + **One Copilot subscription. Every frontier reasoning model. OpenAI and Anthropic shaped.** Point Claude Code, Cline, or your own scripts at a single localhost URL and unlock Claude Sonnet 4.6, GPT-5, Gemini, and friends — with real reasoning traces and thinking budgets routed to whichever knob the upstream model actually supports. > [!WARNING]