From 8be4517ad3c15b4da06d520dacb114f37ec1c8dc Mon Sep 17 00:00:00 2001
From: lyzgeorge <lyzgeorge@gmail.com>
Date: Mon, 13 Apr 2026 19:59:13 +0800
Subject: [PATCH 1/8] chore: ignore local worktrees

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 577a4f199..d26414de8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,7 @@ node_modules/
 .eslintcache
 
 # build output
-dist/
\ No newline at end of file
+dist/
+
+# local worktrees
+.worktrees/
\ No newline at end of file

From 7ecb1618e2a296040db2c448d9420e1013f2e9b2 Mon Sep 17 00:00:00 2001
From: lyzgeorge <lyzgeorge@gmail.com>
Date: Mon, 13 Apr 2026 21:47:14 +0800
Subject: [PATCH 2/8] feat: translate Copilot reasoning for OpenAI and
 Anthropic

Normalize reasoning effort, thinking budget, and Anthropic reasoning streams so both proxy surfaces stay aligned with Copilot model capabilities.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/routes/chat-completions/handler.ts        |  16 +
 src/routes/messages/anthropic-types.ts        |   7 +-
 src/routes/messages/count-tokens-handler.ts   |   7 +-
 src/routes/messages/handler.ts                |  22 +-
 src/routes/messages/non-stream-translation.ts |  64 ++-
 src/routes/messages/stream-translation.ts     |  93 ++--
 src/routes/reasoning-context.ts               |  40 ++
 .../copilot/create-chat-completions.ts        |   7 +
 src/services/copilot/get-models.ts            |   2 +
 tests/anthropic-request.test.ts               | 209 ++++++++-
 tests/anthropic-response.test.ts              | 442 +++++++++++++++++-
 tests/chat-completions-handler.test.ts        | 203 ++++++++
 tests/create-chat-completions.test.ts         |  86 +++-
 13 files changed, 1099 insertions(+), 99 deletions(-)
 create mode 100644 src/routes/reasoning-context.ts
 create mode 100644 tests/chat-completions-handler.test.ts

diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 04a5ae9ed..c4a0fc4fd 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -8,6 +8,7 @@ import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
 import { isNullish } from "~/lib/utils"
+import { buildOpenAIReasoningContext } from "~/routes/reasoning-context"
 import {
   createChatCompletions,
   type ChatCompletionResponse,
@@ -47,6 +48,21 @@ export async function handleCompletion(c: Context) {
     consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
+  const reasoningContext = buildOpenAIReasoningContext(payload, selectedModel)
+
+  if (payload.thinking_budget && !reasoningContext.thinkingBudget) {
+    consola.debug(
+      "Dropping unsupported OpenAI thinking_budget for model:",
+      payload.model,
+    )
+  }
+
+  payload = {
+    ...payload,
+    reasoning_effort: reasoningContext.reasoningEffort,
+    thinking_budget: reasoningContext.thinkingBudget,
+  }
+
   const response = await createChatCompletions(payload)
 
   if (isNonStreaming(response)) {
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 881fffcc8..ca851dc18 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -19,7 +19,7 @@ export interface AnthropicMessagesPayload {
     name?: string
   }
   thinking?: {
-    type: "enabled"
+    type: "enabled" | "disabled"
     budget_tokens?: number
   }
   service_tier?: "auto" | "standard_only"
@@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock {
 export interface AnthropicThinkingBlock {
   type: "thinking"
   thinking: string
+  signature?: string
 }
 
 export type AnthropicUserContentBlock =
@@ -92,6 +93,7 @@ export interface AnthropicResponse {
   role: "assistant"
   content: Array<AnthropicAssistantContentBlock>
   model: string
+  reasoning_opaque?: string
   stop_reason:
     | "end_turn"
     | "max_tokens"
@@ -195,7 +197,8 @@ export type AnthropicStreamEventData =
 export interface AnthropicStreamState {
   messageStartSent: boolean
   contentBlockIndex: number
-  contentBlockOpen: boolean
+  currentBlockType?: "text" | "thinking" | "tool_use"
+  reasoningOpaque?: string
   toolCalls: {
     [openAIToolIndex: number]: {
       id: string
diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts
index 2ec849cb8..60e0a417d 100644
--- a/src/routes/messages/count-tokens-handler.ts
+++ b/src/routes/messages/count-tokens-handler.ts
@@ -4,6 +4,7 @@ import consola from "consola"
 
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
+import { buildAnthropicReasoningContext } from "~/routes/reasoning-context"
 
 import { type AnthropicMessagesPayload } from "./anthropic-types"
 import { translateToOpenAI } from "./non-stream-translation"
@@ -17,11 +18,13 @@ export async function handleCountTokens(c: Context) {
 
     const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
 
-    const openAIPayload = translateToOpenAI(anthropicPayload)
-
     const selectedModel = state.models?.data.find(
       (model) => model.id === anthropicPayload.model,
     )
+    const openAIPayload = translateToOpenAI(
+      anthropicPayload,
+      buildAnthropicReasoningContext(anthropicPayload, selectedModel),
+    )
 
     if (!selectedModel) {
       consola.warn("Model not found, returning default token count")
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 85dbf6243..3e8f236cb 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -6,6 +6,7 @@ import { streamSSE } from "hono/streaming"
 import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
+import { buildAnthropicReasoningContext } from "~/routes/reasoning-context"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
@@ -28,7 +29,25 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
-  const openAIPayload = translateToOpenAI(anthropicPayload)
+  const selectedModel = state.models?.data.find(
+    (model) => model.id === anthropicPayload.model,
+  )
+  const reasoningContext = buildAnthropicReasoningContext(
+    anthropicPayload,
+    selectedModel,
+  )
+
+  if (
+    anthropicPayload.thinking?.type === "enabled"
+    && selectedModel?.capabilities.adaptive_thinking !== true
+  ) {
+    consola.debug(
+      "Stripping unsupported Anthropic thinking config for model:",
+      anthropicPayload.model,
+    )
+  }
+
+  const openAIPayload = translateToOpenAI(anthropicPayload, reasoningContext)
   consola.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
@@ -58,7 +77,6 @@ export async function handleCompletion(c: Context) {
     const streamState: AnthropicStreamState = {
       messageStartSent: false,
       contentBlockIndex: 0,
-      contentBlockOpen: false,
       toolCalls: {},
     }
 
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index dc41e6382..31ec1ec48 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -1,3 +1,5 @@
+import type { ReasoningContext } from "~/routes/reasoning-context"
+
 import {
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
@@ -28,6 +30,7 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils"
 
 export function translateToOpenAI(
   payload: AnthropicMessagesPayload,
+  context: ReasoningContext,
 ): ChatCompletionsPayload {
   return {
     model: translateModelName(payload.model),
@@ -43,6 +46,8 @@ export function translateToOpenAI(
     user: payload.metadata?.user_id,
     tools: translateAnthropicToolsToOpenAI(payload.tools),
     tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
+    reasoning_effort: context.reasoningEffort,
+    thinking_budget: context.thinkingBudget,
   }
 }
 
@@ -281,35 +286,23 @@ function translateAnthropicToolChoiceToOpenAI(
 export function translateToAnthropic(
   response: ChatCompletionResponse,
 ): AnthropicResponse {
-  // Merge content from all choices
-  const allTextBlocks: Array<AnthropicTextBlock> = []
-  const allToolUseBlocks: Array<AnthropicToolUseBlock> = []
-  let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null =
-    null // default
-  stopReason = response.choices[0]?.finish_reason ?? stopReason
-
-  // Process all choices to extract text and tool use blocks
-  for (const choice of response.choices) {
-    const textBlocks = getAnthropicTextBlocks(choice.message.content)
-    const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls)
-
-    allTextBlocks.push(...textBlocks)
-    allToolUseBlocks.push(...toolUseBlocks)
-
-    // Use the finish_reason from the first choice, or prioritize tool_calls
-    if (choice.finish_reason === "tool_calls" || stopReason === "stop") {
-      stopReason = choice.finish_reason
-    }
-  }
-
-  // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses
+  const content = response.choices.flatMap((choice) => [
+    ...getAnthropicThinkingBlocks(choice.message.reasoning_text),
+    ...getAnthropicTextBlocks(choice.message.content),
+    ...getAnthropicToolUseBlocks(choice.message.tool_calls),
+  ])
+  const reasoningOpaque = response.choices.find(
+    (choice) => choice.message.reasoning_opaque,
+  )?.message.reasoning_opaque
+  const stopReason = getAnthropicStopReason(response.choices)
 
   return {
     id: response.id,
     type: "message",
     role: "assistant",
     model: response.model,
-    content: [...allTextBlocks, ...allToolUseBlocks],
+    reasoning_opaque: reasoningOpaque,
+    content,
     stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
     stop_sequence: null,
     usage: {
@@ -326,6 +319,31 @@ export function translateToAnthropic(
   }
 }
 
+function getAnthropicStopReason(
+  choices: ChatCompletionResponse["choices"],
+): "stop" | "length" | "tool_calls" | "content_filter" | null {
+  let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null =
+    choices[0]?.finish_reason ?? null
+
+  for (const choice of choices) {
+    if (choice.finish_reason === "tool_calls" || stopReason === "stop") {
+      stopReason = choice.finish_reason
+    }
+  }
+
+  return stopReason
+}
+
+function getAnthropicThinkingBlocks(
+  reasoningText: string | null | undefined,
+): Array<AnthropicThinkingBlock> {
+  if (!reasoningText) {
+    return []
+  }
+
+  return [{ type: "thinking", thinking: reasoningText }]
+}
+
 function getAnthropicTextBlocks(
   messageContent: Message["content"],
 ): Array<AnthropicTextBlock> {
diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts
index 55094448f..884d78234 100644
--- a/src/routes/messages/stream-translation.ts
+++ b/src/routes/messages/stream-translation.ts
@@ -6,14 +6,32 @@ import {
 } from "./anthropic-types"
 import { mapOpenAIStopReasonToAnthropic } from "./utils"
 
-function isToolBlockOpen(state: AnthropicStreamState): boolean {
-  if (!state.contentBlockOpen) {
-    return false
+function closeOpenBlock(
+  events: Array<AnthropicStreamEventData>,
+  state: AnthropicStreamState,
+): void {
+  if (!state.currentBlockType) {
+    return
+  }
+
+  if (state.currentBlockType === "thinking" && state.reasoningOpaque) {
+    events.push({
+      type: "content_block_delta",
+      index: state.contentBlockIndex,
+      delta: {
+        type: "signature_delta",
+        signature: state.reasoningOpaque,
+      },
+    })
+    state.reasoningOpaque = undefined
   }
-  // Check if the current block index corresponds to any known tool call
-  return Object.values(state.toolCalls).some(
-    (tc) => tc.anthropicBlockIndex === state.contentBlockIndex,
-  )
+
+  events.push({
+    type: "content_block_stop",
+    index: state.contentBlockIndex,
+  })
+  state.contentBlockIndex++
+  state.currentBlockType = undefined
 }
 
 // eslint-disable-next-line max-lines-per-function, complexity
@@ -57,18 +75,43 @@ export function translateChunkToAnthropicEvents(
     state.messageStartSent = true
   }
 
-  if (delta.content) {
-    if (isToolBlockOpen(state)) {
-      // A tool block was open, so close it before starting a text block.
+  if (delta.reasoning_opaque) {
+    state.reasoningOpaque = delta.reasoning_opaque
+  }
+
+  if (delta.reasoning_text) {
+    if (state.currentBlockType && state.currentBlockType !== "thinking") {
+      closeOpenBlock(events, state)
+    }
+
+    if (!state.currentBlockType) {
       events.push({
-        type: "content_block_stop",
+        type: "content_block_start",
         index: state.contentBlockIndex,
+        content_block: {
+          type: "thinking",
+          thinking: "",
+        },
       })
-      state.contentBlockIndex++
-      state.contentBlockOpen = false
+      state.currentBlockType = "thinking"
     }
 
-    if (!state.contentBlockOpen) {
+    events.push({
+      type: "content_block_delta",
+      index: state.contentBlockIndex,
+      delta: {
+        type: "thinking_delta",
+        thinking: delta.reasoning_text,
+      },
+    })
+  }
+
+  if (delta.content) {
+    if (state.currentBlockType && state.currentBlockType !== "text") {
+      closeOpenBlock(events, state)
+    }
+
+    if (!state.currentBlockType) {
       events.push({
         type: "content_block_start",
         index: state.contentBlockIndex,
@@ -77,7 +120,7 @@ export function translateChunkToAnthropicEvents(
           text: "",
         },
       })
-      state.contentBlockOpen = true
+      state.currentBlockType = "text"
     }
 
     events.push({
@@ -94,14 +137,8 @@ export function translateChunkToAnthropicEvents(
     for (const toolCall of delta.tool_calls) {
       if (toolCall.id && toolCall.function?.name) {
         // New tool call starting.
-        if (state.contentBlockOpen) {
-          // Close any previously open block.
-          events.push({
-            type: "content_block_stop",
-            index: state.contentBlockIndex,
-          })
-          state.contentBlockIndex++
-          state.contentBlockOpen = false
+        if (state.currentBlockType) {
+          closeOpenBlock(events, state)
         }
 
         const anthropicBlockIndex = state.contentBlockIndex
@@ -121,7 +158,7 @@ export function translateChunkToAnthropicEvents(
             input: {},
           },
         })
-        state.contentBlockOpen = true
+        state.currentBlockType = "tool_use"
       }
 
       if (toolCall.function?.arguments) {
@@ -143,13 +180,7 @@ export function translateChunkToAnthropicEvents(
   }
 
   if (choice.finish_reason) {
-    if (state.contentBlockOpen) {
-      events.push({
-        type: "content_block_stop",
-        index: state.contentBlockIndex,
-      })
-      state.contentBlockOpen = false
-    }
+    closeOpenBlock(events, state)
 
     events.push(
       {
diff --git a/src/routes/reasoning-context.ts b/src/routes/reasoning-context.ts
new file mode 100644
index 000000000..b41b742e8
--- /dev/null
+++ b/src/routes/reasoning-context.ts
@@ -0,0 +1,40 @@
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+import type { ChatCompletionsPayload } from "~/services/copilot/create-chat-completions"
+import type { Model } from "~/services/copilot/get-models"
+
+export interface ReasoningContext {
+  reasoningEffort?: "low" | "medium" | "high"
+  thinkingBudget?: number
+}
+
+export function buildAnthropicReasoningContext(
+  payload: AnthropicMessagesPayload,
+  model: Model | undefined,
+): ReasoningContext {
+  const adaptiveThinkingSupported =
+    model?.capabilities.adaptive_thinking === true
+  const thinkingEnabled = payload.thinking?.type === "enabled"
+  return {
+    reasoningEffort:
+      thinkingEnabled && adaptiveThinkingSupported ? "high" : undefined,
+    thinkingBudget:
+      thinkingEnabled && adaptiveThinkingSupported ?
+        payload.thinking?.budget_tokens
+      : undefined,
+  }
+}
+
+export function buildOpenAIReasoningContext(
+  payload: ChatCompletionsPayload,
+  model: Model | undefined,
+): ReasoningContext {
+  const adaptiveThinkingSupported =
+    model?.capabilities.adaptive_thinking === true
+  return {
+    reasoningEffort: payload.reasoning_effort ?? undefined,
+    thinkingBudget:
+      adaptiveThinkingSupported ?
+        (payload.thinking_budget ?? undefined)
+      : undefined,
+  }
+}
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 8534151da..15f3e0a21 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -71,6 +71,8 @@ export interface ChatCompletionChunk {
 
 interface Delta {
   content?: string | null
+  reasoning_opaque?: string | null
+  reasoning_text?: string | null
   role?: "user" | "assistant" | "system" | "tool"
   tool_calls?: Array<{
     index: number
@@ -112,6 +114,8 @@ export interface ChatCompletionResponse {
 interface ResponseMessage {
   role: "assistant"
   content: string | null
+  reasoning_opaque?: string | null
+  reasoning_text?: string | null
   tool_calls?: Array<ToolCall>
 }
 
@@ -139,7 +143,10 @@ export interface ChatCompletionsPayload {
   logit_bias?: Record<string, number> | null
   logprobs?: boolean | null
   response_format?: { type: "json_object" } | null
+  reasoning_effort?: "low" | "medium" | "high" | null
   seed?: number | null
+  stream_options?: { include_usage?: boolean } | null
+  thinking_budget?: number | null
   tools?: Array<Tool> | null
   tool_choice?:
     | "none"
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 3cfa30af0..8d5fdb1d3 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -28,9 +28,11 @@ interface ModelSupports {
   tool_calls?: boolean
   parallel_tool_calls?: boolean
   dimensions?: boolean
+  adaptive_thinking?: boolean
 }
 
 interface ModelCapabilities {
+  adaptive_thinking?: boolean
   family: string
   limits: ModelLimits
   object: string
diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts
index 06c663778..0bca2c7a7 100644
--- a/tests/anthropic-request.test.ts
+++ b/tests/anthropic-request.test.ts
@@ -4,6 +4,12 @@ import { z } from "zod"
 import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
 
 import { translateToOpenAI } from "../src/routes/messages/non-stream-translation"
+import { buildAnthropicReasoningContext } from "../src/routes/reasoning-context"
+
+const disabledReasoningContext = {
+  reasoningEffort: undefined,
+  thinkingBudget: undefined,
+}
 
 // Zod schema for a single message in the chat completion request.
 const messageSchema = z.object({
@@ -50,6 +56,8 @@ const chatCompletionRequestSchema = z.object({
   tools: z.array(z.any()).optional(),
   tool_choice: z.union([z.string(), z.object({})]).optional(),
   user: z.string().optional(),
+  reasoning_effort: z.enum(["low", "medium", "high"]).optional(),
+  thinking_budget: z.number().int().optional(),
 })
 
 /**
@@ -62,6 +70,7 @@ function isValidChatCompletionRequest(payload: unknown): boolean {
   return result.success
 }
 
+// eslint-disable-next-line max-lines-per-function
 describe("Anthropic to OpenAI translation logic", () => {
   test("should translate minimal Anthropic payload to valid OpenAI payload", () => {
     const anthropicPayload: AnthropicMessagesPayload = {
@@ -70,7 +79,10 @@ describe("Anthropic to OpenAI translation logic", () => {
       max_tokens: 0,
     }
 
-    const openAIPayload = translateToOpenAI(anthropicPayload)
+    const openAIPayload = translateToOpenAI(
+      anthropicPayload,
+      disabledReasoningContext,
+    )
     expect(isValidChatCompletionRequest(openAIPayload)).toBe(true)
   })
 
@@ -99,7 +111,10 @@ describe("Anthropic to OpenAI translation logic", () => {
       ],
       tool_choice: { type: "auto" },
     }
-    const openAIPayload = translateToOpenAI(anthropicPayload)
+    const openAIPayload = translateToOpenAI(
+      anthropicPayload,
+      disabledReasoningContext,
+    )
     expect(isValidChatCompletionRequest(openAIPayload)).toBe(true)
   })
 
@@ -109,7 +124,10 @@ describe("Anthropic to OpenAI translation logic", () => {
       messages: [{ role: "user", content: "Hello!" }],
       max_tokens: 0,
     }
-    const openAIPayload = translateToOpenAI(anthropicPayload)
+    const openAIPayload = translateToOpenAI(
+      anthropicPayload,
+      disabledReasoningContext,
+    )
     expect(isValidChatCompletionRequest(openAIPayload)).toBe(true)
   })
 
@@ -120,7 +138,10 @@ describe("Anthropic to OpenAI translation logic", () => {
       temperature: "hot", // Should be a number
     }
     // @ts-expect-error intended to be invalid
-    const openAIPayload = translateToOpenAI(anthropicPayload)
+    const openAIPayload = translateToOpenAI(
+      anthropicPayload,
+      disabledReasoningContext,
+    )
     // Should fail validation
     expect(isValidChatCompletionRequest(openAIPayload)).toBe(false)
   })
@@ -143,7 +164,10 @@ describe("Anthropic to OpenAI translation logic", () => {
       ],
       max_tokens: 100,
     }
-    const openAIPayload = translateToOpenAI(anthropicPayload)
+    const openAIPayload = translateToOpenAI(
+      anthropicPayload,
+      disabledReasoningContext,
+    )
     expect(isValidChatCompletionRequest(openAIPayload)).toBe(true)
 
     // Check that thinking content is combined with text content
@@ -181,7 +205,10 @@ describe("Anthropic to OpenAI translation logic", () => {
       ],
       max_tokens: 100,
     }
-    const openAIPayload = translateToOpenAI(anthropicPayload)
+    const openAIPayload = translateToOpenAI(
+      anthropicPayload,
+      disabledReasoningContext,
+    )
     expect(isValidChatCompletionRequest(openAIPayload)).toBe(true)
 
     // Check that thinking content is included in the message content
@@ -197,8 +224,178 @@ describe("Anthropic to OpenAI translation logic", () => {
     expect(assistantMessage?.tool_calls).toHaveLength(1)
     expect(assistantMessage?.tool_calls?.[0].function.name).toBe("get_weather")
   })
+
+  test("enabled thinking maps to reasoning effort and thinking budget", () => {
+    const anthropicPayload: AnthropicMessagesPayload = {
+      model: "claude-sonnet-4-20250514",
+      messages: [{ role: "user", content: "Think carefully." }],
+      max_tokens: 256,
+      thinking: { type: "enabled", budget_tokens: 2048 },
+    }
+
+    const openAIPayload = translateToOpenAI(anthropicPayload, {
+      reasoningEffort: "high",
+      thinkingBudget: 2048,
+      adaptiveThinkingSupported: true,
+    })
+
+    expect(openAIPayload.reasoning_effort).toBe("high")
+    expect(openAIPayload.thinking_budget).toBe(2048)
+    expect(isValidChatCompletionRequest(openAIPayload)).toBe(true)
+  })
+
+  test("disabled thinking omits reasoning fields", () => {
+    const anthropicPayload: AnthropicMessagesPayload = {
+      model: "claude-sonnet-4-20250514",
+      messages: [{ role: "user", content: "Answer directly." }],
+      max_tokens: 256,
+      thinking: { type: "disabled" },
+    }
+
+    const openAIPayload = translateToOpenAI(
+      anthropicPayload,
+      disabledReasoningContext,
+    )
+
+    expect(openAIPayload.reasoning_effort).toBeUndefined()
+    expect(openAIPayload.thinking_budget).toBeUndefined()
+    expect(isValidChatCompletionRequest(openAIPayload)).toBe(true)
+  })
+
+  test("emits tool results before remaining user content from mixed user content arrays", () => {
+    const anthropicPayload: AnthropicMessagesPayload = {
+      model: "claude-sonnet-4-20250514",
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "toolu_123",
+              name: "lookup_weather",
+              input: { location: "Boston" },
+            },
+          ],
+        },
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_123",
+              content: "72 and sunny",
+            },
+            {
+              type: "text",
+              text: "Please summarize that for me.",
+            },
+          ],
+        },
+      ],
+      max_tokens: 256,
+    }
+
+    const openAIPayload = translateToOpenAI(
+      anthropicPayload,
+      disabledReasoningContext,
+    )
+
+    expect(openAIPayload.messages).toEqual([
+      {
+        role: "assistant",
+        content: null,
+        tool_calls: [
+          {
+            id: "toolu_123",
+            type: "function",
+            function: {
+              name: "lookup_weather",
+              arguments: JSON.stringify({ location: "Boston" }),
+            },
+          },
+        ],
+      },
+      {
+        role: "tool",
+        tool_call_id: "toolu_123",
+        content: "72 and sunny",
+      },
+      {
+        role: "user",
+        content: "Please summarize that for me.",
+      },
+    ])
+  })
 })
 
+describe("reasoning context helpers", () => {
+  test("adaptive Claude model returns the expected Anthropic reasoning context", () => {
+    expect(
+      buildAnthropicReasoningContext(
+        {
+          model: "claude-sonnet-4-20250514",
+          messages: [],
+          max_tokens: 1024,
+          thinking: { type: "enabled", budget_tokens: 2048 },
+        },
+        {
+          id: "claude-sonnet-4-20250514",
+          model_picker_enabled: true,
+          name: "Claude Sonnet 4",
+          object: "model",
+          preview: false,
+          vendor: "anthropic",
+          version: "20250514",
+          capabilities: {
+            adaptive_thinking: true,
+            family: "claude",
+            limits: {},
+            object: "model_capabilities",
+            supports: {},
+            tokenizer: "claude",
+            type: "chat",
+          },
+        },
+      ),
+    ).toEqual({
+      reasoningEffort: "high",
+      thinkingBudget: 2048,
+    })
+  })
+
+  test("unsupported model does not expose Anthropic adaptive thinking fields", () => {
+    expect(
+      buildAnthropicReasoningContext(
+        {
+          model: "mistral-large",
+          messages: [],
+          max_tokens: 1024,
+          thinking: { type: "enabled", budget_tokens: 2048 },
+        },
+        {
+          id: "mistral-large",
+          model_picker_enabled: true,
+          name: "Mistral Large",
+          object: "model",
+          preview: false,
+          vendor: "mistral",
+          version: "latest",
+          capabilities: {
+            family: "mistral",
+            limits: {},
+            object: "model_capabilities",
+            supports: {},
+            tokenizer: "mistral",
+            type: "chat",
+          },
+        },
+      ),
+    ).toEqual({
+      reasoningEffort: undefined,
+      thinkingBudget: undefined,
+    })
+  })
+})
 describe("OpenAI Chat Completion v1 Request Payload Validation with Zod", () => {
   test("should return true for a minimal valid request payload", () => {
     const validPayload = {
diff --git a/tests/anthropic-response.test.ts b/tests/anthropic-response.test.ts
index ecd71aacc..3fc48c129 100644
--- a/tests/anthropic-response.test.ts
+++ b/tests/anthropic-response.test.ts
@@ -6,7 +6,10 @@ import type {
   ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
 
-import { type AnthropicStreamState } from "~/routes/messages/anthropic-types"
+import {
+  type AnthropicResponse,
+  type AnthropicStreamState,
+} from "~/routes/messages/anthropic-types"
 import { translateToAnthropic } from "~/routes/messages/non-stream-translation"
 import { translateChunkToAnthropicEvents } from "~/routes/messages/stream-translation"
 
@@ -20,6 +23,11 @@ const anthropicContentBlockTextSchema = z.object({
   text: z.string(),
 })
 
+const anthropicContentBlockThinkingSchema = z.object({
+  type: z.literal("thinking"),
+  thinking: z.string(),
+})
+
 const anthropicContentBlockToolUseSchema = z.object({
   type: z.literal("tool_use"),
   id: z.string(),
@@ -27,6 +35,19 @@ const anthropicContentBlockToolUseSchema = z.object({
   input: z.record(z.string(), z.any()),
 })
 
+const anthropicStopReasonSchema = z.custom<AnthropicResponse["stop_reason"]>(
+  (value) =>
+    value === null
+    || [
+      "end_turn",
+      "max_tokens",
+      "pause_turn",
+      "refusal",
+      "stop_sequence",
+      "tool_use",
+    ].includes(value as string),
+)
+
 const anthropicMessageResponseSchema = z.object({
   id: z.string(),
   type: z.literal("message"),
@@ -34,11 +55,13 @@ const anthropicMessageResponseSchema = z.object({
   content: z.array(
     z.union([
       anthropicContentBlockTextSchema,
+      anthropicContentBlockThinkingSchema,
       anthropicContentBlockToolUseSchema,
     ]),
   ),
   model: z.string(),
-  stop_reason: z.enum(["end_turn", "max_tokens", "stop_sequence", "tool_use"]),
+  reasoning_opaque: z.string().optional(),
+  stop_reason: anthropicStopReasonSchema,
   stop_sequence: z.string().nullable(),
   usage: anthropicUsageSchema,
 })
@@ -63,12 +86,20 @@ const anthropicStreamEventSchema = z.looseObject({
   ]),
 })
 
+function createInitialStreamState(): AnthropicStreamState {
+  return {
+    messageStartSent: false,
+    contentBlockIndex: 0,
+    toolCalls: {},
+  }
+}
+
 function isValidAnthropicStreamEvent(payload: unknown): boolean {
   return anthropicStreamEventSchema.safeParse(payload).success
 }
 
 describe("OpenAI to Anthropic Non-Streaming Response Translation", () => {
-  test("should translate a simple text response correctly", () => {
+  test("should translate reasoning_text into a thinking block and preserve reasoning_opaque", () => {
     const openAIResponse: ChatCompletionResponse = {
       id: "chatcmpl-123",
       object: "chat.completion",
@@ -80,6 +111,8 @@ describe("OpenAI to Anthropic Non-Streaming Response Translation", () => {
           message: {
             role: "assistant",
             content: "Hello! How can I help you today?",
+            reasoning_text: "Need to explain available help clearly.",
+            reasoning_opaque: "opaque-token-123",
           },
           finish_reason: "stop",
           logprobs: null,
@@ -98,15 +131,53 @@ describe("OpenAI to Anthropic Non-Streaming Response Translation", () => {
 
     expect(anthropicResponse.id).toBe("chatcmpl-123")
     expect(anthropicResponse.stop_reason).toBe("end_turn")
+    expect(anthropicResponse.reasoning_opaque).toBe("opaque-token-123")
     expect(anthropicResponse.usage.input_tokens).toBe(9)
-    expect(anthropicResponse.content[0].type).toBe("text")
-    if (anthropicResponse.content[0].type === "text") {
-      expect(anthropicResponse.content[0].text).toBe(
-        "Hello! How can I help you today?",
-      )
-    } else {
-      throw new Error("Expected text block")
+    expect(anthropicResponse.content).toHaveLength(2)
+    expect(anthropicResponse.content[0]).toEqual({
+      type: "thinking",
+      thinking: "Need to explain available help clearly.",
+    })
+    expect(anthropicResponse.content[1]).toEqual({
+      type: "text",
+      text: "Hello! How can I help you today?",
+    })
+  })
+
+  test("should keep a plain text response unchanged when reasoning is absent", () => {
+    const openAIResponse: ChatCompletionResponse = {
+      id: "chatcmpl-124",
+      object: "chat.completion",
+      created: 1677652288,
+      model: "gpt-4o-2024-05-13",
+      choices: [
+        {
+          index: 0,
+          message: {
+            role: "assistant",
+            content: "Hello! How can I help you today?",
+          },
+          finish_reason: "stop",
+          logprobs: null,
+        },
+      ],
+      usage: {
+        prompt_tokens: 9,
+        completion_tokens: 12,
+        total_tokens: 21,
+      },
     }
+
+    const anthropicResponse = translateToAnthropic(openAIResponse)
+
+    expect(isValidAnthropicResponse(anthropicResponse)).toBe(true)
+    expect(anthropicResponse.reasoning_opaque).toBeUndefined()
+    expect(anthropicResponse.content).toEqual([
+      {
+        type: "text",
+        text: "Hello! How can I help you today?",
+      },
+    ])
   })
 
   test("should translate a response with tool calls", () => {
@@ -191,6 +262,7 @@ describe("OpenAI to Anthropic Non-Streaming Response Translation", () => {
   })
 })
 
+// eslint-disable-next-line max-lines-per-function
 describe("OpenAI to Anthropic Streaming Response Translation", () => {
   test("should translate a simple text stream correctly", () => {
     const openAIStream: Array<ChatCompletionChunk> = [
@@ -247,12 +319,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => {
       },
     ]
 
-    const streamState: AnthropicStreamState = {
-      messageStartSent: false,
-      contentBlockIndex: 0,
-      contentBlockOpen: false,
-      toolCalls: {},
-    }
+    const streamState = createInitialStreamState()
     const translatedStream = openAIStream.flatMap((chunk) =>
       translateChunkToAnthropicEvents(chunk, streamState),
     )
@@ -262,6 +329,342 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => {
     }
   })
 
+  test("should emit thinking transitions before text transitions when reasoning chunk precedes text chunk", () => {
+    const openAIStream: Array<ChatCompletionChunk> = [
+      {
+        id: "cmpl-reasoning",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: { role: "assistant" },
+            finish_reason: null,
+            logprobs: null,
+          },
+        ],
+      },
+      {
+        id: "cmpl-reasoning",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: {
+              reasoning_text: "Need to answer carefully.",
+              reasoning_opaque: "sig-123",
+            },
+            finish_reason: null,
+            logprobs: null,
+          },
+        ],
+      },
+      {
+        id: "cmpl-reasoning",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: { content: "Hello" },
+            finish_reason: null,
+            logprobs: null,
+          },
+        ],
+      },
+      {
+        id: "cmpl-reasoning",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: {},
+            finish_reason: "stop",
+            logprobs: null,
+          },
+        ],
+        usage: {
+          prompt_tokens: 20,
+          completion_tokens: 7,
+          total_tokens: 27,
+          prompt_tokens_details: {
+            cached_tokens: 5,
+          },
+        },
+      },
+    ]
+
+    const streamState = createInitialStreamState()
+    const translatedStream = openAIStream.flatMap((chunk) =>
+      translateChunkToAnthropicEvents(chunk, streamState),
+    )
+
+    expect(translatedStream).toEqual([
+      {
+        type: "message_start",
+        message: {
+          id: "cmpl-reasoning",
+          type: "message",
+          role: "assistant",
+          content: [],
+          model: "gpt-4o-2024-05-13",
+          stop_reason: null,
+          stop_sequence: null,
+          usage: {
+            input_tokens: 0,
+            output_tokens: 0,
+          },
+        },
+      },
+      {
+        type: "content_block_start",
+        index: 0,
+        content_block: {
+          type: "thinking",
+          thinking: "",
+        },
+      },
+      {
+        type: "content_block_delta",
+        index: 0,
+        delta: {
+          type: "thinking_delta",
+          thinking: "Need to answer carefully.",
+        },
+      },
+      {
+        type: "content_block_delta",
+        index: 0,
+        delta: {
+          type: "signature_delta",
+          signature: "sig-123",
+        },
+      },
+      {
+        type: "content_block_stop",
+        index: 0,
+      },
+      {
+        type: "content_block_start",
+        index: 1,
+        content_block: {
+          type: "text",
+          text: "",
+        },
+      },
+      {
+        type: "content_block_delta",
+        index: 1,
+        delta: {
+          type: "text_delta",
+          text: "Hello",
+        },
+      },
+      {
+        type: "content_block_stop",
+        index: 1,
+      },
+      {
+        type: "message_delta",
+        delta: {
+          stop_reason: "end_turn",
+          stop_sequence: null,
+        },
+        usage: {
+          input_tokens: 15,
+          output_tokens: 7,
+          cache_read_input_tokens: 5,
+        },
+      },
+      {
+        type: "message_stop",
+      },
+    ])
+  })
+
+  test("should emit each thinking signature only for its own thinking block", () => {
+    const openAIStream: Array<ChatCompletionChunk> = [
+      {
+        id: "cmpl-signature-scope",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: { role: "assistant" },
+            finish_reason: null,
+            logprobs: null,
+          },
+        ],
+      },
+      {
+        id: "cmpl-signature-scope",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: {
+              reasoning_text: "First thought.",
+              reasoning_opaque: "sig-first",
+            },
+            finish_reason: null,
+            logprobs: null,
+          },
+        ],
+      },
+      {
+        id: "cmpl-signature-scope",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: { content: "Answer" },
+            finish_reason: null,
+            logprobs: null,
+          },
+        ],
+      },
+      {
+        id: "cmpl-signature-scope",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: { reasoning_text: "Second thought." },
+            finish_reason: null,
+            logprobs: null,
+          },
+        ],
+      },
+      {
+        id: "cmpl-signature-scope",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: {},
+            finish_reason: "stop",
+            logprobs: null,
+          },
+        ],
+      },
+    ]
+
+    const streamState = createInitialStreamState()
+    const translatedStream = openAIStream.flatMap((chunk) =>
+      translateChunkToAnthropicEvents(chunk, streamState),
+    )
+
+    const signatureEvents = translatedStream.filter(
+      (event) =>
+        event.type === "content_block_delta"
+        && event.delta.type === "signature_delta",
+    )
+
+    expect(signatureEvents).toEqual([
+      {
+        type: "content_block_delta",
+        index: 0,
+        delta: {
+          type: "signature_delta",
+          signature: "sig-first",
+        },
+      },
+    ])
+  })
+
+  test("should preserve final usage on message_delta including cache_read_input_tokens", () => {
+    const openAIStream: Array<ChatCompletionChunk> = [
+      {
+        id: "cmpl-usage",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: { role: "assistant" },
+            finish_reason: null,
+            logprobs: null,
+          },
+        ],
+      },
+      {
+        id: "cmpl-usage",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: { content: "Hello" },
+            finish_reason: null,
+            logprobs: null,
+          },
+        ],
+      },
+      {
+        id: "cmpl-usage",
+        object: "chat.completion.chunk",
+        created: 1677652288,
+        model: "gpt-4o-2024-05-13",
+        choices: [
+          {
+            index: 0,
+            delta: {},
+            finish_reason: "stop",
+            logprobs: null,
+          },
+        ],
+        usage: {
+          prompt_tokens: 14,
+          completion_tokens: 6,
+          total_tokens: 20,
+          prompt_tokens_details: {
+            cached_tokens: 4,
+          },
+        },
+      },
+    ]
+
+    const streamState = createInitialStreamState()
+    const translatedStream = openAIStream.flatMap((chunk) =>
+      translateChunkToAnthropicEvents(chunk, streamState),
+    )
+
+    const messageDeltaEvent = translatedStream.find(
+      (event) => event.type === "message_delta",
+    )
+
+    expect(messageDeltaEvent).toEqual({
+      type: "message_delta",
+      delta: {
+        stop_reason: "end_turn",
+        stop_sequence: null,
+      },
+      usage: {
+        input_tokens: 10,
+        output_tokens: 6,
+        cache_read_input_tokens: 4,
+      },
+    })
+  })
+
   test("should translate a stream with tool calls", () => {
     const openAIStream: Array<ChatCompletionChunk> = [
       {
@@ -347,12 +750,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => {
     ]
 
     // Streaming translation requires state
-    const streamState: AnthropicStreamState = {
-      messageStartSent: false,
-      contentBlockIndex: 0,
-      contentBlockOpen: false,
-      toolCalls: {},
-    }
+    const streamState = createInitialStreamState()
     const translatedStream = openAIStream.flatMap((chunk) =>
       translateChunkToAnthropicEvents(chunk, streamState),
     )
diff --git a/tests/chat-completions-handler.test.ts b/tests/chat-completions-handler.test.ts
new file mode 100644
index 000000000..e128b268e
--- /dev/null
+++ b/tests/chat-completions-handler.test.ts
@@ -0,0 +1,203 @@
+import type { Context } from "hono"
+
+import { beforeEach, describe, expect, mock, test } from "bun:test"
+
+import type { ChatCompletionsPayload } from "../src/services/copilot/create-chat-completions"
+
+import { state } from "../src/lib/state"
+import { handleCompletion } from "../src/routes/chat-completions/handler"
+
+const fetchMock = mock(
+  (_url: string, opts: { body?: string | ReadableStream | null }) => {
+    return Promise.resolve({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          id: "chatcmpl-test",
+          object: "chat.completion" as const,
+          created: 1,
+          model: "mock-model",
+          choices: [],
+        }),
+      body: opts.body,
+    })
+  },
+)
+
+const awaitApprovalMock = mock(() => Promise.resolve())
+const checkRateLimitMock = mock(() => Promise.resolve())
+const getTokenCountMock = mock(() => Promise.resolve(123))
+const streamSSEMock = mock(() => Promise.resolve(new Response("stream")))
+const debugMock = mock(() => {})
+const infoMock = mock(() => {})
+const warnMock = mock(() => {})
+
+// @ts-expect-error - Mock fetch doesn't implement all fetch properties
+;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock
+
+void mock.module("../src/lib/approval", () => ({
+  awaitApproval: awaitApprovalMock,
+}))
+
+void mock.module("../src/lib/rate-limit", () => ({
+  checkRateLimit: checkRateLimitMock,
+}))
+
+void mock.module("../src/lib/tokenizer", () => ({
+  getTokenCount: getTokenCountMock,
+}))
+
+void mock.module("hono/streaming", () => ({
+  streamSSE: streamSSEMock,
+}))
+
+void mock.module("consola", () => ({
+  default: {
+    debug: debugMock,
+    info: infoMock,
+    warn: warnMock,
+  },
+}))
+
+function createContext(payload: ChatCompletionsPayload): Context {
+  return {
+    req: {
+      json: () => Promise.resolve(payload),
+    },
+    json: (body: unknown) => body,
+  } as unknown as Context
+}
+
+function getLastRequestBody() {
+  const lastCall = fetchMock.mock.calls.at(-1)
+  expect(lastCall).toBeDefined()
+
+  if (!lastCall) {
+    throw new Error("Expected fetch to be called")
+  }
+
+  const options = lastCall[1] as { body: string }
+  return JSON.parse(options.body) as ChatCompletionsPayload
+}
+
+describe("handleCompletion reasoning normalization", () => {
+  beforeEach(() => {
+    fetchMock.mockClear()
+    awaitApprovalMock.mockClear()
+    checkRateLimitMock.mockClear()
+    getTokenCountMock.mockClear()
+    streamSSEMock.mockClear()
+    debugMock.mockClear()
+    infoMock.mockClear()
+    warnMock.mockClear()
+
+    state.manualApprove = false
+    state.copilotToken = "test-token"
+    state.vsCodeVersion = "1.0.0"
+    state.accountType = "individual"
+    state.models = {
+      object: "list",
+      data: [],
+    }
+  })
+
+  test("adaptive Claude model keeps reasoning_effort, thinking_budget, stream_options", async () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "claude-adaptive",
+          name: "Claude Adaptive",
+          object: "model",
+          model_picker_enabled: true,
+          preview: false,
+          vendor: "anthropic",
+          version: "1",
+          capabilities: {
+            family: "claude",
+            object: "model_capabilities",
+            tokenizer: "claude",
+            type: "chat",
+            adaptive_thinking: true,
+            supports: {
+              adaptive_thinking: true,
+            },
+            limits: {
+              max_output_tokens: 8192,
+            },
+          },
+        },
+      ],
+    }
+
+    const payload = {
+      messages: [{ role: "user", content: "hello" }],
+      model: "claude-adaptive",
+      reasoning_effort: "high",
+      thinking_budget: 2048,
+      stream_options: { include_usage: true },
+    } satisfies ChatCompletionsPayload
+
+    await handleCompletion(createContext(payload))
+
+    expect(fetchMock).toHaveBeenCalledTimes(1)
+    expect(getLastRequestBody()).toMatchObject({
+      reasoning_effort: "high",
+      thinking_budget: 2048,
+      stream_options: { include_usage: true },
+      max_tokens: 8192,
+    })
+  })
+
+  test("non-Claude adaptive model keeps thinking_budget", async () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "gpt-adaptive",
+          name: "GPT Adaptive",
+          object: "model",
+          model_picker_enabled: true,
+          preview: false,
+          vendor: "openai",
+          version: "1",
+          capabilities: {
+            family: "gpt",
+            object: "model_capabilities",
+            tokenizer: "gpt",
+            type: "chat",
+            adaptive_thinking: true,
+            supports: {
+              adaptive_thinking: true,
+            },
+            limits: {
+              max_output_tokens: 4096,
+            },
+          },
+        },
+      ],
+    }
+
+    const payload = {
+      messages: [{ role: "user", content: "hello" }],
+      model: "gpt-adaptive",
+      reasoning_effort: "medium",
+      thinking_budget: 1024,
+      stream_options: { include_usage: true },
+    } satisfies ChatCompletionsPayload
+
+    await handleCompletion(createContext(payload))
+
+    expect(fetchMock).toHaveBeenCalledTimes(1)
+    expect(getLastRequestBody()).toMatchObject({
+      reasoning_effort: "medium",
+      thinking_budget: 1024,
+      stream_options: { include_usage: true },
+      max_tokens: 4096,
+    })
+    expect(debugMock).not.toHaveBeenCalledWith(
+      "Dropping unsupported OpenAI thinking_budget for model:",
+      "gpt-adaptive",
+    )
+  })
+})
diff --git a/tests/create-chat-completions.test.ts b/tests/create-chat-completions.test.ts
index d18e741aa..1b7f15e9d 100644
--- a/tests/create-chat-completions.test.ts
+++ b/tests/create-chat-completions.test.ts
@@ -1,9 +1,11 @@
-import { test, expect, mock } from "bun:test"
+import { beforeEach, test, expect, mock } from "bun:test"
 
-import type { ChatCompletionsPayload } from "../src/services/copilot/create-chat-completions"
+import type {
+  ChatCompletionChunk,
+  ChatCompletionsPayload,
+} from "../src/services/copilot/create-chat-completions"
 
 import { state } from "../src/lib/state"
-import { createChatCompletions } from "../src/services/copilot/create-chat-completions"
 
 // Mock state
 state.copilotToken = "test-token"
@@ -23,7 +25,29 @@ const fetchMock = mock(
 // @ts-expect-error - Mock fetch doesn't implement all fetch properties
 ;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock
 
+function getLastFetchCallOptions() {
+  const lastCall = fetchMock.mock.calls.at(-1)
+  expect(lastCall).toBeDefined()
+
+  if (!lastCall) {
+    throw new Error("Expected fetch to be called")
+  }
+
+  return lastCall[1] as { headers: Record<string, string>; body: string }
+}
+
+async function loadCreateChatCompletions() {
+  const mod = await import("../src/services/copilot/create-chat-completions")
+  return mod.createChatCompletions
+}
+
+beforeEach(() => {
+  fetchMock.mockClear()
+})
+
 test("sets X-Initiator to agent if tool/assistant present", async () => {
+  const createChatCompletions = await loadCreateChatCompletions()
+
   const payload: ChatCompletionsPayload = {
     messages: [
       { role: "user", content: "hi" },
@@ -32,14 +56,14 @@ test("sets X-Initiator to agent if tool/assistant present", async () => {
     model: "gpt-test",
   }
   await createChatCompletions(payload)
-  expect(fetchMock).toHaveBeenCalled()
-  const headers = (
-    fetchMock.mock.calls[0][1] as { headers: Record<string, string> }
-  ).headers
+  expect(fetchMock).toHaveBeenCalledTimes(1)
+  const { headers } = getLastFetchCallOptions()
   expect(headers["X-Initiator"]).toBe("agent")
 })
 
 test("sets X-Initiator to user if only user present", async () => {
+  const createChatCompletions = await loadCreateChatCompletions()
+
   const payload: ChatCompletionsPayload = {
     messages: [
       { role: "user", content: "hi" },
@@ -48,9 +72,49 @@ test("sets X-Initiator to user if only user present", async () => {
     model: "gpt-test",
   }
   await createChatCompletions(payload)
-  expect(fetchMock).toHaveBeenCalled()
-  const headers = (
-    fetchMock.mock.calls[1][1] as { headers: Record<string, string> }
-  ).headers
+  expect(fetchMock).toHaveBeenCalledTimes(1)
+  const { headers } = getLastFetchCallOptions()
   expect(headers["X-Initiator"]).toBe("user")
 })
+
+test("forwards reasoning and stream options upstream unchanged", async () => {
+  const createChatCompletions = await loadCreateChatCompletions()
+
+  const payload = {
+    messages: [{ role: "user", content: "reason" }],
+    model: "gpt-test",
+    reasoning_effort: "high",
+    thinking_budget: 2048,
+    stream_options: { include_usage: true },
+  } satisfies ChatCompletionsPayload
+
+  await createChatCompletions(payload)
+
+  expect(fetchMock).toHaveBeenCalledTimes(1)
+  const { body } = getLastFetchCallOptions()
+  expect(JSON.parse(body)).toEqual(payload)
+})
+
+test("ChatCompletionChunk typing accepts reasoning fields", () => {
+  const chunk = {
+    id: "chunk-1",
+    object: "chat.completion.chunk",
+    created: 123,
+    model: "gpt-test",
+    choices: [
+      {
+        index: 0,
+        delta: {
+          role: "assistant",
+          reasoning_text: "thinking",
+          reasoning_opaque: "opaque-token",
+        },
+        finish_reason: null,
+        logprobs: null,
+      },
+    ],
+  } satisfies ChatCompletionChunk
+
+  expect(chunk.choices[0]?.delta.reasoning_text).toBe("thinking")
+  expect(chunk.choices[0]?.delta.reasoning_opaque).toBe("opaque-token")
+})

From 3a659461f1469d1e8c06f95a52d2d445cdc091e6 Mon Sep 17 00:00:00 2001
From: lyzgeorge <lyzgeorge@gmail.com>
Date: Mon, 13 Apr 2026 22:53:09 +0800
Subject: [PATCH 3/8] fix: read reasoning capabilities from
 capabilities.supports

Copilot advertises adaptive_thinking and reasoning_effort under `capabilities.supports`, not at the top level of `capabilities`. The previous gate looked at the wrong field, so Anthropic `thinking` was always stripped and reasoning never reached upstream for /v1/messages. Read the correct fields and gate each surface on what the model actually supports.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/routes/messages/handler.ts                |  3 ++-
 src/routes/messages/non-stream-translation.ts |  2 +-
 src/routes/reasoning-context.ts               | 27 ++++++++++++-------
 src/services/copilot/get-models.ts            |  2 +-
 tests/anthropic-request.test.ts               | 11 ++++----
 tests/chat-completions-handler.test.ts        |  4 +--
 6 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 3e8f236cb..b1e579148 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -39,7 +39,8 @@ export async function handleCompletion(c: Context) {
 
   if (
     anthropicPayload.thinking?.type === "enabled"
-    && selectedModel?.capabilities.adaptive_thinking !== true
+    && reasoningContext.reasoningEffort === undefined
+    && reasoningContext.thinkingBudget === undefined
   ) {
     consola.debug(
       "Stripping unsupported Anthropic thinking config for model:",
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index 31ec1ec48..4b299c726 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -301,7 +301,7 @@ export function translateToAnthropic(
     type: "message",
     role: "assistant",
     model: response.model,
-    reasoning_opaque: reasoningOpaque,
+    reasoning_opaque: reasoningOpaque ?? undefined,
     content,
     stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
     stop_sequence: null,
diff --git a/src/routes/reasoning-context.ts b/src/routes/reasoning-context.ts
index b41b742e8..03f851c31 100644
--- a/src/routes/reasoning-context.ts
+++ b/src/routes/reasoning-context.ts
@@ -7,18 +7,26 @@ export interface ReasoningContext {
   thinkingBudget?: number
 }
 
+function supportsReasoningEffort(model: Model | undefined): boolean {
+  const levels = model?.capabilities.supports.reasoning_effort
+  return Array.isArray(levels) && levels.length > 0
+}
+
+function supportsAdaptiveThinking(model: Model | undefined): boolean {
+  return model?.capabilities.supports.adaptive_thinking === true
+}
+
 export function buildAnthropicReasoningContext(
   payload: AnthropicMessagesPayload,
   model: Model | undefined,
 ): ReasoningContext {
-  const adaptiveThinkingSupported =
-    model?.capabilities.adaptive_thinking === true
   const thinkingEnabled = payload.thinking?.type === "enabled"
+  if (!thinkingEnabled) return {}
+
   return {
-    reasoningEffort:
-      thinkingEnabled && adaptiveThinkingSupported ? "high" : undefined,
+    reasoningEffort: supportsReasoningEffort(model) ? "high" : undefined,
     thinkingBudget:
-      thinkingEnabled && adaptiveThinkingSupported ?
+      supportsAdaptiveThinking(model) ?
         payload.thinking?.budget_tokens
       : undefined,
   }
@@ -28,12 +36,13 @@ export function buildOpenAIReasoningContext(
   payload: ChatCompletionsPayload,
   model: Model | undefined,
 ): ReasoningContext {
-  const adaptiveThinkingSupported =
-    model?.capabilities.adaptive_thinking === true
   return {
-    reasoningEffort: payload.reasoning_effort ?? undefined,
+    reasoningEffort:
+      supportsReasoningEffort(model) ?
+        (payload.reasoning_effort ?? undefined)
+      : undefined,
     thinkingBudget:
-      adaptiveThinkingSupported ?
+      supportsAdaptiveThinking(model) ?
         (payload.thinking_budget ?? undefined)
       : undefined,
   }
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 8d5fdb1d3..92d84674e 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -29,10 +29,10 @@ interface ModelSupports {
   parallel_tool_calls?: boolean
   dimensions?: boolean
   adaptive_thinking?: boolean
+  reasoning_effort?: Array<string>
 }
 
 interface ModelCapabilities {
-  adaptive_thinking?: boolean
   family: string
   limits: ModelLimits
   object: string
diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts
index 0bca2c7a7..480cce767 100644
--- a/tests/anthropic-request.test.ts
+++ b/tests/anthropic-request.test.ts
@@ -135,9 +135,9 @@ describe("Anthropic to OpenAI translation logic", () => {
     const anthropicPayload = {
       model: "gpt-4o",
       messages: [{ role: "user", content: "Hello!" }],
+      max_tokens: 0,
       temperature: "hot", // Should be a number
-    }
-    // @ts-expect-error intended to be invalid
+    } as unknown as AnthropicMessagesPayload
     const openAIPayload = translateToOpenAI(
       anthropicPayload,
       disabledReasoningContext,
@@ -236,7 +236,6 @@ describe("Anthropic to OpenAI translation logic", () => {
     const openAIPayload = translateToOpenAI(anthropicPayload, {
       reasoningEffort: "high",
       thinkingBudget: 2048,
-      adaptiveThinkingSupported: true,
     })
 
     expect(openAIPayload.reasoning_effort).toBe("high")
@@ -347,11 +346,13 @@ describe("reasoning context helpers", () => {
           vendor: "anthropic",
           version: "20250514",
           capabilities: {
-            adaptive_thinking: true,
             family: "claude",
             limits: {},
             object: "model_capabilities",
-            supports: {},
+            supports: {
+              adaptive_thinking: true,
+              reasoning_effort: ["low", "medium", "high"],
+            },
             tokenizer: "claude",
             type: "chat",
           },
diff --git a/tests/chat-completions-handler.test.ts b/tests/chat-completions-handler.test.ts
index e128b268e..6e3d09682 100644
--- a/tests/chat-completions-handler.test.ts
+++ b/tests/chat-completions-handler.test.ts
@@ -118,9 +118,9 @@ describe("handleCompletion reasoning normalization", () => {
             object: "model_capabilities",
             tokenizer: "claude",
             type: "chat",
-            adaptive_thinking: true,
             supports: {
               adaptive_thinking: true,
+              reasoning_effort: ["low", "medium", "high"],
             },
             limits: {
               max_output_tokens: 8192,
@@ -166,9 +166,9 @@ describe("handleCompletion reasoning normalization", () => {
             object: "model_capabilities",
             tokenizer: "gpt",
             type: "chat",
-            adaptive_thinking: true,
             supports: {
               adaptive_thinking: true,
+              reasoning_effort: ["low", "medium", "high"],
             },
             limits: {
               max_output_tokens: 4096,

From 34927088a601ca141f67068ec4e22bdc946294bf Mon Sep 17 00:00:00 2001
From: lyzgeorge <lyzgeorge@gmail.com>
Date: Mon, 13 Apr 2026 23:57:40 +0800
Subject: [PATCH 4/8] docs: document reasoning and thinking translation, add
 handler tests

Add a Reasoning & Extended Thinking section to the README, highlight the feature in the intro and features list, and cover the capability gating with new handler tests for the Anthropic /v1/messages surface and additional cases for /v1/chat/completions.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md                              |  53 ++++++
 tests/anthropic-request.test.ts        | 121 +++++++++++++-
 tests/chat-completions-handler.test.ts |  88 ++++++++++
 tests/messages-handler.test.ts         | 213 +++++++++++++++++++++++++
 4 files changed, 474 insertions(+), 1 deletion(-)
 create mode 100644 tests/messages-handler.test.ts

diff --git a/README.md b/README.md
index 0d36c13c9..92f0df518 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # Copilot API Proxy
 
+**One Copilot subscription. Every frontier reasoning model. OpenAI and Anthropic shaped.** Point Claude Code, Cline, or your own scripts at a single localhost URL and unlock Claude Sonnet 4.6, GPT-5, Gemini, and friends — with real reasoning traces and thinking budgets routed to whichever knob the upstream model actually supports.
+
 > [!WARNING]
 > This is a reverse-engineered proxy of GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk.
 
@@ -32,6 +34,7 @@ A reverse-engineered proxy for the GitHub Copilot API that exposes it as an Open
 ## Features
 
 - **OpenAI & Anthropic Compatibility**: Exposes GitHub Copilot as an OpenAI-compatible (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`) and Anthropic-compatible (`/v1/messages`) API.
+- **Reasoning & Extended Thinking**: Capability-aware translation of `reasoning_effort` and Anthropic `thinking` blocks. Thinking traces, signatures, and `reasoning_opaque` tokens flow through both non-streaming and streaming responses without you having to know which upstream flag each model wants.
 - **Claude Code Integration**: Easily configure and launch [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) to use Copilot as its backend with a simple command-line flag (`--claude-code`).
 - **Usage Dashboard**: A web-based dashboard to monitor your Copilot API usage, view quotas, and see detailed statistics.
 - **Rate Limit Control**: Manage API usage with rate-limiting options (`--rate-limit`) and a waiting mechanism (`--wait`) to prevent errors from rapid requests.
@@ -278,6 +281,56 @@ The dashboard provides a user-friendly interface to view your Copilot usage data
 - **URL-based Configuration**: You can also specify the API endpoint directly in the URL using a query parameter. This is useful for bookmarks or sharing links. For example:
   `https://ericc-ch.github.io/copilot-api?endpoint=http://your-api-server/usage`
 
+## Reasoning & Extended Thinking
+
+Each Copilot model advertises its own reasoning knobs under `capabilities.supports`. The proxy reads them at startup and translates requests accordingly, so the same client call works across Claude, GPT, Gemini, and friends.
+
+### OpenAI-shaped requests (`/v1/chat/completions`)
+
+- `reasoning_effort` (`low` | `medium` | `high`, plus `minimal` for GPT-5 family) is passed through to any model whose `supports.reasoning_effort` is non-empty. Other models get it stripped.
+- `thinking_budget` is passed through only when the model advertises `supports.adaptive_thinking` (currently Claude Sonnet 4.5+/4.6, Opus 4.6). Unsupported models silently drop it.
+- Claude reasoning responses surface as `reasoning_text` and `reasoning_opaque` on the assistant message.
+
+```sh
+# GPT-5 mini with heavy reasoning
+curl http://localhost:4141/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-5-mini",
+    "reasoning_effort": "high",
+    "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}]
+  }'
+
+# Claude Sonnet 4.6 with an explicit thinking budget
+curl http://localhost:4141/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-sonnet-4.6",
+    "reasoning_effort": "high",
+    "thinking_budget": 2048,
+    "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}]
+  }'
+```
+
+### Anthropic-shaped requests (`/v1/messages`)
+
+- `thinking: {"type": "enabled", "budget_tokens": N}` is translated into `reasoning_effort: "high"` for any reasoning-capable model, plus `thinking_budget` for adaptive-thinking models.
+- `thinking: {"type": "disabled"}` suppresses both fields upstream.
+- If the selected model supports neither knob, the thinking config is silently stripped and logged at debug level — the request still succeeds.
+- Claude thinking streams emit `content_block_start` / `thinking_delta` / `signature_delta` / `content_block_stop` events before the text block, so Claude Code and similar clients see native thinking UIs.
+
+```sh
+# Extended thinking via the Anthropic surface
+curl http://localhost:4141/v1/messages \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-sonnet-4.6",
+    "max_tokens": 1024,
+    "thinking": {"type": "enabled", "budget_tokens": 2048},
+    "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}]
+  }'
+```
+
 ## Using with Claude Code
 
 This proxy can be used to power [Claude Code](https://docs.anthropic.com/en/claude-code), an experimental conversational AI assistant for developers from Anthropic.
diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts
index 480cce767..b0c9d44d2 100644
--- a/tests/anthropic-request.test.ts
+++ b/tests/anthropic-request.test.ts
@@ -3,8 +3,36 @@ import { z } from "zod"
 
 import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
 
+import type { Model } from "../src/services/copilot/get-models"
+
 import { translateToOpenAI } from "../src/routes/messages/non-stream-translation"
-import { buildAnthropicReasoningContext } from "../src/routes/reasoning-context"
+import {
+  buildAnthropicReasoningContext,
+  buildOpenAIReasoningContext,
+} from "../src/routes/reasoning-context"
+
+function makeModel(
+  id: string,
+  supports: Model["capabilities"]["supports"],
+): Model {
+  return {
+    id,
+    model_picker_enabled: true,
+    name: id,
+    object: "model",
+    preview: false,
+    vendor: "test",
+    version: "1",
+    capabilities: {
+      family: id,
+      limits: {},
+      object: "model_capabilities",
+      supports,
+      tokenizer: "test",
+      type: "chat",
+    },
+  }
+}
 
 const disabledReasoningContext = {
   reasoningEffort: undefined,
@@ -364,6 +392,97 @@ describe("reasoning context helpers", () => {
     })
   })
 
+  test("reasoning_effort-only model gets reasoning_effort but no thinking_budget", () => {
+    expect(
+      buildAnthropicReasoningContext(
+        {
+          model: "gpt-5-mini",
+          messages: [],
+          max_tokens: 1024,
+          thinking: { type: "enabled", budget_tokens: 2048 },
+        },
+        makeModel("gpt-5-mini", {
+          reasoning_effort: ["low", "medium", "high"],
+        }),
+      ),
+    ).toEqual({
+      reasoningEffort: "high",
+      thinkingBudget: undefined,
+    })
+  })
+
+  test("disabled thinking returns an empty context regardless of capability", () => {
+    expect(
+      buildAnthropicReasoningContext(
+        {
+          model: "claude-sonnet-4.6",
+          messages: [],
+          max_tokens: 1024,
+          thinking: { type: "disabled" },
+        },
+        makeModel("claude-sonnet-4.6", {
+          adaptive_thinking: true,
+          reasoning_effort: ["low", "medium", "high"],
+        }),
+      ),
+    ).toEqual({})
+  })
+
+  test("buildOpenAIReasoningContext keeps supported fields and drops unsupported ones", () => {
+    const claudeModel = makeModel("claude-sonnet-4.6", {
+      adaptive_thinking: true,
+      reasoning_effort: ["low", "medium", "high"],
+    })
+    expect(
+      buildOpenAIReasoningContext(
+        {
+          model: "claude-sonnet-4.6",
+          messages: [],
+          reasoning_effort: "high",
+          thinking_budget: 2048,
+        },
+        claudeModel,
+      ),
+    ).toEqual({
+      reasoningEffort: "high",
+      thinkingBudget: 2048,
+    })
+
+    const gptModel = makeModel("gpt-5-mini", {
+      reasoning_effort: ["low", "medium", "high"],
+    })
+    expect(
+      buildOpenAIReasoningContext(
+        {
+          model: "gpt-5-mini",
+          messages: [],
+          reasoning_effort: "high",
+          thinking_budget: 2048,
+        },
+        gptModel,
+      ),
+    ).toEqual({
+      reasoningEffort: "high",
+      thinkingBudget: undefined,
+    })
+
+    const plainModel = makeModel("gpt-4o", {})
+    expect(
+      buildOpenAIReasoningContext(
+        {
+          model: "gpt-4o",
+          messages: [],
+          reasoning_effort: "high",
+          thinking_budget: 2048,
+        },
+        plainModel,
+      ),
+    ).toEqual({
+      reasoningEffort: undefined,
+      thinkingBudget: undefined,
+    })
+  })
+
   test("unsupported model does not expose Anthropic adaptive thinking fields", () => {
     expect(
       buildAnthropicReasoningContext(
diff --git a/tests/chat-completions-handler.test.ts b/tests/chat-completions-handler.test.ts
index 6e3d09682..2167cba43 100644
--- a/tests/chat-completions-handler.test.ts
+++ b/tests/chat-completions-handler.test.ts
@@ -200,4 +200,92 @@ describe("handleCompletion reasoning normalization", () => {
       "gpt-adaptive",
     )
   })
+
+  test("reasoning_effort-only model keeps reasoning_effort and drops thinking_budget", async () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "gpt-reasoning",
+          name: "GPT Reasoning",
+          object: "model",
+          model_picker_enabled: true,
+          preview: false,
+          vendor: "openai",
+          version: "1",
+          capabilities: {
+            family: "gpt",
+            object: "model_capabilities",
+            tokenizer: "gpt",
+            type: "chat",
+            supports: {
+              reasoning_effort: ["low", "medium", "high"],
+            },
+            limits: {
+              max_output_tokens: 4096,
+            },
+          },
+        },
+      ],
+    }
+
+    const payload = {
+      messages: [{ role: "user", content: "hello" }],
+      model: "gpt-reasoning",
+      reasoning_effort: "high",
+      thinking_budget: 2048,
+    } satisfies ChatCompletionsPayload
+
+    await handleCompletion(createContext(payload))
+
+    expect(fetchMock).toHaveBeenCalledTimes(1)
+    const body = getLastRequestBody()
+    expect(body.reasoning_effort).toBe("high")
+    expect(body.thinking_budget).toBeUndefined()
+    expect(debugMock).toHaveBeenCalledWith(
+      "Dropping unsupported OpenAI thinking_budget for model:",
+      "gpt-reasoning",
+    )
+  })
+
+  test("plain model without reasoning capabilities drops both fields", async () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "gpt-4o",
+          name: "GPT-4o",
+          object: "model",
+          model_picker_enabled: true,
+          preview: false,
+          vendor: "openai",
+          version: "1",
+          capabilities: {
+            family: "gpt",
+            object: "model_capabilities",
+            tokenizer: "gpt",
+            type: "chat",
+            supports: {},
+            limits: {
+              max_output_tokens: 4096,
+            },
+          },
+        },
+      ],
+    }
+
+    const payload = {
+      messages: [{ role: "user", content: "hello" }],
+      model: "gpt-4o",
+      reasoning_effort: "high",
+      thinking_budget: 2048,
+    } satisfies ChatCompletionsPayload
+
+    await handleCompletion(createContext(payload))
+
+    expect(fetchMock).toHaveBeenCalledTimes(1)
+    const body = getLastRequestBody()
+    expect(body.reasoning_effort).toBeUndefined()
+    expect(body.thinking_budget).toBeUndefined()
+  })
 })
diff --git a/tests/messages-handler.test.ts b/tests/messages-handler.test.ts
new file mode 100644
index 000000000..9878b1a97
--- /dev/null
+++ b/tests/messages-handler.test.ts
@@ -0,0 +1,213 @@
+import type { Context } from "hono"
+
+import { beforeEach, describe, expect, mock, test } from "bun:test"
+
+import type { AnthropicMessagesPayload } from "../src/routes/messages/anthropic-types"
+import type { ChatCompletionsPayload } from "../src/services/copilot/create-chat-completions"
+
+import { state } from "../src/lib/state"
+import { handleCompletion } from "../src/routes/messages/handler"
+
+const fetchMock = mock(
+  (_url: string, opts: { body?: string | ReadableStream | null }) => {
+    return Promise.resolve({
+      ok: true,
+      json: () =>
+        Promise.resolve({
+          id: "chatcmpl-test",
+          object: "chat.completion" as const,
+          created: 1,
+          model: "mock-model",
+          choices: [
+            {
+              index: 0,
+              message: {
+                role: "assistant",
+                content: "ok",
+              },
+              finish_reason: "stop",
+              logprobs: null,
+            },
+          ],
+          usage: {
+            prompt_tokens: 10,
+            completion_tokens: 2,
+            total_tokens: 12,
+          },
+        }),
+      body: opts.body,
+    })
+  },
+)
+
+const awaitApprovalMock = mock(() => Promise.resolve())
+const checkRateLimitMock = mock(() => Promise.resolve())
+const debugMock = mock(() => {})
+const infoMock = mock(() => {})
+const warnMock = mock(() => {})
+
+// @ts-expect-error - Mock fetch doesn't implement all fetch properties
+;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock
+
+void mock.module("../src/lib/approval", () => ({
+  awaitApproval: awaitApprovalMock,
+}))
+
+void mock.module("../src/lib/rate-limit", () => ({
+  checkRateLimit: checkRateLimitMock,
+}))
+
+void mock.module("consola", () => ({
+  default: {
+    debug: debugMock,
+    info: infoMock,
+    warn: warnMock,
+  },
+}))
+
+function createContext(payload: AnthropicMessagesPayload): Context {
+  return {
+    req: {
+      json: () => Promise.resolve(payload),
+    },
+    json: (body: unknown) => body,
+  } as unknown as Context
+}
+
+function getLastRequestBody(): ChatCompletionsPayload {
+  const lastCall = fetchMock.mock.calls.at(-1)
+  expect(lastCall).toBeDefined()
+  if (!lastCall) throw new Error("Expected fetch to be called")
+  const options = lastCall[1] as { body: string }
+  return JSON.parse(options.body) as ChatCompletionsPayload
+}
+
+function setModel(
+  id: string,
+  supports: {
+    adaptive_thinking?: boolean
+    reasoning_effort?: Array<string>
+  },
+) {
+  state.models = {
+    object: "list",
+    data: [
+      {
+        id,
+        name: id,
+        object: "model",
+        model_picker_enabled: true,
+        preview: false,
+        vendor: "test",
+        version: "1",
+        capabilities: {
+          family: id,
+          object: "model_capabilities",
+          tokenizer: "test",
+          type: "chat",
+          supports,
+          limits: {},
+        },
+      },
+    ],
+  }
+}
+
+describe("Anthropic messages handler reasoning translation", () => {
+  beforeEach(() => {
+    fetchMock.mockClear()
+    awaitApprovalMock.mockClear()
+    checkRateLimitMock.mockClear()
+    debugMock.mockClear()
+    infoMock.mockClear()
+    warnMock.mockClear()
+    state.manualApprove = false
+    state.copilotToken = "test-token"
+    state.vsCodeVersion = "1.0.0"
+    state.accountType = "individual"
+  })
+
+  test("claude-style model forwards reasoning_effort and thinking_budget", async () => {
+    setModel("claude-sonnet-4.6", {
+      adaptive_thinking: true,
+      reasoning_effort: ["low", "medium", "high"],
+    })
+
+    await handleCompletion(
+      createContext({
+        model: "claude-sonnet-4.6",
+        max_tokens: 256,
+        thinking: { type: "enabled", budget_tokens: 2048 },
+        messages: [{ role: "user", content: "Think carefully." }],
+      }),
+    )
+
+    expect(fetchMock).toHaveBeenCalledTimes(1)
+    const body = getLastRequestBody()
+    expect(body.reasoning_effort).toBe("high")
+    expect(body.thinking_budget).toBe(2048)
+  })
+
+  test("reasoning_effort-only model forwards reasoning_effort and drops thinking_budget", async () => {
+    setModel("gpt-5-mini", {
+      reasoning_effort: ["low", "medium", "high"],
+    })
+
+    await handleCompletion(
+      createContext({
+        model: "gpt-5-mini",
+        max_tokens: 256,
+        thinking: { type: "enabled", budget_tokens: 2048 },
+        messages: [{ role: "user", content: "Think carefully." }],
+      }),
+    )
+
+    expect(fetchMock).toHaveBeenCalledTimes(1)
+    const body = getLastRequestBody()
+    expect(body.reasoning_effort).toBe("high")
+    expect(body.thinking_budget).toBeUndefined()
+  })
+
+  test("disabled thinking never forwards reasoning fields", async () => {
+    setModel("claude-sonnet-4.6", {
+      adaptive_thinking: true,
+      reasoning_effort: ["low", "medium", "high"],
+    })
+
+    await handleCompletion(
+      createContext({
+        model: "claude-sonnet-4.6",
+        max_tokens: 256,
+        thinking: { type: "disabled" },
+        messages: [{ role: "user", content: "Answer directly." }],
+      }),
+    )
+
+    expect(fetchMock).toHaveBeenCalledTimes(1)
+    const body = getLastRequestBody()
+    expect(body.reasoning_effort).toBeUndefined()
+    expect(body.thinking_budget).toBeUndefined()
+  })
+
+  test("unsupported model strips thinking config and logs debug", async () => {
+    setModel("gpt-4o", {})
+
+    await handleCompletion(
+      createContext({
+        model: "gpt-4o",
+        max_tokens: 256,
+        thinking: { type: "enabled", budget_tokens: 2048 },
+        messages: [{ role: "user", content: "Think carefully." }],
+      }),
+    )
+
+    expect(fetchMock).toHaveBeenCalledTimes(1)
+    const body = getLastRequestBody()
+    expect(body.reasoning_effort).toBeUndefined()
+    expect(body.thinking_budget).toBeUndefined()
+    expect(debugMock).toHaveBeenCalledWith(
+      "Stripping unsupported Anthropic thinking config for model:",
+      "gpt-4o",
+    )
+  })
+})

From 0e2e8124cc1f07c0423e1970e3232f49417304c1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 14 Apr 2026 03:52:07 +0000
Subject: [PATCH 5/8] Initial plan


From caf33deb8236ee9ef3fd09aacd939a4c99840d15 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 14 Apr 2026 03:53:09 +0000
Subject: [PATCH 6/8] feat: allow arbitrary reasoning_effort values for forward
 compatibility

Agent-Logs-Url: https://github.com/lyzgeorge/copilot-api/sessions/10f76b26-728d-41d8-8fb3-432974af4318

Co-authored-by: lyzgeorge <8285196+lyzgeorge@users.noreply.github.com>
---
 src/routes/reasoning-context.ts                 | 2 +-
 src/services/copilot/create-chat-completions.ts | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/routes/reasoning-context.ts b/src/routes/reasoning-context.ts
index 03f851c31..7bef76e73 100644
--- a/src/routes/reasoning-context.ts
+++ b/src/routes/reasoning-context.ts
@@ -3,7 +3,7 @@ import type { ChatCompletionsPayload } from "~/services/copilot/create-chat-comp
 import type { Model } from "~/services/copilot/get-models"
 
 export interface ReasoningContext {
-  reasoningEffort?: "low" | "medium" | "high"
+  reasoningEffort?: "low" | "medium" | "high" | string
   thinkingBudget?: number
 }
 
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 15f3e0a21..8da439c24 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -143,7 +143,7 @@ export interface ChatCompletionsPayload {
   logit_bias?: Record<string, number> | null
   logprobs?: boolean | null
   response_format?: { type: "json_object" } | null
-  reasoning_effort?: "low" | "medium" | "high" | null
+  reasoning_effort?: "low" | "medium" | "high" | (string & {}) | null
   seed?: number | null
   stream_options?: { include_usage?: boolean } | null
   thinking_budget?: number | null

From dcfb3690c60ac4c09832f8dec3ea1f60afafd947 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 14 Apr 2026 03:54:01 +0000
Subject: [PATCH 7/8] fix: use (string & {}) in ReasoningContext for consistent
 IDE autocomplete

Agent-Logs-Url: https://github.com/lyzgeorge/copilot-api/sessions/10f76b26-728d-41d8-8fb3-432974af4318

Co-authored-by: lyzgeorge <8285196+lyzgeorge@users.noreply.github.com>
---
 src/routes/reasoning-context.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/routes/reasoning-context.ts b/src/routes/reasoning-context.ts
index 7bef76e73..ee542e273 100644
--- a/src/routes/reasoning-context.ts
+++ b/src/routes/reasoning-context.ts
@@ -3,7 +3,7 @@ import type { ChatCompletionsPayload } from "~/services/copilot/create-chat-comp
 import type { Model } from "~/services/copilot/get-models"
 
 export interface ReasoningContext {
-  reasoningEffort?: "low" | "medium" | "high" | string
+  reasoningEffort?: "low" | "medium" | "high" | (string & {})
   thinkingBudget?: number
 }
 

From 4f0d9cafafd0b7306972beb801e3b8599c5c9c64 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 14 Apr 2026 03:57:16 +0000
Subject: [PATCH 8/8] docs: add fork improvements hook at top of README

Agent-Logs-Url: https://github.com/lyzgeorge/copilot-api/sessions/83622886-a808-4ba6-820c-75ed8300f3ab

Co-authored-by: lyzgeorge <8285196+lyzgeorge@users.noreply.github.com>
---
 README.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/README.md b/README.md
index 92f0df518..b0b3a55aa 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,18 @@
 # Copilot API Proxy
 
+> **This is a fork of [ericc-ch/copilot-api](https://github.com/ericc-ch/copilot-api) with full reasoning / extended-thinking support added.**
+>
+> **What this fork adds:**
+>
+> - **Capability-aware reasoning routing** — reads each model's `capabilities.supports` at startup and routes `reasoning_effort` / `thinking_budget` only to models that actually support them; unsupported models silently strip the fields.
+> - **Anthropic ↔ OpenAI thinking translation** — `thinking: { type: "enabled", budget_tokens: N }` on the `/v1/messages` surface is automatically translated into `reasoning_effort: "high"` + `thinking_budget` for the upstream call, and vice versa.
+> - **Streaming thinking traces** — Claude thinking streams emit proper `content_block_start` / `thinking_delta` / `signature_delta` / `content_block_stop` events so Claude Code and similar clients see native thinking UIs.
+> - **Forward-compatible `reasoning_effort`** — type accepts any string (not just `"low" | "medium" | "high"`), so new model-specific values like `"xhigh"` or `"minimal"` are transparently passed through without code changes.
+>
+> Everything else — auth, rate limiting, usage dashboard, CLI flags — is identical to the upstream project.
+
+---
+
 **One Copilot subscription. Every frontier reasoning model. OpenAI and Anthropic shaped.** Point Claude Code, Cline, or your own scripts at a single localhost URL and unlock Claude Sonnet 4.6, GPT-5, Gemini, and friends — with real reasoning traces and thinking budgets routed to whichever knob the upstream model actually supports.
 
 > [!WARNING]