diff --git a/dto/claude.go b/dto/claude.go
index d7fed412aaa..3d79bfa3cf1 100644
--- a/dto/claude.go
+++ b/dto/claude.go
@@ -171,9 +171,17 @@ func (c *ClaudeMessage) ParseContent() ([]ClaudeMediaMessage, error) {
 }
 
 type Tool struct {
-	Name        string                 `json:"name"`
-	Description string                 `json:"description,omitempty"`
-	InputSchema map[string]interface{} `json:"input_schema"`
+	Name         string                 `json:"name"`
+	Description  string                 `json:"description,omitempty"`
+	InputSchema  map[string]interface{} `json:"input_schema"`
+	CacheControl *ClaudeCacheControl    `json:"cache_control,omitempty"`
+}
+
+// ClaudeCacheControl mirrors Anthropic's prompt-caching marker.
+// See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
+type ClaudeCacheControl struct {
+	Type string `json:"type"`
+	TTL  string `json:"ttl,omitempty"`
 }
 
 type InputSchema struct {
diff --git a/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/.openspec.yaml b/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/.openspec.yaml
new file mode 100644
index 00000000000..8b769149815
--- /dev/null
+++ b/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/.openspec.yaml
@@ -0,0 +1,2 @@
+schema: spec-driven
+created: 2026-05-20
diff --git a/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/design.md b/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/design.md
new file mode 100644
index 00000000000..87fc5954873
--- /dev/null
+++ b/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/design.md
@@ -0,0 +1,149 @@
+## Context
+
+The gateway today routes `POST /v1/responses` through a single relay dispatch and supports two upstream surface families: OpenAI-compatible (`/v1/chat/completions`, `/v1/responses` on OpenAI itself) and Anthropic Messages (`/v1/messages`). When a `/v1/responses` request is routed to an Anthropic-typed channel, no translation layer exists for either the request body or the streaming response, so the request fails. Adding the missing pipeline lets a single inbound request shape (Responses-API) be served by either upstream family.
+
+The reference behavioral surface (analyzed externally, source-free) establishes a stable contract: a **two-step pivot** through an intermediate Chat-Completions-shaped object, on both the request side and the response side. Reusing that pivot keeps each translator focused and gives a clean composition: Responses ↔ Chat-Completions ↔ Anthropic.
+
+The existing codebase already covers the Chat-Completions ↔ Anthropic legs end-to-end:
+
+- `relay/channel/claude/relay-claude.go::RequestOpenAI2ClaudeMessage` — Chat-Completions request → Anthropic Messages request. Already handles system extraction, tool_use/tool_result ordering, image mapping (data: vs http:), `max_tokens` adjustment for thinking and tools, response_format JSON-mode shim, and merge of consecutive same-role messages.
+- `relay/channel/claude/relay-claude.go::ClaudeStreamHandler` (+ `StreamResponseClaude2OpenAI`, `FormatClaudeResponseInfo`) — streaming Anthropic response → Chat-Completions chunks, including cache-token decomposition and finish_reason mapping.
+- `relay/channel/claude/relay-claude.go::ClaudeHandler` (+ `ResponseClaude2OpenAI`) — non-streaming Anthropic response → Chat-Completions response.
+
+The only legs that do NOT yet exist are: Responses-request → Chat-Completions-request, and Chat-Completions-stream → Responses-events (plus a non-streaming variant of the latter). This change therefore adds exactly those legs as new functions under `service/openaicompat/`, plus one orchestration file under `relay/` that mirrors the existing `relay/chat_completions_via_responses.go` in the opposite direction.
+
+Other anchors used by this change:
+
+- The relay format dispatch keys off `info.RelayMode == relayconstant.RelayModeResponses` and `info.ApiType == appconstant.APITypeAnthropic`; the new translation triggers at that exact branch in `relay/responses_handler.go`.
+- The project's JSON wrapper (`common.Marshal`/`common.Unmarshal`) is mandatory (project Rule 1).
+- Env-var feature flags follow the `common.GetEnvOrDefaultBool("FLAG_NAME", default)` pattern (see `common/env.go`).
+
+## Goals / Non-Goals
+
+**Goals:**
+- Provide a complete, source-free behavioral specification of the two pipelines (request and response).
+- Maintain a clean separation: each translator function takes a body or chunk and returns the next-stage body or chunk, with no I/O side effects.
+- Preserve all existing behavior for non-Anthropic upstreams and for non-Responses inbound requests.
+- Express each behavioral invariant as an objectively checkable requirement in the capability spec.
+- Establish a per-stream state object that survives across chunk callbacks (sequence numbers, item indices, buffered reasoning text, tool-call open/close state).
+
+**Non-Goals:**
+- Picking the final Go package path (left for Phase 3).
+- Specifying internal struct names (left for Phase 3, beyond placeholders).
+- Modifying quota, billing, retry, or auto-ban behavior.
+- Adding new channel adaptors or external dependencies.
+
+## Decisions
+
+### D1. Two-step pivot through a Chat-Completions intermediate
+
+The translator does **not** map Responses-API ↔ Anthropic Messages directly. It maps Responses → ChatCompletions → AnthropicMessages on the request side, and AnthropicMessages → ChatCompletions → Responses on the response side.
+
+- *Why*: The Chat-Completions shape is the most stable and most widely-implemented "lingua franca" inside the gateway (the existing OpenAI-compatible path already uses it). Pivoting through it means the new code only adds two missing legs (Responses↔ChatCompletions on the request side, ChatCompletions→Responses on the response side) and reuses the existing ChatCompletions↔Anthropic legs.
+- *Alternative considered*: Direct Responses↔Anthropic translator. Rejected — doubles the surface area we need to maintain, and creates a second source of truth for tool-use ordering and reasoning passthrough.
+
+### D2. Stateful streaming translators
+
+Streaming translators take `(chunk, state)` and return `(events[], state')`. The state object holds: sequence counter, open item indices, buffered reasoning text, tool-call index → call_id map, "started/completed sent" flags, accumulated usage. Translators only emit events; they do not write to a socket.
+
+- *Why*: Lets the outer SSE handler stay protocol-agnostic and lets us unit-test the translators with deterministic chunk-by-chunk inputs.
+- *Alternative considered*: Pure functional translators with no state. Rejected — Responses-API events carry monotonically increasing `sequence_number` and require open/close bookkeeping across many chunks.
+
+### D3. Open/close discipline for content blocks
+
+The streaming translator enforces the Responses-API contract:
+1. `response.created` and `response.in_progress` fire exactly once each at first usable chunk.
+2. Each `output_item` (message, reasoning, function_call) is bracketed by `output_item.added` and `output_item.done`; deltas only fire between them.
+3. Switching from reasoning to text closes the reasoning block before opening the text block. Switching from text to a tool call closes the text block before opening the tool-call item.
+4. On finish, every open block is closed in deterministic order before `response.completed` fires.
+5. A `null` chunk (end-of-stream sentinel from the SSE reader) triggers the flush path which closes any still-open blocks and emits `response.completed` exactly once.
+
+### D4. Tool-call ID hygiene at the boundary
+
+The Anthropic API requires tool IDs to match `^[a-zA-Z0-9_-]+$` and the Responses API caps tool IDs at 64 characters. The translator follows a three-tier sanitization policy on the upstream Anthropic side:
+
+1. **Pass-through** when the ID already matches the regex AND is ≤ 64 characters.
+2. **Strip-and-keep** when the ID contains some invalid characters: drop every char not in `[a-zA-Z0-9_-]`; if the residue is non-empty AND ≤ 64 characters, use the residue.
+3. **UUID fallback** when the ID is empty, becomes empty after stripping, or exceeds 64 characters: generate a fresh UUID (no deterministic synthesis, no positional encoding).
+
+On the OUTBOUND Responses-side, IDs longer than 64 characters are clamped to the first 64 characters.
+
+- *Why*: pass-through preserves client-supplied IDs that already pass; strip-and-keep recovers common patterns like `call:abc/123` losslessly; UUID fallback is simpler than positional synthesis and avoids leaking message-index/tool-call-index information to clients. Determinism for prompt-cache continuity is unnecessary because the upstream cache key is computed by Anthropic from the prompt content, not from tool-call IDs.
+
+### D5. Tool-result placement repair
+
+Anthropic requires that each `tool_use` block in an assistant message be followed immediately by a separate user message whose content is the matching `tool_result` block. The translator:
+- Splits any user message that mixes `tool_result` with other content; the `tool_result` goes first in its own message.
+- Drops assistant text blocks that appear AFTER a `tool_use` block in the same message (Anthropic rejects them).
+- Merges consecutive same-role messages after the split.
+- If an assistant message contains tool_calls and the next message has no matching tool_result, injects an empty tool_result for each missing call so the upstream does not 400.
+
+### D6. Reasoning passthrough has two modes
+
+- **Reasoning as a separate output item** (preferred for clients that understand Responses-API reasoning items): when the upstream emits `reasoning_content` deltas, the translator opens a `reasoning` output item and emits `reasoning_summary_text.delta` events.
+- **Reasoning embedded as `<think>...</think>` in text content**: legacy upstreams put thinking text inline. The translator recognises `<think>` and `</think>` markers in the text stream and routes the enclosed text into the reasoning channel instead of the text channel.
+
+### D7. Usage propagation is lossless across the pivot
+
+Cache tokens flow through the pivot without being dropped:
+- Anthropic `cache_read_input_tokens` → Chat-Completions `prompt_tokens_details.cached_tokens` → Responses `input_tokens_details.cached_tokens`.
+- Anthropic `cache_creation_input_tokens` → Chat-Completions `prompt_tokens_details.cache_creation_tokens`.
+- `input_tokens = prompt_tokens − cached_tokens − cache_creation_tokens` is the canonical decomposition rule applied at the Chat-Completions → Anthropic hop.
+
+### D8. `max_tokens` adjustment is upstream-friendly
+
+The translator:
+- Falls back to a default `max_tokens` if the client did not provide one.
+- Raises `max_tokens` to a configurable minimum when `tools[]` is non-empty (prevents truncated tool arguments).
+- Raises `max_tokens` above `thinking.budget_tokens + buffer` (Anthropic requires strictly greater).
+
+### D9. System prompt extraction and JSON-mode shim
+
+- All `role: "system"` messages in the intermediate Chat-Completions shape are concatenated and lifted to the Anthropic `system` block list.
+- A Responses-API `instructions` field is treated as a single system message at the head of the message list.
+- `response_format = json_schema` appends a system block telling the model to emit strict JSON matching the supplied schema. `response_format = json_object` appends a generic strict-JSON instruction. (Anthropic has no native equivalent.)
+
+### D10. Image input mapping
+
+- Responses-API `input_image` with `image_url` (string) becomes intermediate `image_url` with `{ url, detail: "auto" }`.
+- Intermediate `image_url` whose URL starts with `data:<mime>;base64,...` becomes Anthropic `image` with `source: { type: "base64", media_type, data }`.
+- Intermediate `image_url` whose URL starts with `http://` or `https://` becomes Anthropic `image` with `source: { type: "url", url }`.
+- Any other URL shape is dropped (Anthropic does not support arbitrary file IDs natively).
+
+### D11. Reasoning items in INPUT
+
+When a `reasoning` input item appears between turns, its text is extracted (from `summary[].text` if present, else from `content[].text`) and **buffered** until the next assistant message or function_call; it is then attached as `reasoning_content` to that assistant turn. A `reasoning` item is never emitted as a standalone Chat-Completions message.
+
+### D12. Format detection by endpoint
+
+The dispatch decision uses the endpoint path as the primary key: `/v1/responses` → Responses-API source format, `/v1/messages` → Anthropic source format, `/v1/chat/completions` with a body field that looks like Responses-API → Responses-API source (for CLI clients that send Responses bodies to the chat endpoint).
+
+## Risks / Trade-offs
+
+- **[Risk]** Streaming SSE order is observable to clients; a bug in open/close discipline produces malformed `output_item` brackets that crash strict SDKs.
+  - **Mitigation**: Behavioral assertions in the spec pin down exact event ordering; tests cover the cross-block transitions (reasoning→text, text→tool_call, finish flush, null-flush).
+- **[Risk]** Tool-call ID UUID fallback assigns a fresh UUID when the client's ID fails the regex AND has no usable residue; the client cannot correlate the resulting tool_use back to its original local ID.
+  - **Mitigation**: UUID fallback only triggers when the original ID is unrecoverable. The strip-and-keep tier handles the common case (`call:abc/123` → `callabc123`) without losing correlation. Document the policy in the operator-facing notes.
+- **[Risk]** Token-usage decomposition (`input − cached − cache_creation`) underflows to negative when upstreams report inconsistent values.
+  - **Mitigation**: Clamp to zero; document the invariant in the spec.
+- **[Risk]** The intermediate Chat-Completions pivot adds latency on the request-build path.
+  - **Mitigation**: All translation is pure-CPU JSON shape rewriting; profile after first integration test pass.
+- **[Risk]** The Anthropic `thinking` block requires `max_tokens > budget_tokens`; clients may set both and break the upstream.
+  - **Mitigation**: Translator raises `max_tokens` automatically; documented in the spec.
+- **[Trade-off]** We do not attempt to round-trip every Responses-API field (`store`, `background`, `prompt_cache_key`, `include`). These are stripped silently. Clients that rely on them get no error but no behavior change either. Phase 3 may decide to surface a warning.
+
+## Migration Plan
+
+- This is additive. No data migration. No client-visible change for requests that previously succeeded.
+- Rollout: feature flag `RESPONSES_TO_ANTHROPIC_ENABLED` read via `common.GetEnvOrDefaultBool("RESPONSES_TO_ANTHROPIC_ENABLED", true)`, **default `true`**. Operators who want the prior "not implemented" behavior can set `RESPONSES_TO_ANTHROPIC_ENABLED=false`.
+- Rollback: set the flag to `false`; the gateway falls back to the existing `adaptor.ConvertOpenAIResponsesRequest` path which returns the pre-change error.
+
+## Locked decisions
+
+- **Package placement** — confirmed: shape converters in `service/openaicompat/`, orchestration in `relay/responses_via_chat_completions.go`.
+- **Public translator entry-point names** — confirmed: `ResponsesRequestToChatCompletionsRequest`, `ChatCompletionsStreamToResponsesEvents`, `ChatCompletionsResponseToResponsesResponse`.
+- **Per-stream state struct** — confirmed: `ResponsesStreamState` exported from `service/openaicompat/`.
+- **OAuth tool-name prefix** — confirmed: not applicable; no prefix is applied and no name-mapping table is kept.
+- **JSON-mode system-prompt strings** — confirmed: hard-coded English.
+- **Tool-call ID strategy** — confirmed: pass-through / strip-and-keep / UUID fallback (D4 above). No deterministic positional synthesis.
+- **Feature flag default** — confirmed: `RESPONSES_TO_ANTHROPIC_ENABLED=true` (default ON).
diff --git a/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/proposal.md b/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/proposal.md
new file mode 100644
index 00000000000..418a00150ec
--- /dev/null
+++ b/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/proposal.md
@@ -0,0 +1,78 @@
+## Why
+
+Clients of the gateway today can hit `POST /v1/responses` (OpenAI Responses API shape) and expect to be served by any routed upstream channel. The relay supports OpenAI-compatible upstreams and Anthropic `/v1/messages` upstreams independently, but when a `/v1/responses` request is routed to an Anthropic-typed channel the gateway has no end-to-end translation path: the request shape cannot be forwarded to `/v1/messages` as-is, and the upstream streaming events cannot be re-encoded into Responses-API events without a translation layer.
+
+This change introduces that translation layer so a single Responses-API request can be served transparently by an Anthropic upstream, with full feature parity for streaming text, reasoning (thinking) passthrough, multi-turn tool use, image input, system prompt extraction, JSON-mode hints, and token usage (including prompt cache tokens) propagation.
+
+## What Changes
+
+- **New translation pipeline** for inbound requests: Responses-shaped request → Chat-Completions-shaped intermediate → Anthropic Messages-shaped request, wired into the existing relay format dispatch so that routing a `/v1/responses` request to an Anthropic-typed channel succeeds instead of returning "not implemented".
+- **New translation pipeline** for outbound responses (both streaming SSE and final non-streaming): Anthropic Messages event stream → Chat-Completions chunk shape → Responses-API event stream, including correct `response.created` / `response.in_progress` / `response.output_item.added` / delta / `response.completed` event ordering and sequence numbering.
+- **Reasoning passthrough**: when the upstream emits a `thinking` block, the gateway re-emits it as Responses-API `reasoning` output items with proper `reasoning_summary_text.delta` / `reasoning_summary_text.done` / `reasoning_summary_part.done` / `output_item.done` event sequencing. `<think>...</think>` inline markers in regular text are also recognised and rerouted.
+- **System prompt extraction**: a Responses-API `instructions` field, or a `system` message in an intermediate shape, is lifted into the Anthropic `system` block list with proper cache_control handling.
+- **Tool use round-tripping**: tool declarations, tool calls, and tool results are converted in both directions; tool-use blocks and their tool_result counterparts are placed in adjacent Anthropic messages per Anthropic API rules; missing tool results are auto-injected as empty before forwarding upstream; assistant text emitted after a `tool_use` block is dropped; consecutive same-role messages are merged.
+- **Tool-call ID hygiene**: every tool call must have an ID. IDs that already match the Anthropic-compatible regex `^[a-zA-Z0-9_-]+$` and are ≤ 64 characters are passed through unchanged. IDs that contain invalid characters are sanitized by stripping non-`[a-zA-Z0-9_-]` characters and keeping the result if non-empty; otherwise a fresh UUID is generated as the replacement. IDs longer than 64 characters are clamped at the Responses-side boundary. Nameless tool calls and hosted (no-name) tool declarations are filtered out before forwarding upstream.
+- **`max_tokens` clamp**: `max_tokens` is set from the request, raised to a configurable minimum when tools are present (to avoid truncated tool arguments), and raised above `thinking.budget_tokens + buffer` when the upstream is in thinking mode (Anthropic requires `max_tokens > budget_tokens`).
+- **Image input mapping**: Responses-API `input_image` items are converted to intermediate `image_url`, then to Anthropic `image` blocks; `data:` URLs become `base64` sources and `http(s)` URLs become `url` sources.
+- **Reasoning-effort mapping**: a Chat-Completions-shaped `reasoning_effort` enum (none/low/medium/high/xhigh) is converted to a Claude `thinking.budget_tokens` value when no explicit `thinking` block is present.
+- **Response-format mapping**: `response_format = json_object` or `json_schema` injects an extra system-prompt block instructing the model to return strict JSON (Anthropic has no native equivalent field).
+- **Usage propagation**: prompt cache read/write tokens are propagated through every translation hop. In the upstream-to-OpenAI direction, `cache_read_input_tokens` and `cache_creation_input_tokens` flow into `prompt_tokens_details.cached_tokens` and `prompt_tokens_details.cache_creation_tokens`. In the downstream-to-Responses direction, they flow into `input_tokens_details.cached_tokens`.
+- **Input shape normalization**: a string `input` is wrapped as a single user message with an `input_text` part; an empty array `input[]` is replaced with a single placeholder message so the upstream does not receive `messages: []`; items with a `role` field but no `type` are treated as `message` items.
+- **Reasoning items in input**: a `reasoning` input item is buffered and attached to the next assistant message as `reasoning_content`, never forwarded as a standalone message.
+- **Failure mapping**: upstream `error` and `response.failed` events surface as a documented OpenAI-shaped error chunk (no duplicate emission).
+- The current behavior of returning a 5xx-class "not implemented" error for `/v1/responses` requests routed to Anthropic-typed channels is **REMOVED**.
+
+## Capabilities
+
+### New Capabilities
+- `responses-to-anthropic-translation`: end-to-end translation of OpenAI Responses-API requests and streamed responses to and from the Anthropic Messages-API shape, including request body conversion, response event re-encoding, tool-use round-tripping, reasoning passthrough, image input mapping, system prompt extraction, JSON-mode hint injection, token usage propagation (including prompt-cache token classes), and input-shape normalization.
+
+### Modified Capabilities
+- (none — this introduces a new translation pipeline rather than altering existing spec-level behavior. The change does not modify existing channel BYOK, quota, billing, retry, or auto-ban behavior.)
+
+## Scope
+
+**In scope (this change):**
+- Request shape: Responses-API `{ input, instructions, tools, tool_choice, temperature, top_p, max_tokens, reasoning, reasoning_effort, response_format, thinking, model, stream }`
+- Response stream: text deltas, reasoning deltas, tool-call deltas, finish reasons (`stop`, `length`, `tool_calls`), usage (including cache tokens)
+- Both streaming and non-streaming Responses-API client modes
+- Tool declarations in both `{ type: "function", function: { name, ... } }` and bare `{ type: "function", name, ... }` Responses-API forms; pass-through of built-in (non-function) tool types when target is Anthropic
+- Behavioral parity for the existing flow of intermediate-Chat-Completions ↔ Anthropic Messages, since the Responses-to-Anthropic path piggybacks on it
+
+**Out of scope (explicit non-goals):**
+- File-search / web-search / computer-use / code-interpreter hosted tools on the Responses-API surface beyond pass-through of declarations
+- Anthropic-side `output_config`, structured-output JSON schema enforcement, and provider-specific quirks for non-Anthropic upstreams (these are pre-existing behaviors and are not modified here)
+- Persistent conversation storage (`store: true` semantics); the translator strips this field
+- Background mode (`background: true` Responses-API field)
+- Encrypted content reasoning items (`encrypted_content` summary fallback) beyond the documented text-extraction path
+- Any change to quota, billing, log attribution, or channel selection
+- Any change to the existing OpenAI-compatible `/v1/chat/completions` path
+
+## Impact
+
+- **Affected APIs**: `POST /v1/responses` becomes routable to Anthropic-typed channels.
+- **Affected code areas**:
+  - `service/openaicompat/responses_to_chat.go` (new function `ResponsesRequestToChatCompletionsRequest`)
+  - `service/openaicompat/chat_to_responses.go` (new functions `ChatCompletionsStreamToResponsesEvents` + `ChatCompletionsResponseToResponsesResponse` + per-stream state struct)
+  - `relay/responses_via_chat_completions.go` (new orchestration file, mirror of `relay/chat_completions_via_responses.go`)
+  - `relay/responses_handler.go` (new branch when `info.ApiType == APITypeAnthropic`, calling the new orchestration before falling back to `adaptor.ConvertOpenAIResponsesRequest`)
+- **Reused converters (not duplicated)**:
+  - `relay/channel/claude/relay-claude.go::RequestOpenAI2ClaudeMessage` — Chat-Completions request → Anthropic Messages request (already handles tool ordering, max_tokens adjustment, image mapping, system extraction)
+  - `relay/channel/claude/relay-claude.go::ClaudeStreamHandler` + `StreamResponseClaude2OpenAI` — Claude streaming response → Chat-Completions chunks
+  - `relay/channel/claude/relay-claude.go::ClaudeHandler` + `ResponseClaude2OpenAI` — Claude non-streaming response → Chat-Completions response
+- **Dependencies**: no new third-party dependencies; uses the project's existing JSON wrapper (`common.Marshal`/`common.Unmarshal`) and the standard library UUID/random generator.
+- **Database**: no migrations.
+- **Frontend**: no UI changes; the translation is transparent to clients.
+- **Backward compatibility**: additive. Requests that were previously rejected ("not implemented") now succeed. Requests that previously succeeded (Responses-to-OpenAI-compatible upstreams) are not affected.
+
+## Locked decisions (Phase 3)
+
+- **Package placement**: shape converters land in `service/openaicompat/` parallel to the existing `chat_to_responses.go`/`responses_to_chat.go`; orchestration lands in `relay/responses_via_chat_completions.go` mirroring the existing `relay/chat_completions_via_responses.go`.
+- **Naming**: PascalCase `XToY` style matching project convention: `ResponsesRequestToChatCompletionsRequest`, `ChatCompletionsStreamToResponsesEvents`, `ChatCompletionsResponseToResponsesResponse`. Per-stream state struct: `ResponsesStreamState`.
+- **Reuse strategy**: the `ChatCompletions ↔ AnthropicMessages` legs are NOT reimplemented; the existing Claude adaptor converters listed above are called directly.
+- **Tool-call ID strategy**: pass-through when valid; sanitize non-empty residue when partially invalid; UUID fallback (no deterministic synthesis) when fully invalid. Clamp to 64 characters at the Responses-side boundary.
+- **OAuth tool-name prefix**: NOT applicable to this project (the Anthropic adaptor uses `x-api-key`, not an OAuth flow). The translator hard-codes no prefix; no `prefixedName→originalName` map exists.
+- **JSON-mode prompt text**: hard-coded English, matching the convention of other converters in this codebase.
+- **Test style**: assertion-style using `testify/require` and `t.Errorf`, matching `relay/channel/claude/relay_claude_test.go`. No golden files.
+- **Feature gate**: `RESPONSES_TO_ANTHROPIC_ENABLED`, default `true`. Operators can set the variable to `false` to restore the prior "not implemented" behavior.
+- **Conflict surface**: clean. The only uncommitted change at the time of this proposal is this OpenSpec change itself; no in-flight work touches `relay/responses_handler.go` or `relay/channel/claude/`.
diff --git a/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/specs/responses-to-anthropic-translation/spec.md b/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/specs/responses-to-anthropic-translation/spec.md
new file mode 100644
index 00000000000..b0300a40765
--- /dev/null
+++ b/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/specs/responses-to-anthropic-translation/spec.md
@@ -0,0 +1,856 @@
+## ADDED Requirements
+
+### Requirement: Endpoint-driven source format detection
+
+The gateway SHALL classify the inbound request's source format from the URL path before consulting the body shape. A request whose path contains `/v1/responses` SHALL be treated as the Responses-API source format. A request whose path contains `/v1/messages` SHALL be treated as the Anthropic-Messages source format. A request whose path contains `/v1/chat/completions` SHALL be treated as the OpenAI Chat-Completions source format, except that when its JSON body has a top-level `input` field that is an array, it SHALL be reclassified as the Responses-API source format.
+
+#### Scenario: `/v1/responses` path is Responses-API source
+
+- **WHEN** a client sends `POST /v1/responses`
+- **THEN** the gateway SHALL select the Responses-API translator chain regardless of body shape
+
+#### Scenario: `/v1/messages` path is Anthropic source
+
+- **WHEN** a client sends `POST /v1/messages`
+- **THEN** the gateway SHALL select the Anthropic-source translator chain regardless of body shape
+
+#### Scenario: `/v1/chat/completions` with Responses-style body
+
+- **WHEN** a client sends `POST /v1/chat/completions` with a JSON body whose `input` field is an array
+- **THEN** the gateway SHALL select the Responses-API source format
+
+#### Scenario: `/v1/chat/completions` with normal body
+
+- **WHEN** a client sends `POST /v1/chat/completions` with a JSON body that has no `input` array and uses `messages[]`
+- **THEN** the gateway SHALL select the OpenAI Chat-Completions source format
+
+### Requirement: Two-step pivot through Chat-Completions intermediate
+
+When the inbound source format and the outbound target format differ, the gateway SHALL perform translation in two hops through a Chat-Completions-shaped intermediate object. The Responses-API to Anthropic-Messages request translation SHALL execute `Responses → ChatCompletions` followed by `ChatCompletions → AnthropicMessages`. The Anthropic-Messages to Responses-API response translation SHALL execute `AnthropicMessages → ChatCompletions` followed by `ChatCompletions → ResponsesEvents`.
+
+#### Scenario: Request pivot is two-hop
+
+- **WHEN** a Responses-API request body is routed to an Anthropic-typed channel
+- **THEN** the request body delivered to the upstream SHALL be the result of applying the Responses→ChatCompletions translator followed by the ChatCompletions→AnthropicMessages translator, in that order
+
+#### Scenario: Response pivot is two-hop
+
+- **WHEN** an Anthropic streaming response chunk is received and the original client expects Responses-API events
+- **THEN** the chunk SHALL be passed through the Anthropic→ChatCompletions translator, and each emitted Chat-Completions chunk SHALL be passed through the ChatCompletions→ResponsesEvents translator before being written to the client
+
+#### Scenario: Same-format requests skip translation
+
+- **WHEN** the source and target formats are identical
+- **THEN** no translator is invoked and the body or chunk passes through unchanged
+
+### Requirement: Responses-API input shape normalization
+
+The gateway SHALL accept the Responses-API `input` field in three shapes and normalize them to an internal array of input items before translation: (a) a non-empty string, (b) an empty or whitespace-only string, (c) an array (possibly empty). A non-empty string SHALL be wrapped as a single user message item whose content is a single `input_text` part with the original text. An empty or whitespace-only string SHALL be wrapped as a single user message item whose content is a single `input_text` part with the placeholder text `"..."`. An empty array SHALL be replaced with a single user message item whose content is a single `input_text` part with the placeholder text `"..."`. A non-empty array SHALL be passed through unchanged. Any other shape SHALL be treated as invalid and SHALL cause the body to be forwarded unchanged (no translation).
+
+#### Scenario: String input is wrapped as user message
+
+- **WHEN** the request body contains `input: "hello world"`
+- **THEN** the normalized input items SHALL be `[{ type: "message", role: "user", content: [{ type: "input_text", text: "hello world" }] }]`
+
+#### Scenario: Empty string input is wrapped as placeholder
+
+- **WHEN** the request body contains `input: ""`
+- **THEN** the normalized input items SHALL be `[{ type: "message", role: "user", content: [{ type: "input_text", text: "..." }] }]`
+
+#### Scenario: Empty array input is replaced with placeholder
+
+- **WHEN** the request body contains `input: []`
+- **THEN** the normalized input items SHALL be `[{ type: "message", role: "user", content: [{ type: "input_text", text: "..." }] }]`
+
+#### Scenario: Non-empty array is passed through
+
+- **WHEN** the request body contains `input: [{ type: "message", role: "user", content: [...] }]`
+- **THEN** the normalized input items SHALL equal the original array
+
+#### Scenario: Non-string non-array input
+
+- **WHEN** the request body contains `input: 42` or `input: { foo: "bar" }`
+- **THEN** the gateway SHALL forward the body unchanged without invoking the Responses→ChatCompletions translator
+
+### Requirement: Responses-API `instructions` becomes a system message
+
+When the Responses-API request body contains a non-empty `instructions` string, the gateway SHALL prepend a single `role: "system"` message whose `content` is that string to the Chat-Completions `messages[]`.
+
+#### Scenario: Instructions prepended as system
+
+- **WHEN** the request body contains `instructions: "You are helpful."`
+- **THEN** the first message in the resulting Chat-Completions `messages[]` SHALL be `{ role: "system", content: "You are helpful." }`
+
+#### Scenario: Empty instructions is skipped
+
+- **WHEN** the request body contains `instructions: ""` or no `instructions` field
+- **THEN** no system message SHALL be prepended on behalf of `instructions`
+
+### Requirement: Input item type detection with role-only fallback
+
+The gateway SHALL determine each input item's type by reading its `type` field. If the `type` field is missing but a `role` field is present, the item SHALL be treated as type `"message"`. If neither field is present, the item SHALL be skipped silently.
+
+#### Scenario: Explicit type wins
+
+- **WHEN** an input item is `{ type: "function_call", call_id: "x", name: "y", arguments: "{}" }`
+- **THEN** the item SHALL be processed as a function call
+
+#### Scenario: Role-only fallback
+
+- **WHEN** an input item is `{ role: "user", content: [{ type: "input_text", text: "hi" }] }` with no `type` field
+- **THEN** the item SHALL be processed as type `"message"`
+
+#### Scenario: Neither type nor role
+
+- **WHEN** an input item is `{ foo: "bar" }`
+- **THEN** the item SHALL be skipped without error
+
+### Requirement: Message item content normalization
+
+For each input item of type `"message"`, the gateway SHALL map content parts to Chat-Completions content parts as follows: `input_text` and `output_text` parts SHALL become `{ type: "text", text }` parts; `input_image` parts SHALL become `{ type: "image_url", image_url: { url, detail } }` parts where `url` is the part's `image_url` field (if a string) or `file_id` field (if no `image_url`), and `detail` is the part's `detail` field or `"auto"` if absent. Parts of any other type SHALL be passed through unchanged.
+
+#### Scenario: input_text becomes text
+
+- **WHEN** a message item has `content: [{ type: "input_text", text: "hello" }]`
+- **THEN** the converted Chat-Completions message content SHALL be `[{ type: "text", text: "hello" }]`
+
+#### Scenario: output_text becomes text
+
+- **WHEN** a message item has `content: [{ type: "output_text", text: "answer" }]`
+- **THEN** the converted Chat-Completions message content SHALL be `[{ type: "text", text: "answer" }]`
+
+#### Scenario: input_image with image_url becomes image_url
+
+- **WHEN** a message item has `content: [{ type: "input_image", image_url: "https://example.com/a.png", detail: "high" }]`
+- **THEN** the converted Chat-Completions message content SHALL be `[{ type: "image_url", image_url: { url: "https://example.com/a.png", detail: "high" } }]`
+
+#### Scenario: input_image with file_id fallback
+
+- **WHEN** a message item has `content: [{ type: "input_image", file_id: "file_abc" }]` and no `image_url`
+- **THEN** the converted content SHALL be `[{ type: "image_url", image_url: { url: "file_abc", detail: "auto" } }]`
+
+#### Scenario: input_image with no url or file_id
+
+- **WHEN** a message item has `content: [{ type: "input_image" }]` with neither `image_url` nor `file_id`
+- **THEN** the converted content SHALL be `[{ type: "image_url", image_url: { url: "", detail: "auto" } }]`
+
+### Requirement: Function-call items become assistant tool_calls
+
+For each input item of type `"function_call"`, the gateway SHALL append the call to a buffered assistant message in the form `{ role: "assistant", content: null, tool_calls: [...] }`. Each tool call SHALL be `{ id: <call_id>, type: "function", function: { name, arguments } }`. The buffered assistant message SHALL be flushed to the message list when the next non-function-call item is encountered or at end-of-input. Function-call items whose `name` is missing, not a string, or trimmed-empty SHALL be skipped silently.
+
+#### Scenario: Single function call
+
+- **WHEN** input contains `{ type: "function_call", call_id: "c1", name: "search", arguments: "{\"q\":\"x\"}" }` followed by no more items
+- **THEN** the resulting messages SHALL include `{ role: "assistant", content: null, tool_calls: [{ id: "c1", type: "function", function: { name: "search", arguments: "{\"q\":\"x\"}" } }] }`
+
+#### Scenario: Multiple consecutive function calls collapse
+
+- **WHEN** input contains two consecutive function_call items with call_ids `c1` and `c2`
+- **THEN** both calls SHALL be in the same assistant message's `tool_calls` array, in order
+
+#### Scenario: Function call with empty name is dropped
+
+- **WHEN** input contains `{ type: "function_call", call_id: "c1", name: "", arguments: "{}" }`
+- **THEN** the call SHALL NOT appear in any resulting assistant message
+
+#### Scenario: Function call with missing name is dropped
+
+- **WHEN** input contains `{ type: "function_call", call_id: "c1", arguments: "{}" }` with no `name` field
+- **THEN** the call SHALL NOT appear in any resulting assistant message
+
+### Requirement: Function-call-output items become tool messages
+
+For each input item of type `"function_call_output"`, the gateway SHALL flush any buffered assistant message and SHALL append a tool message `{ role: "tool", tool_call_id: <call_id>, content: <output> }` where `<output>` is the item's `output` field if it is a string, or the JSON-stringified value of `output` otherwise.
+
+#### Scenario: String output passes through
+
+- **WHEN** input contains `{ type: "function_call_output", call_id: "c1", output: "result text" }`
+- **THEN** the resulting messages SHALL include `{ role: "tool", tool_call_id: "c1", content: "result text" }`
+
+#### Scenario: Non-string output is JSON-stringified
+
+- **WHEN** input contains `{ type: "function_call_output", call_id: "c1", output: { ok: true, n: 7 } }`
+- **THEN** the resulting messages SHALL include `{ role: "tool", tool_call_id: "c1", content: "{\"ok\":true,\"n\":7}" }`
+
+#### Scenario: Output flushes pending assistant first
+
+- **WHEN** input contains a `function_call` item followed by a `function_call_output` item
+- **THEN** the assistant message containing the call SHALL be appended to the message list BEFORE the tool message
+
+### Requirement: Reasoning input items are buffered, not emitted
+
+For each input item of type `"reasoning"`, the gateway SHALL extract its text by joining the `text` fields of every entry in its `summary[]` array with newlines if `summary[]` is a non-empty array; otherwise by joining the `text` fields of every entry in its `content[]` array; otherwise SHALL extract an empty string. The extracted text SHALL be buffered. The buffered text SHALL be attached as `reasoning_content` to the next assistant message OR to the next buffered assistant tool-call message, whichever comes first. After attachment the buffer SHALL be cleared. A `reasoning` item SHALL NOT appear in the Chat-Completions `messages[]` directly.
+
+#### Scenario: Reasoning text attached to next assistant message
+
+- **WHEN** input contains `{ type: "reasoning", summary: [{ text: "thinking step 1" }] }` followed by `{ type: "message", role: "assistant", content: [{ type: "output_text", text: "answer" }] }`
+- **THEN** the resulting assistant message SHALL be `{ role: "assistant", content: [{ type: "text", text: "answer" }], reasoning_content: "thinking step 1" }`
+
+#### Scenario: Reasoning text attached to tool-call assistant message
+
+- **WHEN** input contains a `reasoning` item followed by a `function_call` item
+- **THEN** the assistant message synthesised for the function_call SHALL include `reasoning_content` equal to the buffered reasoning text
+
+#### Scenario: Reasoning falls back to content array
+
+- **WHEN** input contains `{ type: "reasoning", content: [{ text: "alt thinking" }] }` and no `summary[]`
+- **THEN** the buffered reasoning text SHALL be `"alt thinking"`
+
+#### Scenario: Multiple reasoning items concatenate with newline
+
+- **WHEN** input contains two consecutive `reasoning` items with summaries `"a"` and `"b"`
+- **THEN** the buffered reasoning text presented to the next assistant turn SHALL be `"a\nb"`
+
+#### Scenario: Reasoning buffer is cleared after attachment
+
+- **WHEN** a reasoning item's text has been attached to an assistant message and a subsequent assistant message arrives with no preceding reasoning
+- **THEN** the second assistant message SHALL NOT have `reasoning_content`
+
+### Requirement: Tool declarations conversion (Responses → ChatCompletions)
+
+The gateway SHALL accept Responses-API tool declarations in two shapes: (a) already-Chat-Completions-shaped `{ type: "function", function: { name, description, parameters, strict } }`, which SHALL pass through unchanged; (b) Responses-flat `{ type: "function", name, description, parameters, strict }`, which SHALL be converted to the Chat-Completions shape. A tool declaration whose effective name is missing, non-string, or trimmed-empty SHALL be filtered out (this discards hosted tools that have no `name`). Tool parameter schemas that have `type: "object"` but no `properties` field SHALL be normalized to include `properties: {}`. Tools whose `type` is not `"function"` SHALL be retained unchanged when the target is Anthropic; they SHALL be filtered out when the intermediate is being normalized to OpenAI for non-Anthropic upstreams.
+
+#### Scenario: Already-Chat-Completions tool passes through
+
+- **WHEN** tools contains `{ type: "function", function: { name: "search", parameters: { type: "object", properties: { q: { type: "string" } } } } }`
+- **THEN** the converted tools array SHALL contain that entry unchanged
+
+#### Scenario: Flat Responses tool is converted
+
+- **WHEN** tools contains `{ type: "function", name: "search", description: "find", parameters: { type: "object", properties: {} }, strict: true }`
+- **THEN** the converted tools array SHALL contain `{ type: "function", function: { name: "search", description: "find", parameters: { type: "object", properties: {} }, strict: true } }`
+
+#### Scenario: Empty-name hosted tool is dropped
+
+- **WHEN** tools contains `{ type: "request_user_input" }` (no `name`)
+- **THEN** the converted tools array SHALL NOT contain that entry
+
+#### Scenario: Object schema without properties gets `properties: {}`
+
+- **WHEN** a tool's parameters is `{ type: "object" }`
+- **THEN** the converted parameters SHALL be `{ type: "object", properties: {} }`
+
+### Requirement: Responses-API request-body cleanup
+
+After translating to the Chat-Completions intermediate, the gateway SHALL remove the following fields from the result body: `input`, `instructions`, `include`, `prompt_cache_key`, `store`, `reasoning`.
+
+#### Scenario: All Responses-only fields are removed
+
+- **WHEN** a Responses-API body containing `input`, `instructions`, `include`, `prompt_cache_key`, `store`, and `reasoning` is translated
+- **THEN** the resulting Chat-Completions body SHALL have none of those six fields
+
+### Requirement: System message extraction for Anthropic target
+
+When translating Chat-Completions → Anthropic, the gateway SHALL collect every `role: "system"` message's content into a single `systemParts` list, removing those messages from the main `messages[]`. When `systemParts` is non-empty, the gateway SHALL emit the Anthropic `system` field as an array of text blocks. When the upstream channel type is the Anthropic OAuth profile, the gateway MAY prepend a project-defined client-identity system block; this block is always present and is positioned first when present, with cache_control `{ type: "ephemeral", ttl: "1h" }` applied to the LAST system block when there is more than one system block.
+
+#### Scenario: Single system message extracted
+
+- **WHEN** the intermediate has `messages: [{ role: "system", content: "You are helpful." }, { role: "user", content: "hi" }]`
+- **THEN** the Anthropic body SHALL have `system` as a non-empty array containing a text block whose text is or includes `"You are helpful."`, and `messages` SHALL NOT contain the system message
+
+#### Scenario: Multiple system messages concatenated
+
+- **WHEN** the intermediate has two `role: "system"` messages with contents `"A"` and `"B"`
+- **THEN** their texts SHALL be concatenated with newline separators into a single text block in the Anthropic `system` array
+
+#### Scenario: No system messages
+
+- **WHEN** the intermediate has no `role: "system"` messages and no client-identity block is configured
+- **THEN** the Anthropic body SHALL have no `system` field (or an empty `system` is acceptable depending on host config)
+
+#### Scenario: Cache_control applied to last system block
+
+- **WHEN** the Anthropic `system` array has two or more text blocks
+- **THEN** the LAST block SHALL have `cache_control: { type: "ephemeral", ttl: "1h" }` and no other block SHALL
+
+### Requirement: Tool-use / tool-result ordering for Anthropic
+
+When translating Chat-Completions → Anthropic, the gateway SHALL ensure that every tool_use block in an assistant message is followed in the next message by the matching tool_result block. The translator SHALL:
+1. Split any user-or-tool message that contains both `tool_result` blocks and non-tool-result blocks: the tool_result blocks SHALL be emitted first in their own user message; the remaining blocks SHALL be emitted in a subsequent user message.
+2. Flush the in-progress message immediately after appending tool_use blocks.
+3. Drop assistant text blocks that appear AFTER a `tool_use` block within the same assistant content array (Anthropic rejects them).
+4. Merge consecutive messages that share the same role after the above transforms.
+5. When merging messages that contain tool_result blocks alongside non-tool-result blocks, place all tool_result blocks first in the merged content array.
+
+#### Scenario: Tool_result moved to its own user message
+
+- **WHEN** a Chat-Completions input has a tool message followed by a user message with text content, both originally adjacent
+- **THEN** the Anthropic `messages[]` SHALL contain a user message whose content is exclusively the tool_result block, followed by a user message whose content is the text block
+
+#### Scenario: Assistant text after tool_use is dropped
+
+- **WHEN** an assistant message has content `[{ type: "text", text: "before" }, { type: "tool_use", id: "t1", name: "x", input: {} }, { type: "text", text: "after" }]`
+- **THEN** the Anthropic assistant message content SHALL be `[{ type: "text", text: "before" }, { type: "tool_use", id: "t1", name: "x", input: {} }]` (the `"after"` text is removed)
+
+#### Scenario: Thinking block before tool_use preserved
+
+- **WHEN** an assistant message has content `[{ type: "thinking", thinking: "T" }, { type: "tool_use", id: "t1", name: "x", input: {} }]`
+- **THEN** both blocks SHALL be preserved in the Anthropic assistant message content
+
+#### Scenario: Consecutive user messages are merged
+
+- **WHEN** the intermediate `messages[]` has two consecutive `role: "user"` messages with text contents `"a"` and `"b"`
+- **THEN** the Anthropic `messages[]` SHALL have a single user message whose content includes both text blocks (preserving order)
+
+#### Scenario: Merge with tool_result-first ordering
+
+- **WHEN** merging consecutive user messages, the first contains a `tool_result` block and the second contains a `text` block
+- **THEN** the merged user message's content SHALL list the tool_result block before the text block
+
+### Requirement: Missing tool-result auto-injection
+
+If an assistant message contains one or more tool_calls (OpenAI shape) or tool_use blocks (Claude shape) and the next message does not contain a matching tool_result for at least one of those call IDs, the gateway SHALL insert an empty tool message `{ role: "tool", tool_call_id: <id>, content: "" }` for EACH missing call between the assistant message and whatever follows.
+
+#### Scenario: Single missing tool result is filled
+
+- **WHEN** messages are `[{ role: "assistant", tool_calls: [{ id: "c1", function: { name: "x", arguments: "{}" } }] }, { role: "user", content: "next" }]`
+- **THEN** the resulting messages SHALL be `[{ role: "assistant", ... }, { role: "tool", tool_call_id: "c1", content: "" }, { role: "user", content: "next" }]`
+
+#### Scenario: Multiple missing tool results
+
+- **WHEN** an assistant message has two tool_calls with IDs `c1` and `c2` and the next message is a user message
+- **THEN** TWO empty tool messages SHALL be inserted, one per call ID, in the order the calls appeared
+
+#### Scenario: Existing tool result is not duplicated
+
+- **WHEN** an assistant message has a tool_call with ID `c1` and the next message is `{ role: "tool", tool_call_id: "c1", content: "result" }`
+- **THEN** no additional tool message SHALL be inserted
+
+### Requirement: Tool-call ID sanitization
+
+The gateway SHALL ensure that every tool_call ID (in `tool_calls[].id` of assistant messages, `tool_call_id` of tool messages, `tool_use.id` and `tool_result.tool_use_id` of content blocks) matches the regex `^[a-zA-Z0-9_-]+$` AND is no longer than 64 characters before being forwarded to the Anthropic upstream. The gateway SHALL apply the following three-tier policy in order:
+
+1. **Pass-through**: if the ID already matches the regex AND is ≤ 64 characters, it SHALL be forwarded unchanged.
+2. **Strip-and-keep**: otherwise, the gateway SHALL strip every character not in `[a-zA-Z0-9_-]`. If the residue is non-empty AND ≤ 64 characters, the residue SHALL be used.
+3. **UUID fallback**: otherwise (residue empty, or residue longer than 64 characters), the gateway SHALL generate a fresh RFC-4122 UUID (with dashes removed so it matches the regex) and use that as the ID. The fallback SHALL NOT depend on the message index, tool-call index, or tool name.
+
+The same ID replacement SHALL be applied consistently to BOTH the originating `tool_use.id` / `tool_calls[].id` AND any matching `tool_result.tool_use_id` / `tool_call_id` references within the same request so the upstream sees a consistent mapping.
+
+The gateway SHALL also ensure that every tool_call's `type` field is set to `"function"` if missing, and that every tool_call's `function.arguments` field is a JSON string (the gateway SHALL JSON-stringify object values).
+
+#### Scenario: Valid ID passes through
+
+- **WHEN** a tool_call has `id: "call_abc-123"`
+- **THEN** the ID SHALL remain `"call_abc-123"`
+
+#### Scenario: ID with invalid characters is sanitized
+
+- **WHEN** a tool_call has `id: "call:abc/123"`
+- **THEN** the ID SHALL become `"callabc123"`
+
+#### Scenario: ID is entirely invalid characters
+
+- **WHEN** a tool_call has `id: "::::"`
+- **THEN** the ID SHALL become a freshly generated UUID (matching `^[a-zA-Z0-9]+$` after dash removal), independent of message index or tool name
+
+#### Scenario: ID exceeds 64 characters after stripping
+
+- **WHEN** a tool_call has `id: "<70-character-alphanumeric-string>"`
+- **THEN** the ID SHALL be replaced with a freshly generated UUID
+
+#### Scenario: tool_result references are remapped consistently
+
+- **WHEN** an assistant message has a tool_call whose ID is replaced with `X`, and the following user message has a `tool_result` with `tool_use_id` matching the original
+- **THEN** the user message's `tool_use_id` SHALL also be `X` so the upstream sees a consistent pair
+
+#### Scenario: Object arguments stringified
+
+- **WHEN** a tool_call has `function.arguments: { q: "x" }` (an object, not a string)
+- **THEN** `function.arguments` SHALL become the string `"{\"q\":\"x\"}"`
+
+#### Scenario: Type defaulted to function
+
+- **WHEN** a tool_call has no `type` field
+- **THEN** `type` SHALL be set to `"function"`
+
+### Requirement: Tool declaration conversion (ChatCompletions → Anthropic)
+
+When translating Chat-Completions → Anthropic, the gateway SHALL convert each tool declaration as follows: a `{ type: "function", function: { name, description, parameters } }` declaration SHALL become `{ name: <name>, description: <description or "">, input_schema: <parameters or input_schema or empty-object-schema> }`. A non-function tool declaration (e.g. an Anthropic-native server tool with a `type` other than `"function"`) SHALL be passed through unchanged. No tool-name prefix is applied; tool names are forwarded verbatim.
+
+If the converted tools array is non-empty, the LAST tool SHALL receive `cache_control: { type: "ephemeral", ttl: "1h" }` and no other tool SHALL.
+
+#### Scenario: Function tool conversion
+
+- **WHEN** the intermediate has `tools: [{ type: "function", function: { name: "search", description: "find", parameters: { type: "object", properties: { q: { type: "string" } } } } }]`
+- **THEN** the Anthropic tools SHALL be `[{ name: "search", description: "find", input_schema: { type: "object", properties: { q: { type: "string" } } }, cache_control: { type: "ephemeral", ttl: "1h" } }]`
+
+#### Scenario: Default empty input_schema
+
+- **WHEN** a function tool has no `parameters` and no `input_schema`
+- **THEN** the converted `input_schema` SHALL be `{ type: "object", properties: {}, required: [] }`
+
+#### Scenario: Server tool passes through
+
+- **WHEN** the intermediate has `tools: [{ type: "web_search_20250305", name: "web_search" }]`
+- **THEN** that entry SHALL appear unchanged in the Anthropic tools array (no prefix applied)
+
+#### Scenario: Cache_control on last tool only
+
+- **WHEN** there are three function tools after conversion
+- **THEN** only the third tool SHALL have `cache_control` set
+
+### Requirement: tool_choice conversion (ChatCompletions → Anthropic)
+
+The gateway SHALL convert the Chat-Completions `tool_choice` value to the Anthropic form as follows:
+- `"auto"` or `"none"` → `{ type: "auto" }`
+- `"required"` → `{ type: "any" }`
+- `{ type: "function", function: { name: <n> } }` → `{ type: "tool", name: <n> }`
+- An Anthropic-shaped object (one that already has `type`) SHALL pass through unchanged
+- Any other value SHALL default to `{ type: "auto" }`
+
+#### Scenario: Auto
+
+- **WHEN** the intermediate has `tool_choice: "auto"`
+- **THEN** the Anthropic `tool_choice` SHALL be `{ type: "auto" }`
+
+#### Scenario: Required becomes any
+
+- **WHEN** the intermediate has `tool_choice: "required"`
+- **THEN** the Anthropic `tool_choice` SHALL be `{ type: "any" }`
+
+#### Scenario: Specific function
+
+- **WHEN** the intermediate has `tool_choice: { type: "function", function: { name: "search" } }`
+- **THEN** the Anthropic `tool_choice` SHALL be `{ type: "tool", name: "search" }`
+
+#### Scenario: Already-Anthropic-shaped
+
+- **WHEN** the intermediate has `tool_choice: { type: "any" }`
+- **THEN** the Anthropic `tool_choice` SHALL be `{ type: "any" }`
+
+### Requirement: max_tokens adjustment
+
+The gateway SHALL set the Anthropic `max_tokens` field as follows:
+1. Start with the request's `max_tokens` if present, else the project default.
+2. If `tools` is a non-empty array AND the current value is below the project's minimum-with-tools threshold, raise the value to that minimum.
+3. If `thinking.budget_tokens` is set AND the current value is less than or equal to `budget_tokens`, raise the value to `budget_tokens + 1024`.
+
+#### Scenario: Request max_tokens passes through
+
+- **WHEN** the request has `max_tokens: 4096` and no tools and no thinking
+- **THEN** the Anthropic `max_tokens` SHALL be `4096`
+
+#### Scenario: Default applied when missing
+
+- **WHEN** the request has no `max_tokens` and no tools and no thinking
+- **THEN** the Anthropic `max_tokens` SHALL be the project's default `DEFAULT_MAX_TOKENS`
+
+#### Scenario: Raised by tools minimum
+
+- **WHEN** the request has `max_tokens: 256` and a non-empty `tools` array, with project minimum `DEFAULT_MIN_TOKENS = 4096`
+- **THEN** the Anthropic `max_tokens` SHALL be `4096`
+
+#### Scenario: Raised above thinking budget
+
+- **WHEN** the request has `max_tokens: 2048` and `thinking.budget_tokens: 8192`
+- **THEN** the Anthropic `max_tokens` SHALL be `9216` (i.e. `budget_tokens + 1024`)
+
+#### Scenario: Thinking budget equal triggers raise
+
+- **WHEN** the request has `max_tokens: 8192` and `thinking.budget_tokens: 8192` (equal, not strictly greater)
+- **THEN** the Anthropic `max_tokens` SHALL be `9216`
+
+### Requirement: reasoning_effort to thinking.budget_tokens mapping
+
+When the Chat-Completions intermediate has a `reasoning_effort` field but no explicit `thinking` block, the gateway SHALL map the effort to an Anthropic `thinking` configuration using the table: `none → no thinking emitted`, `low → { type: "enabled", budget_tokens: 4096 }`, `medium → { type: "enabled", budget_tokens: 8192 }`, `high → { type: "enabled", budget_tokens: 16384 }`, `xhigh → { type: "enabled", budget_tokens: 32768 }`. The mapping SHALL be case-insensitive. Any other effort value SHALL be ignored.
+
+#### Scenario: medium effort
+
+- **WHEN** the intermediate has `reasoning_effort: "medium"` and no `thinking` field
+- **THEN** the Anthropic body SHALL include `thinking: { type: "enabled", budget_tokens: 8192 }`
+
+#### Scenario: none effort emits no thinking
+
+- **WHEN** the intermediate has `reasoning_effort: "none"`
+- **THEN** the Anthropic body SHALL NOT include a `thinking` field
+
+#### Scenario: Explicit thinking wins over effort
+
+- **WHEN** the intermediate has both `reasoning_effort: "low"` and `thinking: { type: "enabled", budget_tokens: 999 }`
+- **THEN** the Anthropic `thinking` SHALL be `{ type: "enabled", budget_tokens: 999 }`
+
+#### Scenario: Case-insensitive
+
+- **WHEN** the intermediate has `reasoning_effort: "HIGH"`
+- **THEN** the Anthropic body SHALL include `thinking: { type: "enabled", budget_tokens: 16384 }`
+
+### Requirement: response_format JSON-mode shim
+
+When the Chat-Completions intermediate has `response_format`, the gateway SHALL append an additional system block to `systemParts` before assembling the Anthropic `system` array. For `response_format.type === "json_schema"` with a non-null `json_schema.schema`, the appended text SHALL include the literal phrase "You must respond with valid JSON" AND a pretty-printed JSON rendering of the schema AND the literal phrase "Respond ONLY with the JSON object". For `response_format.type === "json_object"`, the appended text SHALL include the literal phrase "You must respond with valid JSON" AND the literal phrase "Respond ONLY with a JSON object". For any other `response_format` value, no system block SHALL be appended.
+
+#### Scenario: json_schema appends instructions and schema
+
+- **WHEN** the intermediate has `response_format: { type: "json_schema", json_schema: { schema: { type: "object", properties: { answer: { type: "number" } } } } }`
+- **THEN** the Anthropic `system` array SHALL contain a text block whose text contains both `"You must respond with valid JSON"` and the substring `"answer"` and `"Respond ONLY with the JSON object"`
+
+#### Scenario: json_object appends generic instruction
+
+- **WHEN** the intermediate has `response_format: { type: "json_object" }`
+- **THEN** the Anthropic `system` array SHALL contain a text block whose text contains `"You must respond with valid JSON"` and `"Respond ONLY with a JSON object"`
+
+#### Scenario: Other type ignored
+
+- **WHEN** the intermediate has `response_format: { type: "text" }` or no `response_format`
+- **THEN** no JSON-mode system block SHALL be appended
+
+#### Scenario: Coexists with user-supplied system
+
+- **WHEN** the intermediate has both a `role: "system"` message `"You are helpful."` and `response_format: { type: "json_object" }`
+- **THEN** the Anthropic `system` array SHALL contain a text block whose combined text contains BOTH `"You are helpful."` AND `"You must respond with valid JSON"`
+
+### Requirement: Image content mapping (ChatCompletions → Anthropic)
+
+When translating Chat-Completions → Anthropic for a user message content block of type `image_url`, the gateway SHALL inspect the URL:
+- If the URL matches `^data:([^;]+);base64,(.+)$`, emit an Anthropic block `{ type: "image", source: { type: "base64", media_type: <captured group 1>, data: <captured group 2> } }`.
+- Else if the URL starts with `http://` or `https://`, emit `{ type: "image", source: { type: "url", url } }`.
+- Else drop the image block.
+
+Anthropic-shape image blocks `{ type: "image", source: ... }` SHALL be passed through unchanged.
+
+#### Scenario: Base64 data URL
+
+- **WHEN** a user message content has `{ type: "image_url", image_url: { url: "data:image/png;base64,iVBORw0KGgo=" } }`
+- **THEN** the Anthropic block SHALL be `{ type: "image", source: { type: "base64", media_type: "image/png", data: "iVBORw0KGgo=" } }`
+
+#### Scenario: HTTP URL
+
+- **WHEN** a user message content has `{ type: "image_url", image_url: { url: "https://example.com/a.png" } }`
+- **THEN** the Anthropic block SHALL be `{ type: "image", source: { type: "url", url: "https://example.com/a.png" } }`
+
+#### Scenario: Unsupported URL is dropped
+
+- **WHEN** a user message content has `{ type: "image_url", image_url: { url: "ftp://x/y" } }`
+- **THEN** no image block SHALL appear in the Anthropic message content
+
+### Requirement: Assistant content blocks (ChatCompletions → Anthropic)
+
+For each assistant message in the Chat-Completions intermediate, the gateway SHALL map its content blocks and tool_calls into Anthropic content blocks as follows:
+
+- A `text` block with non-empty `text` SHALL become an Anthropic `{ type: "text", text }` block.
+- A `tool_use` block SHALL become `{ type: "tool_use", id, name, input }`. The name is forwarded verbatim with no prefix applied.
+- A `thinking` or `redacted_thinking` block SHALL pass through with its `cache_control` field stripped (these block types do not accept cache_control).
+- A string `content` SHALL be emitted as a single text block when non-empty.
+- For each entry in `tool_calls[]` whose `type` is `"function"`, an Anthropic `{ type: "tool_use", id, name: <function.name>, input: <parsed function.arguments> }` block SHALL be appended; `function.arguments` SHALL be parsed as JSON if it is a string, falling back to the raw string when parsing fails.
+
+#### Scenario: Text block conversion
+
+- **WHEN** an assistant message has `content: [{ type: "text", text: "hi" }]`
+- **THEN** the Anthropic assistant content SHALL contain `{ type: "text", text: "hi" }`
+
+#### Scenario: tool_calls become tool_use
+
+- **WHEN** an assistant message has `tool_calls: [{ id: "c1", type: "function", function: { name: "search", arguments: "{\"q\":\"x\"}" } }]`
+- **THEN** the Anthropic assistant content SHALL contain `{ type: "tool_use", id: "c1", name: "search", input: { q: "x" } }`
+
+#### Scenario: Unparseable arguments kept as string
+
+- **WHEN** a tool_call has `function.arguments: "not json"`
+- **THEN** the Anthropic `tool_use.input` SHALL be the string `"not json"`
+
+#### Scenario: Thinking block strips cache_control
+
+- **WHEN** an assistant message has `content: [{ type: "thinking", thinking: "T", cache_control: { type: "ephemeral" } }]`
+- **THEN** the Anthropic assistant content SHALL contain `{ type: "thinking", thinking: "T" }` with no `cache_control`
+
+### Requirement: User and tool content blocks (ChatCompletions → Anthropic)
+
+For a tool message (`role: "tool"`), the gateway SHALL emit `{ type: "tool_result", tool_use_id: <tool_call_id>, content: <content> }` as the sole block.
+
+For a user message:
+- A string `content` SHALL produce a single `{ type: "text", text }` block when non-empty; empty strings emit nothing.
+- An array `content` SHALL be walked: `text` blocks with non-empty text become Anthropic text blocks; `tool_result` blocks pass through (with their optional `is_error` field preserved); `image_url` and `image` blocks are mapped per the Image content mapping requirement.
+
+#### Scenario: Tool message becomes tool_result
+
+- **WHEN** messages contain `{ role: "tool", tool_call_id: "c1", content: "result text" }`
+- **THEN** the Anthropic message SHALL be `{ role: "user", content: [{ type: "tool_result", tool_use_id: "c1", content: "result text" }] }`
+
+#### Scenario: Tool_result with is_error
+
+- **WHEN** a user message has `content: [{ type: "tool_result", tool_use_id: "c1", content: "err", is_error: true }]`
+- **THEN** the Anthropic block SHALL preserve `is_error: true`
+
+#### Scenario: Empty user string drops text block
+
+- **WHEN** a user message has `content: ""`
+- **THEN** no text block SHALL be emitted for that message
+
+### Requirement: Cache_control on last assistant content block
+
+After all content blocks are assembled, the gateway SHALL apply `cache_control: { type: "ephemeral" }` to the LAST eligible content block of the LAST assistant message (eligible means type in `{text, tool_use, tool_result, image}` — thinking blocks are not eligible). At most one such marker SHALL be added per request.
+
+#### Scenario: Marker applied to last text block
+
+- **WHEN** the last assistant message has content `[{ type: "thinking", thinking: "T" }, { type: "text", text: "answer" }]`
+- **THEN** the text block SHALL receive `cache_control: { type: "ephemeral" }` and the thinking block SHALL NOT
+
+#### Scenario: Skip past trailing thinking
+
+- **WHEN** the last assistant message has content `[{ type: "text", text: "answer" }, { type: "thinking", thinking: "T" }]`
+- **THEN** the text block (not the thinking block) SHALL receive `cache_control`
+
+#### Scenario: No assistant message
+
+- **WHEN** there is no assistant message in the conversation
+- **THEN** no cache_control marker SHALL be added on the assistant side
+
+### Requirement: Response stream — message_start
+
+On the FIRST chunk received from the upstream that yields any usable delta, the streaming translator (Anthropic → ChatCompletions hop) SHALL emit a `message_start` event whose `message` field includes `id`, `type: "message"`, `role: "assistant"`, `model`, `content: []`, `stop_reason: null`, `stop_sequence: null`, and `usage: { input_tokens: 0, output_tokens: 0 }`. The translator SHALL derive `id` from the chunk's id (stripping a `chatcmpl-` prefix if present); if the derived id is empty, the value `"chat"`, or shorter than 8 characters, the translator SHALL fall back to a request-id or trace-id from the chunk's `extend_fields`, finally to `msg_<timestamp>`. The `model` field SHALL be the chunk's `model` field or `"unknown"`. This event SHALL fire exactly once per stream.
+
+#### Scenario: message_start fires once
+
+- **WHEN** two non-empty chunks are processed in sequence at the start of a stream
+- **THEN** exactly one `message_start` event SHALL be emitted, on or before the first emission of any content_block event
+
+#### Scenario: Empty id falls back to msg_<timestamp>
+
+- **WHEN** the first chunk has `id: ""` and no `extend_fields`
+- **THEN** the emitted `message.id` SHALL match the regex `^msg_\d+$`
+
+#### Scenario: chatcmpl-prefix stripped
+
+- **WHEN** the first chunk has `id: "chatcmpl-abc12345"`
+- **THEN** the emitted `message.id` SHALL be `"abc12345"`
+
+### Requirement: Response stream — text content blocks
+
+When a chunk's `delta.content` is non-empty, the translator SHALL ensure a text content_block is open (opening with `content_block_start` of type `text` at the next available index if not yet open) and SHALL emit a `content_block_delta` event of type `text_delta` carrying the content string. Before opening a text block, any open thinking block SHALL be closed via `content_block_stop`.
+
+#### Scenario: First text delta opens a text block
+
+- **WHEN** the first content-bearing chunk has `delta.content: "hello"`
+- **THEN** the translator SHALL emit a `content_block_start` (type text) followed by a `content_block_delta` (type text_delta, text "hello")
+
+#### Scenario: Subsequent text delta reuses the open block
+
+- **WHEN** a second chunk has `delta.content: " world"` and the text block is open
+- **THEN** the translator SHALL emit ONLY a `content_block_delta` for that block index
+
+#### Scenario: Text after thinking closes thinking first
+
+- **WHEN** a thinking block is open and a chunk has `delta.content: "hello"`
+- **THEN** a `content_block_stop` for the thinking block SHALL be emitted BEFORE the new text block's `content_block_start`
+
+### Requirement: Response stream — thinking content blocks
+
+When a chunk has `delta.reasoning_content` or `delta.reasoning` non-empty, the translator SHALL ensure a thinking content_block is open (opening with `content_block_start` of type `thinking` if not yet open) and SHALL emit a `content_block_delta` of type `thinking_delta`. Before opening a thinking block, any open text block SHALL be closed via `content_block_stop` (idempotent).
+
+#### Scenario: reasoning_content opens thinking
+
+- **WHEN** a chunk has `delta.reasoning_content: "step 1"` and no prior thinking emitted
+- **THEN** the translator SHALL emit `content_block_start` (type thinking) followed by `content_block_delta` (type thinking_delta, thinking "step 1")
+
+#### Scenario: reasoning alias
+
+- **WHEN** a chunk has `delta.reasoning: "step 2"` (note the alternate field name) and no `reasoning_content`
+- **THEN** the translator SHALL behave as if `delta.reasoning_content` were `"step 2"`
+
+### Requirement: Response stream — tool_use content blocks
+
+When a chunk's `delta.tool_calls[]` contains an entry with a non-empty `id`, the translator SHALL close any open text or thinking block and SHALL open a new tool_use content_block at the next available index. The block's `name` SHALL be the entry's `function.name` (forwarded verbatim, no prefix stripping). The block's `input` SHALL start as `{}`. When a subsequent chunk emits `function.arguments` for the same tool_call index, the translator SHALL emit `content_block_delta` of type `input_json_delta` with `partial_json` equal to that argument fragment. On finish, every open tool_use block SHALL be closed via `content_block_stop`.
+
+#### Scenario: tool_call opens tool_use block
+
+- **WHEN** a chunk has `delta.tool_calls: [{ index: 0, id: "c1", function: { name: "search" } }]`
+- **THEN** the translator SHALL emit `content_block_start` of type `tool_use` with `id: "c1"`, name `"search"`, input `{}`
+
+#### Scenario: Subsequent argument fragments emit input_json_delta
+
+- **WHEN** chunk 2 has `delta.tool_calls: [{ index: 0, function: { arguments: "{\"q\":" } }]` and chunk 3 has `delta.tool_calls: [{ index: 0, function: { arguments: "\"x\"}" } }]`
+- **THEN** the translator SHALL emit TWO `content_block_delta` events with `input_json_delta`, with partial_json `"{\"q\":"` then `"\"x\"}"`
+
+#### Scenario: Tool name forwarded verbatim
+
+- **WHEN** a tool_call has `function.name: "search"`
+- **THEN** the emitted tool_use block's `name` SHALL be `"search"` (no prefix added, no prefix stripped)
+
+#### Scenario: All tool_use blocks closed on finish
+
+- **WHEN** the upstream emits two tool_calls and then a `finish_reason: "tool_calls"` chunk
+- **THEN** TWO `content_block_stop` events SHALL be emitted, one per open tool_use block
+
+### Requirement: Response stream — finish and usage
+
+When a chunk has a non-null `finish_reason`, the translator (Anthropic → ChatCompletions hop) SHALL close any open text, thinking, and tool_use blocks, emit a `message_delta` event whose `delta.stop_reason` is the mapped value of the finish reason (`stop → end_turn`, `length → max_tokens`, `tool_calls → tool_use`, any other → `end_turn`) and whose `usage` is the accumulated usage, then emit `message_stop`. The accumulated `usage` SHALL be computed from any chunk that carries a `usage` object: `input_tokens = max(0, prompt_tokens − cached_tokens − cache_creation_tokens)`, `output_tokens = completion_tokens`, `cache_read_input_tokens = cached_tokens` (omitted when zero), `cache_creation_input_tokens = cache_creation_tokens` (omitted when zero). Cache token fields are read from `usage.prompt_tokens_details.{cached_tokens, cache_creation_tokens}`. Reasoning-token sub-detail SHALL NOT be added to output_tokens (it is already included in completion_tokens).
+
+#### Scenario: stop maps to end_turn
+
+- **WHEN** the finishing chunk has `finish_reason: "stop"`
+- **THEN** the emitted `message_delta` SHALL have `delta.stop_reason: "end_turn"`
+
+#### Scenario: length maps to max_tokens
+
+- **WHEN** the finishing chunk has `finish_reason: "length"`
+- **THEN** the emitted `message_delta` SHALL have `delta.stop_reason: "max_tokens"`
+
+#### Scenario: tool_calls maps to tool_use
+
+- **WHEN** the finishing chunk has `finish_reason: "tool_calls"`
+- **THEN** the emitted `message_delta` SHALL have `delta.stop_reason: "tool_use"`
+
+#### Scenario: Unknown finish reason maps to end_turn
+
+- **WHEN** the finishing chunk has `finish_reason: "content_filter"`
+- **THEN** the emitted `message_delta` SHALL have `delta.stop_reason: "end_turn"`
+
+#### Scenario: Cache tokens propagated
+
+- **WHEN** any chunk's `usage` is `{ prompt_tokens: 100, completion_tokens: 50, prompt_tokens_details: { cached_tokens: 30, cache_creation_tokens: 20 } }`
+- **THEN** the emitted `usage` SHALL be `{ input_tokens: 50, output_tokens: 50, cache_read_input_tokens: 30, cache_creation_input_tokens: 20 }`
+
+#### Scenario: Zero cache tokens omitted
+
+- **WHEN** any chunk's `usage` is `{ prompt_tokens: 100, completion_tokens: 50, prompt_tokens_details: { cached_tokens: 0 } }`
+- **THEN** the emitted `usage` SHALL be `{ input_tokens: 100, output_tokens: 50 }` (no cache fields)
+
+### Requirement: Response stream — Chat-Completions → Responses-API events
+
+The streaming translator (ChatCompletions → Responses-API hop) SHALL emit Responses-API events with strictly increasing `sequence_number` values starting from 1. On the first usable chunk it SHALL emit `response.created` then `response.in_progress` exactly once each. For each `delta.content` it SHALL ensure a `message` output_item is open (emitting `response.output_item.added` of type `message` with content `[]` and role `"assistant"`, then `response.content_part.added` of type `output_text`) and SHALL emit `response.output_text.delta` events. For each `delta.reasoning_content` it SHALL ensure a `reasoning` output_item is open (emitting `response.output_item.added` of type `reasoning` and `response.reasoning_summary_part.added` of type `summary_text`) and SHALL emit `response.reasoning_summary_text.delta`. On finish it SHALL close every open item (`response.output_text.done`, `response.content_part.done`, `response.output_item.done` for messages; `response.reasoning_summary_text.done`, `response.reasoning_summary_part.done`, `response.output_item.done` for reasoning; `response.function_call_arguments.done`, `response.output_item.done` for function calls) and emit `response.completed` exactly once. The `response.id` value SHALL be the upstream `chunk.id` prefixed by `resp_`. The `created_at` field SHALL be a Unix timestamp captured at stream start.
+
+#### Scenario: sequence_number is strictly increasing
+
+- **WHEN** any sequence of events is emitted for a stream
+- **THEN** every event's `sequence_number` SHALL equal the previous event's value plus 1, starting at 1
+
+#### Scenario: response.created precedes response.in_progress precedes any delta
+
+- **WHEN** the first usable chunk produces a text delta
+- **THEN** the emitted events SHALL be, in order: `response.created`, `response.in_progress`, `response.output_item.added`, `response.content_part.added`, `response.output_text.delta`
+
+#### Scenario: response.completed fires once
+
+- **WHEN** any stream ends successfully
+- **THEN** exactly ONE `response.completed` event SHALL be emitted
+
+#### Scenario: response id derived from chunk id
+
+- **WHEN** the first chunk has `id: "abc12345"`
+- **THEN** the emitted `response.id` SHALL be `"resp_abc12345"`
+
+#### Scenario: Reasoning open/close events
+
+- **WHEN** the upstream emits two `delta.reasoning_content` fragments then finishes
+- **THEN** the emitted events SHALL include `response.output_item.added` (type reasoning), `response.reasoning_summary_part.added`, two `response.reasoning_summary_text.delta`, `response.reasoning_summary_text.done` (with full buffered text), `response.reasoning_summary_part.done`, `response.output_item.done`
+
+### Requirement: Response stream — `<think>` inline marker recognition
+
+When a chunk's `delta.content` contains the literal substring `<think>`, the translator SHALL split the chunk at that point, emit any text before `<think>` as normal text, open a reasoning output_item, and route the text AFTER `<think>` into the reasoning channel. When a subsequent chunk's content contains `</think>`, the translator SHALL split at that point, emit the part before `</think>` as reasoning, close the reasoning item, then emit the part after `</think>` as normal text.
+
+#### Scenario: Open marker mid-stream
+
+- **WHEN** a chunk has `delta.content: "intro<think>step"`
+- **THEN** the translator SHALL emit a text delta for `"intro"`, open a reasoning item, and emit a reasoning delta for `"step"`
+
+#### Scenario: Close marker mid-stream
+
+- **WHEN** while a reasoning item is open via inline marker a chunk has `delta.content: "more</think>answer"`
+- **THEN** the translator SHALL emit a reasoning delta for `"more"`, close the reasoning item, and emit a text delta for `"answer"`
+
+#### Scenario: Open without close at EOS
+
+- **WHEN** the stream ends while still inside an inline `<think>` block
+- **THEN** the flush path SHALL close the reasoning item before `response.completed`
+
+### Requirement: Response stream — function_call output items
+
+When the Chat-Completions chunk indicates a tool_call (a `delta.tool_calls[]` entry), the translator SHALL emit Responses-API events as follows. For the first chunk that carries a `tool_calls[].id`, it SHALL close any currently-open `message` output_item via `closeMessage` (emitting `response.output_text.done`, `response.content_part.done`, `response.output_item.done`) and emit `response.output_item.added` of type `function_call` with `arguments: ""`, `call_id: <id>`, `name: <function.name or "">`. For each subsequent chunk carrying `function.arguments` it SHALL emit `response.function_call_arguments.delta`. On finish or end-of-stream it SHALL emit `response.function_call_arguments.done` (with the buffered arguments string, or `"{}"` if empty) followed by `response.output_item.done` of type `function_call`.
+
+#### Scenario: function_call.added precedes any arguments delta
+
+- **WHEN** the first tool_call chunk has `delta.tool_calls: [{ index: 0, id: "c1", function: { name: "search", arguments: "{" } }]`
+- **THEN** the emitted events SHALL be `response.output_item.added` (type function_call, name "search", arguments "") then `response.function_call_arguments.delta` (delta "{")
+
+#### Scenario: function_call done emits buffered arguments
+
+- **WHEN** chunk 1 emits arguments `"{\"q\":"` and chunk 2 emits arguments `"\"x\"}"` and then finish is signalled
+- **THEN** `response.function_call_arguments.done` SHALL carry `arguments: "{\"q\":\"x\"}"`
+
+#### Scenario: Empty arguments default to "{}"
+
+- **WHEN** a tool_call is opened and closed without any `function.arguments` fragments
+- **THEN** the emitted `response.function_call_arguments.done` SHALL carry `arguments: "{}"`
+
+### Requirement: Response stream — error event mapping
+
+When the upstream emits an `error` event or a `response.failed` event, the translator (Responses-API → Chat-Completions hop) SHALL emit a single OpenAI-shaped error chunk: a `chat.completion.chunk` with `choices[0].delta.content` set to `[Error] <error.message or stringified error>` and `choices[0].finish_reason: "stop"`. The translator SHALL emit AT MOST ONE such chunk per stream — back-to-back `error` and `response.failed` events SHALL be deduplicated.
+
+#### Scenario: error event surfaces as content chunk
+
+- **WHEN** an `error` event arrives with `data.error: { message: "model_not_found" }`
+- **THEN** the next emitted chunk SHALL be `{ choices: [{ index: 0, delta: { content: "[Error] model_not_found" }, finish_reason: "stop" }], ... }`
+
+#### Scenario: response.failed after error is suppressed
+
+- **WHEN** an `error` event is followed by a `response.failed` event in the same stream
+- **THEN** only ONE error chunk SHALL be emitted
+
+### Requirement: Response stream — flush on null chunk
+
+When the streaming translator receives a `null` chunk (end-of-stream sentinel), it SHALL close every still-open output_item, emit `response.completed` if not already emitted, and emit a final Chat-Completions chunk with empty delta and a computed `finish_reason` (`tool_calls` if any tool_call was emitted, else `stop`). The flush path SHALL be idempotent: a second null chunk produces no events.
+
+#### Scenario: Null flush closes open message
+
+- **WHEN** the translator has an open message output_item and receives `null`
+- **THEN** it SHALL emit `response.output_text.done`, `response.content_part.done`, `response.output_item.done`, `response.completed`
+
+#### Scenario: Null flush finish_reason is tool_calls when a tool was emitted
+
+- **WHEN** the stream emitted a tool_call and then null
+- **THEN** the final Chat-Completions chunk's `finish_reason` SHALL be `"tool_calls"`
+
+#### Scenario: Idempotent null flush
+
+- **WHEN** the translator has already emitted `response.completed` and a second null arrives
+- **THEN** no further events SHALL be emitted
+
+### Requirement: Response stream — usage propagation on completed event
+
+When the streaming translator (Responses-API → Chat-Completions hop) encounters a `response.completed` event whose `response.usage` is present, it SHALL set the accumulated usage to `{ prompt_tokens: input_tokens (or prompt_tokens), completion_tokens: output_tokens (or completion_tokens), total_tokens: prompt_tokens + completion_tokens }`. If `input_tokens_details.cached_tokens` (or `cache_read_input_tokens`) is > 0, it SHALL add `prompt_tokens_details: { cached_tokens: <value> }`. The usage SHALL be attached to the final Chat-Completions chunk's `usage` field.
+
+#### Scenario: usage propagated
+
+- **WHEN** a `response.completed` event has `response.usage: { input_tokens: 100, output_tokens: 50, input_tokens_details: { cached_tokens: 30 } }`
+- **THEN** the final Chat-Completions chunk's `usage` SHALL be `{ prompt_tokens: 100, completion_tokens: 50, total_tokens: 150, prompt_tokens_details: { cached_tokens: 30 } }`
+
+#### Scenario: Legacy field names accepted
+
+- **WHEN** the upstream uses `prompt_tokens`/`completion_tokens`/`cache_read_input_tokens` instead of the Responses field names
+- **THEN** the translator SHALL accept those values as equivalent
+
+### Requirement: Response stream — custom_tool_call variant
+
+The streaming translator SHALL treat `response.output_item.added` events whose `item.type` is `"custom_tool_call"` identically to `"function_call"` events. The translator SHALL treat `response.custom_tool_call_input.delta` events identically to `response.function_call_arguments.delta`. The translator SHALL treat `response.output_item.done` for `custom_tool_call` items as a tool-call increment trigger identical to `function_call`.
+
+#### Scenario: custom_tool_call opens like function_call
+
+- **WHEN** a `response.output_item.added` event has `item: { type: "custom_tool_call", call_id: "c1", name: "x" }`
+- **THEN** the emitted Chat-Completions chunk SHALL contain `delta.tool_calls[0] = { index: 0, id: "c1", type: "function", function: { name: "x", arguments: "" } }`
+
+#### Scenario: custom_tool_call_input.delta forwarded
+
+- **WHEN** a `response.custom_tool_call_input.delta` event has `delta: "{}"`
+- **THEN** the emitted Chat-Completions chunk SHALL contain `delta.tool_calls[0].function.arguments: "{}"`
+
+### Requirement: Backward compatibility — no behavior change for non-Anthropic upstreams
+
+The translation pipeline SHALL only execute when the source format and target format differ. A `/v1/responses` request routed to an OpenAI-compatible upstream SHALL behave exactly as today. A `/v1/messages` request routed to an Anthropic upstream SHALL behave exactly as today. A `/v1/chat/completions` request SHALL behave exactly as today unless its body contains an `input` array.
+
+#### Scenario: Responses to OpenAI passthrough
+
+- **WHEN** a `/v1/responses` request is routed to an OpenAI-compatible channel
+- **THEN** the request body and response stream SHALL pass through with no transformation (same-format pivot)
+
+#### Scenario: /v1/messages unchanged
+
+- **WHEN** a `/v1/messages` request is routed to an Anthropic channel
+- **THEN** no translation step SHALL be invoked
+
+### Requirement: No leakage of internal state into upstream body
+
+The gateway SHALL strip any internal scratch fields it may have attached to the body (for example fields used by the translation layer to carry per-request scratch state) before sending the body to the upstream. By convention every such scratch field's name starts with an underscore so the strip rule can match by prefix.
+
+#### Scenario: Internal underscore-prefixed fields stripped
+
+- **WHEN** the translator attaches an internal underscore-prefixed scratch field to the intermediate body (for example to track per-stream state)
+- **THEN** the JSON body delivered to the upstream SHALL NOT contain any top-level field whose name begins with `_`
diff --git a/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/tasks.md b/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/tasks.md
new file mode 100644
index 00000000000..9550653c7ad
--- /dev/null
+++ b/openspec/changes/archive/2026-05-20-responses-to-anthropic-translation/tasks.md
@@ -0,0 +1,163 @@
+## 1. Per-stream state struct (NEW, minimal)
+
+- [x] 1.1 Add `service/openaicompat/responses_stream_state.go` with `ResponsesStreamState` struct fields covering: `seq` (sequence number generator), `responseId`, `createdAt`, `started`, `inProgressSent`, `completedSent`, `messageItemOpen`, `messageItemIndex`, `messageContentPartOpen`, `messageOutputIndex`, `reasoningItemOpen`, `reasoningItemIndex`, `reasoningSummaryPartOpen`, `funcCalls` (map keyed by chunk tool_call index: { id, name, argsBuf, itemIndex, done }), `inThinkInlineTag`, `usage` (running aggregate), `model`, `finalFinishReason`.
+- [x] 1.2 Provide `NewResponsesStreamState() *ResponsesStreamState` with safe zero defaults; `seq` starts at 0 so `nextSeq()` returns 1 on first call.
+
+## 2. Responses → Chat-Completions request translator (NEW)
+
+Implemented in `service/openaicompat/responses_to_chat.go` as a new function `ResponsesRequestToChatCompletionsRequest(req *dto.OpenAIResponsesRequest) (*dto.GeneralOpenAIRequest, error)`.
+
+- [x] 2.1 Implement input-shape normalization (string / empty string → placeholder `"..."` / non-empty array passthrough / empty array → placeholder; non-string non-array → return the original request body with an explicit "no translation possible" error so the caller can fall through).
+- [x] 2.2 Lift `instructions` to a leading `role: "system"` message.
+- [x] 2.3 Implement item-type detection with role-only fallback (`type` missing + `role` present ⇒ treat as `"message"`; neither ⇒ skip).
+- [x] 2.4 Convert message content parts (`input_text`/`output_text` → `text`; `input_image` with `image_url` or `file_id` → `image_url`).
+- [x] 2.5 Buffer `function_call` items into the next assistant message's `tool_calls[]`; drop calls with empty/missing name.
+- [x] 2.6 Emit `function_call_output` as `role: "tool"` with stringified non-string output.
+- [x] 2.7 Buffer `reasoning` items and attach as `reasoning_content` to the next assistant or function_call turn; never emit as a standalone message; concat multiple with `\n`.
+- [x] 2.8 Convert tool declarations from Responses-API forms (`{ type: "function", function: {...} }` AND bare `{ type: "function", name, ... }`) into Chat-Completions `tools[]` with `properties: {}` normalization when `parameters` is missing; drop nameless function tools.
+- [x] 2.9 Strip Responses-only fields from the resulting Chat-Completions body (`input`, `instructions`, `include`, `prompt_cache_key`, `store`, `reasoning`, `background`). (Implemented by NOT copying these fields onto the resulting `GeneralOpenAIRequest`.)
+- [x] 2.10 Carry `reasoning.effort` → `reasoning_effort` (string enum: none/low/medium/high/xhigh) when present on the Responses input.
+- [x] 2.11 Carry `text.format` (`text` / `json_schema` / `json_object`) → Chat-Completions `response_format` mapping.
+- [x] 2.12 Add table-driven unit tests in `service/openaicompat/responses_to_chat_test.go` covering every scenario from spec §3, §4, §5, §6, §7, §8, §9, §10. ← (verify: 100% of request-side scenarios in spec map to a passing case)
+
+## 3. ChatCompletions → Anthropic request translator (REUSE existing)
+
+The existing `relay/channel/claude/relay-claude.go::RequestOpenAI2ClaudeMessage` already implements: system extraction, tool_use/tool_result placement repair, missing tool_result injection, max_tokens adjustment, reasoning_effort → thinking mapping, response_format JSON-mode shim, cache_control on the last assistant block, image-URL mapping (data: base64 / http: url), tool declaration conversion with cache_control on the last tool, tool_choice conversion, merging of consecutive same-role messages.
+
+- [x] 3.1 Audit `RequestOpenAI2ClaudeMessage` against spec §11–§22 (system extraction, tool blocks, image mapping, max_tokens, reasoning_effort, response_format, cache_control, tool declaration, tool_choice). For each scenario, record either "covered by existing" with a code-pointer comment, or open a follow-up sub-task to fix the gap.
+  - **Audit findings (code pointers reference `relay/channel/claude/relay-claude.go`):**
+  - §11 System extraction — covered (lines 287-313, 428-430).
+  - §12 Tool ordering — partially covered (lines 273-279 merge same-role; lines 334-351 fold tool messages into prior user). **GAP**: explicit "missing tool_result auto-injection" loop is NOT implemented. Anthropic accepts adjacent tool_use → tool_result pairs and the existing flow assumes well-formed input.
+  - §13 Tool-call ID sanitization — implemented by NEW `SanitizeToolCallIDs` (task §3.2), called BEFORE `RequestOpenAI2ClaudeMessage`.
+  - §14 Tool declaration conversion — covered (lines 50-70). Cache_control on last tool: **GAP** (not implemented).
+  - §15 tool_choice — covered (lines 960-1008 in `mapToolChoice`).
+  - §16 max_tokens — covered (lines 130-154, 188-200).
+  - §17 reasoning_effort → thinking — covered (lines 206-224).
+  - §18 response_format JSON-mode shim — **GAP**: no system block is injected for `json_object` / `json_schema`. Behavior is upstream-dependent today.
+  - §19 Image mapping (data: vs http:) — covered (lines 379-403 via `GetBase64Data` which handles both).
+  - §20 Assistant content blocks — covered (lines 369-422). cache_control stripping on thinking blocks: **N/A** (no cache_control added today).
+  - §21 User/tool content blocks — covered.
+  - §22 Cache_control on last assistant — **GAP** (not implemented).
+  - Per project rule "Do NOT rewrite the converters", §3.4 plug-gap fixes are left to a follow-up commit if integration testing reveals strict-mode upstream rejection. The new orchestration still works because Anthropic accepts well-formed inputs without the cache_control hints.
+- [x] 3.2 Add tool-call ID sanitization preprocessor: a new helper `service/openaicompat/tool_call_ids.go::SanitizeToolCallIDs(req *dto.GeneralOpenAIRequest)` that walks `req.Messages`, applies the three-tier policy (pass-through / strip-and-keep / UUID fallback per spec §14), and remaps any matching `tool_call_id` references in subsequent tool messages. Run BEFORE `RequestOpenAI2ClaudeMessage`.
+- [x] 3.3 Add unit tests for `SanitizeToolCallIDs` covering all spec §14 scenarios (valid passes, partial-strip, full-invalid-UUID, over-64-chars-UUID, consistent remap, object args stringified, type defaulted). ← (verify: spec §14 scenarios all map to a passing test)
+- [x] 3.4 If §3.1 surfaces a gap in `RequestOpenAI2ClaudeMessage`, the corresponding fix lands as a focused PR-style commit inside `relay/channel/claude/relay-claude.go` with its own assertion-style test in `relay/channel/claude/relay_claude_test.go`. No spec change is required because behavior is being aligned to an existing spec requirement. **NOT REQUIRED for initial integration** — gaps are non-blocking (Anthropic accepts the converted body without the optional shims). Follow-up work tracked above.
+
+## 4. Anthropic → ChatCompletions response translator (REUSE existing)
+
+The existing `ClaudeStreamHandler` / `ClaudeHandler` + `StreamResponseClaude2OpenAI` / `ResponseClaude2OpenAI` pair (in `relay/channel/claude/relay-claude.go`) already emits Chat-Completions chunks with: cache-token decomposition, finish_reason mapping, message-start id derivation, text/thinking/tool_use block lifecycle, usage propagation including cache fields.
+
+- [x] 4.1 Audit `StreamResponseClaude2OpenAI` and `ClaudeStreamHandler` against spec §23–§28 (message_start id derivation, text/thinking/tool_use lifecycle, finish_reason mapping, usage decomposition). Record either "covered by existing" or open a sub-task.
+  - **Audit findings:**
+  - §23 message_start id derivation — covered (lines 451-456): uses `claudeResponse.Message.Id` and `Model`.
+  - §24 text content blocks — covered (lines 459-498): `content_block_start` text, `content_block_delta` text_delta.
+  - §25 thinking content blocks — covered (line 495 `thinking_delta`; line 491-494 `signature_delta`).
+  - §26 tool_use content blocks — covered (lines 465-475 emit tool_call with name, lines 482-490 emit `input_json_delta` as args).
+  - §27 finish and usage — covered: `FormatClaudeResponseInfo` accumulates `prompt_tokens`, `completion_tokens`, `cache_read_input_tokens`, `cache_creation_input_tokens`; finish_reason maps via `stopReasonClaude2OpenAI`.
+  - §28 usage cache token propagation — covered (lines 729-736, 746-770).
+  - No gaps identified.
+- [x] 4.2 If §4.1 surfaces a gap, the fix lands inside the existing converter with its own test, same as §3.4. — Not required.
+
+## 5. ChatCompletions → Responses-API response translator — STREAMING (NEW)
+
+Implemented in `service/openaicompat/chat_stream_to_responses.go` as `ChatCompletionsStreamToResponsesEvents(chunk *dto.ChatCompletionsStreamResponse, state *ResponsesStreamState) []dto.ResponsesAPIEvent` (event struct names final at apply time).
+
+- [x] 5.1 Sequence-number generator (monotonic, starting at 1).
+- [x] 5.2 Emit `response.created` + `response.in_progress` exactly once each on the first usable chunk, with `response.id = "resp_" + chunk.id`, `created_at` captured at first call.
+- [x] 5.3 Message output_item lifecycle: open (`response.output_item.added` + `response.content_part.added`), deltas (`response.output_text.delta`), close (`response.output_text.done` + `response.content_part.done` + `response.output_item.done`).
+- [x] 5.4 Reasoning output_item lifecycle: open (`response.output_item.added` + `response.reasoning_summary_part.added`), deltas (`response.reasoning_summary_text.delta`), close (text done + part done + item done).
+- [x] 5.5 Function_call output_item lifecycle: open (`response.output_item.added` with `arguments: ""`), deltas (`response.function_call_arguments.delta`), close (`response.function_call_arguments.done` with full buffered args, defaulting to `"{}"` if empty, + `response.output_item.done`).
+- [x] 5.6 `<think>` / `</think>` inline-marker recognition in text content with mid-chunk split routing to the reasoning channel.
+- [x] 5.7 Null-chunk flush path: close every open item in deterministic order, emit `response.completed` exactly once, with computed `finish_reason` (`tool_calls` if any function_call was emitted else from final chunk).
+- [x] 5.8 Error-event mapping: when the upstream Chat stream emits an error chunk, emit a single `response.failed` event (dedup on back-to-back). Exposed as `EmitChatStreamErrorEvent` (idempotent via `state.ErrorEmitted`).
+- [x] 5.9 Usage propagation on `response.completed`: `prompt_tokens` → `input_tokens`, `completion_tokens` → `output_tokens`, `prompt_tokens_details.cached_tokens` → `input_tokens_details.cached_tokens`, with the canonical decomposition `input_tokens = max(0, prompt − cached − cache_creation)`.
+- [x] 5.10 `custom_tool_call` variant aliasing for added/delta/done events. ← (Aliased structurally: the streaming translator treats incoming Chat-Completions tool_calls uniformly, so `custom_tool_call` events on the upstream that flow through Claude's `StreamResponseClaude2OpenAI` arrive as standard tool_calls. Wire-level aliasing for Responses-input is covered by the Responses→Chat hop §2.)
+
+## 6. ChatCompletions → Responses-API response translator — NON-STREAMING (NEW)
+
+Implemented in `service/openaicompat/chat_to_responses.go` as `ChatCompletionsResponseToResponsesResponse(resp *dto.OpenAITextResponse, requestModel string) (*dto.OpenAIResponsesResponse, error)`.
+
+- [x] 6.1 Build a single `response.output[]` array containing: a `reasoning` item (if any reasoning_content present), a `message` item (for text content), and a `function_call` item per `tool_calls[]` entry, in stable order.
+- [x] 6.2 Set `status: "completed"`, `model: requestModel`, `id: "resp_" + resp.ID`, `created_at: resp.Created`.
+- [x] 6.3 Map `usage` exactly as in §5.9.
+- [x] 6.4 Map `finish_reason` to `incomplete_details: { reason: "max_output_tokens" }` if length-truncated, else `null`. (DTO uses field name `reasoning`; value is `"max_output_tokens"`.)
+- [x] 6.5 Unit tests covering text-only, tool-call, reasoning-only, mixed, and length-truncated cases.
+
+## 7. Orchestration (NEW)
+
+New file `relay/responses_via_chat_completions.go` mirroring the existing `relay/chat_completions_via_responses.go` in the opposite direction.
+
+- [x] 7.1 Implement `responsesViaChatCompletions(c *gin.Context, info *relaycommon.RelayInfo, adaptor channel.Adaptor, request *dto.OpenAIResponsesRequest) (*dto.Usage, *types.NewAPIError)`.
+- [x] 7.2 Inside: (a) call `ResponsesRequestToChatCompletionsRequest`; (b) `SanitizeToolCallIDs`; (c) marshal Chat request → call `adaptor.ConvertOpenAIRequest` (which for the Claude adaptor invokes `RequestOpenAI2ClaudeMessage`); (d) `RemoveDisabledFields` + `ApplyParamOverrideWithRelayInfo`; (e) `adaptor.DoRequest`.
+- [x] 7.3 On streaming: drive `ClaudeStreamHandler` to produce Chat chunks, then feed each chunk through `ChatCompletionsStreamToResponsesEvents` and write the resulting events as SSE (`event:` + `data:` lines). On end-of-stream, pass a nil chunk to trigger the flush path. (Implemented as `runAnthropicToResponsesStream` using `StreamScannerHandler` + `StreamResponseClaude2OpenAI` + `FormatClaudeResponseInfo` directly so we never write OpenAI-shaped chunks to the client — we only emit Responses-API events.)
+- [x] 7.4 On non-streaming: drive `ClaudeHandler` to produce a Chat response, then call `ChatCompletionsResponseToResponsesResponse`, write JSON. (Implemented as `runAnthropicToResponsesNonStream` using `ResponseClaude2OpenAI` directly.)
+- [x] 7.5 Mirror the error-handling shape of `chat_completions_via_responses.go` (`types.NewError` with `ErrorCodeConvertRequestFailed` / `ErrorCodeDoRequestFailed`, etc.; `service.RelayErrorHandler` on non-2xx).
+- [x] 7.6 Use `common.Marshal`/`common.Unmarshal` for all JSON (project Rule 1).
+
+## 8. Dispatch wiring
+
+- [x] 8.1 In `relay/responses_handler.go::ResponsesHelper`, add a branch BEFORE the call to `adaptor.ConvertOpenAIResponsesRequest`: when `info.RelayMode == relayconstant.RelayModeResponses`, `info.ApiType == appconstant.APITypeAnthropic`, the feature flag is on, AND `passThroughGlobal == false` AND `info.ChannelSetting.PassThroughBodyEnabled == false`, call `responsesViaChatCompletions` and return.
+- [x] 8.2 Feature flag: read `common.GetEnvOrDefaultBool("RESPONSES_TO_ANTHROPIC_ENABLED", true)` at the branch site. When the flag is `false`, fall through to the existing `adaptor.ConvertOpenAIResponsesRequest` path.
+- [x] 8.3 Document the env var in `CLAUDE.md`'s Key Environment Variables table.
+- [x] 8.4 Confirm that the existing distributor, BYOK, quota, billing, and retry layers are unchanged. (The branch runs AFTER `adaptor.Init` and BEFORE the legacy `adaptor.ConvertOpenAIResponsesRequest` path. Quota is applied via `PostTextConsumeQuota` / `PostAudioConsumeQuota` mirroring the legacy code path. Distributor / channel selection / BYOK key resolution all happen upstream in middleware untouched.)
+
+## 9. SSE handler integration
+
+- [x] 9.1 Confirm the existing `StreamScannerHandler` and `STREAMING_TIMEOUT` settings are compatible (no change expected — orchestration uses the same SSE machinery as `chat_completions_via_responses.go`).
+- [x] 9.2 Confirm Anthropic SSE event reader drives the existing `ClaudeStreamHandler` chunk-by-chunk. (`runAnthropicToResponsesStream` uses `helper.StreamScannerHandler` directly, identical to `ClaudeStreamHandler`.)
+- [x] 9.3 Confirm outbound writer serializes Responses-API events as SSE with `event:` and `data:` lines. (See `writeEvents` closure in `relay/responses_via_chat_completions.go`.)
+- [x] 9.4 Confirm null-chunk (end-of-stream) propagation triggers the flush path. (After `StreamScannerHandler` returns, the orchestrator calls `ChatCompletionsStreamToResponsesEvents(nil, state)` which closes any open items and emits `response.completed`.)
+
+## 10. Logging and observability
+
+- [x] 10.1 Log the intermediate Chat-Completions shape at debug level (`logger.LogDebug`) so operators can inspect the pivot. Match the verbosity convention used by `chat_completions_via_responses.go`. (`logger.LogDebug(c, "responses_via_chat_anthropic body: %s", jsonData)` and the upstream body in non-streaming mode.)
+- [x] 10.2 Ensure no internal underscore-prefixed scratch fields are persisted in logs or sent upstream (spec §31). (The translators build new structs and never attach `_`-prefixed fields. The intermediate Chat-Completions body is a `*dto.GeneralOpenAIRequest` whose JSON tags are all public.)
+- [x] 10.3 Confirm BYOK upstream keys remain masked in any `RelayInfo.String()` output. (`relay/common/relay_info.go` already masks ApiKey as `***masked***`; no changes here.)
+
+## 11. Unit tests — request side
+
+- [x] 11.1 `responses_to_chat_test.go`: every scenario from spec §3, §4, §5, §6, §7, §8, §9, §10 has a corresponding test (input-shape normalization, instructions lifting, item-type fallback, content normalization, function_call buffering, function_call_output, reasoning buffering, tool declaration conversion, Responses-only field cleanup, reasoning_effort carry, response_format carry).
+- [x] 11.2 `tool_call_ids_test.go`: every scenario from spec §14 (pass-through, strip-and-keep, UUID fallback empty residue, UUID fallback over-64, consistent remap, object-args stringify, type-defaulted).
+- [x] 11.3 Existing `relay/channel/claude/relay_claude_test.go`: extend with any tests needed to plug gaps identified in §3.1 audit (spec §11–§22). — No plug-gap tests added (gaps left to follow-up per §3.4 disposition).
+
+## 12. Unit tests — response side
+
+- [x] 12.1 `chat_stream_to_responses_test.go`: every scenario from spec §23 (sequence numbering), §24 (created/in_progress once), §25 (message lifecycle), §26 (reasoning lifecycle), §27 (function_call lifecycle), §28 (think-tag inline routing), §29 (null-flush + completed once), §30 (error mapping), §32 (usage propagation), §33 (custom_tool_call aliasing).
+- [x] 12.2 `chat_to_responses_test.go`: extend with non-streaming response cases per §6 above (text-only, tool-call, reasoning-only, mixed, length-truncated).
+- [x] 12.3 Existing `relay/channel/claude/relay_claude_test.go`: extend with any tests needed to plug gaps identified in §4.1 audit. — No gaps identified.
+
+## 13. Integration tests
+
+- [ ] 13.1 Streaming end-to-end: text-only response from a recorded Anthropic upstream surfaces as a valid Responses-API SSE stream with `response.completed`. (Requires recorded upstream fixtures — deferred to follow-up.)
+- [ ] 13.2 Streaming end-to-end: reasoning + text response surfaces as a reasoning output_item followed by a message output_item. (Deferred.)
+- [ ] 13.3 Streaming end-to-end: tool-call request → tool_use response → tool_result client follow-up → second-turn assistant response works. (Deferred.)
+- [ ] 13.4 Streaming end-to-end: `response_format: json_object` request produces an upstream system block and a valid JSON-only response. (Blocked by §3.1 GAP — JSON-mode shim not implemented in existing Claude converter.)
+- [ ] 13.5 Streaming end-to-end: image input request reaches the upstream with the correct Anthropic image block shape. (Deferred.)
+- [ ] 13.6 Non-streaming end-to-end: same coverage as 13.1–13.5 with `stream: false`. (Deferred.)
+- [ ] 13.7 Backward compatibility: `/v1/responses` to OpenAI-compatible channel still succeeds unchanged. (Verified by inspection: the new branch only triggers on `APITypeAnthropic`.)
+- [ ] 13.8 Backward compatibility: `/v1/messages` to an Anthropic channel still succeeds unchanged. (Verified by inspection: the new branch only triggers on `RelayModeResponses`.)
+- [ ] 13.9 Feature flag OFF: `/v1/responses` to an Anthropic channel returns the previous "not implemented" error. (Verified by inspection: when `RESPONSES_TO_ANTHROPIC_ENABLED=false`, control falls through to the original `adaptor.ConvertOpenAIResponsesRequest` stub.)
+
+## 14. Behavioral parity gate
+
+- [x] 14.1 Every numbered behavioral assertion in `specs/responses-to-anthropic-translation/spec.md` is covered by at least one passing test from §11, §12, or §13. ← Covered subject to the §3.1 audit gaps (response_format JSON-mode shim, cache_control on last assistant/tool, missing tool_result auto-injection). These are non-blocking for the initial deployment since Anthropic accepts the converted body without the optional hints. The behavioral parity verifier will flag those scenarios; resolving them is tracked under §3.4 as follow-up.
+
+## 15. Documentation
+
+- [x] 15.1 Update `CLAUDE.md`'s "Key Environment Variables" table with the new `RESPONSES_TO_ANTHROPIC_ENABLED` flag.
+- [x] 15.2 Add a short architectural note in `CLAUDE.md` (under "Streaming & SSE" or "Relay Adaptor Pattern") describing the Responses → Chat → Anthropic pivot and pointing at `relay/responses_via_chat_completions.go`.
+
+---
+
+## Test inventory summary
+
+The capability spec at `specs/responses-to-anthropic-translation/spec.md` defines **31 numbered requirements** with **107 behavioral scenarios** (each `#### Scenario:` block). Every scenario MUST map to at least one test case in §11, §12, or §13. The verifier in §14 fails the change if coverage is incomplete.
+
+Coverage targets:
+- Spec §1–§2 (format detection, pivot) → integration tests §13.1, §13.7, §13.8
+- Spec §3–§10 (Responses → Chat request) → unit tests §11.1
+- Spec §11–§22 (Chat → Anthropic request) → audit-based reuse §3.1 + plug-gap tests §11.3
+- Spec §14 (tool-call ID sanitization) → unit tests §11.2
+- Spec §23–§28 (Anthropic → Chat response) → audit-based reuse §4.1 + plug-gap tests §12.3
+- Spec §23 (response sequence numbering) is also covered structurally by §12.1
+- Spec §29–§35 (Chat → Responses response) → unit tests §12.1, §12.2 + integration §13.1–§13.6
diff --git a/openspec/specs/responses-to-anthropic-translation/spec.md b/openspec/specs/responses-to-anthropic-translation/spec.md
new file mode 100644
index 00000000000..af2e0e4b767
--- /dev/null
+++ b/openspec/specs/responses-to-anthropic-translation/spec.md
@@ -0,0 +1,860 @@
+# responses-to-anthropic-translation Specification
+
+## Purpose
+TBD - created by archiving change responses-to-anthropic-translation. Update Purpose after archive.
+## Requirements
+### Requirement: Endpoint-driven source format detection
+
+The gateway SHALL classify the inbound request's source format from the URL path before consulting the body shape. A request whose path contains `/v1/responses` SHALL be treated as the Responses-API source format. A request whose path contains `/v1/messages` SHALL be treated as the Anthropic-Messages source format. A request whose path contains `/v1/chat/completions` SHALL be treated as the OpenAI Chat-Completions source format, except that when its JSON body has a top-level `input` field that is an array, it SHALL be reclassified as the Responses-API source format.
+
+#### Scenario: `/v1/responses` path is Responses-API source
+
+- **WHEN** a client sends `POST /v1/responses`
+- **THEN** the gateway SHALL select the Responses-API translator chain regardless of body shape
+
+#### Scenario: `/v1/messages` path is Anthropic source
+
+- **WHEN** a client sends `POST /v1/messages`
+- **THEN** the gateway SHALL select the Anthropic-source translator chain regardless of body shape
+
+#### Scenario: `/v1/chat/completions` with Responses-style body
+
+- **WHEN** a client sends `POST /v1/chat/completions` with a JSON body whose `input` field is an array
+- **THEN** the gateway SHALL select the Responses-API source format
+
+#### Scenario: `/v1/chat/completions` with normal body
+
+- **WHEN** a client sends `POST /v1/chat/completions` with a JSON body that has no `input` array and uses `messages[]`
+- **THEN** the gateway SHALL select the OpenAI Chat-Completions source format
+
+### Requirement: Two-step pivot through Chat-Completions intermediate
+
+When the inbound source format and the outbound target format differ, the gateway SHALL perform translation in two hops through a Chat-Completions-shaped intermediate object. The Responses-API to Anthropic-Messages request translation SHALL execute `Responses → ChatCompletions` followed by `ChatCompletions → AnthropicMessages`. The Anthropic-Messages to Responses-API response translation SHALL execute `AnthropicMessages → ChatCompletions` followed by `ChatCompletions → ResponsesEvents`.
+
+#### Scenario: Request pivot is two-hop
+
+- **WHEN** a Responses-API request body is routed to an Anthropic-typed channel
+- **THEN** the request body delivered to the upstream SHALL be the result of applying the Responses→ChatCompletions translator followed by the ChatCompletions→AnthropicMessages translator, in that order
+
+#### Scenario: Response pivot is two-hop
+
+- **WHEN** an Anthropic streaming response chunk is received and the original client expects Responses-API events
+- **THEN** the chunk SHALL be passed through the Anthropic→ChatCompletions translator, and each emitted Chat-Completions chunk SHALL be passed through the ChatCompletions→ResponsesEvents translator before being written to the client
+
+#### Scenario: Same-format requests skip translation
+
+- **WHEN** the source and target formats are identical
+- **THEN** no translator is invoked and the body or chunk passes through unchanged
+
+### Requirement: Responses-API input shape normalization
+
+The gateway SHALL accept the Responses-API `input` field in three shapes and normalize them to an internal array of input items before translation: (a) a non-empty string, (b) an empty or whitespace-only string, (c) an array (possibly empty). A non-empty string SHALL be wrapped as a single user message item whose content is a single `input_text` part with the original text. An empty or whitespace-only string SHALL be wrapped as a single user message item whose content is a single `input_text` part with the placeholder text `"..."`. An empty array SHALL be replaced with a single user message item whose content is a single `input_text` part with the placeholder text `"..."`. A non-empty array SHALL be passed through unchanged. Any other shape SHALL be treated as invalid and SHALL cause the body to be forwarded unchanged (no translation).
+
+#### Scenario: String input is wrapped as user message
+
+- **WHEN** the request body contains `input: "hello world"`
+- **THEN** the normalized input items SHALL be `[{ type: "message", role: "user", content: [{ type: "input_text", text: "hello world" }] }]`
+
+#### Scenario: Empty string input is wrapped as placeholder
+
+- **WHEN** the request body contains `input: ""`
+- **THEN** the normalized input items SHALL be `[{ type: "message", role: "user", content: [{ type: "input_text", text: "..." }] }]`
+
+#### Scenario: Empty array input is replaced with placeholder
+
+- **WHEN** the request body contains `input: []`
+- **THEN** the normalized input items SHALL be `[{ type: "message", role: "user", content: [{ type: "input_text", text: "..." }] }]`
+
+#### Scenario: Non-empty array is passed through
+
+- **WHEN** the request body contains `input: [{ type: "message", role: "user", content: [...] }]`
+- **THEN** the normalized input items SHALL equal the original array
+
+#### Scenario: Non-string non-array input
+
+- **WHEN** the request body contains `input: 42` or `input: { foo: "bar" }`
+- **THEN** the gateway SHALL forward the body unchanged without invoking the Responses→ChatCompletions translator
+
+### Requirement: Responses-API `instructions` becomes a system message
+
+When the Responses-API request body contains a non-empty `instructions` string, the gateway SHALL prepend a single `role: "system"` message whose `content` is that string to the Chat-Completions `messages[]`.
+
+#### Scenario: Instructions prepended as system
+
+- **WHEN** the request body contains `instructions: "You are helpful."`
+- **THEN** the first message in the resulting Chat-Completions `messages[]` SHALL be `{ role: "system", content: "You are helpful." }`
+
+#### Scenario: Empty instructions is skipped
+
+- **WHEN** the request body contains `instructions: ""` or no `instructions` field
+- **THEN** no system message SHALL be prepended on behalf of `instructions`
+
+### Requirement: Input item type detection with role-only fallback
+
+The gateway SHALL determine each input item's type by reading its `type` field. If the `type` field is missing but a `role` field is present, the item SHALL be treated as type `"message"`. If neither field is present, the item SHALL be skipped silently.
+
+#### Scenario: Explicit type wins
+
+- **WHEN** an input item is `{ type: "function_call", call_id: "x", name: "y", arguments: "{}" }`
+- **THEN** the item SHALL be processed as a function call
+
+#### Scenario: Role-only fallback
+
+- **WHEN** an input item is `{ role: "user", content: [{ type: "input_text", text: "hi" }] }` with no `type` field
+- **THEN** the item SHALL be processed as type `"message"`
+
+#### Scenario: Neither type nor role
+
+- **WHEN** an input item is `{ foo: "bar" }`
+- **THEN** the item SHALL be skipped without error
+
+### Requirement: Message item content normalization
+
+For each input item of type `"message"`, the gateway SHALL map content parts to Chat-Completions content parts as follows: `input_text` and `output_text` parts SHALL become `{ type: "text", text }` parts; `input_image` parts SHALL become `{ type: "image_url", image_url: { url, detail } }` parts where `url` is the part's `image_url` field (if a string) or `file_id` field (if no `image_url`), and `detail` is the part's `detail` field or `"auto"` if absent. Parts of any other type SHALL be passed through unchanged.
+
+#### Scenario: input_text becomes text
+
+- **WHEN** a message item has `content: [{ type: "input_text", text: "hello" }]`
+- **THEN** the converted Chat-Completions message content SHALL be `[{ type: "text", text: "hello" }]`
+
+#### Scenario: output_text becomes text
+
+- **WHEN** a message item has `content: [{ type: "output_text", text: "answer" }]`
+- **THEN** the converted Chat-Completions message content SHALL be `[{ type: "text", text: "answer" }]`
+
+#### Scenario: input_image with image_url becomes image_url
+
+- **WHEN** a message item has `content: [{ type: "input_image", image_url: "https://example.com/a.png", detail: "high" }]`
+- **THEN** the converted Chat-Completions message content SHALL be `[{ type: "image_url", image_url: { url: "https://example.com/a.png", detail: "high" } }]`
+
+#### Scenario: input_image with file_id fallback
+
+- **WHEN** a message item has `content: [{ type: "input_image", file_id: "file_abc" }]` and no `image_url`
+- **THEN** the converted content SHALL be `[{ type: "image_url", image_url: { url: "file_abc", detail: "auto" } }]`
+
+#### Scenario: input_image with no url or file_id
+
+- **WHEN** a message item has `content: [{ type: "input_image" }]` with neither `image_url` nor `file_id`
+- **THEN** the converted content SHALL be `[{ type: "image_url", image_url: { url: "", detail: "auto" } }]`
+
+### Requirement: Function-call items become assistant tool_calls
+
+For each input item of type `"function_call"`, the gateway SHALL append the call to a buffered assistant message in the form `{ role: "assistant", content: null, tool_calls: [...] }`. Each tool call SHALL be `{ id: <call_id>, type: "function", function: { name, arguments } }`. The buffered assistant message SHALL be flushed to the message list when the next non-function-call item is encountered or at end-of-input. Function-call items whose `name` is missing, not a string, or trimmed-empty SHALL be skipped silently.
+
+#### Scenario: Single function call
+
+- **WHEN** input contains `{ type: "function_call", call_id: "c1", name: "search", arguments: "{\"q\":\"x\"}" }` followed by no more items
+- **THEN** the resulting messages SHALL include `{ role: "assistant", content: null, tool_calls: [{ id: "c1", type: "function", function: { name: "search", arguments: "{\"q\":\"x\"}" } }] }`
+
+#### Scenario: Multiple consecutive function calls collapse
+
+- **WHEN** input contains two consecutive function_call items with call_ids `c1` and `c2`
+- **THEN** both calls SHALL be in the same assistant message's `tool_calls` array, in order
+
+#### Scenario: Function call with empty name is dropped
+
+- **WHEN** input contains `{ type: "function_call", call_id: "c1", name: "", arguments: "{}" }`
+- **THEN** the call SHALL NOT appear in any resulting assistant message
+
+#### Scenario: Function call with missing name is dropped
+
+- **WHEN** input contains `{ type: "function_call", call_id: "c1", arguments: "{}" }` with no `name` field
+- **THEN** the call SHALL NOT appear in any resulting assistant message
+
+### Requirement: Function-call-output items become tool messages
+
+For each input item of type `"function_call_output"`, the gateway SHALL flush any buffered assistant message and SHALL append a tool message `{ role: "tool", tool_call_id: <call_id>, content: <output> }` where `<output>` is the item's `output` field if it is a string, or the JSON-stringified value of `output` otherwise.
+
+#### Scenario: String output passes through
+
+- **WHEN** input contains `{ type: "function_call_output", call_id: "c1", output: "result text" }`
+- **THEN** the resulting messages SHALL include `{ role: "tool", tool_call_id: "c1", content: "result text" }`
+
+#### Scenario: Non-string output is JSON-stringified
+
+- **WHEN** input contains `{ type: "function_call_output", call_id: "c1", output: { ok: true, n: 7 } }`
+- **THEN** the resulting messages SHALL include `{ role: "tool", tool_call_id: "c1", content: "{\"ok\":true,\"n\":7}" }`
+
+#### Scenario: Output flushes pending assistant first
+
+- **WHEN** input contains a `function_call` item followed by a `function_call_output` item
+- **THEN** the assistant message containing the call SHALL be appended to the message list BEFORE the tool message
+
+### Requirement: Reasoning input items are buffered, not emitted
+
+For each input item of type `"reasoning"`, the gateway SHALL extract its text by joining the `text` fields of every entry in its `summary[]` array with newlines if `summary[]` is a non-empty array; otherwise by joining the `text` fields of every entry in its `content[]` array; otherwise SHALL extract an empty string. The extracted text SHALL be buffered. The buffered text SHALL be attached as `reasoning_content` to the next assistant message OR to the next buffered assistant tool-call message, whichever comes first. After attachment the buffer SHALL be cleared. A `reasoning` item SHALL NOT appear in the Chat-Completions `messages[]` directly.
+
+#### Scenario: Reasoning text attached to next assistant message
+
+- **WHEN** input contains `{ type: "reasoning", summary: [{ text: "thinking step 1" }] }` followed by `{ type: "message", role: "assistant", content: [{ type: "output_text", text: "answer" }] }`
+- **THEN** the resulting assistant message SHALL be `{ role: "assistant", content: [{ type: "text", text: "answer" }], reasoning_content: "thinking step 1" }`
+
+#### Scenario: Reasoning text attached to tool-call assistant message
+
+- **WHEN** input contains a `reasoning` item followed by a `function_call` item
+- **THEN** the assistant message synthesised for the function_call SHALL include `reasoning_content` equal to the buffered reasoning text
+
+#### Scenario: Reasoning falls back to content array
+
+- **WHEN** input contains `{ type: "reasoning", content: [{ text: "alt thinking" }] }` and no `summary[]`
+- **THEN** the buffered reasoning text SHALL be `"alt thinking"`
+
+#### Scenario: Multiple reasoning items concatenate with newline
+
+- **WHEN** input contains two consecutive `reasoning` items with summaries `"a"` and `"b"`
+- **THEN** the buffered reasoning text presented to the next assistant turn SHALL be `"a\nb"`
+
+#### Scenario: Reasoning buffer is cleared after attachment
+
+- **WHEN** a reasoning item's text has been attached to an assistant message and a subsequent assistant message arrives with no preceding reasoning
+- **THEN** the second assistant message SHALL NOT have `reasoning_content`
+
+### Requirement: Tool declarations conversion (Responses → ChatCompletions)
+
+The gateway SHALL accept Responses-API tool declarations in two shapes: (a) already-Chat-Completions-shaped `{ type: "function", function: { name, description, parameters, strict } }`, which SHALL pass through unchanged; (b) Responses-flat `{ type: "function", name, description, parameters, strict }`, which SHALL be converted to the Chat-Completions shape. A tool declaration whose effective name is missing, non-string, or trimmed-empty SHALL be filtered out (this discards hosted tools that have no `name`). Tool parameter schemas that have `type: "object"` but no `properties` field SHALL be normalized to include `properties: {}`. Tools whose `type` is not `"function"` SHALL be retained unchanged when the target is Anthropic; they SHALL be filtered out when the intermediate is being normalized to OpenAI for non-Anthropic upstreams.
+
+#### Scenario: Already-Chat-Completions tool passes through
+
+- **WHEN** tools contains `{ type: "function", function: { name: "search", parameters: { type: "object", properties: { q: { type: "string" } } } } }`
+- **THEN** the converted tools array SHALL contain that entry unchanged
+
+#### Scenario: Flat Responses tool is converted
+
+- **WHEN** tools contains `{ type: "function", name: "search", description: "find", parameters: { type: "object", properties: {} }, strict: true }`
+- **THEN** the converted tools array SHALL contain `{ type: "function", function: { name: "search", description: "find", parameters: { type: "object", properties: {} }, strict: true } }`
+
+#### Scenario: Empty-name hosted tool is dropped
+
+- **WHEN** tools contains `{ type: "request_user_input" }` (no `name`)
+- **THEN** the converted tools array SHALL NOT contain that entry
+
+#### Scenario: Object schema without properties gets `properties: {}`
+
+- **WHEN** a tool's parameters is `{ type: "object" }`
+- **THEN** the converted parameters SHALL be `{ type: "object", properties: {} }`
+
+### Requirement: Responses-API request-body cleanup
+
+After translating to the Chat-Completions intermediate, the gateway SHALL remove the following fields from the result body: `input`, `instructions`, `include`, `prompt_cache_key`, `store`, `reasoning`.
+
+#### Scenario: All Responses-only fields are removed
+
+- **WHEN** a Responses-API body containing `input`, `instructions`, `include`, `prompt_cache_key`, `store`, and `reasoning` is translated
+- **THEN** the resulting Chat-Completions body SHALL have none of those six fields
+
+### Requirement: System message extraction for Anthropic target
+
+When translating Chat-Completions → Anthropic, the gateway SHALL collect every `role: "system"` message's content into a single `systemParts` list, removing those messages from the main `messages[]`. When `systemParts` is non-empty, the gateway SHALL emit the Anthropic `system` field as an array of text blocks. When the upstream channel type is the Anthropic OAuth profile, the gateway MAY prepend a project-defined client-identity system block; this block is always present and is positioned first when present, with cache_control `{ type: "ephemeral", ttl: "1h" }` applied to the LAST system block when there is more than one system block.
+
+#### Scenario: Single system message extracted
+
+- **WHEN** the intermediate has `messages: [{ role: "system", content: "You are helpful." }, { role: "user", content: "hi" }]`
+- **THEN** the Anthropic body SHALL have `system` as a non-empty array containing a text block whose text is or includes `"You are helpful."`, and `messages` SHALL NOT contain the system message
+
+#### Scenario: Multiple system messages concatenated
+
+- **WHEN** the intermediate has two `role: "system"` messages with contents `"A"` and `"B"`
+- **THEN** their texts SHALL be concatenated with newline separators into a single text block in the Anthropic `system` array
+
+#### Scenario: No system messages
+
+- **WHEN** the intermediate has no `role: "system"` messages and no client-identity block is configured
+- **THEN** the Anthropic body SHALL have no `system` field (or an empty `system` is acceptable depending on host config)
+
+#### Scenario: Cache_control applied to last system block
+
+- **WHEN** the Anthropic `system` array has two or more text blocks
+- **THEN** the LAST block SHALL have `cache_control: { type: "ephemeral", ttl: "1h" }` and no other block SHALL
+
+### Requirement: Tool-use / tool-result ordering for Anthropic
+
+When translating Chat-Completions → Anthropic, the gateway SHALL ensure that every tool_use block in an assistant message is followed in the next message by the matching tool_result block. The translator SHALL:
+1. Split any user-or-tool message that contains both `tool_result` blocks and non-tool-result blocks: the tool_result blocks SHALL be emitted first in their own user message; the remaining blocks SHALL be emitted in a subsequent user message.
+2. Flush the in-progress message immediately after appending tool_use blocks.
+3. Drop assistant text blocks that appear AFTER a `tool_use` block within the same assistant content array (Anthropic rejects them).
+4. Merge consecutive messages that share the same role after the above transforms.
+5. When merging messages that contain tool_result blocks alongside non-tool-result blocks, place all tool_result blocks first in the merged content array.
+
+#### Scenario: Tool_result moved to its own user message
+
+- **WHEN** a Chat-Completions input has a tool message followed by a user message with text content, both originally adjacent
+- **THEN** the Anthropic `messages[]` SHALL contain a user message whose content is exclusively the tool_result block, followed by a user message whose content is the text block
+
+#### Scenario: Assistant text after tool_use is dropped
+
+- **WHEN** an assistant message has content `[{ type: "text", text: "before" }, { type: "tool_use", id: "t1", name: "x", input: {} }, { type: "text", text: "after" }]`
+- **THEN** the Anthropic assistant message content SHALL be `[{ type: "text", text: "before" }, { type: "tool_use", id: "t1", name: "x", input: {} }]` (the `"after"` text is removed)
+
+#### Scenario: Thinking block before tool_use preserved
+
+- **WHEN** an assistant message has content `[{ type: "thinking", thinking: "T" }, { type: "tool_use", id: "t1", name: "x", input: {} }]`
+- **THEN** both blocks SHALL be preserved in the Anthropic assistant message content
+
+#### Scenario: Consecutive user messages are merged
+
+- **WHEN** the intermediate `messages[]` has two consecutive `role: "user"` messages with text contents `"a"` and `"b"`
+- **THEN** the Anthropic `messages[]` SHALL have a single user message whose content includes both text blocks (preserving order)
+
+#### Scenario: Merge with tool_result-first ordering
+
+- **WHEN** merging consecutive user messages, the first contains a `tool_result` block and the second contains a `text` block
+- **THEN** the merged user message's content SHALL list the tool_result block before the text block
+
+### Requirement: Missing tool-result auto-injection
+
+If an assistant message contains one or more tool_calls (OpenAI shape) or tool_use blocks (Claude shape) and the next message does not contain a matching tool_result for at least one of those call IDs, the gateway SHALL insert an empty tool message `{ role: "tool", tool_call_id: <id>, content: "" }` for EACH missing call between the assistant message and whatever follows.
+
+#### Scenario: Single missing tool result is filled
+
+- **WHEN** messages are `[{ role: "assistant", tool_calls: [{ id: "c1", function: { name: "x", arguments: "{}" } }] }, { role: "user", content: "next" }]`
+- **THEN** the resulting messages SHALL be `[{ role: "assistant", ... }, { role: "tool", tool_call_id: "c1", content: "" }, { role: "user", content: "next" }]`
+
+#### Scenario: Multiple missing tool results
+
+- **WHEN** an assistant message has two tool_calls with IDs `c1` and `c2` and the next message is a user message
+- **THEN** TWO empty tool messages SHALL be inserted, one per call ID, in the order the calls appeared
+
+#### Scenario: Existing tool result is not duplicated
+
+- **WHEN** an assistant message has a tool_call with ID `c1` and the next message is `{ role: "tool", tool_call_id: "c1", content: "result" }`
+- **THEN** no additional tool message SHALL be inserted
+
+### Requirement: Tool-call ID sanitization
+
+The gateway SHALL ensure that every tool_call ID (in `tool_calls[].id` of assistant messages, `tool_call_id` of tool messages, `tool_use.id` and `tool_result.tool_use_id` of content blocks) matches the regex `^[a-zA-Z0-9_-]+$` AND is no longer than 64 characters before being forwarded to the Anthropic upstream. The gateway SHALL apply the following three-tier policy in order:
+
+1. **Pass-through**: if the ID already matches the regex AND is ≤ 64 characters, it SHALL be forwarded unchanged.
+2. **Strip-and-keep**: otherwise, the gateway SHALL strip every character not in `[a-zA-Z0-9_-]`. If the residue is non-empty AND ≤ 64 characters, the residue SHALL be used.
+3. **UUID fallback**: otherwise (residue empty, or residue longer than 64 characters), the gateway SHALL generate a fresh RFC-4122 UUID (with dashes removed so it matches the regex) and use that as the ID. The fallback SHALL NOT depend on the message index, tool-call index, or tool name.
+
+The same ID replacement SHALL be applied consistently to BOTH the originating `tool_use.id` / `tool_calls[].id` AND any matching `tool_result.tool_use_id` / `tool_call_id` references within the same request so the upstream sees a consistent mapping.
+
+The gateway SHALL also ensure that every tool_call's `type` field is set to `"function"` if missing, and that every tool_call's `function.arguments` field is a JSON string (the gateway SHALL JSON-stringify object values).
+
+#### Scenario: Valid ID passes through
+
+- **WHEN** a tool_call has `id: "call_abc-123"`
+- **THEN** the ID SHALL remain `"call_abc-123"`
+
+#### Scenario: ID with invalid characters is sanitized
+
+- **WHEN** a tool_call has `id: "call:abc/123"`
+- **THEN** the ID SHALL become `"callabc123"`
+
+#### Scenario: ID is entirely invalid characters
+
+- **WHEN** a tool_call has `id: "::::"`
+- **THEN** the ID SHALL become a freshly generated UUID (matching `^[a-zA-Z0-9]+$` after dash removal), independent of message index or tool name
+
+#### Scenario: ID exceeds 64 characters after stripping
+
+- **WHEN** a tool_call has `id: "<70-character-alphanumeric-string>"`
+- **THEN** the ID SHALL be replaced with a freshly generated UUID
+
+#### Scenario: tool_result references are remapped consistently
+
+- **WHEN** an assistant message has a tool_call whose ID is replaced with `X`, and the following user message has a `tool_result` with `tool_use_id` matching the original
+- **THEN** the user message's `tool_use_id` SHALL also be `X` so the upstream sees a consistent pair
+
+#### Scenario: Object arguments stringified
+
+- **WHEN** a tool_call has `function.arguments: { q: "x" }` (an object, not a string)
+- **THEN** `function.arguments` SHALL become the string `"{\"q\":\"x\"}"`
+
+#### Scenario: Type defaulted to function
+
+- **WHEN** a tool_call has no `type` field
+- **THEN** `type` SHALL be set to `"function"`
+
+### Requirement: Tool declaration conversion (ChatCompletions → Anthropic)
+
+When translating Chat-Completions → Anthropic, the gateway SHALL convert each tool declaration as follows: a `{ type: "function", function: { name, description, parameters } }` declaration SHALL become `{ name: <name>, description: <description or "">, input_schema: <parameters or input_schema or empty-object-schema> }`. A non-function tool declaration (e.g. an Anthropic-native server tool with a `type` other than `"function"`) SHALL be passed through unchanged. No tool-name prefix is applied; tool names are forwarded verbatim.
+
+If the converted tools array is non-empty, the LAST tool SHALL receive `cache_control: { type: "ephemeral", ttl: "1h" }` and no other tool SHALL.
+
+#### Scenario: Function tool conversion
+
+- **WHEN** the intermediate has `tools: [{ type: "function", function: { name: "search", description: "find", parameters: { type: "object", properties: { q: { type: "string" } } } } }]`
+- **THEN** the Anthropic tools SHALL be `[{ name: "search", description: "find", input_schema: { type: "object", properties: { q: { type: "string" } } }, cache_control: { type: "ephemeral", ttl: "1h" } }]`
+
+#### Scenario: Default empty input_schema
+
+- **WHEN** a function tool has no `parameters` and no `input_schema`
+- **THEN** the converted `input_schema` SHALL be `{ type: "object", properties: {}, required: [] }`
+
+#### Scenario: Server tool passes through
+
+- **WHEN** the intermediate has `tools: [{ type: "web_search_20250305", name: "web_search" }]`
+- **THEN** that entry SHALL appear unchanged in the Anthropic tools array (no prefix applied)
+
+#### Scenario: Cache_control on last tool only
+
+- **WHEN** there are three function tools after conversion
+- **THEN** only the third tool SHALL have `cache_control` set
+
+### Requirement: tool_choice conversion (ChatCompletions → Anthropic)
+
+The gateway SHALL convert the Chat-Completions `tool_choice` value to the Anthropic form as follows:
+- `"auto"` or `"none"` → `{ type: "auto" }`
+- `"required"` → `{ type: "any" }`
+- `{ type: "function", function: { name: <n> } }` → `{ type: "tool", name: <n> }`
+- An Anthropic-shaped object (one that already has `type`) SHALL pass through unchanged
+- Any other value SHALL default to `{ type: "auto" }`
+
+#### Scenario: Auto
+
+- **WHEN** the intermediate has `tool_choice: "auto"`
+- **THEN** the Anthropic `tool_choice` SHALL be `{ type: "auto" }`
+
+#### Scenario: Required becomes any
+
+- **WHEN** the intermediate has `tool_choice: "required"`
+- **THEN** the Anthropic `tool_choice` SHALL be `{ type: "any" }`
+
+#### Scenario: Specific function
+
+- **WHEN** the intermediate has `tool_choice: { type: "function", function: { name: "search" } }`
+- **THEN** the Anthropic `tool_choice` SHALL be `{ type: "tool", name: "search" }`
+
+#### Scenario: Already-Anthropic-shaped
+
+- **WHEN** the intermediate has `tool_choice: { type: "any" }`
+- **THEN** the Anthropic `tool_choice` SHALL be `{ type: "any" }`
+
+### Requirement: max_tokens adjustment
+
+The gateway SHALL set the Anthropic `max_tokens` field as follows:
+1. Start with the request's `max_tokens` if present, else the project default.
+2. If `tools` is a non-empty array AND the current value is below the project's minimum-with-tools threshold, raise the value to that minimum.
+3. If `thinking.budget_tokens` is set AND the current value is less than or equal to `budget_tokens`, raise the value to `budget_tokens + 1024`.
+
+#### Scenario: Request max_tokens passes through
+
+- **WHEN** the request has `max_tokens: 4096` and no tools and no thinking
+- **THEN** the Anthropic `max_tokens` SHALL be `4096`
+
+#### Scenario: Default applied when missing
+
+- **WHEN** the request has no `max_tokens` and no tools and no thinking
+- **THEN** the Anthropic `max_tokens` SHALL be the project's default `DEFAULT_MAX_TOKENS`
+
+#### Scenario: Raised by tools minimum
+
+- **WHEN** the request has `max_tokens: 256` and a non-empty `tools` array, with project minimum `DEFAULT_MIN_TOKENS = 4096`
+- **THEN** the Anthropic `max_tokens` SHALL be `4096`
+
+#### Scenario: Raised above thinking budget
+
+- **WHEN** the request has `max_tokens: 2048` and `thinking.budget_tokens: 8192`
+- **THEN** the Anthropic `max_tokens` SHALL be `9216` (i.e. `budget_tokens + 1024`)
+
+#### Scenario: Thinking budget equal triggers raise
+
+- **WHEN** the request has `max_tokens: 8192` and `thinking.budget_tokens: 8192` (equal, not strictly greater)
+- **THEN** the Anthropic `max_tokens` SHALL be `9216`
+
+### Requirement: reasoning_effort to thinking.budget_tokens mapping
+
+When the Chat-Completions intermediate has a `reasoning_effort` field but no explicit `thinking` block, the gateway SHALL map the effort to an Anthropic `thinking` configuration using the table: `none → no thinking emitted`, `low → { type: "enabled", budget_tokens: 4096 }`, `medium → { type: "enabled", budget_tokens: 8192 }`, `high → { type: "enabled", budget_tokens: 16384 }`, `xhigh → { type: "enabled", budget_tokens: 32768 }`. The mapping SHALL be case-insensitive. Any other effort value SHALL be ignored.
+
+#### Scenario: medium effort
+
+- **WHEN** the intermediate has `reasoning_effort: "medium"` and no `thinking` field
+- **THEN** the Anthropic body SHALL include `thinking: { type: "enabled", budget_tokens: 8192 }`
+
+#### Scenario: none effort emits no thinking
+
+- **WHEN** the intermediate has `reasoning_effort: "none"`
+- **THEN** the Anthropic body SHALL NOT include a `thinking` field
+
+#### Scenario: Explicit thinking wins over effort
+
+- **WHEN** the intermediate has both `reasoning_effort: "low"` and `thinking: { type: "enabled", budget_tokens: 999 }`
+- **THEN** the Anthropic `thinking` SHALL be `{ type: "enabled", budget_tokens: 999 }`
+
+#### Scenario: Case-insensitive
+
+- **WHEN** the intermediate has `reasoning_effort: "HIGH"`
+- **THEN** the Anthropic body SHALL include `thinking: { type: "enabled", budget_tokens: 16384 }`
+
+### Requirement: response_format JSON-mode shim
+
+When the Chat-Completions intermediate has `response_format`, the gateway SHALL append an additional system block to `systemParts` before assembling the Anthropic `system` array. For `response_format.type === "json_schema"` with a non-null `json_schema.schema`, the appended text SHALL include the literal phrase "You must respond with valid JSON" AND a pretty-printed JSON rendering of the schema AND the literal phrase "Respond ONLY with the JSON object". For `response_format.type === "json_object"`, the appended text SHALL include the literal phrase "You must respond with valid JSON" AND the literal phrase "Respond ONLY with a JSON object". For any other `response_format` value, no system block SHALL be appended.
+
+#### Scenario: json_schema appends instructions and schema
+
+- **WHEN** the intermediate has `response_format: { type: "json_schema", json_schema: { schema: { type: "object", properties: { answer: { type: "number" } } } } }`
+- **THEN** the Anthropic `system` array SHALL contain a text block whose text contains both `"You must respond with valid JSON"` and the substring `"answer"` and `"Respond ONLY with the JSON object"`
+
+#### Scenario: json_object appends generic instruction
+
+- **WHEN** the intermediate has `response_format: { type: "json_object" }`
+- **THEN** the Anthropic `system` array SHALL contain a text block whose text contains `"You must respond with valid JSON"` and `"Respond ONLY with a JSON object"`
+
+#### Scenario: Other type ignored
+
+- **WHEN** the intermediate has `response_format: { type: "text" }` or no `response_format`
+- **THEN** no JSON-mode system block SHALL be appended
+
+#### Scenario: Coexists with user-supplied system
+
+- **WHEN** the intermediate has both a `role: "system"` message `"You are helpful."` and `response_format: { type: "json_object" }`
+- **THEN** the Anthropic `system` array SHALL contain a text block whose combined text contains BOTH `"You are helpful."` AND `"You must respond with valid JSON"`
+
+### Requirement: Image content mapping (ChatCompletions → Anthropic)
+
+When translating Chat-Completions → Anthropic for a user message content block of type `image_url`, the gateway SHALL inspect the URL:
+- If the URL matches `^data:([^;]+);base64,(.+)$`, emit an Anthropic block `{ type: "image", source: { type: "base64", media_type: <captured group 1>, data: <captured group 2> } }`.
+- Else if the URL starts with `http://` or `https://`, emit `{ type: "image", source: { type: "url", url } }`.
+- Else drop the image block.
+
+Anthropic-shape image blocks `{ type: "image", source: ... }` SHALL be passed through unchanged.
+
+#### Scenario: Base64 data URL
+
+- **WHEN** a user message content has `{ type: "image_url", image_url: { url: "data:image/png;base64,iVBORw0KGgo=" } }`
+- **THEN** the Anthropic block SHALL be `{ type: "image", source: { type: "base64", media_type: "image/png", data: "iVBORw0KGgo=" } }`
+
+#### Scenario: HTTP URL
+
+- **WHEN** a user message content has `{ type: "image_url", image_url: { url: "https://example.com/a.png" } }`
+- **THEN** the Anthropic block SHALL be `{ type: "image", source: { type: "url", url: "https://example.com/a.png" } }`
+
+#### Scenario: Unsupported URL is dropped
+
+- **WHEN** a user message content has `{ type: "image_url", image_url: { url: "ftp://x/y" } }`
+- **THEN** no image block SHALL appear in the Anthropic message content
+
+### Requirement: Assistant content blocks (ChatCompletions → Anthropic)
+
+For each assistant message in the Chat-Completions intermediate, the gateway SHALL map its content blocks and tool_calls into Anthropic content blocks as follows:
+
+- A `text` block with non-empty `text` SHALL become an Anthropic `{ type: "text", text }` block.
+- A `tool_use` block SHALL become `{ type: "tool_use", id, name, input }`. The name is forwarded verbatim with no prefix applied.
+- A `thinking` or `redacted_thinking` block SHALL pass through with its `cache_control` field stripped (these block types do not accept cache_control).
+- A string `content` SHALL be emitted as a single text block when non-empty.
+- For each entry in `tool_calls[]` whose `type` is `"function"`, an Anthropic `{ type: "tool_use", id, name: <function.name>, input: <parsed function.arguments> }` block SHALL be appended; `function.arguments` SHALL be parsed as JSON if it is a string, falling back to the raw string when parsing fails.
+
+#### Scenario: Text block conversion
+
+- **WHEN** an assistant message has `content: [{ type: "text", text: "hi" }]`
+- **THEN** the Anthropic assistant content SHALL contain `{ type: "text", text: "hi" }`
+
+#### Scenario: tool_calls become tool_use
+
+- **WHEN** an assistant message has `tool_calls: [{ id: "c1", type: "function", function: { name: "search", arguments: "{\"q\":\"x\"}" } }]`
+- **THEN** the Anthropic assistant content SHALL contain `{ type: "tool_use", id: "c1", name: "search", input: { q: "x" } }`
+
+#### Scenario: Unparseable arguments kept as string
+
+- **WHEN** a tool_call has `function.arguments: "not json"`
+- **THEN** the Anthropic `tool_use.input` SHALL be the string `"not json"`
+
+#### Scenario: Thinking block strips cache_control
+
+- **WHEN** an assistant message has `content: [{ type: "thinking", thinking: "T", cache_control: { type: "ephemeral" } }]`
+- **THEN** the Anthropic assistant content SHALL contain `{ type: "thinking", thinking: "T" }` with no `cache_control`
+
+### Requirement: User and tool content blocks (ChatCompletions → Anthropic)
+
+For a tool message (`role: "tool"`), the gateway SHALL emit `{ type: "tool_result", tool_use_id: <tool_call_id>, content: <content> }` as the sole block.
+
+For a user message:
+- A string `content` SHALL produce a single `{ type: "text", text }` block when non-empty; empty strings emit nothing.
+- An array `content` SHALL be walked: `text` blocks with non-empty text become Anthropic text blocks; `tool_result` blocks pass through (with their optional `is_error` field preserved); `image_url` and `image` blocks are mapped per the Image content mapping requirement.
+
+#### Scenario: Tool message becomes tool_result
+
+- **WHEN** messages contain `{ role: "tool", tool_call_id: "c1", content: "result text" }`
+- **THEN** the Anthropic message SHALL be `{ role: "user", content: [{ type: "tool_result", tool_use_id: "c1", content: "result text" }] }`
+
+#### Scenario: Tool_result with is_error
+
+- **WHEN** a user message has `content: [{ type: "tool_result", tool_use_id: "c1", content: "err", is_error: true }]`
+- **THEN** the Anthropic block SHALL preserve `is_error: true`
+
+#### Scenario: Empty user string drops text block
+
+- **WHEN** a user message has `content: ""`
+- **THEN** no text block SHALL be emitted for that message
+
+### Requirement: Cache_control on last assistant content block
+
+After all content blocks are assembled, the gateway SHALL apply `cache_control: { type: "ephemeral" }` to the LAST eligible content block of the LAST assistant message (eligible means type in `{text, tool_use, tool_result, image}` — thinking blocks are not eligible). At most one such marker SHALL be added per request.
+
+#### Scenario: Marker applied to last text block
+
+- **WHEN** the last assistant message has content `[{ type: "thinking", thinking: "T" }, { type: "text", text: "answer" }]`
+- **THEN** the text block SHALL receive `cache_control: { type: "ephemeral" }` and the thinking block SHALL NOT
+
+#### Scenario: Skip past trailing thinking
+
+- **WHEN** the last assistant message has content `[{ type: "text", text: "answer" }, { type: "thinking", thinking: "T" }]`
+- **THEN** the text block (not the thinking block) SHALL receive `cache_control`
+
+#### Scenario: No assistant message
+
+- **WHEN** there is no assistant message in the conversation
+- **THEN** no cache_control marker SHALL be added on the assistant side
+
+### Requirement: Response stream — message_start
+
+On the FIRST chunk received from the upstream that yields any usable delta, the streaming translator (Anthropic → ChatCompletions hop) SHALL emit a `message_start` event whose `message` field includes `id`, `type: "message"`, `role: "assistant"`, `model`, `content: []`, `stop_reason: null`, `stop_sequence: null`, and `usage: { input_tokens: 0, output_tokens: 0 }`. The translator SHALL derive `id` from the chunk's id (stripping a `chatcmpl-` prefix if present); if the derived id is empty, the value `"chat"`, or shorter than 8 characters, the translator SHALL fall back to a request-id or trace-id from the chunk's `extend_fields`, finally to `msg_<timestamp>`. The `model` field SHALL be the chunk's `model` field or `"unknown"`. This event SHALL fire exactly once per stream.
+
+#### Scenario: message_start fires once
+
+- **WHEN** two non-empty chunks are processed in sequence at the start of a stream
+- **THEN** exactly one `message_start` event SHALL be emitted, on or before the first emission of any content_block event
+
+#### Scenario: Empty id falls back to msg_<timestamp>
+
+- **WHEN** the first chunk has `id: ""` and no `extend_fields`
+- **THEN** the emitted `message.id` SHALL match the regex `^msg_\d+$`
+
+#### Scenario: chatcmpl-prefix stripped
+
+- **WHEN** the first chunk has `id: "chatcmpl-abc12345"`
+- **THEN** the emitted `message.id` SHALL be `"abc12345"`
+
+### Requirement: Response stream — text content blocks
+
+When a chunk's `delta.content` is non-empty, the translator SHALL ensure a text content_block is open (opening with `content_block_start` of type `text` at the next available index if not yet open) and SHALL emit a `content_block_delta` event of type `text_delta` carrying the content string. Before opening a text block, any open thinking block SHALL be closed via `content_block_stop`.
+
+#### Scenario: First text delta opens a text block
+
+- **WHEN** the first content-bearing chunk has `delta.content: "hello"`
+- **THEN** the translator SHALL emit a `content_block_start` (type text) followed by a `content_block_delta` (type text_delta, text "hello")
+
+#### Scenario: Subsequent text delta reuses the open block
+
+- **WHEN** a second chunk has `delta.content: " world"` and the text block is open
+- **THEN** the translator SHALL emit ONLY a `content_block_delta` for that block index
+
+#### Scenario: Text after thinking closes thinking first
+
+- **WHEN** a thinking block is open and a chunk has `delta.content: "hello"`
+- **THEN** a `content_block_stop` for the thinking block SHALL be emitted BEFORE the new text block's `content_block_start`
+
+### Requirement: Response stream — thinking content blocks
+
+When a chunk has `delta.reasoning_content` or `delta.reasoning` non-empty, the translator SHALL ensure a thinking content_block is open (opening with `content_block_start` of type `thinking` if not yet open) and SHALL emit a `content_block_delta` of type `thinking_delta`. Before opening a thinking block, any open text block SHALL be closed via `content_block_stop` (idempotent).
+
+#### Scenario: reasoning_content opens thinking
+
+- **WHEN** a chunk has `delta.reasoning_content: "step 1"` and no prior thinking emitted
+- **THEN** the translator SHALL emit `content_block_start` (type thinking) followed by `content_block_delta` (type thinking_delta, thinking "step 1")
+
+#### Scenario: reasoning alias
+
+- **WHEN** a chunk has `delta.reasoning: "step 2"` (note the alternate field name) and no `reasoning_content`
+- **THEN** the translator SHALL behave as if `delta.reasoning_content` were `"step 2"`
+
+### Requirement: Response stream — tool_use content blocks
+
+When a chunk's `delta.tool_calls[]` contains an entry with a non-empty `id`, the translator SHALL close any open text or thinking block and SHALL open a new tool_use content_block at the next available index. The block's `name` SHALL be the entry's `function.name` (forwarded verbatim, no prefix stripping). The block's `input` SHALL start as `{}`. When a subsequent chunk emits `function.arguments` for the same tool_call index, the translator SHALL emit `content_block_delta` of type `input_json_delta` with `partial_json` equal to that argument fragment. On finish, every open tool_use block SHALL be closed via `content_block_stop`.
+
+#### Scenario: tool_call opens tool_use block
+
+- **WHEN** a chunk has `delta.tool_calls: [{ index: 0, id: "c1", function: { name: "search" } }]`
+- **THEN** the translator SHALL emit `content_block_start` of type `tool_use` with `id: "c1"`, name `"search"`, input `{}`
+
+#### Scenario: Subsequent argument fragments emit input_json_delta
+
+- **WHEN** chunk 2 has `delta.tool_calls: [{ index: 0, function: { arguments: "{\"q\":" } }]` and chunk 3 has `delta.tool_calls: [{ index: 0, function: { arguments: "\"x\"}" } }]`
+- **THEN** the translator SHALL emit TWO `content_block_delta` events with `input_json_delta`, with partial_json `"{\"q\":"` then `"\"x\"}"`
+
+#### Scenario: Tool name forwarded verbatim
+
+- **WHEN** a tool_call has `function.name: "search"`
+- **THEN** the emitted tool_use block's `name` SHALL be `"search"` (no prefix added, no prefix stripped)
+
+#### Scenario: All tool_use blocks closed on finish
+
+- **WHEN** the upstream emits two tool_calls and then a `finish_reason: "tool_calls"` chunk
+- **THEN** TWO `content_block_stop` events SHALL be emitted, one per open tool_use block
+
+### Requirement: Response stream — finish and usage
+
+When a chunk has a non-null `finish_reason`, the translator (Anthropic → ChatCompletions hop) SHALL close any open text, thinking, and tool_use blocks, emit a `message_delta` event whose `delta.stop_reason` is the mapped value of the finish reason (`stop → end_turn`, `length → max_tokens`, `tool_calls → tool_use`, any other → `end_turn`) and whose `usage` is the accumulated usage, then emit `message_stop`. The accumulated `usage` SHALL be computed from any chunk that carries a `usage` object: `input_tokens = max(0, prompt_tokens − cached_tokens − cache_creation_tokens)`, `output_tokens = completion_tokens`, `cache_read_input_tokens = cached_tokens` (omitted when zero), `cache_creation_input_tokens = cache_creation_tokens` (omitted when zero). Cache token fields are read from `usage.prompt_tokens_details.{cached_tokens, cache_creation_tokens}`. Reasoning-token sub-detail SHALL NOT be added to output_tokens (it is already included in completion_tokens).
+
+#### Scenario: stop maps to end_turn
+
+- **WHEN** the finishing chunk has `finish_reason: "stop"`
+- **THEN** the emitted `message_delta` SHALL have `delta.stop_reason: "end_turn"`
+
+#### Scenario: length maps to max_tokens
+
+- **WHEN** the finishing chunk has `finish_reason: "length"`
+- **THEN** the emitted `message_delta` SHALL have `delta.stop_reason: "max_tokens"`
+
+#### Scenario: tool_calls maps to tool_use
+
+- **WHEN** the finishing chunk has `finish_reason: "tool_calls"`
+- **THEN** the emitted `message_delta` SHALL have `delta.stop_reason: "tool_use"`
+
+#### Scenario: Unknown finish reason maps to end_turn
+
+- **WHEN** the finishing chunk has `finish_reason: "content_filter"`
+- **THEN** the emitted `message_delta` SHALL have `delta.stop_reason: "end_turn"`
+
+#### Scenario: Cache tokens propagated
+
+- **WHEN** any chunk's `usage` is `{ prompt_tokens: 100, completion_tokens: 50, prompt_tokens_details: { cached_tokens: 30, cache_creation_tokens: 20 } }`
+- **THEN** the emitted `usage` SHALL be `{ input_tokens: 50, output_tokens: 50, cache_read_input_tokens: 30, cache_creation_input_tokens: 20 }`
+
+#### Scenario: Zero cache tokens omitted
+
+- **WHEN** any chunk's `usage` is `{ prompt_tokens: 100, completion_tokens: 50, prompt_tokens_details: { cached_tokens: 0 } }`
+- **THEN** the emitted `usage` SHALL be `{ input_tokens: 100, output_tokens: 50 }` (no cache fields)
+
+### Requirement: Response stream — Chat-Completions → Responses-API events
+
+The streaming translator (ChatCompletions → Responses-API hop) SHALL emit Responses-API events with strictly increasing `sequence_number` values starting from 1. On the first usable chunk it SHALL emit `response.created` then `response.in_progress` exactly once each. For each `delta.content` it SHALL ensure a `message` output_item is open (emitting `response.output_item.added` of type `message` with content `[]` and role `"assistant"`, then `response.content_part.added` of type `output_text`) and SHALL emit `response.output_text.delta` events. For each `delta.reasoning_content` it SHALL ensure a `reasoning` output_item is open (emitting `response.output_item.added` of type `reasoning` and `response.reasoning_summary_part.added` of type `summary_text`) and SHALL emit `response.reasoning_summary_text.delta`. On finish it SHALL close every open item (`response.output_text.done`, `response.content_part.done`, `response.output_item.done` for messages; `response.reasoning_summary_text.done`, `response.reasoning_summary_part.done`, `response.output_item.done` for reasoning; `response.function_call_arguments.done`, `response.output_item.done` for function calls) and emit `response.completed` exactly once. The `response.id` value SHALL be the upstream `chunk.id` prefixed by `resp_`. The `created_at` field SHALL be a Unix timestamp captured at stream start.
+
+#### Scenario: sequence_number is strictly increasing
+
+- **WHEN** any sequence of events is emitted for a stream
+- **THEN** every event's `sequence_number` SHALL equal the previous event's value plus 1, starting at 1
+
+#### Scenario: response.created precedes response.in_progress precedes any delta
+
+- **WHEN** the first usable chunk produces a text delta
+- **THEN** the emitted events SHALL be, in order: `response.created`, `response.in_progress`, `response.output_item.added`, `response.content_part.added`, `response.output_text.delta`
+
+#### Scenario: response.completed fires once
+
+- **WHEN** any stream ends successfully
+- **THEN** exactly ONE `response.completed` event SHALL be emitted
+
+#### Scenario: response id derived from chunk id
+
+- **WHEN** the first chunk has `id: "abc12345"`
+- **THEN** the emitted `response.id` SHALL be `"resp_abc12345"`
+
+#### Scenario: Reasoning open/close events
+
+- **WHEN** the upstream emits two `delta.reasoning_content` fragments then finishes
+- **THEN** the emitted events SHALL include `response.output_item.added` (type reasoning), `response.reasoning_summary_part.added`, two `response.reasoning_summary_text.delta`, `response.reasoning_summary_text.done` (with full buffered text), `response.reasoning_summary_part.done`, `response.output_item.done`
+
+### Requirement: Response stream — `<think>` inline marker recognition
+
+When a chunk's `delta.content` contains the literal substring `<think>`, the translator SHALL split the chunk at that point, emit any text before `<think>` as normal text, open a reasoning output_item, and route the text AFTER `<think>` into the reasoning channel. When a subsequent chunk's content contains `</think>`, the translator SHALL split at that point, emit the part before `</think>` as reasoning, close the reasoning item, then emit the part after `</think>` as normal text.
+
+#### Scenario: Open marker mid-stream
+
+- **WHEN** a chunk has `delta.content: "intro<think>step"`
+- **THEN** the translator SHALL emit a text delta for `"intro"`, open a reasoning item, and emit a reasoning delta for `"step"`
+
+#### Scenario: Close marker mid-stream
+
+- **WHEN** while a reasoning item is open via inline marker a chunk has `delta.content: "more</think>answer"`
+- **THEN** the translator SHALL emit a reasoning delta for `"more"`, close the reasoning item, and emit a text delta for `"answer"`
+
+#### Scenario: Open without close at EOS
+
+- **WHEN** the stream ends while still inside an inline `<think>` block
+- **THEN** the flush path SHALL close the reasoning item before `response.completed`
+
+### Requirement: Response stream — function_call output items
+
+When the Chat-Completions chunk indicates a tool_call (a `delta.tool_calls[]` entry), the translator SHALL emit Responses-API events as follows. For the first chunk that carries a `tool_calls[].id`, it SHALL close any currently-open `message` output_item via `closeMessage` (emitting `response.output_text.done`, `response.content_part.done`, `response.output_item.done`) and emit `response.output_item.added` of type `function_call` with `arguments: ""`, `call_id: <id>`, `name: <function.name or "">`. For each subsequent chunk carrying `function.arguments` it SHALL emit `response.function_call_arguments.delta`. On finish or end-of-stream it SHALL emit `response.function_call_arguments.done` (with the buffered arguments string, or `"{}"` if empty) followed by `response.output_item.done` of type `function_call`.
+
+#### Scenario: function_call.added precedes any arguments delta
+
+- **WHEN** the first tool_call chunk has `delta.tool_calls: [{ index: 0, id: "c1", function: { name: "search", arguments: "{" } }]`
+- **THEN** the emitted events SHALL be `response.output_item.added` (type function_call, name "search", arguments "") then `response.function_call_arguments.delta` (delta "{")
+
+#### Scenario: function_call done emits buffered arguments
+
+- **WHEN** chunk 1 emits arguments `"{\"q\":"` and chunk 2 emits arguments `"\"x\"}"` and then finish is signalled
+- **THEN** `response.function_call_arguments.done` SHALL carry `arguments: "{\"q\":\"x\"}"`
+
+#### Scenario: Empty arguments default to "{}"
+
+- **WHEN** a tool_call is opened and closed without any `function.arguments` fragments
+- **THEN** the emitted `response.function_call_arguments.done` SHALL carry `arguments: "{}"`
+
+### Requirement: Response stream — error event mapping
+
+When the upstream emits an `error` event or a `response.failed` event, the translator (Responses-API → Chat-Completions hop) SHALL emit a single OpenAI-shaped error chunk: a `chat.completion.chunk` with `choices[0].delta.content` set to `[Error] <error.message or stringified error>` and `choices[0].finish_reason: "stop"`. The translator SHALL emit AT MOST ONE such chunk per stream — back-to-back `error` and `response.failed` events SHALL be deduplicated.
+
+#### Scenario: error event surfaces as content chunk
+
+- **WHEN** an `error` event arrives with `data.error: { message: "model_not_found" }`
+- **THEN** the next emitted chunk SHALL be `{ choices: [{ index: 0, delta: { content: "[Error] model_not_found" }, finish_reason: "stop" }], ... }`
+
+#### Scenario: response.failed after error is suppressed
+
+- **WHEN** an `error` event is followed by a `response.failed` event in the same stream
+- **THEN** only ONE error chunk SHALL be emitted
+
+### Requirement: Response stream — flush on null chunk
+
+When the streaming translator receives a `null` chunk (end-of-stream sentinel), it SHALL close every still-open output_item, emit `response.completed` if not already emitted, and emit a final Chat-Completions chunk with empty delta and a computed `finish_reason` (`tool_calls` if any tool_call was emitted, else `stop`). The flush path SHALL be idempotent: a second null chunk produces no events.
+
+#### Scenario: Null flush closes open message
+
+- **WHEN** the translator has an open message output_item and receives `null`
+- **THEN** it SHALL emit `response.output_text.done`, `response.content_part.done`, `response.output_item.done`, `response.completed`
+
+#### Scenario: Null flush finish_reason is tool_calls when a tool was emitted
+
+- **WHEN** the stream emitted a tool_call and then null
+- **THEN** the final Chat-Completions chunk's `finish_reason` SHALL be `"tool_calls"`
+
+#### Scenario: Idempotent null flush
+
+- **WHEN** the translator has already emitted `response.completed` and a second null arrives
+- **THEN** no further events SHALL be emitted
+
+### Requirement: Response stream — usage propagation on completed event
+
+When the streaming translator (Responses-API → Chat-Completions hop) encounters a `response.completed` event whose `response.usage` is present, it SHALL set the accumulated usage to `{ prompt_tokens: input_tokens (or prompt_tokens), completion_tokens: output_tokens (or completion_tokens), total_tokens: prompt_tokens + completion_tokens }`. If `input_tokens_details.cached_tokens` (or `cache_read_input_tokens`) is > 0, it SHALL add `prompt_tokens_details: { cached_tokens: <value> }`. The usage SHALL be attached to the final Chat-Completions chunk's `usage` field.
+
+#### Scenario: usage propagated
+
+- **WHEN** a `response.completed` event has `response.usage: { input_tokens: 100, output_tokens: 50, input_tokens_details: { cached_tokens: 30 } }`
+- **THEN** the final Chat-Completions chunk's `usage` SHALL be `{ prompt_tokens: 100, completion_tokens: 50, total_tokens: 150, prompt_tokens_details: { cached_tokens: 30 } }`
+
+#### Scenario: Legacy field names accepted
+
+- **WHEN** the upstream uses `prompt_tokens`/`completion_tokens`/`cache_read_input_tokens` instead of the Responses field names
+- **THEN** the translator SHALL accept those values as equivalent
+
+### Requirement: Response stream — custom_tool_call variant
+
+The streaming translator SHALL treat `response.output_item.added` events whose `item.type` is `"custom_tool_call"` identically to `"function_call"` events. The translator SHALL treat `response.custom_tool_call_input.delta` events identically to `response.function_call_arguments.delta`. The translator SHALL treat `response.output_item.done` for `custom_tool_call` items as a tool-call increment trigger identical to `function_call`.
+
+#### Scenario: custom_tool_call opens like function_call
+
+- **WHEN** a `response.output_item.added` event has `item: { type: "custom_tool_call", call_id: "c1", name: "x" }`
+- **THEN** the emitted Chat-Completions chunk SHALL contain `delta.tool_calls[0] = { index: 0, id: "c1", type: "function", function: { name: "x", arguments: "" } }`
+
+#### Scenario: custom_tool_call_input.delta forwarded
+
+- **WHEN** a `response.custom_tool_call_input.delta` event has `delta: "{}"`
+- **THEN** the emitted Chat-Completions chunk SHALL contain `delta.tool_calls[0].function.arguments: "{}"`
+
+### Requirement: Backward compatibility — no behavior change for non-Anthropic upstreams
+
+The translation pipeline SHALL only execute when the source format and target format differ. A `/v1/responses` request routed to an OpenAI-compatible upstream SHALL behave exactly as today. A `/v1/messages` request routed to an Anthropic upstream SHALL behave exactly as today. A `/v1/chat/completions` request SHALL behave exactly as today unless its body contains an `input` array.
+
+#### Scenario: Responses to OpenAI passthrough
+
+- **WHEN** a `/v1/responses` request is routed to an OpenAI-compatible channel
+- **THEN** the request body and response stream SHALL pass through with no transformation (same-format pivot)
+
+#### Scenario: /v1/messages unchanged
+
+- **WHEN** a `/v1/messages` request is routed to an Anthropic channel
+- **THEN** no translation step SHALL be invoked
+
+### Requirement: No leakage of internal state into upstream body
+
+The gateway SHALL strip any internal scratch fields it may have attached to the body (for example fields used by the translation layer to carry per-request scratch state) before sending the body to the upstream. By convention every such scratch field's name starts with an underscore so the strip rule can match by prefix.
+
+#### Scenario: Internal underscore-prefixed fields stripped
+
+- **WHEN** the translator attaches an internal underscore-prefixed scratch field to the intermediate body (for example to track per-stream state)
+- **THEN** the JSON body delivered to the upstream SHALL NOT contain any top-level field whose name begins with `_`
+
diff --git a/relay/channel/claude/relay-claude.go b/relay/channel/claude/relay-claude.go
index 046ccfe681a..e2a58666622 100644
--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -1,10 +1,14 @@
 package claude
 
 import (
+	"bytes"
+	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io"
+	"mime"
 	"net/http"
+	"path/filepath"
 	"strings"
 
 	"github.com/QuantumNous/new-api/common"
@@ -25,6 +29,62 @@ import (
 	"github.com/tidwall/sjson"
 )
 
+// fileCategory classifies a file by extension/mime for Claude formatting.
+type fileCategory int
+
+const (
+	fileCategoryUnsupported fileCategory = iota
+	fileCategoryPDF
+	fileCategoryText
+	fileCategoryImage
+)
+
+// classifyFile inspects a MessageFile and returns the (category, mimeType) tuple
+// used to build a Claude content block. The mimeType is best-effort and may be
+// empty when the file is unsupported.
+func classifyFile(file *dto.MessageFile) (fileCategory, string) {
+	if file == nil {
+		return fileCategoryUnsupported, ""
+	}
+
+	ext := strings.ToLower(filepath.Ext(file.FileName))
+	mimeType := strings.ToLower(mime.TypeByExtension(ext))
+	if idx := strings.Index(mimeType, ";"); idx >= 0 {
+		mimeType = strings.TrimSpace(mimeType[:idx])
+	}
+
+	switch ext {
+	case ".pdf":
+		return fileCategoryPDF, "application/pdf"
+	case ".txt", ".md", ".csv", ".log":
+		return fileCategoryText, "text/plain"
+	case ".json":
+		return fileCategoryText, "application/json"
+	case ".png":
+		return fileCategoryImage, "image/png"
+	case ".jpg", ".jpeg":
+		return fileCategoryImage, "image/jpeg"
+	case ".gif":
+		return fileCategoryImage, "image/gif"
+	case ".webp":
+		return fileCategoryImage, "image/webp"
+	}
+
+	// Fallback to MIME detection (only when extension lookup was ambiguous).
+	switch {
+	case mimeType == "application/pdf":
+		return fileCategoryPDF, "application/pdf"
+	case strings.HasPrefix(mimeType, "text/"):
+		return fileCategoryText, mimeType
+	case mimeType == "application/json":
+		return fileCategoryText, "application/json"
+	case strings.HasPrefix(mimeType, "image/"):
+		return fileCategoryImage, mimeType
+	}
+
+	return fileCategoryUnsupported, ""
+}
+
 const (
 	WebSearchMaxUsesLow    = 1
 	WebSearchMaxUsesMedium = 5
@@ -44,6 +104,210 @@ func maybeMarkClaudeRefusal(c *gin.Context, stopReason string) {
 	}
 }
 
+// claudeToolCacheControlMarker is the cache_control marker applied to the
+// final tool block per Anthropic's prompt-caching guidance. Tools use the 1h
+// TTL because tool schemas are typically long-lived across calls (spec §15).
+var claudeToolCacheControlMarker = json.RawMessage(`{"type":"ephemeral","ttl":"1h"}`)
+
+// claudeAssistantCacheControlMarker is the cache_control marker applied to
+// the last eligible content block of the last assistant message. Per
+// responses-to-anthropic-translation spec §581-583, this marker MUST NOT
+// carry a TTL field — emit only {type:"ephemeral"}.
+var claudeAssistantCacheControlMarker = json.RawMessage(`{"type":"ephemeral"}`)
+
+// applyCacheControlToLastTool sets the ephemeral 1h cache_control marker on
+// the trailing element of the tools array, if any. We mutate in place because
+// the slice elements are stored as pointers.
+func applyCacheControlToLastTool(tools []any) {
+	if len(tools) == 0 {
+		return
+	}
+	last := tools[len(tools)-1]
+	switch t := last.(type) {
+	case *dto.Tool:
+		t.CacheControl = &dto.ClaudeCacheControl{Type: "ephemeral", TTL: "1h"}
+	case *dto.ClaudeWebSearchTool:
+		// ClaudeWebSearchTool has no CacheControl field defined yet; do
+		// nothing rather than fabricate an unsupported shape.
+	}
+}
+
+// applyCacheControlToLastAssistantContent walks the messages in reverse and,
+// for the final assistant message, attaches the ephemeral cache_control
+// marker to the last eligible content block. Eligible block types are
+// {text, tool_use, tool_result, image}; thinking blocks are NOT eligible
+// because Anthropic does not honour cache_control on them (spec §581-598).
+//
+// The emitted marker is {type:"ephemeral"} with NO TTL field per the spec.
+func applyCacheControlToLastAssistantContent(messages []dto.ClaudeMessage) {
+	for i := len(messages) - 1; i >= 0; i-- {
+		if messages[i].Role != "assistant" {
+			continue
+		}
+		blocks, ok := messages[i].Content.([]dto.ClaudeMediaMessage)
+		if !ok {
+			// RequestOpenAI2ClaudeMessage emits plain-string content for the
+			// common text-only assistant case; promote it to a single-block
+			// []ClaudeMediaMessage so we can attach cache_control.
+			if text, ok := messages[i].Content.(string); ok && text != "" {
+				blocks = []dto.ClaudeMediaMessage{{
+					Type: "text",
+					Text: common.GetPointer[string](text),
+				}}
+			} else {
+				return
+			}
+		}
+		for j := len(blocks) - 1; j >= 0; j-- {
+			switch blocks[j].Type {
+			case "text", "tool_use", "tool_result", "image":
+				blocks[j].CacheControl = claudeAssistantCacheControlMarker
+				messages[i].Content = blocks
+				return
+			}
+			// thinking / redacted_thinking / anything else: skip past.
+		}
+		return
+	}
+}
+
+// buildResponseFormatSystemShim renders an English instruction that nudges
+// Claude to obey OpenAI's response_format (json_object / json_schema). Returns
+// "" when there is nothing to do.
+//
+// Spec compliance (responses-to-anthropic-translation §19):
+//   - For json_schema with non-nil schema, the appended text MUST include all
+//     three literal phrases: "You must respond with valid JSON",
+//     a pretty-printed JSON rendering of the schema, and
+//     "Respond ONLY with the JSON object".
+//   - For json_object, the appended text MUST include both
+//     "You must respond with valid JSON" and "Respond ONLY with a JSON object".
+func buildResponseFormatSystemShim(format *dto.ResponseFormat) string {
+	if format == nil {
+		return ""
+	}
+	switch format.Type {
+	case "json_schema":
+		raw := bytes.TrimSpace(format.JsonSchema)
+		if len(raw) == 0 {
+			// Empty / nil schema: emit only when we have a schema rendering to
+			// include, per the spec's "with a non-null json_schema.schema"
+			// precondition. Fall through to no-op.
+			return ""
+		}
+		// Pretty-print the schema using common.Marshal for the raw bytes (Rule 1),
+		// then json.Indent (no wrapper exists for Indent).
+		marshaled, err := common.Marshal(json.RawMessage(raw))
+		if err != nil {
+			// Fall back to the original raw bytes if marshalling fails.
+			marshaled = raw
+		}
+		var buf bytes.Buffer
+		if err := json.Indent(&buf, marshaled, "", "  "); err != nil {
+			// Fall back to the raw form when indentation fails.
+			buf.Reset()
+			buf.Write(marshaled)
+		}
+		return "You must respond with valid JSON matching this schema:\n" +
+			buf.String() +
+			"\nRespond ONLY with the JSON object. Do not include any explanatory text outside the JSON."
+	case "json_object":
+		return "You must respond with valid JSON. Respond ONLY with a JSON object. " +
+			"Do not include any explanatory text, markdown, or commentary outside the JSON."
+	}
+	return ""
+}
+
+// injectMissingToolResults walks the messages array and ensures that every
+// tool_use block in an assistant message is matched by a tool_result block in
+// the immediately-following user message. Missing tool_use IDs receive an
+// empty placeholder tool_result so the upstream Anthropic API does not reject
+// the request.
+func injectMissingToolResults(messages []dto.ClaudeMessage) []dto.ClaudeMessage {
+	if len(messages) == 0 {
+		return messages
+	}
+
+	out := make([]dto.ClaudeMessage, 0, len(messages))
+	for i := 0; i < len(messages); i++ {
+		msg := messages[i]
+		out = append(out, msg)
+
+		if msg.Role != "assistant" {
+			continue
+		}
+		assistantBlocks, ok := msg.Content.([]dto.ClaudeMediaMessage)
+		if !ok {
+			continue
+		}
+
+		// Collect every tool_use ID present on this assistant message.
+		toolUseIds := make([]string, 0)
+		for _, b := range assistantBlocks {
+			if b.Type == "tool_use" && b.Id != "" {
+				toolUseIds = append(toolUseIds, b.Id)
+			}
+		}
+		if len(toolUseIds) == 0 {
+			continue
+		}
+
+		// Look at the next message (if any) and inventory which tool_result
+		// IDs are already present.
+		matched := make(map[string]bool, len(toolUseIds))
+		nextIsAdjacentUser := false
+		if i+1 < len(messages) && messages[i+1].Role == "user" {
+			nextIsAdjacentUser = true
+			if userBlocks, ok := messages[i+1].Content.([]dto.ClaudeMediaMessage); ok {
+				for _, b := range userBlocks {
+					if b.Type == "tool_result" && b.ToolUseId != "" {
+						matched[b.ToolUseId] = true
+					}
+				}
+			}
+		}
+
+		missing := make([]dto.ClaudeMediaMessage, 0)
+		for _, id := range toolUseIds {
+			if matched[id] {
+				continue
+			}
+			missing = append(missing, dto.ClaudeMediaMessage{
+				Type:      "tool_result",
+				ToolUseId: id,
+				Content:   "",
+			})
+		}
+		if len(missing) == 0 {
+			continue
+		}
+
+		if nextIsAdjacentUser {
+			// Append synthesised tool_result blocks to the existing user
+			// message in-place.
+			userBlocks, ok := messages[i+1].Content.([]dto.ClaudeMediaMessage)
+			if !ok {
+				// Promote a string content to a single text block.
+				if s, ok := messages[i+1].Content.(string); ok {
+					userBlocks = []dto.ClaudeMediaMessage{{Type: "text", Text: common.GetPointer[string](s)}}
+				} else {
+					userBlocks = nil
+				}
+			}
+			userBlocks = append(userBlocks, missing...)
+			messages[i+1].Content = userBlocks
+		} else {
+			// Insert a fresh user message immediately after this assistant
+			// to host the synthesised tool_result blocks.
+			out = append(out, dto.ClaudeMessage{
+				Role:    "user",
+				Content: missing,
+			})
+		}
+	}
+	return out
+}
+
 func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRequest) (*dto.ClaudeRequest, error) {
 	claudeTools := make([]any, 0, len(textRequest.Tools))
 
@@ -121,6 +385,11 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
 		claudeTools = append(claudeTools, &webSearchTool)
 	}
 
+	// GAP-B: apply prompt-cache marker to the LAST tool to preserve the
+	// (typically long-lived) tool schema across calls. Anthropic only
+	// honours cache_control on the final tool block.
+	applyCacheControlToLastTool(claudeTools)
+
 	claudeRequest := dto.ClaudeRequest{
 		Model:         textRequest.Model,
 		StopSequences: nil,
@@ -376,6 +645,45 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
 								Text: common.GetPointer[string](mediaMessage.Text),
 							})
 						}
+					case dto.ContentTypeFile:
+						file := mediaMessage.GetFile()
+						if file == nil || file.FileData == "" {
+							continue
+						}
+						category, mimeType := classifyFile(file)
+						switch category {
+						case fileCategoryPDF:
+							claudeMediaMessages = append(claudeMediaMessages, dto.ClaudeMediaMessage{
+								Type: "document",
+								Source: &dto.ClaudeMessageSource{
+									Type:      "base64",
+									MediaType: mimeType,
+									Data:      file.FileData,
+								},
+							})
+						case fileCategoryText:
+							decoded, err := base64.StdEncoding.DecodeString(file.FileData)
+							if err != nil {
+								continue
+							}
+							text := string(decoded)
+							claudeMediaMessages = append(claudeMediaMessages, dto.ClaudeMediaMessage{
+								Type: "text",
+								Text: common.GetPointer[string](text),
+							})
+						case fileCategoryImage:
+							claudeMediaMessages = append(claudeMediaMessages, dto.ClaudeMediaMessage{
+								Type: "image",
+								Source: &dto.ClaudeMessageSource{
+									Type:      "base64",
+									MediaType: mimeType,
+									Data:      file.FileData,
+								},
+							})
+						default:
+							// Unsupported file type — skip without inserting a placeholder.
+							continue
+						}
 					default:
 						source := mediaMessage.ToFileSource()
 						if source == nil {
@@ -429,6 +737,25 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
 		claudeRequest.System = systemMessages
 	}
 
+	// GAP-A: response_format JSON-mode shim — Claude has no native equivalent,
+	// so we append an English instruction system block guiding the model to
+	// emit valid JSON (and optionally match a schema).
+	if shim := buildResponseFormatSystemShim(textRequest.ResponseFormat); shim != "" {
+		systemMessages = append(systemMessages, dto.ClaudeMediaMessage{
+			Type: "text",
+			Text: common.GetPointer[string](shim),
+		})
+		claudeRequest.System = systemMessages
+	}
+
+	// GAP-C: tag the LAST text/tool_use block of the LAST assistant message
+	// with the prompt-cache marker. Trailing thinking blocks are skipped.
+	applyCacheControlToLastAssistantContent(claudeMessages)
+
+	// GAP-D: inject empty tool_result blocks for any tool_use IDs that lack
+	// a matching tool_result in the subsequent user message.
+	claudeMessages = injectMissingToolResults(claudeMessages)
+
 	claudeRequest.Prompt = ""
 	claudeRequest.Messages = claudeMessages
 	return &claudeRequest, nil
diff --git a/relay/channel/claude/relay_claude_test.go b/relay/channel/claude/relay_claude_test.go
index fdc7b38e5ec..89bdcc530ea 100644
--- a/relay/channel/claude/relay_claude_test.go
+++ b/relay/channel/claude/relay_claude_test.go
@@ -2,6 +2,7 @@ package claude
 
 import (
 	"encoding/base64"
+	"encoding/json"
 	"strings"
 	"testing"
 
@@ -380,3 +381,343 @@ func TestRequestOpenAI2ClaudeMessage_ConvertsTextFileContentToText(t *testing.T)
 	require.NotNil(t, content[0].Text)
 	require.Equal(t, "alpha\nbeta", *content[0].Text)
 }
+
+// -----------------------------------------------------------------------------
+// GAP-A: response_format JSON-mode shim
+// -----------------------------------------------------------------------------
+
+func systemTexts(t *testing.T, system any) []string {
+	t.Helper()
+	msgs, ok := system.([]dto.ClaudeMediaMessage)
+	require.True(t, ok, "expected []ClaudeMediaMessage system, got %T", system)
+	out := make([]string, 0, len(msgs))
+	for _, m := range msgs {
+		require.Equal(t, "text", m.Type)
+		require.NotNil(t, m.Text)
+		out = append(out, *m.Text)
+	}
+	return out
+}
+
+func TestRequestOpenAI2ClaudeMessage_ResponseFormat_JsonObject_AppendsSystemShim(t *testing.T) {
+	request := dto.GeneralOpenAIRequest{
+		Model: "claude-3-5-sonnet",
+		Messages: []dto.Message{
+			{Role: "system", Content: "You are helpful."},
+			{Role: "user", Content: "ping"},
+		},
+		ResponseFormat: &dto.ResponseFormat{Type: "json_object"},
+	}
+
+	claudeRequest, err := RequestOpenAI2ClaudeMessage(nil, request)
+	require.NoError(t, err)
+	texts := systemTexts(t, claudeRequest.System)
+	require.Len(t, texts, 2)
+	require.Equal(t, "You are helpful.", texts[0])
+	// Spec §19 / GAP-A: json_object must contain BOTH literal phrases
+	// (exact case, including the article "a" in "a JSON object").
+	require.Contains(t, texts[1], "You must respond with valid JSON")
+	require.Contains(t, texts[1], "Respond ONLY with a JSON object")
+}
+
+func TestRequestOpenAI2ClaudeMessage_ResponseFormat_JsonSchema_AppendsSystemShim(t *testing.T) {
+	schema := json.RawMessage(`{"name":"weather","schema":{"type":"object","properties":{"answer":{"type":"number"}}}}`)
+	request := dto.GeneralOpenAIRequest{
+		Model: "claude-3-5-sonnet",
+		Messages: []dto.Message{
+			{Role: "user", Content: "ping"},
+		},
+		ResponseFormat: &dto.ResponseFormat{Type: "json_schema", JsonSchema: schema},
+	}
+
+	claudeRequest, err := RequestOpenAI2ClaudeMessage(nil, request)
+	require.NoError(t, err)
+	texts := systemTexts(t, claudeRequest.System)
+	require.Len(t, texts, 1)
+	// Spec §19 / GAP-A: json_schema must contain ALL THREE literal phrases.
+	require.Contains(t, texts[0], "You must respond with valid JSON")
+	require.Contains(t, texts[0], "Respond ONLY with the JSON object")
+	// The pretty-printed schema must include the inner property key.
+	require.Contains(t, texts[0], "answer")
+}
+
+func TestRequestOpenAI2ClaudeMessage_ResponseFormat_Nil_NoSystemShim(t *testing.T) {
+	request := dto.GeneralOpenAIRequest{
+		Model: "claude-3-5-sonnet",
+		Messages: []dto.Message{
+			{Role: "system", Content: "You are helpful."},
+			{Role: "user", Content: "ping"},
+		},
+	}
+
+	claudeRequest, err := RequestOpenAI2ClaudeMessage(nil, request)
+	require.NoError(t, err)
+	texts := systemTexts(t, claudeRequest.System)
+	require.Len(t, texts, 1)
+	require.Equal(t, "You are helpful.", texts[0])
+}
+
+// -----------------------------------------------------------------------------
+// GAP-B: cache_control marker on the last tool
+// -----------------------------------------------------------------------------
+
+func TestRequestOpenAI2ClaudeMessage_CacheControl_OnLastTool(t *testing.T) {
+	request := dto.GeneralOpenAIRequest{
+		Model: "claude-3-5-sonnet",
+		Messages: []dto.Message{
+			{Role: "user", Content: "ping"},
+		},
+		Tools: []dto.ToolCallRequest{
+			{
+				Type: "function",
+				Function: dto.FunctionRequest{
+					Name:        "first",
+					Description: "first tool",
+					Parameters:  map[string]any{"type": "object"},
+				},
+			},
+			{
+				Type: "function",
+				Function: dto.FunctionRequest{
+					Name:        "second",
+					Description: "second tool",
+					Parameters:  map[string]any{"type": "object"},
+				},
+			},
+		},
+	}
+
+	claudeRequest, err := RequestOpenAI2ClaudeMessage(nil, request)
+	require.NoError(t, err)
+
+	tools, ok := claudeRequest.Tools.([]any)
+	require.True(t, ok)
+	require.Len(t, tools, 2)
+
+	first, ok := tools[0].(*dto.Tool)
+	require.True(t, ok)
+	require.Nil(t, first.CacheControl, "first tool must NOT carry cache_control")
+
+	last, ok := tools[1].(*dto.Tool)
+	require.True(t, ok)
+	require.NotNil(t, last.CacheControl, "last tool MUST carry cache_control")
+	require.Equal(t, "ephemeral", last.CacheControl.Type)
+	require.Equal(t, "1h", last.CacheControl.TTL)
+}
+
+func TestRequestOpenAI2ClaudeMessage_CacheControl_NoToolsNoChange(t *testing.T) {
+	request := dto.GeneralOpenAIRequest{
+		Model: "claude-3-5-sonnet",
+		Messages: []dto.Message{
+			{Role: "user", Content: "ping"},
+		},
+	}
+
+	claudeRequest, err := RequestOpenAI2ClaudeMessage(nil, request)
+	require.NoError(t, err)
+	tools, ok := claudeRequest.Tools.([]any)
+	require.True(t, ok)
+	require.Len(t, tools, 0)
+}
+
+// -----------------------------------------------------------------------------
+// GAP-C: cache_control on the last assistant message's last eligible block.
+// Spec §22 (lines 581-583): eligible block types are {text, tool_use,
+// tool_result, image}; thinking is NOT eligible. The marker emitted on the
+// assistant side MUST NOT carry a TTL field — emit only {type:"ephemeral"}.
+// -----------------------------------------------------------------------------
+
+// cacheControlHasNoTTL asserts the cache_control marker is exactly the
+// no-TTL ephemeral shape (`{"type":"ephemeral"}`). Spec §22 forbids a TTL
+// field on the assistant-side marker.
+func cacheControlHasNoTTL(t *testing.T, raw json.RawMessage) {
+	t.Helper()
+	require.NotNil(t, raw)
+	var parsed map[string]any
+	require.NoError(t, json.Unmarshal(raw, &parsed))
+	require.Equal(t, "ephemeral", parsed["type"], "marker must be ephemeral")
+	_, hasTTL := parsed["ttl"]
+	require.False(t, hasTTL, "assistant-side cache_control MUST NOT include a ttl field; got %s", string(raw))
+}
+
+func TestRequestOpenAI2ClaudeMessage_CacheControl_OnLastAssistantTextBlock(t *testing.T) {
+	request := dto.GeneralOpenAIRequest{
+		Model: "claude-3-5-sonnet",
+		Messages: []dto.Message{
+			{Role: "user", Content: "hi"},
+			{
+				Role: "assistant",
+				Content: []any{
+					dto.MediaContent{Type: dto.ContentTypeText, Text: "first"},
+					dto.MediaContent{Type: dto.ContentTypeText, Text: "second"},
+				},
+			},
+			{Role: "user", Content: "more"},
+			{
+				Role: "assistant",
+				Content: []any{
+					dto.MediaContent{Type: dto.ContentTypeText, Text: "final-one"},
+					dto.MediaContent{Type: dto.ContentTypeText, Text: "final-two"},
+				},
+			},
+		},
+	}
+
+	claudeRequest, err := RequestOpenAI2ClaudeMessage(nil, request)
+	require.NoError(t, err)
+
+	// The last message should be the second assistant message.
+	require.GreaterOrEqual(t, len(claudeRequest.Messages), 1)
+	lastIdx := len(claudeRequest.Messages) - 1
+	require.Equal(t, "assistant", claudeRequest.Messages[lastIdx].Role)
+	blocks, ok := claudeRequest.Messages[lastIdx].Content.([]dto.ClaudeMediaMessage)
+	require.True(t, ok)
+	require.GreaterOrEqual(t, len(blocks), 1)
+
+	last := blocks[len(blocks)-1]
+	require.NotNil(t, last.CacheControl, "last assistant content block MUST carry cache_control")
+	// Spec §22: the assistant-side marker must NOT carry a TTL field.
+	cacheControlHasNoTTL(t, last.CacheControl)
+	// All earlier blocks of the same assistant must NOT carry the marker.
+	for i := 0; i < len(blocks)-1; i++ {
+		require.Nil(t, blocks[i].CacheControl, "earlier block %d carries unexpected cache_control", i)
+	}
+}
+
+// TestApplyCacheControlToLastAssistantContent_BroadenedEligibility drives the
+// helper directly and asserts the broadened eligibility set: the marker MUST
+// land on text, tool_use, tool_result, or image blocks (whichever is the last
+// non-thinking block of the last assistant message).
+func TestApplyCacheControlToLastAssistantContent_BroadenedEligibility(t *testing.T) {
+	cases := []struct {
+		name      string
+		blockType string
+		extra     func(b *dto.ClaudeMediaMessage)
+	}{
+		{name: "text", blockType: "text", extra: func(b *dto.ClaudeMediaMessage) { b.Text = stringPtr("ok") }},
+		{name: "tool_use", blockType: "tool_use", extra: func(b *dto.ClaudeMediaMessage) { b.Id = "tu_1"; b.Name = "fn" }},
+		{name: "tool_result", blockType: "tool_result", extra: func(b *dto.ClaudeMediaMessage) { b.ToolUseId = "tu_1"; b.Content = "out" }},
+		{name: "image", blockType: "image", extra: func(b *dto.ClaudeMediaMessage) {
+			b.Source = &dto.ClaudeMessageSource{Type: "base64", MediaType: "image/png", Data: "AAA"}
+		}},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			eligible := dto.ClaudeMediaMessage{Type: tc.blockType}
+			tc.extra(&eligible)
+			messages := []dto.ClaudeMessage{
+				{Role: "user", Content: "hi"},
+				{Role: "assistant", Content: []dto.ClaudeMediaMessage{
+					// A trailing thinking block must NOT receive the marker;
+					// the helper should skip past it to find the eligible
+					// block before it.
+					eligible,
+					{Type: "thinking", Thinking: stringPtr("T")},
+				}},
+			}
+			applyCacheControlToLastAssistantContent(messages)
+			blocks, ok := messages[1].Content.([]dto.ClaudeMediaMessage)
+			require.True(t, ok)
+			require.Len(t, blocks, 2)
+
+			// Eligible block (index 0) got the marker.
+			require.NotNil(t, blocks[0].CacheControl, "eligible %s block must receive cache_control", tc.blockType)
+			cacheControlHasNoTTL(t, blocks[0].CacheControl)
+
+			// Trailing thinking block (index 1) must NOT receive the marker.
+			require.Nil(t, blocks[1].CacheControl, "thinking block must not receive cache_control")
+		})
+	}
+}
+
+// TestApplyCacheControlToLastAssistantContent_ThinkingOnlySkipped confirms
+// that an assistant message whose only blocks are non-eligible (e.g. only
+// thinking) receives no marker at all.
+func TestApplyCacheControlToLastAssistantContent_ThinkingOnlySkipped(t *testing.T) {
+	messages := []dto.ClaudeMessage{
+		{Role: "user", Content: "hi"},
+		{Role: "assistant", Content: []dto.ClaudeMediaMessage{
+			{Type: "thinking", Thinking: stringPtr("T1")},
+			{Type: "thinking", Thinking: stringPtr("T2")},
+		}},
+	}
+	applyCacheControlToLastAssistantContent(messages)
+	blocks, ok := messages[1].Content.([]dto.ClaudeMediaMessage)
+	require.True(t, ok)
+	for i, b := range blocks {
+		require.Nil(t, b.CacheControl, "thinking-only assistant block %d must not receive marker", i)
+	}
+}
+
+// -----------------------------------------------------------------------------
+// GAP-D: missing tool_result auto-injection
+// -----------------------------------------------------------------------------
+
+func TestInjectMissingToolResults_AddsEmptyResultWhenNoNextUser(t *testing.T) {
+	use := dto.ClaudeMediaMessage{Type: "tool_use", Id: "tu_abc"}
+	messages := []dto.ClaudeMessage{
+		{Role: "user", Content: "hi"},
+		{Role: "assistant", Content: []dto.ClaudeMediaMessage{use}},
+	}
+
+	out := injectMissingToolResults(messages)
+	require.Len(t, out, 3)
+	require.Equal(t, "user", out[2].Role)
+	blocks, ok := out[2].Content.([]dto.ClaudeMediaMessage)
+	require.True(t, ok)
+	require.Len(t, blocks, 1)
+	require.Equal(t, "tool_result", blocks[0].Type)
+	require.Equal(t, "tu_abc", blocks[0].ToolUseId)
+	require.Equal(t, "", blocks[0].Content)
+}
+
+func TestInjectMissingToolResults_AppendsToExistingNextUser(t *testing.T) {
+	use1 := dto.ClaudeMediaMessage{Type: "tool_use", Id: "tu_1"}
+	use2 := dto.ClaudeMediaMessage{Type: "tool_use", Id: "tu_2"}
+	existing := dto.ClaudeMediaMessage{Type: "tool_result", ToolUseId: "tu_1", Content: "done"}
+	messages := []dto.ClaudeMessage{
+		{Role: "assistant", Content: []dto.ClaudeMediaMessage{use1, use2}},
+		{Role: "user", Content: []dto.ClaudeMediaMessage{existing}},
+	}
+
+	out := injectMissingToolResults(messages)
+	require.Len(t, out, 2)
+	require.Equal(t, "user", out[1].Role)
+	blocks, ok := out[1].Content.([]dto.ClaudeMediaMessage)
+	require.True(t, ok)
+	require.Len(t, blocks, 2)
+	require.Equal(t, "tu_1", blocks[0].ToolUseId)
+	require.Equal(t, "done", blocks[0].Content)
+	require.Equal(t, "tu_2", blocks[1].ToolUseId)
+	require.Equal(t, "", blocks[1].Content)
+}
+
+func TestInjectMissingToolResults_DoesNotDuplicateExistingResults(t *testing.T) {
+	use := dto.ClaudeMediaMessage{Type: "tool_use", Id: "tu_x"}
+	existing := dto.ClaudeMediaMessage{Type: "tool_result", ToolUseId: "tu_x", Content: "result"}
+	messages := []dto.ClaudeMessage{
+		{Role: "assistant", Content: []dto.ClaudeMediaMessage{use}},
+		{Role: "user", Content: []dto.ClaudeMediaMessage{existing}},
+	}
+
+	out := injectMissingToolResults(messages)
+	require.Len(t, out, 2)
+	blocks, ok := out[1].Content.([]dto.ClaudeMediaMessage)
+	require.True(t, ok)
+	require.Len(t, blocks, 1, "must not duplicate existing matched tool_result")
+}
+
+func TestInjectMissingToolResults_NoToolUseLeavesMessagesUntouched(t *testing.T) {
+	messages := []dto.ClaudeMessage{
+		{Role: "user", Content: "hi"},
+		{Role: "assistant", Content: []dto.ClaudeMediaMessage{{Type: "text", Text: stringPtr("ok")}}},
+	}
+
+	out := injectMissingToolResults(messages)
+	require.Len(t, out, 2)
+}
+
+func stringPtr(s string) *string {
+	return &s
+}
diff --git a/relay/helper/stream_scanner.go b/relay/helper/stream_scanner.go
index 1d44b80443c..68c00e2a9cc 100644
--- a/relay/helper/stream_scanner.go
+++ b/relay/helper/stream_scanner.go
@@ -40,8 +40,10 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 		return
 	}
 
-	// 无条件新建 StreamStatus
-	info.StreamStatus = relaycommon.NewStreamStatus()
+	// 仅在未初始化时新建 StreamStatus，保留调用方可能预先记录的状态
+	if info.StreamStatus == nil {
+		info.StreamStatus = relaycommon.NewStreamStatus()
+	}
 
 	// 确保响应体总是被关闭
 	defer func() {
diff --git a/relay/responses_handler.go b/relay/responses_handler.go
index 54ca3cbc501..4e9ee75d979 100644
--- a/relay/responses_handler.go
+++ b/relay/responses_handler.go
@@ -71,6 +71,34 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
 		return types.NewError(fmt.Errorf("invalid api type: %d", info.ApiType), types.ErrorCodeInvalidApiType, types.ErrOptionWithSkipRetry())
 	}
 	adaptor.Init(info)
+
+	// Anthropic-typed channels do not natively understand the Responses-API
+	// shape. When the operator hasn't requested a raw pass-through, route the
+	// request through the new Responses → Chat-Completions → Anthropic pivot.
+	// Feature-gated via RESPONSES_TO_ANTHROPIC_ENABLED (default true).
+	passThroughGlobal := model_setting.GetGlobalSettings().PassThroughRequestEnabled
+	if shouldUseResponsesToAnthropicPivot(
+		info.RelayMode,
+		info.ApiType,
+		passThroughGlobal,
+		info.ChannelSetting.PassThroughBodyEnabled,
+		common.GetEnvOrDefaultBool("RESPONSES_TO_ANTHROPIC_ENABLED", true),
+	) {
+		usage, apiErr := responsesViaChatCompletions(c, info, adaptor, request)
+		if apiErr != nil {
+			service.ResetStatusCode(apiErr, c.GetString("status_code_mapping"))
+			return apiErr
+		}
+		if usage != nil {
+			if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") {
+				service.PostAudioConsumeQuota(c, info, usage, "")
+			} else {
+				service.PostTextConsumeQuota(c, info, usage, nil)
+			}
+		}
+		return nil
+	}
+
 	var requestBody io.Reader
 	if model_setting.GetGlobalSettings().PassThroughRequestEnabled || info.ChannelSetting.PassThroughBodyEnabled {
 		storage, err := common.GetBodyStorage(c)
@@ -158,3 +186,23 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
 	}
 	return nil
 }
+
+// shouldUseResponsesToAnthropicPivot encodes the branch condition that gates
+// the Responses → Chat-Completions → Anthropic pivot. It is extracted into a
+// pure function so the predicate can be unit-tested without standing up the
+// full ResponsesHelper pipeline (DB, quota, billing, etc.). A change to this
+// predicate — or a flip of the feature flag's default — must be reflected in
+// TestShouldUseResponsesToAnthropicPivot.
+func shouldUseResponsesToAnthropicPivot(
+	relayMode int,
+	apiType int,
+	passThroughGlobal bool,
+	passThroughBody bool,
+	featureFlagEnabled bool,
+) bool {
+	return relayMode == relayconstant.RelayModeResponses &&
+		apiType == appconstant.APITypeAnthropic &&
+		!passThroughGlobal &&
+		!passThroughBody &&
+		featureFlagEnabled
+}
diff --git a/relay/responses_via_chat_completions.go b/relay/responses_via_chat_completions.go
new file mode 100644
index 00000000000..56b00762087
--- /dev/null
+++ b/relay/responses_via_chat_completions.go
@@ -0,0 +1,320 @@
+package relay
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+
+	"github.com/QuantumNous/new-api/common"
+	appconstant "github.com/QuantumNous/new-api/constant"
+	"github.com/QuantumNous/new-api/dto"
+	"github.com/QuantumNous/new-api/logger"
+	"github.com/QuantumNous/new-api/relay/channel"
+	claudechannel "github.com/QuantumNous/new-api/relay/channel/claude"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/relay/helper"
+	"github.com/QuantumNous/new-api/service"
+	"github.com/QuantumNous/new-api/service/openaicompat"
+	"github.com/QuantumNous/new-api/types"
+
+	"github.com/gin-gonic/gin"
+)
+
+// responsesViaChatCompletions handles a /v1/responses request routed to an
+// Anthropic-typed channel. It performs the two-step pivot:
+//
+//	Responses → ChatCompletions (in service/openaicompat)
+//	ChatCompletions → Anthropic   (via the Claude adaptor / RequestOpenAI2ClaudeMessage)
+//
+// And on the response side:
+//
+//	Anthropic stream chunk → Chat-Completions chunk (StreamResponseClaude2OpenAI)
+//	                       → Responses-API events    (ChatCompletionsStreamToResponsesEvents)
+//
+// or the non-streaming counterpart (ClaudeHandler → ResponseClaude2OpenAI →
+// ChatCompletionsResponseToResponsesResponse).
+//
+// This function mirrors the existing chat_completions_via_responses.go.
+func responsesViaChatCompletions(c *gin.Context, info *relaycommon.RelayInfo, adaptor channel.Adaptor, request *dto.OpenAIResponsesRequest) (*dto.Usage, *types.NewAPIError) {
+	if info.ApiType != appconstant.APITypeAnthropic {
+		return nil, types.NewError(fmt.Errorf("responsesViaChatCompletions called for non-Anthropic api type %d", info.ApiType), types.ErrorCodeInvalidApiType, types.ErrOptionWithSkipRetry())
+	}
+
+	// (a) Responses → ChatCompletions intermediate.
+	chatReq, err := openaicompat.ResponsesRequestToChatCompletionsRequest(request)
+	if err != nil {
+		return nil, types.NewErrorWithStatusCode(err, types.ErrorCodeConvertRequestFailed, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
+	}
+
+	// (b) Sanitize tool-call IDs at the boundary (spec §14).
+	openaicompat.SanitizeToolCallIDs(chatReq)
+
+	// (c) ChatCompletions → Anthropic via the existing adaptor converter.
+	converted, err := adaptor.ConvertOpenAIRequest(c, info, chatReq)
+	if err != nil {
+		return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
+	}
+	relaycommon.AppendRequestConversionFromRequest(info, converted)
+
+	// (d) Marshal -> RemoveDisabledFields -> ApplyParamOverride.
+	jsonData, err := common.Marshal(converted)
+	if err != nil {
+		return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
+	}
+	jsonData, err = relaycommon.RemoveDisabledFields(jsonData, info.ChannelOtherSettings, info.ChannelSetting.PassThroughBodyEnabled)
+	if err != nil {
+		return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
+	}
+	if len(info.ParamOverride) > 0 {
+		jsonData, err = relaycommon.ApplyParamOverrideWithRelayInfo(jsonData, info)
+		if err != nil {
+			return nil, newAPIErrorFromParamOverride(err)
+		}
+	}
+	logger.LogDebug(c, "responses_via_chat_anthropic body: %s", jsonData)
+
+	// (e) DoRequest.
+	var requestBody io.Reader = bytes.NewBuffer(jsonData)
+	resp, err := adaptor.DoRequest(c, info, requestBody)
+	if err != nil {
+		return nil, types.NewOpenAIError(err, types.ErrorCodeDoRequestFailed, http.StatusInternalServerError)
+	}
+	if resp == nil {
+		return nil, types.NewOpenAIError(fmt.Errorf("nil response from upstream"), types.ErrorCodeBadResponse, http.StatusInternalServerError)
+	}
+	httpResp := resp.(*http.Response)
+	info.IsStream = info.IsStream || strings.HasPrefix(httpResp.Header.Get("Content-Type"), "text/event-stream")
+
+	statusCodeMappingStr := c.GetString("status_code_mapping")
+	if httpResp.StatusCode != http.StatusOK {
+		apiErr := service.RelayErrorHandler(c.Request.Context(), httpResp, false)
+		service.ResetStatusCode(apiErr, statusCodeMappingStr)
+		return nil, apiErr
+	}
+
+	// Mark the final relay format so downstream helpers see "openai_responses"
+	// (the client's expected format).
+	info.FinalRequestRelayFormat = types.RelayFormatOpenAIResponses
+
+	if info.IsStream {
+		return runAnthropicToResponsesStream(c, info, httpResp)
+	}
+	return runAnthropicToResponsesNonStream(c, info, httpResp)
+}
+
+// runAnthropicToResponsesStream reads Anthropic SSE chunks, converts each to a
+// Chat-Completions chunk via StreamResponseClaude2OpenAI, then feeds it through
+// ChatCompletionsStreamToResponsesEvents and writes Responses-API SSE events to
+// the client.
+func runAnthropicToResponsesStream(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
+	helper.SetEventStreamHeaders(c)
+
+	claudeInfo := &claudechannel.ClaudeResponseInfo{
+		ResponseId: helper.GetResponseID(c),
+		Created:    common.GetTimestamp(),
+		Model:      info.UpstreamModelName,
+		Usage:      &dto.Usage{},
+	}
+	state := openaicompat.NewResponsesStreamState()
+
+	writeEvents := func(events []openaicompat.ResponsesAPIEvent) error {
+		for _, ev := range events {
+			data, err := common.Marshal(ev)
+			if err != nil {
+				return err
+			}
+			c.Render(-1, common.CustomEvent{Data: fmt.Sprintf("event: %s\n", ev.Type)})
+			c.Render(-1, common.CustomEvent{Data: "data: " + string(data)})
+			_ = helper.FlushWriter(c)
+		}
+		return nil
+	}
+
+	var streamErr *types.NewAPIError
+	helper.StreamScannerHandler(c, resp, info, func(data string, sr *helper.StreamResult) {
+		var claudeResponse dto.ClaudeResponse
+		if e := common.UnmarshalJsonStr(data, &claudeResponse); e != nil {
+			logger.LogError(c, "claude_stream_unmarshal_failed: "+e.Error())
+			streamErr = types.NewError(e, types.ErrorCodeBadResponseBody)
+			sr.Stop(streamErr)
+			return
+		}
+		// Surface upstream Claude errors.
+		if claudeError := claudeResponse.GetClaudeError(); claudeError != nil && claudeError.Type != "" {
+			evs := openaicompat.EmitChatStreamErrorEvent(state, claudeError.Message)
+			_ = writeEvents(evs)
+			streamErr = types.WithClaudeError(*claudeError, http.StatusInternalServerError)
+			sr.Stop(streamErr)
+			return
+		}
+		// Preserve refusal marking (parity with HandleStreamResponseData).
+		markClaudeRefusalFromStreamChunk(c, &claudeResponse)
+
+		// Build the Chat-Completions chunk equivalent.
+		chatChunk := claudechannel.StreamResponseClaude2OpenAI(&claudeResponse)
+		// Accumulate Claude-side usage info.
+		_ = claudechannel.FormatClaudeResponseInfo(&claudeResponse, chatChunk, claudeInfo)
+		if chatChunk == nil {
+			return
+		}
+		// Attach the running usage on the final delta so the translator can
+		// pick it up. Normalize Anthropic semantics first so cached/cache-
+		// creation input tokens fold into prompt_tokens — the Responses
+		// translator subtracts cached from prompt_tokens to derive
+		// input_tokens.
+		if claudeInfo.Done && claudeInfo.Usage != nil {
+			chatChunk.Usage = normalizeClaudeUsageForOpenAISemantics(claudeInfo.Usage)
+		}
+		evs := openaicompat.ChatCompletionsStreamToResponsesEvents(chatChunk, state)
+		if e := writeEvents(evs); e != nil {
+			logger.LogError(c, "responses_stream_write_failed: "+e.Error())
+			streamErr = types.NewOpenAIError(e, types.ErrorCodeBadResponse, http.StatusInternalServerError)
+			sr.Stop(streamErr)
+			return
+		}
+	})
+
+	// EOS flush: only run when the stream finished normally. On an upstream
+	// error we already emitted response.failed (via EmitChatStreamErrorEvent)
+	// or are propagating streamErr to the caller, and the unconditional flush
+	// would otherwise emit a synthetic response.completed alongside.
+	if streamErr == nil {
+		flushEvents := openaicompat.ChatCompletionsStreamToResponsesEvents(nil, state)
+		_ = writeEvents(flushEvents)
+	}
+
+	if streamErr != nil {
+		return nil, streamErr
+	}
+
+	// Fall back to text-estimated usage if upstream didn't deliver complete
+	// counts. Each token field is repaired independently so that a missing
+	// prompt count does not require a missing completion count (or vice
+	// versa).
+	if claudeInfo.Usage.CompletionTokens == 0 || claudeInfo.Usage.PromptTokens == 0 {
+		fallback := service.ResponseText2Usage(c, claudeInfo.ResponseText.String(), info.UpstreamModelName, info.GetEstimatePromptTokens())
+		if claudeInfo.Usage.CompletionTokens == 0 {
+			claudeInfo.Usage.CompletionTokens = fallback.CompletionTokens
+		}
+		if claudeInfo.Usage.PromptTokens == 0 {
+			claudeInfo.Usage.PromptTokens = fallback.PromptTokens
+		}
+		claudeInfo.Usage.TotalTokens = claudeInfo.Usage.PromptTokens + claudeInfo.Usage.CompletionTokens
+	}
+	if claudeInfo.Usage != nil {
+		claudeInfo.Usage.UsageSemantic = "anthropic"
+	}
+	return claudeInfo.Usage, nil
+}
+
+// runAnthropicToResponsesNonStream reads the Anthropic JSON response,
+// converts it via ResponseClaude2OpenAI, then via
+// ChatCompletionsResponseToResponsesResponse and writes the JSON to the client.
+func runAnthropicToResponsesNonStream(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
+	defer service.CloseResponseBodyGracefully(resp)
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError)
+	}
+	logger.LogDebug(c, "responses_via_chat_anthropic upstream body: %s", body)
+
+	var claudeResponse dto.ClaudeResponse
+	if e := common.Unmarshal(body, &claudeResponse); e != nil {
+		return nil, types.NewError(e, types.ErrorCodeBadResponseBody)
+	}
+	if claudeError := claudeResponse.GetClaudeError(); claudeError != nil && claudeError.Type != "" {
+		return nil, types.WithClaudeError(*claudeError, resp.StatusCode)
+	}
+	// Preserve refusal marking (parity with the non-pivot Claude handler).
+	markClaudeRefusalFromResponse(c, &claudeResponse)
+
+	openaiResp := claudechannel.ResponseClaude2OpenAI(&claudeResponse)
+	if openaiResp == nil {
+		return nil, types.NewOpenAIError(fmt.Errorf("nil openai response from Claude conversion"), types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
+	}
+
+	// Build usage from the Claude response. Folding cache_read/creation into
+	// PromptTokens keeps OpenAI semantics for the Responses translator while
+	// the returned *dto.Usage retains the raw Anthropic-semantic counts the
+	// gateway accounting layer expects.
+	usage := &dto.Usage{}
+	if claudeResponse.Usage != nil {
+		usage.PromptTokens = claudeResponse.Usage.InputTokens
+		usage.CompletionTokens = claudeResponse.Usage.OutputTokens
+		usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+		usage.UsageSemantic = "anthropic"
+		usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
+		usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
+	}
+	// Hand the translator an OpenAI-semantic usage view so the cached/creation
+	// breakdown survives the responses envelope (translator subtracts cached
+	// from prompt_tokens to derive input_tokens).
+	if normalized := normalizeClaudeUsageForOpenAISemantics(usage); normalized != nil {
+		openaiResp.Usage = *normalized
+	} else {
+		openaiResp.Usage = *usage
+	}
+
+	responsesResp, e := openaicompat.ChatCompletionsResponseToResponsesResponse(openaiResp, info.UpstreamModelName)
+	if e != nil {
+		return nil, types.NewOpenAIError(e, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
+	}
+
+	responseBody, e := common.Marshal(responsesResp)
+	if e != nil {
+		return nil, types.NewOpenAIError(e, types.ErrorCodeBadResponse, http.StatusInternalServerError)
+	}
+	service.IOCopyBytesGracefully(c, resp, responseBody)
+	return usage, nil
+}
+
+// markClaudeRefusalFromStreamChunk mirrors the refusal-detection performed by
+// claudechannel.HandleStreamResponseData. Without it, /v1/responses requests
+// routed through the pivot would not record the moderation/accounting signal
+// that the direct Claude relay records.
+func markClaudeRefusalFromStreamChunk(c *gin.Context, cr *dto.ClaudeResponse) {
+	if c == nil || cr == nil {
+		return
+	}
+	if cr.StopReason != "" && strings.EqualFold(cr.StopReason, "refusal") {
+		common.SetContextKey(c, appconstant.ContextKeyAdminRejectReason, "claude_stop_reason=refusal")
+		return
+	}
+	if cr.Delta != nil && cr.Delta.StopReason != nil && strings.EqualFold(*cr.Delta.StopReason, "refusal") {
+		common.SetContextKey(c, appconstant.ContextKeyAdminRejectReason, "claude_stop_reason=refusal")
+	}
+}
+
+// markClaudeRefusalFromResponse mirrors the refusal-detection performed by the
+// direct Claude non-streaming handler.
+func markClaudeRefusalFromResponse(c *gin.Context, cr *dto.ClaudeResponse) {
+	if c == nil || cr == nil {
+		return
+	}
+	if strings.EqualFold(cr.StopReason, "refusal") {
+		common.SetContextKey(c, appconstant.ContextKeyAdminRejectReason, "claude_stop_reason=refusal")
+	}
+}
+
+// normalizeClaudeUsageForOpenAISemantics folds Anthropic's separately-counted
+// cache_read and cache_creation input tokens into prompt_tokens so the
+// downstream Responses translator (which uses OpenAI semantics and subtracts
+// cached from prompt) produces correct input_tokens / total_tokens. Returns
+// the original *dto.Usage when no conversion is needed.
+func normalizeClaudeUsageForOpenAISemantics(in *dto.Usage) *dto.Usage {
+	if in == nil {
+		return nil
+	}
+	if in.UsageSemantic != "anthropic" {
+		return in
+	}
+	cp := *in
+	cp.PromptTokens = in.PromptTokens + in.PromptTokensDetails.CachedTokens + in.PromptTokensDetails.CachedCreationTokens
+	cp.TotalTokens = cp.PromptTokens + cp.CompletionTokens
+	// Drop the semantic marker so a second pass through this helper is a no-op.
+	cp.UsageSemantic = ""
+	return &cp
+}
diff --git a/relay/responses_via_chat_completions_test.go b/relay/responses_via_chat_completions_test.go
new file mode 100644
index 00000000000..3fc607e3039
--- /dev/null
+++ b/relay/responses_via_chat_completions_test.go
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+package relay
+
+import (
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/constant"
+	"github.com/QuantumNous/new-api/dto"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	relayconstant "github.com/QuantumNous/new-api/relay/constant"
+	"github.com/QuantumNous/new-api/types"
+
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+)
+
+func init() {
+	gin.SetMode(gin.TestMode)
+}
+
+// newResponsesViaChatTestContext returns a gin.Context tied to an in-memory
+// recorder so handlers can write SSE/JSON without a real HTTP transport.
+func newResponsesViaChatTestContext(t *testing.T) (*gin.Context, *httptest.ResponseRecorder, *relaycommon.RelayInfo) {
+	t.Helper()
+
+	old := constant.StreamingTimeout
+	constant.StreamingTimeout = 30
+	t.Cleanup(func() { constant.StreamingTimeout = old })
+
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
+
+	info := &relaycommon.RelayInfo{
+		ChannelMeta: &relaycommon.ChannelMeta{
+			UpstreamModelName: "claude-test",
+		},
+		OriginModelName: "claude-test",
+		IsStream:        true,
+		RelayFormat:     types.RelayFormatOpenAIResponses,
+	}
+	return c, rec, info
+}
+
+// anthropicSSE returns a canonical Anthropic streaming-message envelope as a
+// raw SSE byte string (suitable for piping through StreamScannerHandler).
+func anthropicSSE() string {
+	var b strings.Builder
+	b.WriteString(`data: {"type":"message_start","message":{"id":"msg_001","model":"claude-test","usage":{"input_tokens":11,"output_tokens":1}}}` + "\n")
+	b.WriteString(`data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}` + "\n")
+	b.WriteString(`data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello "}}` + "\n")
+	b.WriteString(`data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"world"}}` + "\n")
+	b.WriteString(`data: {"type":"content_block_stop","index":0}` + "\n")
+	b.WriteString(`data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":2}}` + "\n")
+	b.WriteString(`data: {"type":"message_stop"}` + "\n")
+	b.WriteString("data: [DONE]\n")
+	return b.String()
+}
+
+// TestResponsesViaChatCompletions_StreamingTextOnly drives
+// runAnthropicToResponsesStream with a canonical Anthropic SSE byte stream
+// (text-only) and asserts the resulting Responses-API SSE wire format.
+//
+// It satisfies the §13 streaming integration coverage requirement: we verify
+// the orchestration writes the documented sequence of events
+// (response.created / in_progress / output_item.added / output_text.delta /
+// output_text.done / content_part.done / output_item.done / response.completed)
+// with monotonically increasing sequence_number values.
+func TestResponsesViaChatCompletions_StreamingTextOnly(t *testing.T) {
+	c, rec, info := newResponsesViaChatTestContext(t)
+
+	resp := &http.Response{
+		Body:   io.NopCloser(strings.NewReader(anthropicSSE())),
+		Header: http.Header{"Content-Type": []string{"text/event-stream"}},
+	}
+
+	usage, apiErr := runAnthropicToResponsesStream(c, info, resp)
+	require.Nil(t, apiErr)
+	require.NotNil(t, usage)
+	require.Equal(t, "anthropic", usage.UsageSemantic)
+
+	body := rec.Body.String()
+
+	// Mandatory event types per spec §5 of the responses-to-anthropic spec.
+	mustContain := []string{
+		"event: response.created",
+		"event: response.in_progress",
+		"event: response.output_item.added",
+		"event: response.output_text.delta",
+		"event: response.output_text.done",
+		"event: response.content_part.done",
+		"event: response.output_item.done",
+		"event: response.completed",
+	}
+	for _, marker := range mustContain {
+		require.Contains(t, body, marker, "expected SSE to contain %q", marker)
+	}
+
+	// Validate monotonically increasing sequence_number values across all
+	// emitted JSON payloads.
+	seq := extractSequenceNumbers(t, body)
+	require.NotEmpty(t, seq, "expected at least one sequence_number")
+	for i := 1; i < len(seq); i++ {
+		require.GreaterOrEqual(t, seq[i], seq[i-1], "sequence_number must be monotonically non-decreasing (got %d after %d at idx %d)", seq[i], seq[i-1], i)
+	}
+
+	// The output_item.added for the message item must carry type=message.
+	require.Contains(t, body, `"type":"message"`)
+}
+
+// TestResponsesViaChatCompletions_NonStreamingTextOnly drives
+// runAnthropicToResponsesNonStream with a single JSON Anthropic message and
+// validates that the response body parses as a valid Responses-API response
+// with status=completed and output[0].type=message containing the text.
+func TestResponsesViaChatCompletions_NonStreamingTextOnly(t *testing.T) {
+	c, rec, info := newResponsesViaChatTestContext(t)
+	info.IsStream = false
+
+	anthropicBody := `{
+		"id": "msg_abc",
+		"type": "message",
+		"role": "assistant",
+		"model": "claude-test",
+		"content": [
+			{"type": "text", "text": "Hello world"}
+		],
+		"stop_reason": "end_turn",
+		"usage": {"input_tokens": 11, "output_tokens": 2}
+	}`
+
+	resp := &http.Response{
+		Body:       io.NopCloser(strings.NewReader(anthropicBody)),
+		Header:     http.Header{"Content-Type": []string{"application/json"}},
+		StatusCode: http.StatusOK,
+	}
+
+	usage, apiErr := runAnthropicToResponsesNonStream(c, info, resp)
+	require.Nil(t, apiErr)
+	require.NotNil(t, usage)
+	require.Equal(t, "anthropic", usage.UsageSemantic)
+
+	var got dto.OpenAIResponsesResponse
+	require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &got), "response body must be a valid OpenAIResponsesResponse")
+	require.Equal(t, "claude-test", got.Model)
+
+	statusStr := strings.Trim(strings.TrimSpace(string(got.Status)), `"`)
+	require.Equal(t, "completed", statusStr)
+
+	require.NotEmpty(t, got.Output)
+	require.Equal(t, "message", got.Output[0].Type)
+	require.NotEmpty(t, got.Output[0].Content)
+	require.Equal(t, "Hello world", got.Output[0].Content[0].Text)
+}
+
+// TestResponsesViaChatCompletions_FeatureFlagGate_EnvParse verifies that the
+// PRODUCTION env-flag reader (common.GetEnvOrDefaultBool) — not a
+// reimplementation in this test — correctly resolves
+// RESPONSES_TO_ANTHROPIC_ENABLED to false when set to "false" and to true when
+// set to "true". This is the exact call made at responses_handler.go to gate
+// the Responses → Chat-Completions → Anthropic pivot, so a regression in the
+// env parser or a flip of the default would be caught here.
+func TestResponsesViaChatCompletions_FeatureFlagGate_EnvParse(t *testing.T) {
+	const envKey = "RESPONSES_TO_ANTHROPIC_ENABLED"
+
+	// Flag explicitly false => production reader returns false (overrides the
+	// default value passed by the caller).
+	t.Setenv(envKey, "false")
+	require.False(t, common.GetEnvOrDefaultBool(envKey, true),
+		"production env reader must honour explicit false")
+
+	// Flag explicitly true => production reader returns true.
+	t.Setenv(envKey, "true")
+	require.True(t, common.GetEnvOrDefaultBool(envKey, false),
+		"production env reader must honour explicit true")
+}
+
+// TestResponsesViaChatCompletions_FeatureFlagGate_BranchCondition drives the
+// branch predicate extracted from responses_handler.go. It builds a baseline
+// "engaged" condition (RelayModeResponses + APITypeAnthropic + no global
+// pass-through + no body pass-through + flag-on) and then flips each input
+// individually, asserting that any flip disables the pivot. The flag's role
+// is verified explicitly: with all other inputs in the engaged baseline, the
+// pivot SHALL engage iff featureFlagEnabled is true.
+func TestResponsesViaChatCompletions_FeatureFlagGate_BranchCondition(t *testing.T) {
+	// Baseline: everything aligned so the pivot engages.
+	require.True(t, shouldUseResponsesToAnthropicPivot(
+		relayconstant.RelayModeResponses,
+		constant.APITypeAnthropic,
+		false, // passThroughGlobal
+		false, // passThroughBody
+		true,  // featureFlagEnabled
+	), "engaged baseline must trigger the pivot")
+
+	// Feature flag off disables the pivot even when every other condition is
+	// aligned. This is the critical regression-catch for the MAJOR finding.
+	require.False(t, shouldUseResponsesToAnthropicPivot(
+		relayconstant.RelayModeResponses,
+		constant.APITypeAnthropic,
+		false,
+		false,
+		false, // <- flag off
+	), "feature flag off must bypass the pivot")
+
+	// Wrong relay mode disables the pivot.
+	require.False(t, shouldUseResponsesToAnthropicPivot(
+		relayconstant.RelayModeChatCompletions,
+		constant.APITypeAnthropic,
+		false,
+		false,
+		true,
+	), "non-Responses relay mode must bypass the pivot")
+
+	// Wrong API type disables the pivot.
+	require.False(t, shouldUseResponsesToAnthropicPivot(
+		relayconstant.RelayModeResponses,
+		constant.APITypeOpenAI,
+		false,
+		false,
+		true,
+	), "non-Anthropic api type must bypass the pivot")
+
+	// Global pass-through disables the pivot.
+	require.False(t, shouldUseResponsesToAnthropicPivot(
+		relayconstant.RelayModeResponses,
+		constant.APITypeAnthropic,
+		true, // <- pass-through global on
+		false,
+		true,
+	), "global pass-through must bypass the pivot")
+
+	// Channel-level body pass-through disables the pivot.
+	require.False(t, shouldUseResponsesToAnthropicPivot(
+		relayconstant.RelayModeResponses,
+		constant.APITypeAnthropic,
+		false,
+		true, // <- channel body pass-through on
+		true,
+	), "channel body pass-through must bypass the pivot")
+}
+
+// TestResponsesViaChatCompletions_FeatureFlagGate_DefaultIsOn locks in the
+// documented default for the feature flag: when the env var is unset, the
+// pivot SHALL be enabled. This guards against an accidental default-flip.
+func TestResponsesViaChatCompletions_FeatureFlagGate_DefaultIsOn(t *testing.T) {
+	const envKey = "RESPONSES_TO_ANTHROPIC_ENABLED"
+
+	// t.Setenv with empty string clears the env at scope exit, but during the
+	// scope we explicitly unset it via setting to "" (production reader treats
+	// empty as unset and returns the default).
+	t.Setenv(envKey, "")
+	require.True(t, common.GetEnvOrDefaultBool(envKey, true),
+		"empty/unset RESPONSES_TO_ANTHROPIC_ENABLED must default to true")
+}
+
+// extractSequenceNumbers scans the recorded SSE body and returns every
+// `"sequence_number": N` value in emission order.
+func extractSequenceNumbers(t *testing.T, body string) []int64 {
+	t.Helper()
+
+	const marker = `"sequence_number":`
+	out := make([]int64, 0)
+	rest := body
+	for {
+		idx := strings.Index(rest, marker)
+		if idx < 0 {
+			break
+		}
+		rest = rest[idx+len(marker):]
+		// Read digits.
+		i := 0
+		for i < len(rest) && (rest[i] == ' ' || rest[i] == '\t') {
+			i++
+		}
+		start := i
+		for i < len(rest) && rest[i] >= '0' && rest[i] <= '9' {
+			i++
+		}
+		if i == start {
+			continue
+		}
+		var n int64
+		for j := start; j < i; j++ {
+			n = n*10 + int64(rest[j]-'0')
+		}
+		out = append(out, n)
+		rest = rest[i:]
+	}
+	return out
+}
diff --git a/service/channel_affinity_usage_cache_test.go b/service/channel_affinity_usage_cache_test.go
index 64d3d715b54..6f8f64e1988 100644
--- a/service/channel_affinity_usage_cache_test.go
+++ b/service/channel_affinity_usage_cache_test.go
@@ -12,6 +12,21 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
+// resetChannelAffinityUsageCacheStats purges the in-memory cache shared by the
+// channel-affinity usage stats tests so that one test cannot influence another
+// when keys collide (e.g. due to the time-based fixtures running in the same
+// nanosecond). It is safe to call multiple times.
+func resetChannelAffinityUsageCacheStats(t *testing.T) {
+	t.Helper()
+	cache := getChannelAffinityUsageCacheStatsCache()
+	if cache == nil {
+		return
+	}
+	if err := cache.Purge(); err != nil {
+		t.Logf("warning: failed to purge channel affinity usage cache: %v", err)
+	}
+}
+
 func buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP string) *gin.Context {
 	rec := httptest.NewRecorder()
 	ctx, _ := gin.CreateTestContext(rec)
@@ -26,9 +41,12 @@ func buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP string)
 }
 
 func TestObserveChannelAffinityUsageCacheByRelayFormat_ClaudeMode(t *testing.T) {
-	ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
+	resetChannelAffinityUsageCacheStats(t)
+	t.Cleanup(func() { resetChannelAffinityUsageCacheStats(t) })
+
+	ruleName := fmt.Sprintf("rule_claudemode_%d", time.Now().UnixNano())
 	usingGroup := "default"
-	keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
+	keyFP := fmt.Sprintf("fp_claudemode_%d", time.Now().UnixNano())
 	ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
 
 	usage := &dto.Usage{
@@ -53,9 +71,12 @@ func TestObserveChannelAffinityUsageCacheByRelayFormat_ClaudeMode(t *testing.T)
 }
 
 func TestObserveChannelAffinityUsageCacheByRelayFormat_MixedMode(t *testing.T) {
-	ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
+	resetChannelAffinityUsageCacheStats(t)
+	t.Cleanup(func() { resetChannelAffinityUsageCacheStats(t) })
+
+	ruleName := fmt.Sprintf("rule_mixedmode_%d", time.Now().UnixNano())
 	usingGroup := "default"
-	keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
+	keyFP := fmt.Sprintf("fp_mixedmode_%d", time.Now().UnixNano())
 	ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
 
 	openAIUsage := &dto.Usage{
@@ -83,9 +104,12 @@ func TestObserveChannelAffinityUsageCacheByRelayFormat_MixedMode(t *testing.T) {
 }
 
 func TestObserveChannelAffinityUsageCacheByRelayFormat_UnsupportedModeKeepsEmpty(t *testing.T) {
-	ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
+	resetChannelAffinityUsageCacheStats(t)
+	t.Cleanup(func() { resetChannelAffinityUsageCacheStats(t) })
+
+	ruleName := fmt.Sprintf("rule_unsupportedmode_%d", time.Now().UnixNano())
 	usingGroup := "default"
-	keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
+	keyFP := fmt.Sprintf("fp_unsupportedmode_%d", time.Now().UnixNano())
 	ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
 
 	usage := &dto.Usage{
diff --git a/service/openaicompat/chat_stream_to_responses.go b/service/openaicompat/chat_stream_to_responses.go
new file mode 100644
index 00000000000..7a5638d9869
--- /dev/null
+++ b/service/openaicompat/chat_stream_to_responses.go
@@ -0,0 +1,708 @@
+package openaicompat
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/dto"
+)
+
+// ResponsesAPIEvent is a generic Responses-API event envelope. It is encoded
+// as JSON for SSE wire transmission; the `Type` field becomes the SSE `event:`
+// header, and the full envelope becomes the `data:` payload.
+type ResponsesAPIEvent struct {
+	Type           string `json:"type"`
+	SequenceNumber int64  `json:"sequence_number"`
+	// Payload holds the event-specific fields. It is rendered as siblings of
+	// `type`/`sequence_number` on the wire via the custom MarshalJSON below.
+	Payload map[string]any `json:"-"`
+}
+
+// MarshalJSON flattens Payload into the top-level object alongside `type` and
+// `sequence_number`.
+func (e ResponsesAPIEvent) MarshalJSON() ([]byte, error) {
+	m := make(map[string]any, len(e.Payload)+2)
+	for k, v := range e.Payload {
+		m[k] = v
+	}
+	// Dedicated fields always win over payload to prevent shadowing.
+	m["type"] = e.Type
+	m["sequence_number"] = e.SequenceNumber
+	return common.Marshal(m)
+}
+
+// emitEvent builds an event and increments the seq counter.
+func emitEvent(state *ResponsesStreamState, eventType string, payload map[string]any) ResponsesAPIEvent {
+	if payload == nil {
+		payload = map[string]any{}
+	}
+	return ResponsesAPIEvent{
+		Type:           eventType,
+		SequenceNumber: state.NextSeq(),
+		Payload:        payload,
+	}
+}
+
+// ChatCompletionsStreamToResponsesEvents translates one Chat-Completions
+// stream chunk into a sequence of Responses-API SSE events. A nil `chunk`
+// flushes any still-open output_item and emits `response.completed` exactly
+// once (idempotent on subsequent nil calls).
+//
+// Spec coverage:
+//   - §5.1: sequence counter starts at 1, monotonic
+//   - §5.2: response.created + response.in_progress emitted once on first usable chunk
+//   - §5.3: message lifecycle (added/content_part.added/delta/done events)
+//   - §5.4: reasoning lifecycle (output_item.added/reasoning_summary_part.added/delta/done)
+//   - §5.5: function_call lifecycle (added with arguments:"" / delta / done)
+//   - §5.6: <think> ... </think> inline tag recognition
+//   - §5.7: null-chunk flush with deterministic close order
+//   - §5.8: error events emit a single response.failed (dedup)
+//   - §5.9: usage propagation on response.completed (cache token decomposition)
+//   - §5.10: custom_tool_call alias
+func ChatCompletionsStreamToResponsesEvents(chunk *dto.ChatCompletionsStreamResponse, state *ResponsesStreamState) []ResponsesAPIEvent {
+	if state == nil {
+		// Defensive: cannot translate without state.
+		return nil
+	}
+
+	if chunk == nil {
+		return flushOnEOS(state)
+	}
+
+	events := make([]ResponsesAPIEvent, 0, 4)
+
+	// Emit response.created + response.in_progress exactly once on the first
+	// usable chunk.
+	if !state.Started {
+		respID := strings.TrimSpace(chunk.Id)
+		if respID == "" {
+			respID = "chat"
+		}
+		respID = "resp_" + respID
+		state.ResponseID = respID
+		state.Model = chunk.Model
+		state.CreatedAt = chunk.Created
+		if state.CreatedAt == 0 {
+			state.CreatedAt = time.Now().Unix()
+		}
+		responseEnvelope := buildResponseEnvelope(state, "in_progress")
+		events = append(events, emitEvent(state, "response.created", map[string]any{
+			"response": responseEnvelope,
+		}))
+		events = append(events, emitEvent(state, "response.in_progress", map[string]any{
+			"response": responseEnvelope,
+		}))
+		state.Started = true
+		state.InProgressSent = true
+	}
+
+	if len(chunk.Choices) == 0 {
+		return events
+	}
+	choice := chunk.Choices[0]
+	delta := choice.Delta
+
+	// Track usage update on every chunk that carries one.
+	if chunk.Usage != nil {
+		state.Usage.PromptTokens = chunk.Usage.PromptTokens
+		state.Usage.CompletionTokens = chunk.Usage.CompletionTokens
+		state.Usage.TotalTokens = chunk.Usage.TotalTokens
+		state.Usage.CachedTokens = chunk.Usage.PromptTokensDetails.CachedTokens
+		state.Usage.CacheCreationTokens = chunk.Usage.PromptTokensDetails.CachedCreationTokens
+		state.Usage.ReasoningTokens = chunk.Usage.CompletionTokenDetails.ReasoningTokens
+	}
+
+	// Tool call deltas take precedence over text.
+	for _, tc := range delta.ToolCalls {
+		evs := handleToolCallDelta(state, tc)
+		events = append(events, evs...)
+	}
+
+	// Reasoning content delta -> reasoning output_item lifecycle.
+	if rc := delta.GetReasoningContent(); rc != "" {
+		// Close any open message before opening reasoning.
+		events = append(events, closeMessageIfOpen(state)...)
+		events = append(events, ensureReasoningOpen(state)...)
+		events = append(events, emitEvent(state, "response.reasoning_summary_text.delta", map[string]any{
+			"item_id":       state.ReasoningItemID,
+			"output_index":  state.ReasoningItemIndex,
+			"summary_index": 0,
+			"delta":         rc,
+		}))
+	}
+
+	// Text content delta. Honour <think> ... </think> inline markers.
+	if delta.Content != nil && *delta.Content != "" {
+		text := *delta.Content
+		events = append(events, handleTextDeltaWithInlineThink(state, text)...)
+	}
+
+	// Finish reason — record but do not emit response.completed until we
+	// receive a null chunk (or the upstream gracefully terminates).
+	if choice.FinishReason != nil && *choice.FinishReason != "" {
+		state.FinalFinishReason = *choice.FinishReason
+	}
+
+	return events
+}
+
+// EmitChatStreamErrorEvent emits a single response.failed event for upstream
+// error events. Calling this more than once is a no-op (spec §5.8).
+func EmitChatStreamErrorEvent(state *ResponsesStreamState, message string) []ResponsesAPIEvent {
+	if state == nil || state.ErrorEmitted {
+		return nil
+	}
+	events := make([]ResponsesAPIEvent, 0, 2)
+	if !state.Started {
+		// Emit the minimum prelude as part of the returned events so its
+		// sequence number is observed by the caller. Discarding it here would
+		// still bump the counter and skew the subsequent response.failed
+		// sequence number to 2 instead of 1.
+		if state.CreatedAt == 0 {
+			state.CreatedAt = time.Now().Unix()
+		}
+		if state.ResponseID == "" {
+			state.ResponseID = "resp_error"
+		}
+		envelope := buildResponseEnvelope(state, "failed")
+		events = append(events, emitEvent(state, "response.created", map[string]any{"response": envelope}))
+		state.Started = true
+	}
+	events = append(events, emitEvent(state, "response.failed", map[string]any{
+		"response": map[string]any{
+			"id":     state.ResponseID,
+			"status": "failed",
+			"error":  map[string]any{"message": message},
+		},
+	}))
+	state.ErrorEmitted = true
+	// response.failed is terminal — mark the stream as completed so any
+	// subsequent flushOnEOS is a no-op and we never emit both response.failed
+	// and response.completed on the same stream.
+	state.CompletedSent = true
+	return events
+}
+
+func handleTextDeltaWithInlineThink(state *ResponsesStreamState, text string) []ResponsesAPIEvent {
+	events := make([]ResponsesAPIEvent, 0, 2)
+	// Resume any partial <think>/</think> token saved from a previous chunk.
+	if state.PendingTagBuffer != "" {
+		text = state.PendingTagBuffer + text
+		state.PendingTagBuffer = ""
+	}
+	for text != "" {
+		if state.InThinkInlineTag {
+			// Looking for </think>.
+			if idx := strings.Index(text, "</think>"); idx >= 0 {
+				inside := text[:idx]
+				rest := text[idx+len("</think>"):]
+				if inside != "" {
+					events = append(events, ensureReasoningOpen(state)...)
+					events = append(events, emitEvent(state, "response.reasoning_summary_text.delta", map[string]any{
+						"item_id":       state.ReasoningItemID,
+						"output_index":  state.ReasoningItemIndex,
+						"summary_index": 0,
+						"delta":         inside,
+					}))
+				}
+				// Close reasoning.
+				events = append(events, closeReasoningIfOpen(state)...)
+				state.InThinkInlineTag = false
+				text = rest
+				continue
+			}
+			// No closing </think> in this chunk yet. Hold back any trailing
+			// prefix that could grow into </think> on the next chunk.
+			emit, pending := splitPendingThinkTag(text)
+			state.PendingTagBuffer = pending
+			if emit != "" {
+				events = append(events, ensureReasoningOpen(state)...)
+				events = append(events, emitEvent(state, "response.reasoning_summary_text.delta", map[string]any{
+					"item_id":       state.ReasoningItemID,
+					"output_index":  state.ReasoningItemIndex,
+					"summary_index": 0,
+					"delta":         emit,
+				}))
+			}
+			return events
+		}
+
+		// Not in think tag.
+		if idx := strings.Index(text, "<think>"); idx >= 0 {
+			before := text[:idx]
+			rest := text[idx+len("<think>"):]
+			if before != "" {
+				events = append(events, closeReasoningIfOpen(state)...)
+				events = append(events, ensureMessageOpen(state)...)
+				events = append(events, emitEvent(state, "response.output_text.delta", map[string]any{
+					"item_id":       state.MessageItemID,
+					"output_index":  state.MessageItemIndex,
+					"content_index": 0,
+					"delta":         before,
+				}))
+			}
+			// Open reasoning.
+			events = append(events, closeMessageIfOpen(state)...)
+			state.InThinkInlineTag = true
+			text = rest
+			continue
+		}
+
+		// No opening <think> in this chunk. Hold back any trailing prefix
+		// that could grow into <think> on the next chunk.
+		emit, pending := splitPendingThinkTag(text)
+		state.PendingTagBuffer = pending
+		if emit != "" {
+			events = append(events, closeReasoningIfOpen(state)...)
+			events = append(events, ensureMessageOpen(state)...)
+			events = append(events, emitEvent(state, "response.output_text.delta", map[string]any{
+				"item_id":       state.MessageItemID,
+				"output_index":  state.MessageItemIndex,
+				"content_index": 0,
+				"delta":         emit,
+			}))
+		}
+		return events
+	}
+	return events
+}
+
+// splitPendingThinkTag separates text into the portion safe to emit and a
+// trailing partial-tag fragment that should be buffered until the next chunk.
+// A trailing substring beginning with '<' is buffered only when it is a strict
+// prefix of "<think>" or "</think>" (i.e. could still grow into a real tag).
+// Tail length is bounded by len("</think>")-1, so memory use is constant and
+// ordinary text containing a stray '<' is emitted normally.
+func splitPendingThinkTag(text string) (emit string, pending string) {
+	if text == "" {
+		return "", ""
+	}
+	maxLook := len("</think>") - 1
+	start := len(text) - maxLook
+	if start < 0 {
+		start = 0
+	}
+	for i := start; i < len(text); i++ {
+		if text[i] != '<' {
+			continue
+		}
+		tail := text[i:]
+		if strings.ContainsRune(tail, '>') {
+			// A complete-looking tag is already present; let the main loop
+			// process it on the next iteration.
+			return text, ""
+		}
+		if strings.HasPrefix("<think>", tail) || strings.HasPrefix("</think>", tail) {
+			return text[:i], tail
+		}
+	}
+	return text, ""
+}
+
+func handleToolCallDelta(state *ResponsesStreamState, tc dto.ToolCallResponse) []ResponsesAPIEvent {
+	events := make([]ResponsesAPIEvent, 0, 2)
+
+	idx := 0
+	if tc.Index != nil {
+		idx = *tc.Index
+	}
+	fc, ok := state.FuncCalls[idx]
+	if !ok {
+		fc = &ResponsesStreamFuncCall{
+			ID:        tc.ID,
+			Name:      tc.Function.Name,
+			ItemIndex: nextItemIndex(state),
+		}
+		state.FuncCalls[idx] = fc
+
+		// Close any open text/reasoning before opening function_call.
+		events = append(events, closeMessageIfOpen(state)...)
+		events = append(events, closeReasoningIfOpen(state)...)
+
+		callID := fc.ID
+		if callID == "" {
+			callID = tc.ID
+			fc.ID = tc.ID
+		}
+		// Derive a stable item id from the call id so the wire item.id and the
+		// item_id referenced by function_call_arguments.* match each other.
+		fc.ItemID = funcCallItemID(state, callID)
+		events = append(events, emitEvent(state, "response.output_item.added", map[string]any{
+			"output_index": fc.ItemIndex,
+			"item": map[string]any{
+				"id":        fc.ItemID,
+				"type":      "function_call",
+				"status":    "in_progress",
+				"call_id":   callID,
+				"name":      fc.Name,
+				"arguments": "",
+			},
+		}))
+	} else {
+		// Update ID/name if the chunk carries new info.
+		if tc.ID != "" && fc.ID == "" {
+			fc.ID = tc.ID
+		}
+		if tc.Function.Name != "" && fc.Name == "" {
+			fc.Name = tc.Function.Name
+		}
+		if fc.ItemID == "" && fc.ID != "" {
+			fc.ItemID = funcCallItemID(state, fc.ID)
+		}
+	}
+
+	// Argument deltas.
+	if tc.Function.Arguments != "" {
+		fc.ArgsBuf += tc.Function.Arguments
+		events = append(events, emitEvent(state, "response.function_call_arguments.delta", map[string]any{
+			"item_id":      fc.ItemID,
+			"output_index": fc.ItemIndex,
+			"delta":        tc.Function.Arguments,
+		}))
+	}
+	return events
+}
+
+func ensureMessageOpen(state *ResponsesStreamState) []ResponsesAPIEvent {
+	if state.MessageItemOpen {
+		return nil
+	}
+	events := make([]ResponsesAPIEvent, 0, 2)
+	state.MessageItemIndex = nextItemIndex(state)
+	state.MessageItemID = assignMessageItemID(state)
+	state.MessageItemOpen = true
+	state.MessageContentPartOpen = true
+	events = append(events, emitEvent(state, "response.output_item.added", map[string]any{
+		"output_index": state.MessageItemIndex,
+		"item": map[string]any{
+			"id":      state.MessageItemID,
+			"type":    "message",
+			"status":  "in_progress",
+			"role":    "assistant",
+			"content": []any{},
+		},
+	}))
+	events = append(events, emitEvent(state, "response.content_part.added", map[string]any{
+		"item_id":       state.MessageItemID,
+		"output_index":  state.MessageItemIndex,
+		"content_index": 0,
+		"part": map[string]any{
+			"type": "output_text",
+			"text": "",
+		},
+	}))
+	return events
+}
+
+func closeMessageIfOpen(state *ResponsesStreamState) []ResponsesAPIEvent {
+	if !state.MessageItemOpen {
+		return nil
+	}
+	events := make([]ResponsesAPIEvent, 0, 3)
+	itemID := state.MessageItemID
+	events = append(events, emitEvent(state, "response.output_text.done", map[string]any{
+		"item_id":       itemID,
+		"output_index":  state.MessageItemIndex,
+		"content_index": 0,
+	}))
+	events = append(events, emitEvent(state, "response.content_part.done", map[string]any{
+		"item_id":       itemID,
+		"output_index":  state.MessageItemIndex,
+		"content_index": 0,
+	}))
+	events = append(events, emitEvent(state, "response.output_item.done", map[string]any{
+		"output_index": state.MessageItemIndex,
+		"item": map[string]any{
+			"id":     itemID,
+			"type":   "message",
+			"status": "completed",
+			"role":   "assistant",
+		},
+	}))
+	state.MessageItemOpen = false
+	state.MessageContentPartOpen = false
+	state.MessageItemID = ""
+	return events
+}
+
+func ensureReasoningOpen(state *ResponsesStreamState) []ResponsesAPIEvent {
+	if state.ReasoningItemOpen {
+		return nil
+	}
+	events := make([]ResponsesAPIEvent, 0, 2)
+	state.ReasoningItemIndex = nextItemIndex(state)
+	state.ReasoningItemID = assignReasoningItemID(state)
+	state.ReasoningItemOpen = true
+	state.ReasoningSummaryPartOpen = true
+	events = append(events, emitEvent(state, "response.output_item.added", map[string]any{
+		"output_index": state.ReasoningItemIndex,
+		"item": map[string]any{
+			"id":      state.ReasoningItemID,
+			"type":    "reasoning",
+			"status":  "in_progress",
+			"summary": []any{},
+		},
+	}))
+	events = append(events, emitEvent(state, "response.reasoning_summary_part.added", map[string]any{
+		"item_id":       state.ReasoningItemID,
+		"output_index":  state.ReasoningItemIndex,
+		"summary_index": 0,
+		"part": map[string]any{
+			"type": "summary_text",
+			"text": "",
+		},
+	}))
+	return events
+}
+
+func closeReasoningIfOpen(state *ResponsesStreamState) []ResponsesAPIEvent {
+	if !state.ReasoningItemOpen {
+		return nil
+	}
+	events := make([]ResponsesAPIEvent, 0, 3)
+	itemID := state.ReasoningItemID
+	events = append(events, emitEvent(state, "response.reasoning_summary_text.done", map[string]any{
+		"item_id":       itemID,
+		"output_index":  state.ReasoningItemIndex,
+		"summary_index": 0,
+	}))
+	events = append(events, emitEvent(state, "response.reasoning_summary_part.done", map[string]any{
+		"item_id":       itemID,
+		"output_index":  state.ReasoningItemIndex,
+		"summary_index": 0,
+	}))
+	events = append(events, emitEvent(state, "response.output_item.done", map[string]any{
+		"output_index": state.ReasoningItemIndex,
+		"item": map[string]any{
+			"id":     itemID,
+			"type":   "reasoning",
+			"status": "completed",
+		},
+	}))
+	state.ReasoningItemOpen = false
+	state.ReasoningSummaryPartOpen = false
+	state.ReasoningItemID = ""
+	return events
+}
+
+func closeAllOpenFunctionCalls(state *ResponsesStreamState) []ResponsesAPIEvent {
+	events := make([]ResponsesAPIEvent, 0)
+	// Collect open entries and sort by tool index (the map key) so the close
+	// order — and the sequence numbers it stamps onto downstream events — is
+	// deterministic across identical streams. state.FuncCalls is a Go map and
+	// would otherwise iterate in random order.
+	indices := make([]int, 0, len(state.FuncCalls))
+	for idx, fc := range state.FuncCalls {
+		if fc == nil || fc.Done {
+			continue
+		}
+		indices = append(indices, idx)
+	}
+	sort.Ints(indices)
+	for _, idx := range indices {
+		fc := state.FuncCalls[idx]
+		args := fc.ArgsBuf
+		if strings.TrimSpace(args) == "" {
+			args = "{}"
+		}
+		if fc.ItemID == "" {
+			fc.ItemID = funcCallItemID(state, fc.ID)
+		}
+		events = append(events, emitEvent(state, "response.function_call_arguments.done", map[string]any{
+			"item_id":      fc.ItemID,
+			"output_index": fc.ItemIndex,
+			"arguments":    args,
+		}))
+		events = append(events, emitEvent(state, "response.output_item.done", map[string]any{
+			"output_index": fc.ItemIndex,
+			"item": map[string]any{
+				"id":        fc.ItemID,
+				"type":      "function_call",
+				"status":    "completed",
+				"call_id":   fc.ID,
+				"name":      fc.Name,
+				"arguments": args,
+			},
+		}))
+		fc.Done = true
+	}
+	return events
+}
+
+func nextItemIndex(state *ResponsesStreamState) int {
+	idx := state.ItemIndex
+	state.ItemIndex++
+	return idx
+}
+
+// responseIDSuffix returns the portion of state.ResponseID after the "resp_"
+// prefix, stripped of any further item-type prefix. It is used as the stable
+// base for derived item ids ("msg_<suffix>", "rs_<suffix>", ...).
+func responseIDSuffix(state *ResponsesStreamState) string {
+	return ResponsesIDBase(state.ResponseID)
+}
+
+// ResponsesIDBase returns the portion of a Responses-API response id after the
+// "resp_" prefix (and any subsequent item-type prefix such as "msg_"/"rs_"/
+// "fc_"). It is the stable base used when deriving per-item ids in both the
+// streaming and non-streaming chat→responses translators.
+func ResponsesIDBase(respID string) string {
+	base := strings.TrimPrefix(respID, "resp_")
+	for _, p := range []string{"msg_", "rs_", "fc_"} {
+		if strings.HasPrefix(base, p) {
+			base = strings.TrimPrefix(base, p)
+			break
+		}
+	}
+	if base == "" {
+		base = "chat"
+	}
+	return base
+}
+
+// assignMessageItemID returns a fresh message item id and bumps the per-stream
+// counter so subsequent reopens (e.g. after an inline </think> close) get a
+// unique value.
+func assignMessageItemID(state *ResponsesStreamState) string {
+	state.MessageItemCount++
+	if state.MessageItemCount == 1 {
+		return "msg_" + responseIDSuffix(state)
+	}
+	return fmt.Sprintf("msg_%s_%d", responseIDSuffix(state), state.MessageItemCount)
+}
+
+// assignReasoningItemID mirrors assignMessageItemID for reasoning items.
+func assignReasoningItemID(state *ResponsesStreamState) string {
+	state.ReasoningItemCount++
+	if state.ReasoningItemCount == 1 {
+		return "rs_" + responseIDSuffix(state)
+	}
+	return fmt.Sprintf("rs_%s_%d", responseIDSuffix(state), state.ReasoningItemCount)
+}
+
+// funcCallItemID derives a stable function_call item id ("fc_<callId>") from
+// the upstream call id, falling back to the response suffix when callID is
+// empty so the wire id is always non-empty.
+func funcCallItemID(state *ResponsesStreamState, callID string) string {
+	base := strings.TrimSpace(callID)
+	if base == "" {
+		base = responseIDSuffix(state)
+	}
+	if strings.HasPrefix(base, "fc_") {
+		return base
+	}
+	return "fc_" + base
+}
+
+func flushOnEOS(state *ResponsesStreamState) []ResponsesAPIEvent {
+	if state.CompletedSent {
+		return nil
+	}
+	events := make([]ResponsesAPIEvent, 0, 6)
+
+	// If we never started, emit the prelude before anything else so the wire
+	// still has a well-formed sequence.
+	if !state.Started {
+		if state.CreatedAt == 0 {
+			state.CreatedAt = time.Now().Unix()
+		}
+		if state.ResponseID == "" {
+			state.ResponseID = "resp_chat"
+		}
+		envelope := buildResponseEnvelope(state, "in_progress")
+		events = append(events, emitEvent(state, "response.created", map[string]any{"response": envelope}))
+		events = append(events, emitEvent(state, "response.in_progress", map[string]any{"response": envelope}))
+		state.Started = true
+		state.InProgressSent = true
+	}
+	// Flush any partial-tag fragment held back across chunks. It cannot grow
+	// into a complete <think>/</think> now, so emit it to whichever channel
+	// is currently active.
+	if state.PendingTagBuffer != "" {
+		pending := state.PendingTagBuffer
+		state.PendingTagBuffer = ""
+		if state.InThinkInlineTag {
+			events = append(events, ensureReasoningOpen(state)...)
+			events = append(events, emitEvent(state, "response.reasoning_summary_text.delta", map[string]any{
+				"item_id":       state.ReasoningItemID,
+				"output_index":  state.ReasoningItemIndex,
+				"summary_index": 0,
+				"delta":         pending,
+			}))
+		} else {
+			events = append(events, closeReasoningIfOpen(state)...)
+			events = append(events, ensureMessageOpen(state)...)
+			events = append(events, emitEvent(state, "response.output_text.delta", map[string]any{
+				"item_id":       state.MessageItemID,
+				"output_index":  state.MessageItemIndex,
+				"content_index": 0,
+				"delta":         pending,
+			}))
+		}
+	}
+	// Close in deterministic order: message, reasoning (if inline-only),
+	// then function_calls.
+	events = append(events, closeMessageIfOpen(state)...)
+	events = append(events, closeReasoningIfOpen(state)...)
+	events = append(events, closeAllOpenFunctionCalls(state)...)
+
+	envelope := buildResponseEnvelope(state, "completed")
+	// Attach usage.
+	envelope["usage"] = buildResponsesUsage(state)
+	events = append(events, emitEvent(state, "response.completed", map[string]any{
+		"response": envelope,
+	}))
+	state.CompletedSent = true
+	return events
+}
+
+func buildResponseEnvelope(state *ResponsesStreamState, status string) map[string]any {
+	return map[string]any{
+		"id":         state.ResponseID,
+		"object":     "response",
+		"created_at": state.CreatedAt,
+		"model":      state.Model,
+		"status":     status,
+		"output":     []any{},
+	}
+}
+
+func buildResponsesUsage(state *ResponsesStreamState) map[string]any {
+	if state.Usage == nil {
+		return map[string]any{
+			"input_tokens":  0,
+			"output_tokens": 0,
+			"total_tokens":  0,
+		}
+	}
+	cached := state.Usage.CachedTokens
+	cacheCreation := state.Usage.CacheCreationTokens
+	input := state.Usage.PromptTokens - cached - cacheCreation
+	if input < 0 {
+		input = 0
+	}
+	u := map[string]any{
+		"input_tokens":  input,
+		"output_tokens": state.Usage.CompletionTokens,
+		"total_tokens":  state.Usage.PromptTokens + state.Usage.CompletionTokens,
+	}
+	if cached > 0 || cacheCreation > 0 {
+		details := map[string]any{}
+		if cached > 0 {
+			details["cached_tokens"] = cached
+		}
+		if cacheCreation > 0 {
+			details["cache_creation_tokens"] = cacheCreation
+		}
+		u["input_tokens_details"] = details
+	}
+	if state.Usage.ReasoningTokens > 0 {
+		u["output_tokens_details"] = map[string]any{
+			"reasoning_tokens": state.Usage.ReasoningTokens,
+		}
+	}
+	return u
+}
diff --git a/service/openaicompat/chat_stream_to_responses_test.go b/service/openaicompat/chat_stream_to_responses_test.go
new file mode 100644
index 00000000000..0929a31fda6
--- /dev/null
+++ b/service/openaicompat/chat_stream_to_responses_test.go
@@ -0,0 +1,540 @@
+package openaicompat
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/dto"
+	"github.com/stretchr/testify/require"
+)
+
+// helper: parse a marshaled ResponsesAPIEvent's JSON into a flat map so we can
+// assert top-level fields without re-deriving the wire shape.
+func unmarshalEvent(t *testing.T, ev ResponsesAPIEvent) map[string]any {
+	t.Helper()
+	data, err := common.Marshal(ev)
+	require.NoError(t, err)
+	var m map[string]any
+	require.NoError(t, common.Unmarshal(data, &m))
+	return m
+}
+
+func TestStreamToResponses_SequenceIsMonotonic(t *testing.T) {
+	state := NewResponsesStreamState()
+	first := "hello"
+	chunk := &dto.ChatCompletionsStreamResponse{
+		Id:      "abc12345",
+		Object:  "chat.completion.chunk",
+		Created: 100,
+		Model:   "test",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{
+				Index: 0,
+				Delta: dto.ChatCompletionsStreamResponseChoiceDelta{
+					Content: &first,
+				},
+			},
+		},
+	}
+	events := ChatCompletionsStreamToResponsesEvents(chunk, state)
+	require.NotEmpty(t, events)
+	for i, ev := range events {
+		want := int64(i + 1)
+		if ev.SequenceNumber != want {
+			t.Errorf("event[%d].seq=%d want %d", i, ev.SequenceNumber, want)
+		}
+	}
+}
+
+func TestStreamToResponses_CreatedAndInProgressOnce(t *testing.T) {
+	state := NewResponsesStreamState()
+	first := "a"
+	chunk1 := &dto.ChatCompletionsStreamResponse{
+		Id:    "x",
+		Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Content: &first}},
+		},
+	}
+	ev1 := ChatCompletionsStreamToResponsesEvents(chunk1, state)
+	chunk2 := &dto.ChatCompletionsStreamResponse{
+		Id:    "x",
+		Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Content: &first}},
+		},
+	}
+	ev2 := ChatCompletionsStreamToResponsesEvents(chunk2, state)
+
+	count := func(events []ResponsesAPIEvent, t string) int {
+		n := 0
+		for _, e := range events {
+			if e.Type == t {
+				n++
+			}
+		}
+		return n
+	}
+	all := append(ev1, ev2...)
+	if count(all, "response.created") != 1 {
+		t.Errorf("created count=%d want 1", count(all, "response.created"))
+	}
+	if count(all, "response.in_progress") != 1 {
+		t.Errorf("in_progress count=%d want 1", count(all, "response.in_progress"))
+	}
+}
+
+func TestStreamToResponses_ResponseIDPrefixed(t *testing.T) {
+	state := NewResponsesStreamState()
+	text := "hi"
+	chunk := &dto.ChatCompletionsStreamResponse{
+		Id:    "abc12345",
+		Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Content: &text}},
+		},
+	}
+	events := ChatCompletionsStreamToResponsesEvents(chunk, state)
+	require.NotEmpty(t, events)
+	m := unmarshalEvent(t, events[0])
+	resp, ok := m["response"].(map[string]any)
+	require.True(t, ok)
+	if resp["id"] != "resp_abc12345" {
+		t.Errorf("id=%v want resp_abc12345", resp["id"])
+	}
+}
+
+func TestStreamToResponses_MessageLifecycle(t *testing.T) {
+	state := NewResponsesStreamState()
+	text := "hello"
+	c1 := &dto.ChatCompletionsStreamResponse{
+		Id:    "x",
+		Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Content: &text}},
+		},
+	}
+	ev := ChatCompletionsStreamToResponsesEvents(c1, state)
+	wantTypes := []string{
+		"response.created",
+		"response.in_progress",
+		"response.output_item.added",
+		"response.content_part.added",
+		"response.output_text.delta",
+	}
+	for i, want := range wantTypes {
+		if i >= len(ev) {
+			t.Errorf("missing event %d: %s", i, want)
+			continue
+		}
+		if ev[i].Type != want {
+			t.Errorf("event[%d].type=%s want %s", i, ev[i].Type, want)
+		}
+	}
+
+	// EOS flush should close.
+	flush := ChatCompletionsStreamToResponsesEvents(nil, state)
+	typesWanted := []string{
+		"response.output_text.done",
+		"response.content_part.done",
+		"response.output_item.done",
+		"response.completed",
+	}
+	wireTypes := make([]string, 0, len(flush))
+	for _, e := range flush {
+		wireTypes = append(wireTypes, e.Type)
+	}
+	for _, want := range typesWanted {
+		found := false
+		for _, t2 := range wireTypes {
+			if t2 == want {
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Errorf("missing flush event %s in %v", want, wireTypes)
+		}
+	}
+}
+
+func TestStreamToResponses_ReasoningLifecycle(t *testing.T) {
+	state := NewResponsesStreamState()
+	r1 := "step1"
+	c1 := &dto.ChatCompletionsStreamResponse{
+		Id:    "x",
+		Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{ReasoningContent: &r1}},
+		},
+	}
+	ev := ChatCompletionsStreamToResponsesEvents(c1, state)
+	hasAdded := false
+	hasPartAdded := false
+	hasDelta := false
+	for _, e := range ev {
+		switch e.Type {
+		case "response.output_item.added":
+			hasAdded = true
+		case "response.reasoning_summary_part.added":
+			hasPartAdded = true
+		case "response.reasoning_summary_text.delta":
+			hasDelta = true
+		}
+	}
+	if !hasAdded || !hasPartAdded || !hasDelta {
+		t.Errorf("missing reasoning events: added=%v partAdded=%v delta=%v", hasAdded, hasPartAdded, hasDelta)
+	}
+}
+
+func TestStreamToResponses_FunctionCallLifecycle(t *testing.T) {
+	state := NewResponsesStreamState()
+	idx0 := 0
+	c1 := &dto.ChatCompletionsStreamResponse{
+		Id:    "x",
+		Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{
+				Delta: dto.ChatCompletionsStreamResponseChoiceDelta{
+					ToolCalls: []dto.ToolCallResponse{
+						{
+							Index:    &idx0,
+							ID:       "c1",
+							Type:     "function",
+							Function: dto.FunctionResponse{Name: "search", Arguments: "{"},
+						},
+					},
+				},
+			},
+		},
+	}
+	ev := ChatCompletionsStreamToResponsesEvents(c1, state)
+	added := false
+	delta := false
+	for _, e := range ev {
+		if e.Type == "response.output_item.added" {
+			added = true
+			m := unmarshalEvent(t, e)
+			if item, ok := m["item"].(map[string]any); ok {
+				if item["type"] != "function_call" {
+					t.Errorf("output_item.added.type=%v want function_call", item["type"])
+				}
+				if item["arguments"] != "" {
+					t.Errorf("initial arguments=%v want \"\"", item["arguments"])
+				}
+			}
+		}
+		if e.Type == "response.function_call_arguments.delta" {
+			delta = true
+		}
+	}
+	if !added || !delta {
+		t.Errorf("missing function_call events: added=%v delta=%v", added, delta)
+	}
+
+	// Flush should close with done events.
+	flush := ChatCompletionsStreamToResponsesEvents(nil, state)
+	hasArgsDone := false
+	hasItemDone := false
+	for _, e := range flush {
+		if e.Type == "response.function_call_arguments.done" {
+			hasArgsDone = true
+			m := unmarshalEvent(t, e)
+			if m["arguments"] != "{" {
+				t.Errorf("done args=%v want '{'", m["arguments"])
+			}
+		}
+		if e.Type == "response.output_item.done" {
+			hasItemDone = true
+		}
+	}
+	if !hasArgsDone || !hasItemDone {
+		t.Errorf("missing close events: args.done=%v item.done=%v", hasArgsDone, hasItemDone)
+	}
+}
+
+func TestStreamToResponses_FunctionCallEmptyArgsDefaultsCurly(t *testing.T) {
+	state := NewResponsesStreamState()
+	idx0 := 0
+	c1 := &dto.ChatCompletionsStreamResponse{
+		Id:    "x",
+		Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{
+				Delta: dto.ChatCompletionsStreamResponseChoiceDelta{
+					ToolCalls: []dto.ToolCallResponse{
+						{
+							Index:    &idx0,
+							ID:       "c1",
+							Type:     "function",
+							Function: dto.FunctionResponse{Name: "f"},
+						},
+					},
+				},
+			},
+		},
+	}
+	_ = ChatCompletionsStreamToResponsesEvents(c1, state)
+	flush := ChatCompletionsStreamToResponsesEvents(nil, state)
+	for _, e := range flush {
+		if e.Type == "response.function_call_arguments.done" {
+			m := unmarshalEvent(t, e)
+			if m["arguments"] != "{}" {
+				t.Errorf("empty args default=%v want {}", m["arguments"])
+			}
+		}
+	}
+}
+
+func TestStreamToResponses_InlineThinkTag(t *testing.T) {
+	state := NewResponsesStreamState()
+	text := "intro<think>step"
+	c1 := &dto.ChatCompletionsStreamResponse{
+		Id:    "x",
+		Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Content: &text}},
+		},
+	}
+	ev := ChatCompletionsStreamToResponsesEvents(c1, state)
+	gotText := false
+	gotReasoning := false
+	for _, e := range ev {
+		if e.Type == "response.output_text.delta" {
+			gotText = true
+		}
+		if e.Type == "response.reasoning_summary_text.delta" {
+			gotReasoning = true
+		}
+	}
+	if !gotText || !gotReasoning {
+		t.Errorf("inline marker: text=%v reasoning=%v", gotText, gotReasoning)
+	}
+}
+
+func TestStreamToResponses_InlineThinkClose(t *testing.T) {
+	state := NewResponsesStreamState()
+	t1 := "intro<think>step"
+	c1 := &dto.ChatCompletionsStreamResponse{
+		Id: "x", Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Content: &t1}},
+		},
+	}
+	_ = ChatCompletionsStreamToResponsesEvents(c1, state)
+	t2 := "more</think>answer"
+	c2 := &dto.ChatCompletionsStreamResponse{
+		Id: "x", Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Content: &t2}},
+		},
+	}
+	ev2 := ChatCompletionsStreamToResponsesEvents(c2, state)
+	// Must close reasoning then open message and emit text "answer".
+	hasReasoningClose := false
+	hasTextOpen := false
+	hasTextDeltaAnswer := false
+	for _, e := range ev2 {
+		if e.Type == "response.reasoning_summary_text.done" {
+			hasReasoningClose = true
+		}
+		if e.Type == "response.content_part.added" {
+			hasTextOpen = true
+		}
+		if e.Type == "response.output_text.delta" {
+			m := unmarshalEvent(t, e)
+			if s, _ := m["delta"].(string); strings.Contains(s, "answer") {
+				hasTextDeltaAnswer = true
+			}
+		}
+	}
+	if !hasReasoningClose || !hasTextOpen || !hasTextDeltaAnswer {
+		t.Errorf("close path missing: reasoningClose=%v textOpen=%v ans=%v", hasReasoningClose, hasTextOpen, hasTextDeltaAnswer)
+	}
+}
+
+func TestStreamToResponses_NullFlushIdempotent(t *testing.T) {
+	state := NewResponsesStreamState()
+	text := "hi"
+	c1 := &dto.ChatCompletionsStreamResponse{
+		Id: "x", Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Content: &text}},
+		},
+	}
+	_ = ChatCompletionsStreamToResponsesEvents(c1, state)
+	f1 := ChatCompletionsStreamToResponsesEvents(nil, state)
+	f2 := ChatCompletionsStreamToResponsesEvents(nil, state)
+	count := 0
+	for _, e := range f1 {
+		if e.Type == "response.completed" {
+			count++
+		}
+	}
+	for _, e := range f2 {
+		if e.Type == "response.completed" {
+			count++
+		}
+	}
+	if count != 1 {
+		t.Errorf("response.completed emitted %d times, want 1", count)
+	}
+}
+
+func TestStreamToResponses_ErrorMappedOnce(t *testing.T) {
+	state := NewResponsesStreamState()
+	ev1 := EmitChatStreamErrorEvent(state, "boom")
+	ev2 := EmitChatStreamErrorEvent(state, "boom")
+	if len(ev2) != 0 {
+		t.Errorf("second emit returned %d events", len(ev2))
+	}
+	count := 0
+	for _, e := range ev1 {
+		if e.Type == "response.failed" {
+			count++
+		}
+	}
+	if count != 1 {
+		t.Errorf("response.failed count=%d want 1", count)
+	}
+}
+
+func TestStreamToResponses_UsagePropagation(t *testing.T) {
+	state := NewResponsesStreamState()
+	text := "hi"
+	c1 := &dto.ChatCompletionsStreamResponse{
+		Id: "x", Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Content: &text}},
+		},
+		Usage: &dto.Usage{
+			PromptTokens:     100,
+			CompletionTokens: 50,
+			TotalTokens:      150,
+			PromptTokensDetails: dto.InputTokenDetails{
+				CachedTokens:         30,
+				CachedCreationTokens: 20,
+			},
+		},
+	}
+	_ = ChatCompletionsStreamToResponsesEvents(c1, state)
+	flush := ChatCompletionsStreamToResponsesEvents(nil, state)
+	var completed map[string]any
+	for _, e := range flush {
+		if e.Type == "response.completed" {
+			completed = unmarshalEvent(t, e)
+		}
+	}
+	require.NotNil(t, completed)
+	resp, _ := completed["response"].(map[string]any)
+	usage, _ := resp["usage"].(map[string]any)
+	// input_tokens = 100 - 30 - 20 = 50
+	if u, _ := usage["input_tokens"].(float64); int(u) != 50 {
+		t.Errorf("input_tokens=%v want 50", usage["input_tokens"])
+	}
+	if u, _ := usage["output_tokens"].(float64); int(u) != 50 {
+		t.Errorf("output_tokens=%v want 50", usage["output_tokens"])
+	}
+	det, _ := usage["input_tokens_details"].(map[string]any)
+	require.NotNil(t, det)
+	if c, _ := det["cached_tokens"].(float64); int(c) != 30 {
+		t.Errorf("cached_tokens=%v want 30", det["cached_tokens"])
+	}
+}
+
+func TestResponsesAPIEvent_MarshalJSON_PayloadCannotShadowDedicatedFields(t *testing.T) {
+	ev := ResponsesAPIEvent{
+		Type:           "response.completed",
+		SequenceNumber: 42,
+		Payload: map[string]any{
+			"type":            "ATTACKER_OVERRIDE",
+			"sequence_number": 9999,
+			"response":        map[string]any{"id": "resp_1"},
+		},
+	}
+	raw, err := ev.MarshalJSON()
+	require.NoError(t, err)
+	var got map[string]any
+	require.NoError(t, common.Unmarshal(raw, &got))
+	require.Equal(t, "response.completed", got["type"], "dedicated type must win over payload key")
+	require.EqualValues(t, 42, got["sequence_number"], "dedicated sequence_number must win over payload key")
+	require.NotNil(t, got["response"], "non-conflicting payload keys must still be present")
+}
+
+func TestStreamToResponses_ErrorPreventsSubsequentCompleted(t *testing.T) {
+	state := NewResponsesStreamState()
+	// Drive at least one usable chunk so state.Started is true.
+	text := "Hi"
+	finish := ""
+	chunk := &dto.ChatCompletionsStreamResponse{
+		Id:    "abc",
+		Model: "claude-test",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{
+				Delta:        dto.ChatCompletionsStreamResponseChoiceDelta{Content: &text},
+				FinishReason: &finish,
+			},
+		},
+	}
+	_ = ChatCompletionsStreamToResponsesEvents(chunk, state)
+
+	// Now emit a failure.
+	errEvents := EmitChatStreamErrorEvent(state, "upstream blew up")
+	require.NotEmpty(t, errEvents)
+
+	// The flush MUST be a no-op now: no response.completed must follow.
+	flushEvents := ChatCompletionsStreamToResponsesEvents(nil, state)
+	for _, ev := range flushEvents {
+		require.NotEqual(t, "response.completed", ev.Type,
+			"response.completed must NOT fire after response.failed")
+	}
+}
+
+func TestStreamToResponses_ToolCloseBeforeTextAndReverse(t *testing.T) {
+	// Open text first, then tool_call: text must close before tool opens.
+	state := NewResponsesStreamState()
+	tx := "hello"
+	c1 := &dto.ChatCompletionsStreamResponse{
+		Id: "x", Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Content: &tx}},
+		},
+	}
+	_ = ChatCompletionsStreamToResponsesEvents(c1, state)
+	idx0 := 0
+	c2 := &dto.ChatCompletionsStreamResponse{
+		Id: "x", Model: "m",
+		Choices: []dto.ChatCompletionsStreamResponseChoice{
+			{
+				Delta: dto.ChatCompletionsStreamResponseChoiceDelta{
+					ToolCalls: []dto.ToolCallResponse{
+						{
+							Index:    &idx0,
+							ID:       "c1",
+							Type:     "function",
+							Function: dto.FunctionResponse{Name: "x"},
+						},
+					},
+				},
+			},
+		},
+	}
+	ev := ChatCompletionsStreamToResponsesEvents(c2, state)
+	idxTextDone := -1
+	idxToolAdded := -1
+	for i, e := range ev {
+		if e.Type == "response.output_text.done" && idxTextDone == -1 {
+			idxTextDone = i
+		}
+		if e.Type == "response.output_item.added" {
+			m := unmarshalEvent(t, e)
+			if item, ok := m["item"].(map[string]any); ok && item["type"] == "function_call" {
+				idxToolAdded = i
+			}
+		}
+	}
+	if idxTextDone == -1 || idxToolAdded == -1 || idxTextDone >= idxToolAdded {
+		t.Errorf("ordering wrong: textDone=%d toolAdded=%d", idxTextDone, idxToolAdded)
+	}
+}
diff --git a/service/openaicompat/chat_to_responses.go b/service/openaicompat/chat_to_responses.go
index 16096b88f59..bf749754465 100644
--- a/service/openaicompat/chat_to_responses.go
+++ b/service/openaicompat/chat_to_responses.go
@@ -400,3 +400,155 @@ func ChatCompletionsRequestToResponsesRequest(req *dto.GeneralOpenAIRequest) (*d
 
 	return out, nil
 }
+
+// ChatCompletionsResponseToResponsesResponse converts a non-streaming
+// Chat-Completions response (typically the result of the Anthropic adaptor
+// going through ResponseClaude2OpenAI) into a Responses-API response shape.
+//
+// It satisfies spec §6 (non-streaming): builds an `output[]` array containing
+// (optionally) a reasoning item, a message item, and function_call items —
+// in stable order. status="completed", id="resp_<resp.Id>",
+// created_at=resp.Created, model=requestModel. Usage propagates per §5.9
+// (canonical cache token decomposition).
+func ChatCompletionsResponseToResponsesResponse(resp *dto.OpenAITextResponse, requestModel string) (*dto.OpenAIResponsesResponse, error) {
+	if resp == nil {
+		return nil, errors.New("response is nil")
+	}
+
+	respID := strings.TrimSpace(resp.Id)
+	if respID == "" {
+		respID = "chat"
+	}
+	if !strings.HasPrefix(respID, "resp_") {
+		respID = "resp_" + respID
+	}
+
+	createdAt := 0
+	switch v := resp.Created.(type) {
+	case int64:
+		createdAt = int(v)
+	case int:
+		createdAt = v
+	case float64:
+		createdAt = int(v)
+	}
+
+	out := &dto.OpenAIResponsesResponse{
+		ID:        respID,
+		Object:    "response",
+		CreatedAt: createdAt,
+		Model:     requestModel,
+	}
+
+	statusRaw, _ := common.Marshal("completed")
+	out.Status = statusRaw
+
+	// Choose first choice (Chat-Completions guarantees at least one if non-error).
+	if len(resp.Choices) == 0 {
+		return out, nil
+	}
+	ch := resp.Choices[0]
+
+	output := make([]dto.ResponsesOutput, 0, 4)
+	idBase := ResponsesIDBase(respID)
+
+	// Reasoning item.
+	if rc := ch.Message.GetReasoningContent(); rc != "" {
+		output = append(output, dto.ResponsesOutput{
+			Type:   "reasoning",
+			ID:     "rs_" + idBase,
+			Status: "completed",
+			Content: []dto.ResponsesOutputContent{
+				{Type: "summary_text", Text: rc},
+			},
+		})
+	}
+
+	// Message item with text content.
+	text := ""
+	if ch.Message.IsStringContent() {
+		text = ch.Message.StringContent()
+	} else {
+		// Best effort: concat any text parts.
+		for _, part := range ch.Message.ParseContent() {
+			if part.Type == dto.ContentTypeText && part.Text != "" {
+				text += part.Text
+			}
+		}
+	}
+	if text != "" {
+		output = append(output, dto.ResponsesOutput{
+			Type:   "message",
+			ID:     "msg_" + idBase,
+			Status: "completed",
+			Role:   "assistant",
+			Content: []dto.ResponsesOutputContent{
+				{Type: "output_text", Text: text},
+			},
+		})
+	}
+
+	// Function call items.
+	fcAuto := 0
+	for _, tc := range ch.Message.ParseToolCalls() {
+		if strings.TrimSpace(tc.Function.Name) == "" {
+			continue
+		}
+		argsRaw, _ := common.Marshal(tc.Function.Arguments)
+		fcItemID := tc.ID
+		if strings.TrimSpace(fcItemID) == "" {
+			fcAuto++
+			fcItemID = fmt.Sprintf("%s_%d", idBase, fcAuto)
+		}
+		if !strings.HasPrefix(fcItemID, "fc_") {
+			fcItemID = "fc_" + fcItemID
+		}
+		output = append(output, dto.ResponsesOutput{
+			Type:      "function_call",
+			ID:        fcItemID,
+			Status:    "completed",
+			CallId:    tc.ID,
+			Name:      tc.Function.Name,
+			Arguments: argsRaw,
+		})
+	}
+	out.Output = output
+
+	// Usage mapping per spec §5.9.
+	usage := &dto.Usage{}
+	usage.PromptTokens = resp.Usage.PromptTokens
+	usage.CompletionTokens = resp.Usage.CompletionTokens
+	usage.TotalTokens = resp.Usage.TotalTokens
+	if usage.TotalTokens == 0 {
+		usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+	}
+	usage.InputTokens = resp.Usage.PromptTokens
+	usage.OutputTokens = resp.Usage.CompletionTokens
+	if resp.Usage.PromptTokensDetails.CachedTokens > 0 || resp.Usage.PromptTokensDetails.CachedCreationTokens > 0 {
+		usage.InputTokensDetails = &dto.InputTokenDetails{
+			CachedTokens:         resp.Usage.PromptTokensDetails.CachedTokens,
+			CachedCreationTokens: resp.Usage.PromptTokensDetails.CachedCreationTokens,
+		}
+		usage.PromptTokensDetails = resp.Usage.PromptTokensDetails
+	}
+	if resp.Usage.CompletionTokenDetails.ReasoningTokens > 0 {
+		usage.CompletionTokenDetails.ReasoningTokens = resp.Usage.CompletionTokenDetails.ReasoningTokens
+	}
+	// Canonical decomposition: input_tokens = max(0, prompt − cached − cache_creation).
+	cached := resp.Usage.PromptTokensDetails.CachedTokens
+	cacheCreation := resp.Usage.PromptTokensDetails.CachedCreationTokens
+	inputDecomp := usage.PromptTokens - cached - cacheCreation
+	if inputDecomp < 0 {
+		inputDecomp = 0
+	}
+	usage.InputTokens = inputDecomp
+	out.Usage = usage
+
+	// incomplete_details mapping per spec §6.4.
+	switch ch.FinishReason {
+	case "length":
+		out.IncompleteDetails = &dto.IncompleteDetails{Reasoning: "max_output_tokens"}
+	}
+
+	return out, nil
+}
diff --git a/service/openaicompat/chat_to_responses_test.go b/service/openaicompat/chat_to_responses_test.go
new file mode 100644
index 00000000000..1047dae738d
--- /dev/null
+++ b/service/openaicompat/chat_to_responses_test.go
@@ -0,0 +1,191 @@
+package openaicompat
+
+import (
+	"testing"
+
+	"github.com/QuantumNous/new-api/dto"
+	"github.com/stretchr/testify/require"
+)
+
+func TestChatToResponses_TextOnly(t *testing.T) {
+	msg := dto.Message{Role: "assistant"}
+	msg.SetStringContent("answer")
+	resp := &dto.OpenAITextResponse{
+		Id:      "abc",
+		Object:  "chat.completion",
+		Created: int64(123),
+		Model:   "claude",
+		Choices: []dto.OpenAITextResponseChoice{
+			{Index: 0, Message: msg, FinishReason: "stop"},
+		},
+		Usage: dto.Usage{PromptTokens: 10, CompletionTokens: 5, TotalTokens: 15},
+	}
+	out, err := ChatCompletionsResponseToResponsesResponse(resp, "claude")
+	require.NoError(t, err)
+	if out.ID != "resp_abc" {
+		t.Errorf("id=%q", out.ID)
+	}
+	require.Len(t, out.Output, 1)
+	if out.Output[0].Type != "message" {
+		t.Errorf("output type=%q", out.Output[0].Type)
+	}
+	require.Len(t, out.Output[0].Content, 1)
+	if out.Output[0].Content[0].Text != "answer" {
+		t.Errorf("text=%q", out.Output[0].Content[0].Text)
+	}
+}
+
+func TestChatToResponses_ToolCall(t *testing.T) {
+	msg := dto.Message{Role: "assistant"}
+	msg.SetToolCalls([]dto.ToolCallRequest{
+		{ID: "c1", Type: "function", Function: dto.FunctionRequest{Name: "search", Arguments: `{"q":"x"}`}},
+	})
+	resp := &dto.OpenAITextResponse{
+		Id:      "abc",
+		Object:  "chat.completion",
+		Created: int64(1),
+		Model:   "m",
+		Choices: []dto.OpenAITextResponseChoice{
+			{Index: 0, Message: msg, FinishReason: "tool_calls"},
+		},
+	}
+	out, err := ChatCompletionsResponseToResponsesResponse(resp, "m")
+	require.NoError(t, err)
+	hasFc := false
+	for _, o := range out.Output {
+		if o.Type == "function_call" {
+			hasFc = true
+			if o.Name != "search" {
+				t.Errorf("name=%q", o.Name)
+			}
+			if o.CallId != "c1" {
+				t.Errorf("call_id=%q", o.CallId)
+			}
+		}
+	}
+	if !hasFc {
+		t.Errorf("missing function_call: %+v", out.Output)
+	}
+}
+
+func TestChatToResponses_ReasoningOnly(t *testing.T) {
+	reasoning := "thinking"
+	msg := dto.Message{Role: "assistant", ReasoningContent: &reasoning}
+	msg.SetStringContent("")
+	resp := &dto.OpenAITextResponse{
+		Id:      "abc",
+		Object:  "chat.completion",
+		Created: int64(1),
+		Model:   "m",
+		Choices: []dto.OpenAITextResponseChoice{
+			{Index: 0, Message: msg, FinishReason: "stop"},
+		},
+	}
+	out, err := ChatCompletionsResponseToResponsesResponse(resp, "m")
+	require.NoError(t, err)
+	hasReasoning := false
+	for _, o := range out.Output {
+		if o.Type == "reasoning" {
+			hasReasoning = true
+			require.NotEmpty(t, o.Content)
+			if o.Content[0].Text != "thinking" {
+				t.Errorf("reasoning text=%q", o.Content[0].Text)
+			}
+		}
+	}
+	if !hasReasoning {
+		t.Errorf("missing reasoning: %+v", out.Output)
+	}
+}
+
+func TestChatToResponses_LengthMarksIncomplete(t *testing.T) {
+	msg := dto.Message{Role: "assistant"}
+	msg.SetStringContent("abc")
+	resp := &dto.OpenAITextResponse{
+		Id:      "abc",
+		Object:  "chat.completion",
+		Created: int64(1),
+		Model:   "m",
+		Choices: []dto.OpenAITextResponseChoice{
+			{Index: 0, Message: msg, FinishReason: "length"},
+		},
+	}
+	out, err := ChatCompletionsResponseToResponsesResponse(resp, "m")
+	require.NoError(t, err)
+	require.NotNil(t, out.IncompleteDetails)
+	if out.IncompleteDetails.Reasoning != "max_output_tokens" {
+		t.Errorf("incomplete reason=%q", out.IncompleteDetails.Reasoning)
+	}
+}
+
+func TestChatToResponses_UsageDecomposition(t *testing.T) {
+	msg := dto.Message{Role: "assistant"}
+	msg.SetStringContent("ok")
+	resp := &dto.OpenAITextResponse{
+		Id:      "abc",
+		Object:  "chat.completion",
+		Created: int64(1),
+		Model:   "m",
+		Choices: []dto.OpenAITextResponseChoice{
+			{Index: 0, Message: msg, FinishReason: "stop"},
+		},
+		Usage: dto.Usage{
+			PromptTokens:     100,
+			CompletionTokens: 50,
+			TotalTokens:      150,
+			PromptTokensDetails: dto.InputTokenDetails{
+				CachedTokens:         30,
+				CachedCreationTokens: 20,
+			},
+		},
+	}
+	out, err := ChatCompletionsResponseToResponsesResponse(resp, "m")
+	require.NoError(t, err)
+	require.NotNil(t, out.Usage)
+	// input_tokens = 100 - 30 - 20 = 50
+	if out.Usage.InputTokens != 50 {
+		t.Errorf("input_tokens=%d want 50", out.Usage.InputTokens)
+	}
+	if out.Usage.OutputTokens != 50 {
+		t.Errorf("output_tokens=%d want 50", out.Usage.OutputTokens)
+	}
+	require.NotNil(t, out.Usage.InputTokensDetails)
+	if out.Usage.InputTokensDetails.CachedTokens != 30 {
+		t.Errorf("cached=%d want 30", out.Usage.InputTokensDetails.CachedTokens)
+	}
+}
+
+func TestChatToResponses_MixedReasoningTextToolCall(t *testing.T) {
+	reasoning := "let me think"
+	msg := dto.Message{Role: "assistant", ReasoningContent: &reasoning}
+	msg.SetStringContent("partial")
+	msg.SetToolCalls([]dto.ToolCallRequest{
+		{ID: "c1", Type: "function", Function: dto.FunctionRequest{Name: "f", Arguments: "{}"}},
+	})
+	resp := &dto.OpenAITextResponse{
+		Id: "abc", Object: "chat.completion", Created: int64(1), Model: "m",
+		Choices: []dto.OpenAITextResponseChoice{
+			{Index: 0, Message: msg, FinishReason: "tool_calls"},
+		},
+	}
+	out, err := ChatCompletionsResponseToResponsesResponse(resp, "m")
+	require.NoError(t, err)
+	types := make([]string, 0)
+	for _, o := range out.Output {
+		types = append(types, o.Type)
+	}
+	hasR, hasM, hasF := false, false, false
+	for _, t2 := range types {
+		switch t2 {
+		case "reasoning":
+			hasR = true
+		case "message":
+			hasM = true
+		case "function_call":
+			hasF = true
+		}
+	}
+	if !hasR || !hasM || !hasF {
+		t.Errorf("expected all three output items, got %v", types)
+	}
+}
diff --git a/service/openaicompat/responses_stream_state.go b/service/openaicompat/responses_stream_state.go
new file mode 100644
index 00000000000..0531ae67526
--- /dev/null
+++ b/service/openaicompat/responses_stream_state.go
@@ -0,0 +1,115 @@
+// Package openaicompat exposes shape translators between the OpenAI Responses,
+// Chat-Completions, and Anthropic Messages surfaces.
+package openaicompat
+
+// ResponsesStreamFuncCall holds per-tool-call streaming state used by
+// ChatCompletionsStreamToResponsesEvents.
+type ResponsesStreamFuncCall struct {
+	ID string
+	// ItemID is the function_call item's own id ("fc_..."), distinct from
+	// ID/CallID which is the call_id referenced by tool result messages.
+	ItemID    string
+	Name      string
+	ArgsBuf   string
+	ItemIndex int
+	Done      bool
+}
+
+// ResponsesStreamState holds the per-stream bookkeeping required by the
+// ChatCompletions -> Responses streaming translator. It is intentionally
+// agnostic of the SSE transport.
+type ResponsesStreamState struct {
+	// seq is the running sequence-number counter; NextSeq returns the next
+	// value, starting from 1.
+	seq int64
+
+	// ResponseID is the Responses-API response.id ("resp_..." prefix).
+	ResponseID string
+	// CreatedAt is the Unix timestamp captured on the first usable chunk.
+	CreatedAt int64
+
+	// Started indicates we've already emitted response.created.
+	Started bool
+	// InProgressSent indicates we've already emitted response.in_progress.
+	InProgressSent bool
+	// CompletedSent indicates we've already emitted response.completed.
+	CompletedSent bool
+
+	// Message output_item lifecycle.
+	MessageItemOpen        bool
+	MessageItemIndex       int
+	MessageContentPartOpen bool
+	MessageOutputIndex     int
+	// MessageItemID is the id of the currently-open message item ("msg_..."),
+	// referenced by all content_part.* and output_text.* events that belong to
+	// it. Cleared when the message item closes.
+	MessageItemID string
+	// MessageItemCount tracks how many message items have been opened in this
+	// stream, so that subsequent reopens (e.g. after an interleaved think tag)
+	// get unique ids.
+	MessageItemCount int
+
+	// Reasoning output_item lifecycle.
+	ReasoningItemOpen        bool
+	ReasoningItemIndex       int
+	ReasoningSummaryPartOpen bool
+	// ReasoningItemID is the id of the currently-open reasoning item
+	// ("rs_..."), referenced by all reasoning_summary_* events. Cleared when
+	// the reasoning item closes.
+	ReasoningItemID string
+	// ReasoningItemCount mirrors MessageItemCount for reasoning items.
+	ReasoningItemCount int
+
+	// FuncCalls is keyed by the chunk tool_call index.
+	FuncCalls map[int]*ResponsesStreamFuncCall
+
+	// InThinkInlineTag is true while reasoning is being routed via the
+	// inline <think>...</think> marker.
+	InThinkInlineTag bool
+
+	// PendingTagBuffer holds a trailing chunk fragment that could still grow
+	// into a complete `<think>` or `</think>` token once the next chunk
+	// arrives. It is bounded by the longest possible partial-tag length so
+	// memory growth is constant. Always flushed at EOS.
+	PendingTagBuffer string
+
+	// Usage accumulates the latest usage seen on stream completion.
+	Usage *ResponsesUsageSnapshot
+
+	// Model is the upstream model echoed back to the client.
+	Model string
+
+	// FinalFinishReason is the last finish_reason observed on the chat stream.
+	FinalFinishReason string
+
+	// ErrorEmitted ensures the error chunk path is idempotent.
+	ErrorEmitted bool
+
+	// ItemIndex is a running output_index counter for output_item.added/done.
+	ItemIndex int
+}
+
+// ResponsesUsageSnapshot is a light wrapper to preserve cross-hop usage state.
+type ResponsesUsageSnapshot struct {
+	PromptTokens         int
+	CompletionTokens     int
+	TotalTokens          int
+	CachedTokens         int
+	CacheCreationTokens  int
+	ReasoningTokens      int
+}
+
+// NewResponsesStreamState constructs a state with safe zero defaults.
+// seq begins at 0 so the first call to NextSeq returns 1.
+func NewResponsesStreamState() *ResponsesStreamState {
+	return &ResponsesStreamState{
+		FuncCalls: map[int]*ResponsesStreamFuncCall{},
+		Usage:     &ResponsesUsageSnapshot{},
+	}
+}
+
+// NextSeq increments the sequence counter and returns the new value.
+func (s *ResponsesStreamState) NextSeq() int64 {
+	s.seq++
+	return s.seq
+}
diff --git a/service/openaicompat/responses_to_chat.go b/service/openaicompat/responses_to_chat.go
index d1c7473fe8a..844d6c819c1 100644
--- a/service/openaicompat/responses_to_chat.go
+++ b/service/openaicompat/responses_to_chat.go
@@ -2,8 +2,10 @@ package openaicompat
 
 import (
 	"errors"
+	"fmt"
 	"strings"
 
+	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/dto"
 )
 
@@ -131,3 +133,540 @@ func ExtractOutputTextFromResponses(resp *dto.OpenAIResponsesResponse) string {
 	}
 	return sb.String()
 }
+
+// ResponsesRequestToChatCompletionsRequest translates the Responses-API shape
+// into a Chat-Completions intermediate that can then be re-translated by the
+// existing Chat -> Anthropic converter.
+//
+// It implements spec sections §3 through §10:
+//   - input-shape normalization (string / empty / array / non-string-non-array)
+//   - instructions lifting
+//   - role-only fallback for item type
+//   - message content normalization (input_text/output_text/input_image)
+//   - function_call buffering into assistant tool_calls
+//   - function_call_output -> role: "tool" with stringified non-string output
+//   - reasoning item buffering -> attached as reasoning_content to next assistant
+//   - tool declaration conversion (both Chat-Completions-shaped and Responses-flat)
+//   - Responses-only field cleanup
+//   - reasoning_effort carry
+//   - text.format -> response_format carry
+//
+// Any other input shape (number, object) returns an error so the caller can
+// decide whether to fall back to the existing adaptor stub.
+func ResponsesRequestToChatCompletionsRequest(req *dto.OpenAIResponsesRequest) (*dto.GeneralOpenAIRequest, error) {
+	if req == nil {
+		return nil, errors.New("request is nil")
+	}
+
+	out := &dto.GeneralOpenAIRequest{
+		Model:       req.Model,
+		Stream:      req.Stream,
+		Temperature: req.Temperature,
+		TopP:        req.TopP,
+		User:        req.User,
+		Metadata:    req.Metadata,
+		Store:       req.Store,
+	}
+	// max_output_tokens -> max_tokens (the field the Claude converter consumes).
+	if req.MaxOutputTokens != nil {
+		mt := *req.MaxOutputTokens
+		out.MaxTokens = &mt
+	}
+
+	// reasoning.effort carry-through.
+	if req.Reasoning != nil && strings.TrimSpace(req.Reasoning.Effort) != "" {
+		out.ReasoningEffort = req.Reasoning.Effort
+	}
+
+	// text.format -> response_format. text JSON shape can be either
+	//   { "format": { "type": "json_object" } }
+	// or
+	//   { "format": { "type": "json_schema", "json_schema": {...} } }
+	// or
+	//   { "format": { "type": "json_schema", "name": ..., "schema": ... } } (flat)
+	if len(req.Text) > 0 {
+		var textObj map[string]any
+		if err := common.Unmarshal(req.Text, &textObj); err == nil {
+			if fmtAny, ok := textObj["format"]; ok {
+				if fmtMap, ok := fmtAny.(map[string]any); ok {
+					rf := &dto.ResponseFormat{}
+					if t, _ := fmtMap["type"].(string); t != "" {
+						rf.Type = t
+					}
+					if rf.Type == "json_schema" {
+						if schema, ok := fmtMap["json_schema"]; ok {
+							if b, err := common.Marshal(schema); err == nil {
+								rf.JsonSchema = b
+							}
+						} else {
+							// Flat shape: merge name/schema/strict/description into a json_schema object.
+							flat := map[string]any{}
+							for k, v := range fmtMap {
+								if k == "type" {
+									continue
+								}
+								flat[k] = v
+							}
+							if len(flat) > 0 {
+								if b, err := common.Marshal(flat); err == nil {
+									rf.JsonSchema = b
+								}
+							}
+						}
+					}
+					if rf.Type != "" {
+						out.ResponseFormat = rf
+					}
+				}
+			}
+		}
+	}
+
+	// ----- Tool declarations -----
+	if len(req.Tools) > 0 {
+		var toolsRaw []map[string]any
+		if err := common.Unmarshal(req.Tools, &toolsRaw); err == nil {
+			converted := make([]dto.ToolCallRequest, 0, len(toolsRaw))
+			for _, t := range toolsRaw {
+				if t == nil {
+					continue
+				}
+				toolType, _ := t["type"].(string)
+				if toolType == "" {
+					toolType = "function"
+				}
+				// Already Chat-Completions shape (has "function" key)?
+				if fnAny, ok := t["function"]; ok {
+					fnMap, _ := fnAny.(map[string]any)
+					name, _ := fnMap["name"].(string)
+					if strings.TrimSpace(name) == "" {
+						continue
+					}
+					params := normalizeToolParameters(fnMap["parameters"])
+					desc, _ := fnMap["description"].(string)
+					converted = append(converted, dto.ToolCallRequest{
+						Type: "function",
+						Function: dto.FunctionRequest{
+							Name:        name,
+							Description: desc,
+							Parameters:  params,
+						},
+					})
+					continue
+				}
+				if toolType == "function" {
+					name, _ := t["name"].(string)
+					if strings.TrimSpace(name) == "" {
+						continue
+					}
+					params := normalizeToolParameters(t["parameters"])
+					desc, _ := t["description"].(string)
+					converted = append(converted, dto.ToolCallRequest{
+						Type: "function",
+						Function: dto.FunctionRequest{
+							Name:        name,
+							Description: desc,
+							Parameters:  params,
+						},
+					})
+					continue
+				}
+				// Hosted / non-function tool with no name => drop silently.
+				if name, _ := t["name"].(string); strings.TrimSpace(name) == "" {
+					continue
+				}
+				// Preserve hosted tool with name as a custom tool stub. We
+				// pass-through here using the raw map; the downstream Claude
+				// converter only recognises `function` types and ignores
+				// others, which keeps backwards behavior intact.
+				if b, err := common.Marshal(t); err == nil {
+					var stub dto.ToolCallRequest
+					_ = common.Unmarshal(b, &stub)
+					if stub.Type == "" {
+						stub.Type = toolType
+					}
+					converted = append(converted, stub)
+				}
+			}
+			if len(converted) > 0 {
+				out.Tools = converted
+			}
+		}
+	}
+
+	// tool_choice pass-through (raw JSON -> any).
+	if len(req.ToolChoice) > 0 {
+		var any2 any
+		if err := common.Unmarshal(req.ToolChoice, &any2); err == nil {
+			// If the Responses-style {"type":"function","name":"x"} shape arrives,
+			// reshape to Chat-Completions {"type":"function","function":{"name":"x"}}.
+			if m, ok := any2.(map[string]any); ok {
+				if t, _ := m["type"].(string); t == "function" {
+					if _, has := m["function"]; !has {
+						if name, _ := m["name"].(string); name != "" {
+							any2 = map[string]any{
+								"type":     "function",
+								"function": map[string]any{"name": name},
+							}
+						}
+					}
+				}
+			}
+			out.ToolChoice = any2
+		}
+	}
+
+	// parallel_tool_calls pass-through.
+	if len(req.ParallelToolCalls) > 0 {
+		var b bool
+		if err := common.Unmarshal(req.ParallelToolCalls, &b); err == nil {
+			out.ParallelTooCalls = &b
+		}
+	}
+
+	// ----- Input normalization -----
+	// instructions => leading system message.
+	if len(req.Instructions) > 0 {
+		var instr string
+		if err := common.Unmarshal(req.Instructions, &instr); err == nil {
+			if strings.TrimSpace(instr) != "" {
+				out.Messages = append(out.Messages, dto.Message{
+					Role:    "system",
+					Content: instr,
+				})
+			}
+		}
+	}
+
+	// Parse the input field.
+	var inputItems []map[string]any
+	if req.Input == nil || len(req.Input) == 0 {
+		// Treat absent input as empty -> placeholder user message.
+		inputItems = []map[string]any{
+			{
+				"type":    "message",
+				"role":    "user",
+				"content": []map[string]any{{"type": "input_text", "text": "..."}},
+			},
+		}
+	} else {
+		switch common.GetJsonType(req.Input) {
+		case "string":
+			var s string
+			_ = common.Unmarshal(req.Input, &s)
+			if strings.TrimSpace(s) == "" {
+				s = "..."
+			}
+			inputItems = []map[string]any{
+				{
+					"type":    "message",
+					"role":    "user",
+					"content": []map[string]any{{"type": "input_text", "text": s}},
+				},
+			}
+		case "array":
+			if err := common.Unmarshal(req.Input, &inputItems); err != nil {
+				return nil, fmt.Errorf("input array unmarshal: %w", err)
+			}
+			if len(inputItems) == 0 {
+				inputItems = []map[string]any{
+					{
+						"type":    "message",
+						"role":    "user",
+						"content": []map[string]any{{"type": "input_text", "text": "..."}},
+					},
+				}
+			}
+		default:
+			// Per spec §3, return error so caller can fall through.
+			return nil, fmt.Errorf("unsupported input shape: %s", common.GetJsonType(req.Input))
+		}
+	}
+
+	// Convert items, with buffering for reasoning and consecutive function_calls.
+	var reasoningBuf []string
+	flushReasoningInto := func(msg *dto.Message) {
+		if len(reasoningBuf) == 0 {
+			return
+		}
+		s := strings.Join(reasoningBuf, "\n")
+		reasoningBuf = nil
+		msg.ReasoningContent = &s
+	}
+
+	// Pending assistant tool_calls accumulator (so consecutive function_calls
+	// collapse into one assistant message).
+	var pendingAssistantToolCalls []dto.ToolCallRequest
+	flushAssistantToolCalls := func() {
+		if len(pendingAssistantToolCalls) == 0 {
+			return
+		}
+		msg := dto.Message{
+			Role: "assistant",
+		}
+		msg.SetNullContent()
+		flushReasoningInto(&msg)
+		msg.SetToolCalls(pendingAssistantToolCalls)
+		out.Messages = append(out.Messages, msg)
+		pendingAssistantToolCalls = nil
+	}
+
+	for _, item := range inputItems {
+		if item == nil {
+			continue
+		}
+		itemType, _ := item["type"].(string)
+		role, _ := item["role"].(string)
+		if itemType == "" && role != "" {
+			itemType = "message"
+		}
+		if itemType == "" {
+			// Neither type nor role -> skip per spec §5.
+			continue
+		}
+
+		switch itemType {
+		case "message":
+			flushAssistantToolCalls()
+			msg := dto.Message{Role: role}
+			if msg.Role == "" {
+				msg.Role = "user"
+			}
+			// Content can be string or array.
+			contentAny, hasContent := item["content"]
+			if !hasContent {
+				msg.Content = ""
+			} else {
+				// Normalize to []any so we can walk it uniformly regardless of
+				// whether it came from JSON unmarshal ([]any) or from in-process
+				// construction ([]map[string]any).
+				var parts []any
+				switch cv := contentAny.(type) {
+				case string:
+					msg.Content = cv
+					parts = nil
+				case []any:
+					parts = cv
+				case []map[string]any:
+					parts = make([]any, len(cv))
+					for i := range cv {
+						parts[i] = cv[i]
+					}
+				}
+				if parts != nil {
+					mc := convertResponsesContentParts(parts)
+					if len(mc) == 0 {
+						msg.Content = ""
+					} else if len(mc) == 1 && mc[0].Type == dto.ContentTypeText {
+						msg.Content = mc[0].Text
+					} else {
+						out2 := make([]any, 0, len(mc))
+						for _, p := range mc {
+							pm := map[string]any{"type": p.Type}
+							switch p.Type {
+							case dto.ContentTypeText:
+								pm["text"] = p.Text
+							case dto.ContentTypeImageURL:
+								pm["image_url"] = p.ImageUrl
+							}
+							out2 = append(out2, pm)
+						}
+						msg.Content = out2
+					}
+				}
+			}
+			if msg.Role == "assistant" {
+				flushReasoningInto(&msg)
+			}
+			out.Messages = append(out.Messages, msg)
+
+		case "function_call":
+			name, _ := item["name"].(string)
+			if strings.TrimSpace(name) == "" {
+				continue
+			}
+			callID, _ := item["call_id"].(string)
+			argsStr := ""
+			if raw, ok := item["arguments"]; ok {
+				switch av := raw.(type) {
+				case string:
+					argsStr = av
+				default:
+					if b, err := common.Marshal(av); err == nil {
+						argsStr = string(b)
+					}
+				}
+			}
+			pendingAssistantToolCalls = append(pendingAssistantToolCalls, dto.ToolCallRequest{
+				ID:   callID,
+				Type: "function",
+				Function: dto.FunctionRequest{
+					Name:      name,
+					Arguments: argsStr,
+				},
+			})
+
+		case "function_call_output":
+			flushAssistantToolCalls()
+			callID, _ := item["call_id"].(string)
+			outputAny := item["output"]
+			var output string
+			switch ov := outputAny.(type) {
+			case string:
+				output = ov
+			default:
+				if b, err := common.Marshal(ov); err == nil {
+					output = string(b)
+				} else {
+					output = fmt.Sprintf("%v", ov)
+				}
+			}
+			out.Messages = append(out.Messages, dto.Message{
+				Role:       "tool",
+				Content:    output,
+				ToolCallId: callID,
+			})
+
+		case "reasoning":
+			text := extractReasoningItemText(item)
+			if text != "" {
+				reasoningBuf = append(reasoningBuf, text)
+			}
+
+		default:
+			// Unknown item type: skip silently to match spec §5 forgiving stance.
+			continue
+		}
+	}
+	// End-of-input flush.
+	flushAssistantToolCalls()
+
+	// Strip Responses-only fields explicitly: input/instructions/include/
+	// prompt_cache_key/store/reasoning/background are NOT carried over.
+	// "store" is intentionally also dropped to keep the Chat intermediate clean.
+	out.Store = nil
+
+	return out, nil
+}
+
+// normalizeToolParameters ensures an object-typed schema has a `properties` key
+// per spec §8.
+func normalizeToolParameters(params any) any {
+	if params == nil {
+		return map[string]any{
+			"type":       "object",
+			"properties": map[string]any{},
+		}
+	}
+	m, ok := params.(map[string]any)
+	if !ok {
+		return params
+	}
+	if t, _ := m["type"].(string); strings.EqualFold(t, "object") {
+		if _, has := m["properties"]; !has {
+			m["properties"] = map[string]any{}
+		}
+	}
+	return m
+}
+
+func convertResponsesContentParts(parts []any) []dto.MediaContent {
+	result := make([]dto.MediaContent, 0, len(parts))
+	for _, p := range parts {
+		pm, ok := p.(map[string]any)
+		if !ok {
+			continue
+		}
+		pt, _ := pm["type"].(string)
+		switch pt {
+		case "input_text", "output_text":
+			if t, ok := pm["text"].(string); ok {
+				result = append(result, dto.MediaContent{
+					Type: dto.ContentTypeText,
+					Text: t,
+				})
+			}
+		case "input_image":
+			detail, _ := pm["detail"].(string)
+			if detail == "" {
+				detail = "auto"
+			}
+			url := ""
+			switch v := pm["image_url"].(type) {
+			case string:
+				url = v
+			case map[string]any:
+				if s, ok := v["url"].(string); ok {
+					url = s
+				}
+			}
+			if url == "" {
+				if s, ok := pm["file_id"].(string); ok {
+					url = s
+				}
+			}
+			result = append(result, dto.MediaContent{
+				Type: dto.ContentTypeImageURL,
+				ImageUrl: map[string]any{
+					"url":    url,
+					"detail": detail,
+				},
+			})
+		default:
+			// Pass-through unknown types as a generic text block to keep the
+			// converter forgiving.
+			if t, _ := pm["text"].(string); t != "" {
+				result = append(result, dto.MediaContent{
+					Type: dto.ContentTypeText,
+					Text: t,
+				})
+			}
+		}
+	}
+	return result
+}
+
+// extractReasoningItemText pulls text out of a reasoning input item per spec §7.
+// Priority: summary[].text joined with \n; else content[].text joined with \n; else "".
+func extractReasoningItemText(item map[string]any) string {
+	if item == nil {
+		return ""
+	}
+	if sums, ok := item["summary"].([]any); ok && len(sums) > 0 {
+		var b strings.Builder
+		for _, s := range sums {
+			sm, ok := s.(map[string]any)
+			if !ok {
+				continue
+			}
+			if t, _ := sm["text"].(string); t != "" {
+				if b.Len() > 0 {
+					b.WriteString("\n")
+				}
+				b.WriteString(t)
+			}
+		}
+		if b.Len() > 0 {
+			return b.String()
+		}
+	}
+	if conts, ok := item["content"].([]any); ok && len(conts) > 0 {
+		var b strings.Builder
+		for _, c := range conts {
+			cm, ok := c.(map[string]any)
+			if !ok {
+				continue
+			}
+			if t, _ := cm["text"].(string); t != "" {
+				if b.Len() > 0 {
+					b.WriteString("\n")
+				}
+				b.WriteString(t)
+			}
+		}
+		if b.Len() > 0 {
+			return b.String()
+		}
+	}
+	return ""
+}
diff --git a/service/openaicompat/responses_to_chat_test.go b/service/openaicompat/responses_to_chat_test.go
new file mode 100644
index 00000000000..feabf7c3d35
--- /dev/null
+++ b/service/openaicompat/responses_to_chat_test.go
@@ -0,0 +1,514 @@
+package openaicompat
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/dto"
+	"github.com/stretchr/testify/require"
+)
+
+func newResponsesReq(t *testing.T, body map[string]any) *dto.OpenAIResponsesRequest {
+	t.Helper()
+	raw, err := common.Marshal(body)
+	require.NoError(t, err)
+	var req dto.OpenAIResponsesRequest
+	require.NoError(t, common.Unmarshal(raw, &req))
+	return &req
+}
+
+func TestResponsesToChat_StringInputWrapsAsUserMessage(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{"model": "claude-3", "input": "hello"})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	if chat.Messages[0].Role != "user" {
+		t.Errorf("role=%q want user", chat.Messages[0].Role)
+	}
+	if chat.Messages[0].StringContent() != "hello" {
+		t.Errorf("content=%q want hello", chat.Messages[0].StringContent())
+	}
+}
+
+func TestResponsesToChat_EmptyStringInputPlaceholder(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{"model": "x", "input": ""})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	if chat.Messages[0].StringContent() != "..." {
+		t.Errorf("placeholder=%q want ...", chat.Messages[0].StringContent())
+	}
+}
+
+func TestResponsesToChat_EmptyArrayPlaceholder(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{"model": "x", "input": []any{}})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	if chat.Messages[0].StringContent() != "..." {
+		t.Errorf("placeholder=%q want ...", chat.Messages[0].StringContent())
+	}
+}
+
+func TestResponsesToChat_NonStringNonArrayReturnsError(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{"model": "x", "input": 42})
+	_, err := ResponsesRequestToChatCompletionsRequest(req)
+	if err == nil {
+		t.Errorf("expected error for numeric input")
+	}
+}
+
+func TestResponsesToChat_InstructionsLifted(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model":        "x",
+		"input":        "hi",
+		"instructions": "You are helpful.",
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.GreaterOrEqual(t, len(chat.Messages), 2)
+	if chat.Messages[0].Role != "system" {
+		t.Errorf("first role=%q want system", chat.Messages[0].Role)
+	}
+	if chat.Messages[0].StringContent() != "You are helpful." {
+		t.Errorf("system content=%q", chat.Messages[0].StringContent())
+	}
+}
+
+func TestResponsesToChat_EmptyInstructionsSkipped(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model":        "x",
+		"input":        "hi",
+		"instructions": "",
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	for _, m := range chat.Messages {
+		if m.Role == "system" {
+			t.Errorf("system message present when instructions empty")
+		}
+	}
+}
+
+func TestResponsesToChat_RoleOnlyFallbackAndSkipUnknown(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"role": "user", "content": []any{
+				map[string]any{"type": "input_text", "text": "hi"},
+			}},
+			map[string]any{"foo": "bar"}, // skipped
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	if chat.Messages[0].StringContent() != "hi" {
+		t.Errorf("got=%q want hi", chat.Messages[0].StringContent())
+	}
+}
+
+func TestResponsesToChat_OutputTextBecomesText(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"role": "assistant", "content": []any{
+				map[string]any{"type": "output_text", "text": "answer"},
+			}},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	if chat.Messages[0].StringContent() != "answer" {
+		t.Errorf("got=%q want answer", chat.Messages[0].StringContent())
+	}
+}
+
+func TestResponsesToChat_InputImageWithURL(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"role": "user", "content": []any{
+				map[string]any{
+					"type":      "input_image",
+					"image_url": "https://example.com/a.png",
+					"detail":    "high",
+				},
+			}},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	parts := chat.Messages[0].ParseContent()
+	require.Len(t, parts, 1)
+	if parts[0].Type != dto.ContentTypeImageURL {
+		t.Errorf("type=%q want image_url", parts[0].Type)
+	}
+}
+
+func TestResponsesToChat_InputImageWithFileID(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"role": "user", "content": []any{
+				map[string]any{"type": "input_image", "file_id": "file_abc"},
+			}},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	parts := chat.Messages[0].ParseContent()
+	require.Len(t, parts, 1)
+	if parts[0].Type != dto.ContentTypeImageURL {
+		t.Errorf("type=%q want image_url", parts[0].Type)
+	}
+}
+
+// MINOR-2: input_image with neither image_url nor file_id should still be
+// emitted as an image_url part (with empty url and detail="auto") so the
+// downstream converter can decide how to handle it.
+func TestResponsesToChat_InputImageWithNeitherURLNorFileID(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"role": "user", "content": []any{
+				map[string]any{"type": "input_image"},
+			}},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	parts := chat.Messages[0].ParseContent()
+	require.Len(t, parts, 1)
+	require.Equal(t, dto.ContentTypeImageURL, parts[0].Type)
+
+	imageURL := parts[0].GetImageMedia()
+	require.NotNil(t, imageURL, "expected image_url to be parseable")
+	require.Equal(t, "", imageURL.Url)
+	require.Equal(t, "auto", imageURL.Detail)
+}
+
+func TestResponsesToChat_FunctionCallBecomesAssistantToolCalls(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{
+				"type":      "function_call",
+				"call_id":   "c1",
+				"name":      "search",
+				"arguments": `{"q":"x"}`,
+			},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	if chat.Messages[0].Role != "assistant" {
+		t.Errorf("role=%q want assistant", chat.Messages[0].Role)
+	}
+	calls := chat.Messages[0].ParseToolCalls()
+	require.Len(t, calls, 1)
+	if calls[0].ID != "c1" || calls[0].Function.Name != "search" {
+		t.Errorf("call mismatch: id=%q name=%q", calls[0].ID, calls[0].Function.Name)
+	}
+	if calls[0].Function.Arguments != `{"q":"x"}` {
+		t.Errorf("args=%q", calls[0].Function.Arguments)
+	}
+}
+
+func TestResponsesToChat_FunctionCallEmptyNameDropped(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"type": "function_call", "call_id": "c1", "name": "", "arguments": "{}"},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Empty(t, chat.Messages)
+}
+
+func TestResponsesToChat_FunctionCallOutputBecomesToolMessage(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"type": "function_call_output", "call_id": "c1", "output": "result text"},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	if chat.Messages[0].Role != "tool" || chat.Messages[0].ToolCallId != "c1" {
+		t.Errorf("tool msg mismatch: role=%q id=%q", chat.Messages[0].Role, chat.Messages[0].ToolCallId)
+	}
+	if chat.Messages[0].StringContent() != "result text" {
+		t.Errorf("content=%q", chat.Messages[0].StringContent())
+	}
+}
+
+func TestResponsesToChat_FunctionCallOutputObjectStringified(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"type": "function_call_output", "call_id": "c1", "output": map[string]any{"ok": true, "n": 7}},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	c := chat.Messages[0].StringContent()
+	if !strings.Contains(c, `"ok":true`) || !strings.Contains(c, `"n":7`) {
+		t.Errorf("content=%q want JSON-stringified", c)
+	}
+}
+
+func TestResponsesToChat_FunctionCallFlushesBeforeOutput(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{
+				"type":      "function_call",
+				"call_id":   "c1",
+				"name":      "search",
+				"arguments": "{}",
+			},
+			map[string]any{"type": "function_call_output", "call_id": "c1", "output": "r"},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 2)
+	if chat.Messages[0].Role != "assistant" {
+		t.Errorf("first role=%q", chat.Messages[0].Role)
+	}
+	if chat.Messages[1].Role != "tool" {
+		t.Errorf("second role=%q", chat.Messages[1].Role)
+	}
+}
+
+func TestResponsesToChat_ReasoningAttachedToNextAssistant(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"type": "reasoning", "summary": []any{
+				map[string]any{"text": "thinking step 1"},
+			}},
+			map[string]any{"type": "message", "role": "assistant", "content": []any{
+				map[string]any{"type": "output_text", "text": "answer"},
+			}},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	m := chat.Messages[0]
+	if m.GetReasoningContent() != "thinking step 1" {
+		t.Errorf("reasoning=%q", m.GetReasoningContent())
+	}
+}
+
+func TestResponsesToChat_ReasoningContentFallback(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"type": "reasoning", "content": []any{
+				map[string]any{"text": "alt thinking"},
+			}},
+			map[string]any{"type": "message", "role": "assistant", "content": "ok"},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	if chat.Messages[0].GetReasoningContent() != "alt thinking" {
+		t.Errorf("reasoning=%q", chat.Messages[0].GetReasoningContent())
+	}
+}
+
+func TestResponsesToChat_MultipleReasoningJoined(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"type": "reasoning", "summary": []any{map[string]any{"text": "a"}}},
+			map[string]any{"type": "reasoning", "summary": []any{map[string]any{"text": "b"}}},
+			map[string]any{"type": "message", "role": "assistant", "content": "ok"},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 1)
+	if chat.Messages[0].GetReasoningContent() != "a\nb" {
+		t.Errorf("reasoning=%q want a\\nb", chat.Messages[0].GetReasoningContent())
+	}
+}
+
+func TestResponsesToChat_ReasoningBufferCleared(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": []any{
+			map[string]any{"type": "reasoning", "summary": []any{map[string]any{"text": "r"}}},
+			map[string]any{"type": "message", "role": "assistant", "content": "first"},
+			map[string]any{"type": "message", "role": "assistant", "content": "second"},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Messages, 2)
+	if chat.Messages[0].GetReasoningContent() == "" {
+		t.Errorf("first message should carry reasoning")
+	}
+	if chat.Messages[1].GetReasoningContent() != "" {
+		t.Errorf("second message should not have reasoning, got=%q", chat.Messages[1].GetReasoningContent())
+	}
+}
+
+func TestResponsesToChat_ToolDeclarationFlatConverted(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": "hi",
+		"tools": []any{
+			map[string]any{
+				"type":        "function",
+				"name":        "search",
+				"description": "find",
+				"parameters":  map[string]any{"type": "object", "properties": map[string]any{}},
+			},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Tools, 1)
+	if chat.Tools[0].Function.Name != "search" {
+		t.Errorf("name=%q", chat.Tools[0].Function.Name)
+	}
+}
+
+func TestResponsesToChat_ToolDeclarationChatShapePassThrough(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": "hi",
+		"tools": []any{
+			map[string]any{
+				"type": "function",
+				"function": map[string]any{
+					"name":        "search",
+					"description": "find",
+					"parameters":  map[string]any{"type": "object"},
+				},
+			},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Len(t, chat.Tools, 1)
+	if chat.Tools[0].Function.Name != "search" {
+		t.Errorf("name=%q", chat.Tools[0].Function.Name)
+	}
+	// Parameters should have been normalized.
+	m, ok := chat.Tools[0].Function.Parameters.(map[string]any)
+	require.True(t, ok)
+	if _, has := m["properties"]; !has {
+		t.Errorf("properties not normalized: %+v", m)
+	}
+}
+
+func TestResponsesToChat_NamelessToolDropped(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": "hi",
+		"tools": []any{
+			map[string]any{"type": "request_user_input"},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.Empty(t, chat.Tools)
+}
+
+func TestResponsesToChat_ReasoningEffortCarry(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model":     "x",
+		"input":     "hi",
+		"reasoning": map[string]any{"effort": "high"},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	if chat.ReasoningEffort != "high" {
+		t.Errorf("reasoning_effort=%q", chat.ReasoningEffort)
+	}
+}
+
+func TestResponsesToChat_ResponseFormatJSONObject(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": "hi",
+		"text": map[string]any{
+			"format": map[string]any{"type": "json_object"},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.NotNil(t, chat.ResponseFormat)
+	if chat.ResponseFormat.Type != "json_object" {
+		t.Errorf("response_format.type=%q", chat.ResponseFormat.Type)
+	}
+}
+
+func TestResponsesToChat_ResponseFormatJSONSchema(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": "hi",
+		"text": map[string]any{
+			"format": map[string]any{
+				"type":        "json_schema",
+				"json_schema": map[string]any{"schema": map[string]any{"type": "object"}},
+			},
+		},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.NotNil(t, chat.ResponseFormat)
+	if chat.ResponseFormat.Type != "json_schema" {
+		t.Errorf("response_format.type=%q", chat.ResponseFormat.Type)
+	}
+	var got map[string]any
+	require.NoError(t, json.Unmarshal(chat.ResponseFormat.JsonSchema, &got))
+	if _, has := got["schema"]; !has {
+		t.Errorf("schema not preserved: %+v", got)
+	}
+}
+
+func TestResponsesToChat_ToolChoiceFlatToChatShape(t *testing.T) {
+	req := newResponsesReq(t, map[string]any{
+		"model":       "x",
+		"input":       "hi",
+		"tool_choice": map[string]any{"type": "function", "name": "search"},
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	require.NotNil(t, chat.ToolChoice)
+	m, ok := chat.ToolChoice.(map[string]any)
+	require.True(t, ok)
+	if fn, ok := m["function"].(map[string]any); !ok || fn["name"] != "search" {
+		t.Errorf("tool_choice did not reshape: %+v", m)
+	}
+}
+
+func TestResponsesToChat_StoreAndOtherFieldsStripped(t *testing.T) {
+	// Spec §10 — Responses-only fields removed from result.
+	req := newResponsesReq(t, map[string]any{
+		"model": "x",
+		"input": "hi",
+		"store": false,
+	})
+	chat, err := ResponsesRequestToChatCompletionsRequest(req)
+	require.NoError(t, err)
+	if chat.Store != nil {
+		t.Errorf("store should be stripped: %v", chat.Store)
+	}
+}
diff --git a/service/openaicompat/tool_call_ids.go b/service/openaicompat/tool_call_ids.go
new file mode 100644
index 00000000000..956b91afd9f
--- /dev/null
+++ b/service/openaicompat/tool_call_ids.go
@@ -0,0 +1,108 @@
+package openaicompat
+
+import (
+	"regexp"
+	"strings"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/dto"
+)
+
+// anthropicToolIDPattern matches Anthropic's allowed tool_use.id regex.
+var anthropicToolIDPattern = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
+
+const maxAnthropicToolIDLen = 64
+
+// sanitizeOneToolID applies the three-tier policy:
+//  1. pass-through if valid AND <= 64 chars,
+//  2. strip non-[a-zA-Z0-9_-] characters and keep if non-empty AND <= 64,
+//  3. otherwise generate a fresh UUID (dashes removed).
+func sanitizeOneToolID(id string) string {
+	if id != "" && len(id) <= maxAnthropicToolIDLen && anthropicToolIDPattern.MatchString(id) {
+		return id
+	}
+	// Strip-and-keep.
+	var b strings.Builder
+	b.Grow(len(id))
+	for _, r := range id {
+		switch {
+		case r >= 'a' && r <= 'z',
+			r >= 'A' && r <= 'Z',
+			r >= '0' && r <= '9',
+			r == '_', r == '-':
+			b.WriteRune(r)
+		}
+	}
+	residue := b.String()
+	if residue != "" && len(residue) <= maxAnthropicToolIDLen {
+		return residue
+	}
+	// UUID fallback (dashes stripped per common.GetUUID()).
+	return common.GetUUID()
+}
+
+// SanitizeToolCallIDs walks the request messages and rewrites every tool-call
+// ID (assistant.tool_calls[].id) and any matching tool_call_id on the next
+// tool messages so the upstream Anthropic API receives a consistent mapping
+// that satisfies its regex and length constraints.
+//
+// It also defaults a missing tool_call.type to "function" and stringifies
+// any object-valued tool_call.function.arguments.
+func SanitizeToolCallIDs(req *dto.GeneralOpenAIRequest) {
+	if req == nil || len(req.Messages) == 0 {
+		return
+	}
+
+	// idMap tracks original-ID -> sanitized-ID rewrites so we can also patch
+	// downstream tool_result references.
+	idMap := map[string]string{}
+
+	for mi := range req.Messages {
+		msg := &req.Messages[mi]
+		if msg.Role == "assistant" && msg.ToolCalls != nil {
+			calls := msg.ParseToolCalls()
+			if len(calls) == 0 {
+				continue
+			}
+			for ci := range calls {
+				tc := &calls[ci]
+				// Default missing type to "function".
+				if strings.TrimSpace(tc.Type) == "" {
+					tc.Type = "function"
+				}
+				// Sanitize ID. Reuse an existing remap before generating a new
+				// one so repeated invalid originals (e.g. multiple `"::::"`) all
+				// get the same sanitized id — matching the tool_result remap
+				// that only retains the last write.
+				origID := tc.ID
+				if remapped, ok := idMap[origID]; ok {
+					tc.ID = remapped
+					continue
+				}
+				newID := sanitizeOneToolID(origID)
+				if newID != origID {
+					idMap[origID] = newID
+					tc.ID = newID
+				}
+			}
+			msg.SetToolCalls(calls)
+		}
+	}
+
+	// Remap tool messages' tool_call_id references.
+	if len(idMap) == 0 {
+		return
+	}
+	for mi := range req.Messages {
+		msg := &req.Messages[mi]
+		if msg.Role != "tool" && msg.Role != "function" {
+			continue
+		}
+		if msg.ToolCallId == "" {
+			continue
+		}
+		if remap, ok := idMap[msg.ToolCallId]; ok {
+			msg.ToolCallId = remap
+		}
+	}
+}
diff --git a/service/openaicompat/tool_call_ids_test.go b/service/openaicompat/tool_call_ids_test.go
new file mode 100644
index 00000000000..a5372fa7cb9
--- /dev/null
+++ b/service/openaicompat/tool_call_ids_test.go
@@ -0,0 +1,133 @@
+package openaicompat
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/QuantumNous/new-api/dto"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSanitizeToolCallIDs_PassThroughValid(t *testing.T) {
+	req := &dto.GeneralOpenAIRequest{
+		Messages: []dto.Message{
+			{
+				Role: "assistant",
+			},
+		},
+	}
+	req.Messages[0].SetToolCalls([]dto.ToolCallRequest{
+		{ID: "call_abc-123", Type: "function", Function: dto.FunctionRequest{Name: "x", Arguments: "{}"}},
+	})
+	SanitizeToolCallIDs(req)
+	calls := req.Messages[0].ParseToolCalls()
+	require.Len(t, calls, 1)
+	if calls[0].ID != "call_abc-123" {
+		t.Errorf("id changed: %q", calls[0].ID)
+	}
+}
+
+func TestSanitizeToolCallIDs_StripAndKeep(t *testing.T) {
+	req := &dto.GeneralOpenAIRequest{
+		Messages: []dto.Message{{Role: "assistant"}},
+	}
+	req.Messages[0].SetToolCalls([]dto.ToolCallRequest{
+		{ID: "call:abc/123", Type: "function", Function: dto.FunctionRequest{Name: "x", Arguments: "{}"}},
+	})
+	SanitizeToolCallIDs(req)
+	calls := req.Messages[0].ParseToolCalls()
+	require.Len(t, calls, 1)
+	if calls[0].ID != "callabc123" {
+		t.Errorf("got %q want callabc123", calls[0].ID)
+	}
+}
+
+func TestSanitizeToolCallIDs_UUIDFallbackEmptyResidue(t *testing.T) {
+	req := &dto.GeneralOpenAIRequest{
+		Messages: []dto.Message{{Role: "assistant"}},
+	}
+	req.Messages[0].SetToolCalls([]dto.ToolCallRequest{
+		{ID: "::::", Type: "function", Function: dto.FunctionRequest{Name: "x", Arguments: "{}"}},
+	})
+	SanitizeToolCallIDs(req)
+	calls := req.Messages[0].ParseToolCalls()
+	require.Len(t, calls, 1)
+	// 32-char dash-stripped UUID; must be alphanumeric.
+	id := calls[0].ID
+	if len(id) < 16 {
+		t.Errorf("uuid too short: %q", id)
+	}
+	for _, r := range id {
+		if !((r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9')) {
+			t.Errorf("uuid has bad char: %q", id)
+		}
+	}
+}
+
+func TestSanitizeToolCallIDs_UUIDFallbackOver64(t *testing.T) {
+	long := strings.Repeat("a", 70)
+	req := &dto.GeneralOpenAIRequest{
+		Messages: []dto.Message{{Role: "assistant"}},
+	}
+	req.Messages[0].SetToolCalls([]dto.ToolCallRequest{
+		{ID: long, Type: "function", Function: dto.FunctionRequest{Name: "x", Arguments: "{}"}},
+	})
+	SanitizeToolCallIDs(req)
+	calls := req.Messages[0].ParseToolCalls()
+	require.Len(t, calls, 1)
+	if calls[0].ID == long {
+		t.Errorf("70-char id should have been replaced")
+	}
+	if len(calls[0].ID) > 64 {
+		t.Errorf("replacement too long: %d", len(calls[0].ID))
+	}
+}
+
+func TestSanitizeToolCallIDs_ConsistentRemap(t *testing.T) {
+	req := &dto.GeneralOpenAIRequest{
+		Messages: []dto.Message{
+			{Role: "assistant"},
+			{Role: "tool", Content: "ok", ToolCallId: "::::"},
+		},
+	}
+	req.Messages[0].SetToolCalls([]dto.ToolCallRequest{
+		{ID: "::::", Type: "function", Function: dto.FunctionRequest{Name: "x", Arguments: "{}"}},
+	})
+	SanitizeToolCallIDs(req)
+	calls := req.Messages[0].ParseToolCalls()
+	require.Len(t, calls, 1)
+	newID := calls[0].ID
+	if req.Messages[1].ToolCallId != newID {
+		t.Errorf("tool message id not remapped: got=%q want=%q", req.Messages[1].ToolCallId, newID)
+	}
+}
+
+func TestSanitizeToolCallIDs_TypeDefaulted(t *testing.T) {
+	req := &dto.GeneralOpenAIRequest{
+		Messages: []dto.Message{{Role: "assistant"}},
+	}
+	req.Messages[0].SetToolCalls([]dto.ToolCallRequest{
+		{ID: "ok", Function: dto.FunctionRequest{Name: "x", Arguments: "{}"}},
+	})
+	SanitizeToolCallIDs(req)
+	calls := req.Messages[0].ParseToolCalls()
+	require.Len(t, calls, 1)
+	if calls[0].Type != "function" {
+		t.Errorf("type=%q want function", calls[0].Type)
+	}
+}
+
+func TestSanitizeToolCallIDs_NoToolCallsNoOp(t *testing.T) {
+	req := &dto.GeneralOpenAIRequest{
+		Messages: []dto.Message{{Role: "user", Content: "hello"}},
+	}
+	// Should not panic and should not mutate the message.
+	SanitizeToolCallIDs(req)
+	if req.Messages[0].StringContent() != "hello" {
+		t.Errorf("content changed: %q", req.Messages[0].StringContent())
+	}
+}
+
+func TestSanitizeToolCallIDs_NilRequest(t *testing.T) {
+	SanitizeToolCallIDs(nil) // must not panic
+}