From 26072d7f48cec5310e43d5033a4dcd9c808f231b Mon Sep 17 00:00:00 2001
From: hanyunsushi <2424791234@qq.com>
Date: Sun, 31 May 2026 04:16:21 +0800
Subject: [PATCH 1/2] fix(apicompat): repair codex
 Responses<->Anthropic/ChatCompletions conversion

Several bugs in the OpenAI Responses API conversion layer caused requests
from codex (and other Responses clients) to fail or render blank when routed
through Anthropic upstreams or chat/completions-only upstreams.

Request direction (ResponsesInputItem):
- Arguments/Output were typed string but clients send object/array, causing
  502 (Anthropic path) or silent data loss (chat/completions path). Both are
  now json.RawMessage with normalization helpers.
- Tools with no parameters (e.g. namespace/web_search) produced a null
  input_schema -> Anthropic 422. Now backfilled with an empty object schema.
- web_search was emitted as the Anthropic server tool web_search_20250305,
  which some Anthropic-compatible upstreams do not implement (422). It is now
  a plain function tool.
- Top-level instructions and developer-role items were dropped instead of
  mapped to the Anthropic system field; empty/whitespace system is omitted.
- system is now emitted in array form, which some upstreams require when
  tools are present.
- reasoning_effort 'xhigh' is normalized to 'high' for chat/completions
  upstreams that only accept low/medium/high.

Response direction (streaming -> Responses events):
- Emit response.content_part.added/done around output_text so strict clients
  (codex) have a content part to attach text to.
- output_item.done for message items now carries the full content, and for
  function_call items carries call_id/name/arguments, since codex collects
  final output and tool calls from OutputItemDone items.
- Skip empty-string content/reasoning deltas that produced ghost
  output_text.delta events and a blank render.

Adds polymorphic/tools/system/streaming test coverage.
---
 .../pkg/apicompat/anthropic_responses_test.go |   8 +-
 .../pkg/apicompat/anthropic_to_responses.go   |   4 +-
 .../anthropic_to_responses_response.go        |  88 +++++++-
 .../chatcompletions_empty_delta_test.go       | 162 ++++++++++++++
 .../chatcompletions_responses_bridge.go       | 101 +++++++--
 .../chatcompletions_responses_test.go         |   4 +-
 .../apicompat/chatcompletions_to_responses.go |   6 +-
 .../responses_input_item_polymorphic_test.go  | 191 +++++++++++++++++
 .../responses_to_anthropic_request.go         | 174 +++++++++++----
 ...esponses_to_anthropic_tools_system_test.go | 201 ++++++++++++++++++
 backend/internal/pkg/apicompat/types.go       |  31 ++-
 11 files changed, 896 insertions(+), 74 deletions(-)
 create mode 100644 backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go
 create mode 100644 backend/internal/pkg/apicompat/responses_input_item_polymorphic_test.go
 create mode 100644 backend/internal/pkg/apicompat/responses_to_anthropic_tools_system_test.go

diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go
index 8997835c2aa..d8bcf5229b7 100644
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -143,7 +143,7 @@ func TestAnthropicToResponses_ToolUse(t *testing.T) {
 	assert.Empty(t, items[2].ID)
 	assert.Equal(t, "function_call_output", items[3].Type)
 	assert.Equal(t, "call_1", items[3].CallID)
-	assert.Equal(t, "Sunny, 72°F", items[3].Output)
+	assert.Equal(t, `"Sunny, 72°F"`, string(items[3].Output))
 }
 
 func TestAnthropicToResponses_ThinkingIgnored(t *testing.T) {
@@ -1340,7 +1340,7 @@ func TestAnthropicToResponses_ToolResultWithImage(t *testing.T) {
 	// function_call_output should have text-only output (no image).
 	assert.Equal(t, "function_call_output", items[2].Type)
 	assert.Equal(t, "toolu_1", items[2].CallID)
-	assert.Equal(t, "(empty)", items[2].Output)
+	assert.Equal(t, `"(empty)"`, string(items[2].Output))
 
 	// Image should be in a separate user message.
 	assert.Equal(t, "user", items[3].Role)
@@ -1377,7 +1377,7 @@ func TestAnthropicToResponses_ToolResultMixed(t *testing.T) {
 
 	// function_call_output should have text-only output.
 	assert.Equal(t, "function_call_output", items[2].Type)
-	assert.Equal(t, "File metadata: 800x600 PNG", items[2].Output)
+	assert.Equal(t, `"File metadata: 800x600 PNG"`, string(items[2].Output))
 
 	// Image should be in a separate user message.
 	assert.Equal(t, "user", items[3].Role)
@@ -1412,7 +1412,7 @@ func TestAnthropicToResponses_TextOnlyToolResultBackwardCompat(t *testing.T) {
 	require.Len(t, items, 3)
 
 	// Text-only tool_result should produce a plain string.
-	assert.Equal(t, "Sunny, 72°F", items[2].Output)
+	assert.Equal(t, `"Sunny, 72°F"`, string(items[2].Output))
 }
 
 func TestAnthropicToResponses_ImageEmptyMediaType(t *testing.T) {
diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go
index e2011bee0bf..bc29da07dd5 100644
--- a/backend/internal/pkg/apicompat/anthropic_to_responses.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -221,7 +221,7 @@ func anthropicUserToResponses(raw json.RawMessage) ([]ResponsesInputItem, error)
 		out = append(out, ResponsesInputItem{
 			Type:   "function_call_output",
 			CallID: toResponsesCallID(b.ToolUseID),
-			Output: outputText,
+			Output: jsonRawString(outputText),
 		})
 		toolResultImageParts = append(toolResultImageParts, imageParts...)
 	}
@@ -302,7 +302,7 @@ func anthropicAssistantToResponses(raw json.RawMessage) ([]ResponsesInputItem, e
 			Type:      "function_call",
 			CallID:    fcID,
 			Name:      b.Name,
-			Arguments: args,
+			Arguments: jsonRawString(args),
 		})
 	}
 
diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses_response.go b/backend/internal/pkg/apicompat/anthropic_to_responses_response.go
index de8ab78df89..d706339340d 100644
--- a/backend/internal/pkg/apicompat/anthropic_to_responses_response.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses_response.go
@@ -5,6 +5,7 @@ import (
 	"encoding/hex"
 	"encoding/json"
 	"fmt"
+	"strings"
 	"time"
 )
 
@@ -151,10 +152,20 @@ type AnthropicEventToResponsesState struct {
 
 	// For message output: accumulate text parts
 	ContentIndex int
+	// CurrentText accumulates the message's output_text so the terminal
+	// output_item.done can carry the full content. codex collects final text
+	// from OutputItemDone items, not from output_text.delta events, so the
+	// message item MUST include content:[{type:output_text,text:...}].
+	CurrentText string
 
 	// For function_call: track per-output info
 	CurrentCallID string
 	CurrentName   string
+	// CurrentArguments accumulates the function_call's argument JSON so the
+	// terminal output_item.done (and arguments.done) can carry the full args.
+	// codex reads the tool call from the OutputItemDone item; without
+	// call_id/name/arguments it cannot execute the tool and stalls.
+	CurrentArguments string
 
 	// Usage from message_start / message_delta. InputTokens here follows
 	// Anthropic semantics (excludes cached tokens); they are added back when
@@ -278,6 +289,7 @@ func anthToResHandleContentBlockStart(evt *AnthropicStreamEvent, state *Anthropi
 			state.CurrentItemID = generateItemID()
 			state.CurrentItemType = "message"
 			state.ContentIndex = 0
+			state.CurrentText = ""
 
 			events = append(events, makeResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{
 				OutputIndex: state.OutputIndex,
@@ -288,6 +300,21 @@ func anthToResHandleContentBlockStart(evt *AnthropicStreamEvent, state *Anthropi
 					Status: "in_progress",
 				},
 			}))
+
+			// Emit response.content_part.added so clients (e.g. codex) know a
+			// text content part is starting. Without it the subsequent
+			// output_text.delta events have no part to attach to and the client
+			// renders nothing. Reverse of anthToResHandleContentBlockStop's
+			// content_part.done.
+			events = append(events, makeResponsesEvent(state, "response.content_part.added", &ResponsesStreamEvent{
+				OutputIndex:  state.OutputIndex,
+				ContentIndex: state.ContentIndex,
+				ItemID:       state.CurrentItemID,
+				Part: &ResponsesContentPart{
+					Type: "output_text",
+					Text: "",
+				},
+			}))
 		}
 
 	case "tool_use":
@@ -298,6 +325,7 @@ func anthToResHandleContentBlockStart(evt *AnthropicStreamEvent, state *Anthropi
 		state.CurrentItemType = "function_call"
 		state.CurrentCallID = toResponsesCallID(evt.ContentBlock.ID)
 		state.CurrentName = evt.ContentBlock.Name
+		state.CurrentArguments = ""
 
 		events = append(events, makeResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{
 			OutputIndex: state.OutputIndex,
@@ -324,6 +352,7 @@ func anthToResHandleContentBlockDelta(evt *AnthropicStreamEvent, state *Anthropi
 		if evt.Delta.Text == "" {
 			return nil
 		}
+		state.CurrentText += evt.Delta.Text
 		return []ResponsesStreamEvent{makeResponsesEvent(state, "response.output_text.delta", &ResponsesStreamEvent{
 			OutputIndex:  state.OutputIndex,
 			ContentIndex: state.ContentIndex,
@@ -346,6 +375,7 @@ func anthToResHandleContentBlockDelta(evt *AnthropicStreamEvent, state *Anthropi
 		if evt.Delta.PartialJSON == "" {
 			return nil
 		}
+		state.CurrentArguments += evt.Delta.PartialJSON
 		return []ResponsesStreamEvent{makeResponsesEvent(state, "response.function_call_arguments.delta", &ResponsesStreamEvent{
 			OutputIndex: state.OutputIndex,
 			Delta:       evt.Delta.PartialJSON,
@@ -384,18 +414,32 @@ func anthToResHandleContentBlockStop(evt *AnthropicStreamEvent, state *Anthropic
 				ItemID:      state.CurrentItemID,
 				CallID:      state.CurrentCallID,
 				Name:        state.CurrentName,
+				Arguments:   nonEmptyArguments(state.CurrentArguments),
 			}),
 		}
 		events = append(events, closeCurrentResponsesItem(state)...)
 		return events
 
 	case "message":
-		// Emit output_text.done (text block is done, but message item stays open for potential more blocks)
+		// Text block done: emit output_text.done then content_part.done.
+		// The message item stays open for potential more blocks; it is closed
+		// later by closeCurrentResponsesItem. content_part.done mirrors the
+		// content_part.added emitted in anthToResHandleContentBlockStart.
 		return []ResponsesStreamEvent{
 			makeResponsesEvent(state, "response.output_text.done", &ResponsesStreamEvent{
 				OutputIndex:  state.OutputIndex,
 				ContentIndex: state.ContentIndex,
 				ItemID:       state.CurrentItemID,
+				Text:         state.CurrentText,
+			}),
+			makeResponsesEvent(state, "response.content_part.done", &ResponsesStreamEvent{
+				OutputIndex:  state.OutputIndex,
+				ContentIndex: state.ContentIndex,
+				ItemID:       state.CurrentItemID,
+				Part: &ResponsesContentPart{
+					Type: "output_text",
+					Text: state.CurrentText,
+				},
 			}),
 		}
 	}
@@ -450,25 +494,57 @@ func closeCurrentResponsesItem(state *AnthropicEventToResponsesState) []Response
 
 	itemType := state.CurrentItemType
 	itemID := state.CurrentItemID
+	currentText := state.CurrentText
+	currentCallID := state.CurrentCallID
+	currentName := state.CurrentName
+	currentArgs := state.CurrentArguments
 
 	// Reset
 	state.CurrentItemType = ""
 	state.CurrentItemID = ""
 	state.CurrentCallID = ""
 	state.CurrentName = ""
+	state.CurrentText = ""
+	state.CurrentArguments = ""
 	state.OutputIndex++
 	state.ContentIndex = 0
 
+	// The terminal item carries its full content. codex collects final output
+	// from OutputItemDone items (not from the delta events), so an item missing
+	// its content/arguments renders blank or cannot be executed as a tool call.
+	doneItem := &ResponsesOutput{
+		Type:   itemType,
+		ID:     itemID,
+		Status: "completed",
+	}
+	switch itemType {
+	case "message":
+		doneItem.Role = "assistant"
+		doneItem.Content = []ResponsesContentPart{{
+			Type: "output_text",
+			Text: currentText,
+		}}
+	case "function_call":
+		doneItem.CallID = currentCallID
+		doneItem.Name = currentName
+		doneItem.Arguments = nonEmptyArguments(currentArgs)
+	}
+
 	return []ResponsesStreamEvent{makeResponsesEvent(state, "response.output_item.done", &ResponsesStreamEvent{
 		OutputIndex: state.OutputIndex - 1, // Use the index before increment
-		Item: &ResponsesOutput{
-			Type:   itemType,
-			ID:     itemID,
-			Status: "completed",
-		},
+		Item:        doneItem,
 	})}
 }
 
+// nonEmptyArguments ensures function_call arguments are valid JSON. Anthropic
+// tool_use with no input produces an empty string; codex expects at least "{}".
+func nonEmptyArguments(args string) string {
+	if strings.TrimSpace(args) == "" {
+		return "{}"
+	}
+	return args
+}
+
 func makeResponsesCreatedEvent(state *AnthropicEventToResponsesState) ResponsesStreamEvent {
 	seq := state.SequenceNumber
 	state.SequenceNumber++
diff --git a/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go b/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go
new file mode 100644
index 00000000000..7bd5d889d23
--- /dev/null
+++ b/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go
@@ -0,0 +1,162 @@
+package apicompat
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// strptr is a local helper for *string fields.
+func strptr(s string) *string { return &s }
+
+// Reproduces the mimo "thinking done, nothing shown" bug: the upstream emits a
+// leading {"content":""} chunk (non-nil, empty). The bridge must NOT emit a
+// response.output_text.delta for it (the delta would serialize empty and a
+// premature message item would be created), and must still stream the real
+// content that follows.
+func TestChatChunkToResponses_SkipsEmptyContentDelta(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("mimo-v2.5")
+
+	// chunk 1: empty content (some upstreams send a leading empty chunk) — no text delta
+	c1 := &ChatCompletionsChunk{
+		ID:      "c1",
+		Choices: []ChatChunkChoice{{Delta: ChatDelta{Role: "assistant", Content: strptr("")}}},
+	}
+	ev1 := ChatCompletionsChunkToResponsesEvents(c1, state)
+	for _, e := range ev1 {
+		assert.NotEqual(t, "response.output_text.delta", e.Type,
+			"empty content must not emit an output_text delta")
+	}
+
+	// chunk 2: real content — must emit a delta carrying the text
+	c2 := &ChatCompletionsChunk{
+		ID:      "c1",
+		Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hello")}}},
+	}
+	ev2 := ChatCompletionsChunkToResponsesEvents(c2, state)
+	var sawDelta bool
+	for _, e := range ev2 {
+		if e.Type == "response.output_text.delta" {
+			sawDelta = true
+			assert.Equal(t, "Hello", e.Delta)
+		}
+	}
+	assert.True(t, sawDelta, "real content must emit an output_text delta")
+}
+
+func TestChatChunkToResponses_SkipsEmptyReasoningDelta(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("mimo-v2.5")
+	c := &ChatCompletionsChunk{
+		ID:      "c1",
+		Choices: []ChatChunkChoice{{Delta: ChatDelta{ReasoningContent: strptr("")}}},
+	}
+	ev := ChatCompletionsChunkToResponsesEvents(c, state)
+	for _, e := range ev {
+		assert.NotEqual(t, "response.reasoning_summary_text.delta", e.Type,
+			"empty reasoning_content must not emit a reasoning delta")
+	}
+}
+
+// Full mimo-shaped stream: empty content → reasoning → real content. The final
+// visible text must be exactly the real content, and at least one non-empty
+// output_text delta must reach the client.
+func TestChatChunkToResponses_MimoShapedStream(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("mimo-v2.5")
+	chunks := []*ChatCompletionsChunk{
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Role: "assistant", Content: strptr("")}}}},
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{ReasoningContent: strptr("thinking...")}}}},
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hi")}}}},
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("!")}}}},
+	}
+	var textDeltas []string
+	for _, c := range chunks {
+		for _, e := range ChatCompletionsChunkToResponsesEvents(c, state) {
+			if e.Type == "response.output_text.delta" {
+				textDeltas = append(textDeltas, e.Delta)
+			}
+		}
+	}
+	// every emitted text delta is non-empty
+	for _, d := range textDeltas {
+		assert.NotEqual(t, "", d)
+	}
+	assert.Equal(t, "Hi!", strings.Join(textDeltas, ""))
+}
+
+// codex requires response.content_part.added before output_text deltas and
+// content_part.done at the end; without them it renders nothing.
+func TestChatChunkToResponses_EmitsContentPartEvents(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("mimo-v2.5")
+	var types []string
+	for _, c := range []*ChatCompletionsChunk{
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hi")}}}},
+	} {
+		for _, e := range ChatCompletionsChunkToResponsesEvents(c, state) {
+			types = append(types, e.Type)
+		}
+	}
+	for _, e := range FinalizeChatCompletionsResponsesStream(state) {
+		types = append(types, e.Type)
+	}
+	assert.Contains(t, types, "response.content_part.added")
+	assert.Contains(t, types, "response.content_part.done")
+	// content_part.added must come before the first output_text.delta
+	iAdded, iDelta := -1, -1
+	for i, ty := range types {
+		if ty == "response.content_part.added" && iAdded < 0 {
+			iAdded = i
+		}
+		if ty == "response.output_text.delta" && iDelta < 0 {
+			iDelta = i
+		}
+	}
+	assert.GreaterOrEqual(t, iDelta, 0)
+	assert.GreaterOrEqual(t, iAdded, 0)
+	assert.Less(t, iAdded, iDelta, "content_part.added must precede output_text.delta")
+}
+
+// codex collects final text from OutputItemDone items, so the message item in
+// response.output_item.done must carry content with the accumulated text.
+func TestChatChunkToResponses_OutputItemDoneCarriesContent(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("mimo-v2.5")
+	for _, c := range []*ChatCompletionsChunk{
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hello world")}}}},
+	} {
+		ChatCompletionsChunkToResponsesEvents(c, state)
+	}
+	var found bool
+	for _, e := range FinalizeChatCompletionsResponsesStream(state) {
+		if e.Type == "response.output_item.done" && e.Item != nil && e.Item.Type == "message" {
+			found = true
+			require.Len(t, e.Item.Content, 1)
+			assert.Equal(t, "output_text", e.Item.Content[0].Type)
+			assert.Equal(t, "Hello world", e.Item.Content[0].Text)
+		}
+	}
+	assert.True(t, found, "must emit message output_item.done with content")
+}
+
+// Some chat/completions upstreams reject reasoning_effort "xhigh"
+// (only low/medium/high allowed). It must be normalized to high.
+func TestResponsesToChatCompletions_XhighReasoningNormalized(t *testing.T) {
+	body := []byte(`{"model":"gpt-5.5","reasoning":{"effort":"xhigh"},"input":[{"role":"user","content":[{"type":"input_text","text":"hi"}]}]}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+	cc, err := ResponsesToChatCompletionsRequest(&req)
+	require.NoError(t, err)
+	assert.Equal(t, "high", cc.ReasoningEffort, "xhigh must be normalized to high for chat/completions")
+}
+
+func TestNormalizeChatReasoningEffort(t *testing.T) {
+	assert.Equal(t, "high", normalizeChatReasoningEffort("xhigh"))
+	assert.Equal(t, "high", normalizeChatReasoningEffort("high"))
+	assert.Equal(t, "high", normalizeChatReasoningEffort("max"))
+	assert.Equal(t, "medium", normalizeChatReasoningEffort("medium"))
+	assert.Equal(t, "low", normalizeChatReasoningEffort("low"))
+	assert.Equal(t, "low", normalizeChatReasoningEffort("minimal"))
+	assert.Equal(t, "", normalizeChatReasoningEffort(""))
+	assert.Equal(t, "", normalizeChatReasoningEffort("bogus"))
+}
diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go
index 09b680c7c73..0647a260b3c 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go
@@ -30,7 +30,7 @@ func ResponsesToChatCompletionsRequest(req *ResponsesRequest) (*ChatCompletionsR
 		ServiceTier:         req.ServiceTier,
 	}
 	if req.Reasoning != nil {
-		out.ReasoningEffort = req.Reasoning.Effort
+		out.ReasoningEffort = normalizeChatReasoningEffort(req.Reasoning.Effort)
 	}
 	if len(req.Tools) > 0 {
 		out.Tools = responsesToolsToChatTools(req.Tools)
@@ -93,7 +93,7 @@ func responsesInputToChatMessages(instructions string, inputRaw json.RawMessage)
 		itemType := rawString(item["type"])
 		switch itemType {
 		case "function_call":
-			arguments := rawString(item["arguments"])
+			arguments := responsesArgumentsToChatString(item["arguments"])
 			if strings.TrimSpace(arguments) == "" {
 				arguments = "{}"
 			}
@@ -110,7 +110,7 @@ func responsesInputToChatMessages(instructions string, inputRaw json.RawMessage)
 			})
 			continue
 		case "function_call_output":
-			content, _ := json.Marshal(rawString(item["output"]))
+			content, _ := json.Marshal(extractResponsesOutputText(item["output"]))
 			messages = append(messages, ChatMessage{
 				Role:       "tool",
 				ToolCallID: rawString(item["call_id"]),
@@ -490,7 +490,12 @@ func ChatCompletionsChunkToResponsesEvents(
 	events = append(events, ensureChatToResponsesCreated(state)...)
 
 	for _, choice := range chunk.Choices {
-		if choice.Delta.Content != nil {
+		// Skip empty-string content deltas. Some upstreams emit a
+		// leading {"content":""} chunk; it is non-nil but carries no text, and
+		// emitting it produces a response.output_text.delta with an empty delta
+		// (omitempty drops the field entirely) plus a premature message item —
+		// codex then shows "thinking" with no visible output.
+		if choice.Delta.Content != nil && *choice.Delta.Content != "" {
 			events = append(events, ensureChatToResponsesMessageItem(state)...)
 			_, _ = state.Text.WriteString(*choice.Delta.Content)
 			events = append(events, chatToResponsesEvent(state, "response.output_text.delta", &ResponsesStreamEvent{
@@ -500,7 +505,7 @@ func ChatCompletionsChunkToResponsesEvents(
 				ItemID:       state.MessageItemID,
 			}))
 		}
-		if choice.Delta.ReasoningContent != nil {
+		if choice.Delta.ReasoningContent != nil && *choice.Delta.ReasoningContent != "" {
 			_, _ = state.Reasoning.WriteString(*choice.Delta.ReasoningContent)
 			events = append(events, chatToResponsesEvent(state, "response.reasoning_summary_text.delta", &ResponsesStreamEvent{
 				OutputIndex:  0,
@@ -572,6 +577,16 @@ func FinalizeChatCompletionsResponsesStream(state *ChatCompletionsToResponsesStr
 			Text:         state.Text.String(),
 			ItemID:       state.MessageItemID,
 		}))
+		// content_part.done mirrors content_part.added from ensureChatToResponsesMessageItem.
+		events = append(events, chatToResponsesEvent(state, "response.content_part.done", &ResponsesStreamEvent{
+			OutputIndex:  0,
+			ContentIndex: 0,
+			ItemID:       state.MessageItemID,
+			Part: &ResponsesContentPart{
+				Type: "output_text",
+				Text: state.Text.String(),
+			},
+		}))
 		events = append(events, chatToResponsesEvent(state, "response.output_item.done", &ResponsesStreamEvent{
 			OutputIndex: 0,
 			Item: &ResponsesOutput{
@@ -579,6 +594,12 @@ func FinalizeChatCompletionsResponsesStream(state *ChatCompletionsToResponsesStr
 				ID:     state.MessageItemID,
 				Role:   "assistant",
 				Status: "completed",
+				// codex collects final text from OutputItemDone items, so the
+				// message item must carry its full content, not just status.
+				Content: []ResponsesContentPart{{
+					Type: "output_text",
+					Text: state.Text.String(),
+				}},
 			},
 		}))
 	}
@@ -626,15 +647,28 @@ func ensureChatToResponsesMessageItem(state *ChatCompletionsToResponsesStreamSta
 		return nil
 	}
 	state.MessageItemID = generateItemID()
-	return []ResponsesStreamEvent{chatToResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{
-		OutputIndex: 0,
-		Item: &ResponsesOutput{
-			Type:   "message",
-			ID:     state.MessageItemID,
-			Role:   "assistant",
-			Status: "in_progress",
-		},
-	})}
+	return []ResponsesStreamEvent{
+		chatToResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{
+			OutputIndex: 0,
+			Item: &ResponsesOutput{
+				Type:   "message",
+				ID:     state.MessageItemID,
+				Role:   "assistant",
+				Status: "in_progress",
+			},
+		}),
+		// content_part.added must precede output_text.delta or strict clients
+		// (codex) have no part to attach text to and render nothing.
+		chatToResponsesEvent(state, "response.content_part.added", &ResponsesStreamEvent{
+			OutputIndex:  0,
+			ContentIndex: 0,
+			ItemID:       state.MessageItemID,
+			Part: &ResponsesContentPart{
+				Type: "output_text",
+				Text: "",
+			},
+		}),
+	}
 }
 
 func (state *ChatCompletionsToResponsesStreamState) chatOutput() []ResponsesOutput {
@@ -695,6 +729,45 @@ func chatToResponsesEvent(
 	return evt
 }
 
+// normalizeChatReasoningEffort maps a Responses reasoning effort to a value the
+// Chat Completions protocol accepts. The Responses API allows "xhigh" (codex's
+// highest tier for gpt-5.5 etc.), but chat/completions upstreams (and the
+// OpenAI chat/completions schema) only accept low/medium/high and 400 on
+// "xhigh". Map xhigh→high; pass through known values; drop unknown/empty.
+func normalizeChatReasoningEffort(effort string) string {
+	switch strings.ToLower(strings.TrimSpace(effort)) {
+	case "xhigh", "extrahigh", "max", "high":
+		return "high"
+	case "medium":
+		return "medium"
+	case "low", "minimal", "none":
+		return "low"
+	default:
+		return "" // omit unknown/empty so the upstream uses its default
+	}
+}
+
+// responsesArgumentsToChatString converts a Responses function_call.arguments
+// field into the stringified-JSON form required by Chat Completions
+// (ChatFunctionCall.Arguments is a string).
+//
+//   - stringified JSON: "{\"x\":1}" → use the inner string as-is
+//   - raw JSON object:   {"x":1}     → serialize to its string form
+//   - empty/absent                   → ""
+func responsesArgumentsToChatString(raw json.RawMessage) string {
+	trimmed := json.RawMessage(strings.TrimSpace(string(raw)))
+	if len(trimmed) == 0 || string(trimmed) == "null" {
+		return ""
+	}
+	// Already a JSON string — return the inner value verbatim.
+	var s string
+	if err := json.Unmarshal(trimmed, &s); err == nil {
+		return s
+	}
+	// Object/array/other JSON — serialize to its compact string form.
+	return string(trimmed)
+}
+
 func rawString(raw json.RawMessage) string {
 	raw = bytesTrimSpace(raw)
 	if len(raw) == 0 || string(raw) == "null" {
diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
index b03b012fc7a..c0c7384b1b2 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
@@ -105,7 +105,7 @@ func TestChatCompletionsToResponses_ToolCalls(t *testing.T) {
 	// Check function_call_output item
 	assert.Equal(t, "function_call_output", items[2].Type)
 	assert.Equal(t, "call_1", items[2].CallID)
-	assert.Equal(t, "pong", items[2].Output)
+	assert.Equal(t, `"pong"`, string(items[2].Output))
 
 	// Check tools
 	require.Len(t, resp.Tools, 1)
@@ -614,7 +614,7 @@ func TestChatCompletionsToResponses_ToolArrayContent(t *testing.T) {
 	require.Len(t, items, 3)
 	assert.Equal(t, "function_call_output", items[2].Type)
 	assert.Equal(t, "call_1", items[2].CallID)
-	assert.Equal(t, "image width: 100; image height: 200", items[2].Output)
+	assert.Equal(t, `"image width: 100; image height: 200"`, string(items[2].Output))
 }
 
 func TestResponsesToChatCompletions_Incomplete(t *testing.T) {
diff --git a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
index 463bdd0d15d..a7459bdeb45 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
@@ -194,7 +194,7 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
 			Type:      "function_call",
 			CallID:    tc.ID,
 			Name:      tc.Function.Name,
-			Arguments: args,
+			Arguments: jsonRawString(args),
 		})
 	}
 
@@ -284,7 +284,7 @@ func chatToolToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
 	return []ResponsesInputItem{{
 		Type:   "function_call_output",
 		CallID: m.ToolCallID,
-		Output: output,
+		Output: jsonRawString(output),
 	}}, nil
 }
 
@@ -302,7 +302,7 @@ func chatFunctionToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
 	return []ResponsesInputItem{{
 		Type:   "function_call_output",
 		CallID: m.Name,
-		Output: output,
+		Output: jsonRawString(output),
 	}}, nil
 }
 
diff --git a/backend/internal/pkg/apicompat/responses_input_item_polymorphic_test.go b/backend/internal/pkg/apicompat/responses_input_item_polymorphic_test.go
new file mode 100644
index 00000000000..e0108512de4
--- /dev/null
+++ b/backend/internal/pkg/apicompat/responses_input_item_polymorphic_test.go
@@ -0,0 +1,191 @@
+package apicompat
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// These tests cover the fix for codex (and newer Responses clients) sending
+// function_call.arguments as a JSON object and function_call_output.output as
+// a JSON array. Before the fix, ResponsesInputItem.Arguments / .Output were
+// typed `string`, so json.Unmarshal failed:
+//   - Responses→Anthropic path (ResponsesToAnthropicRequest): HTTP 502
+//   - Responses→ChatCompletions path (ResponsesToChatCompletionsRequest):
+//     silent data loss (rawString returned "" for non-string values)
+
+// --- helper-level tests ---------------------------------------------------
+
+func TestNormalizeResponsesArguments(t *testing.T) {
+	cases := []struct {
+		name string
+		in   string
+		want string
+	}{
+		{"object", `{"x":1}`, `{"x":1}`},
+		{"stringified", `"{\"x\":1}"`, `{"x":1}`},
+		{"empty string", `""`, `{}`},
+		{"empty raw", ``, `{}`},
+		{"null", `null`, `{}`},
+		{"non-json string", `"not json"`, `{}`},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := normalizeResponsesArguments(json.RawMessage(tc.in))
+			assert.JSONEq(t, tc.want, string(got))
+		})
+	}
+}
+
+func TestExtractResponsesOutputText(t *testing.T) {
+	cases := []struct {
+		name string
+		in   string
+		want string
+	}{
+		{"plain string", `"result"`, "result"},
+		{"array one part", `[{"type":"output_text","text":"result"}]`, "result"},
+		{"array two parts", `[{"type":"output_text","text":"a"},{"type":"output_text","text":"b"}]`, "a\n\nb"},
+		{"empty raw", ``, ""},
+		{"null", `null`, ""},
+		{"empty array", `[]`, ""},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := extractResponsesOutputText(json.RawMessage(tc.in))
+			assert.Equal(t, tc.want, got)
+		})
+	}
+}
+
+// --- Responses→Anthropic path: must not 502 ----------------------------
+
+func TestResponsesToAnthropicRequest_FunctionCallObjectArguments(t *testing.T) {
+	body := []byte(`{
+		"model": "claude-opus-4-8",
+		"input": [
+			{"type": "function_call", "call_id": "c1", "name": "foo", "arguments": {"x": 1}}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	anth, err := ResponsesToAnthropicRequest(&req)
+	require.NoError(t, err) // before fix: "cannot unmarshal object ... arguments of type string"
+	require.NotNil(t, anth)
+
+	require.Len(t, anth.Messages, 1)
+	var blocks []AnthropicContentBlock
+	require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks))
+	require.Len(t, blocks, 1)
+	assert.Equal(t, "tool_use", blocks[0].Type)
+	assert.Equal(t, "foo", blocks[0].Name)
+	assert.JSONEq(t, `{"x":1}`, string(blocks[0].Input))
+}
+
+func TestResponsesToAnthropicRequest_FunctionCallStringifiedArguments(t *testing.T) {
+	body := []byte(`{
+		"model": "claude-opus-4-8",
+		"input": [
+			{"type": "function_call", "call_id": "c1", "name": "foo", "arguments": "{\"x\":1}"}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	anth, err := ResponsesToAnthropicRequest(&req)
+	require.NoError(t, err)
+
+	require.Len(t, anth.Messages, 1)
+	var blocks []AnthropicContentBlock
+	require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks))
+	require.Len(t, blocks, 1)
+	assert.JSONEq(t, `{"x":1}`, string(blocks[0].Input))
+}
+
+func TestResponsesToAnthropicRequest_FunctionCallOutputArray(t *testing.T) {
+	body := []byte(`{
+		"model": "claude-opus-4-8",
+		"input": [
+			{"type": "function_call_output", "call_id": "c1",
+			 "output": [{"type": "output_text", "text": "result"}]}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	anth, err := ResponsesToAnthropicRequest(&req)
+	require.NoError(t, err) // before fix: "cannot unmarshal array ... output of type string"
+	require.NotNil(t, anth)
+
+	require.Len(t, anth.Messages, 1)
+	var blocks []AnthropicContentBlock
+	require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks))
+	require.Len(t, blocks, 1)
+	assert.Equal(t, "tool_result", blocks[0].Type)
+	assert.Equal(t, "toolu_c1", blocks[0].ToolUseID) // call_id is namespaced for Anthropic
+	assert.JSONEq(t, `"result"`, string(blocks[0].Content))
+}
+
+func TestResponsesToAnthropicRequest_FunctionCallOutputString(t *testing.T) {
+	// Backward compatibility: older clients send output as a plain string.
+	body := []byte(`{
+		"model": "claude-opus-4-8",
+		"input": [
+			{"type": "function_call_output", "call_id": "c1", "output": "result"}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	anth, err := ResponsesToAnthropicRequest(&req)
+	require.NoError(t, err)
+
+	require.Len(t, anth.Messages, 1)
+	var blocks []AnthropicContentBlock
+	require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks))
+	require.Len(t, blocks, 1)
+	assert.JSONEq(t, `"result"`, string(blocks[0].Content))
+}
+
+// --- Responses→ChatCompletions path: must not drop data ----------------
+
+func TestResponsesToChatCompletionsRequest_FunctionCallObjectArguments(t *testing.T) {
+	body := []byte(`{
+		"model": "gpt-5.4",
+		"input": [
+			{"type": "function_call", "call_id": "c1", "name": "foo", "arguments": {"x": 1}}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	cc, err := ResponsesToChatCompletionsRequest(&req)
+	require.NoError(t, err)
+	require.Len(t, cc.Messages, 1)
+	require.Len(t, cc.Messages[0].ToolCalls, 1)
+	// Chat Completions requires arguments to be a stringified JSON object;
+	// before the fix rawString returned "" and it degraded to "{}".
+	assert.JSONEq(t, `{"x":1}`, cc.Messages[0].ToolCalls[0].Function.Arguments)
+}
+
+func TestResponsesToChatCompletionsRequest_FunctionCallOutputArray(t *testing.T) {
+	body := []byte(`{
+		"model": "gpt-5.4",
+		"input": [
+			{"type": "function_call_output", "call_id": "c1",
+			 "output": [{"type": "output_text", "text": "result"}]}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	cc, err := ResponsesToChatCompletionsRequest(&req)
+	require.NoError(t, err)
+	require.Len(t, cc.Messages, 1)
+	assert.Equal(t, "tool", cc.Messages[0].Role)
+	// before the fix rawString returned "" → tool result content lost.
+	assert.JSONEq(t, `"result"`, string(cc.Messages[0].Content))
+}
diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go
index 8fa652f2bd1..37c8f258e13 100644
--- a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go
@@ -11,7 +11,7 @@ import (
 // enables Anthropic platform groups to accept OpenAI Responses API requests
 // by converting them to the native /v1/messages format before forwarding upstream.
 func ResponsesToAnthropicRequest(req *ResponsesRequest) (*AnthropicRequest, error) {
-	system, messages, err := convertResponsesInputToAnthropic(req.Input)
+	system, messages, err := convertResponsesInputToAnthropic(req.Instructions, req.Input)
 	if err != nil {
 		return nil, err
 	}
@@ -98,14 +98,27 @@ func mapResponsesEffortToAnthropic(effort string) string {
 }
 
 // convertResponsesInputToAnthropic extracts system prompt and messages from
-// a Responses API input array. Returns the system as raw JSON (for Anthropic's
-// polymorphic system field) and a list of Anthropic messages.
-func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage, []AnthropicMessage, error) {
+// a Responses API request. The system prompt is sourced from (in priority
+// order, concatenated): the top-level `instructions` field (codex's primary
+// system prompt) and any system/developer role items in the input array.
+// Returns the system as raw JSON (for Anthropic's polymorphic system field)
+// and a list of Anthropic messages.
+//
+// codex sends its ~20KB system prompt in `instructions` and additional context
+// in `developer` role items; both must map to Anthropic's system field, not be
+// dropped (the old code ignored both, leaving claude without instructions) nor
+// leaked into a user message as raw input_text blocks (which caused 422).
+func convertResponsesInputToAnthropic(instructions string, inputRaw json.RawMessage) (json.RawMessage, []AnthropicMessage, error) {
+	var systemParts []string
+	if s := strings.TrimSpace(instructions); s != "" {
+		systemParts = append(systemParts, s)
+	}
+
 	// Try as plain string input.
 	var inputStr string
 	if err := json.Unmarshal(inputRaw, &inputStr); err == nil {
 		content, _ := json.Marshal(inputStr)
-		return nil, []AnthropicMessage{{Role: "user", Content: content}}, nil
+		return buildSystemJSON(systemParts), []AnthropicMessage{{Role: "user", Content: content}}, nil
 	}
 
 	var items []ResponsesInputItem
@@ -113,29 +126,23 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage
 		return nil, nil, fmt.Errorf("parse responses input: %w", err)
 	}
 
-	var system json.RawMessage
 	var messages []AnthropicMessage
 
 	for _, item := range items {
 		switch {
-		case item.Role == "system":
-			// System prompt → Anthropic system field
-			text := extractTextFromContent(item.Content)
-			if text != "" {
-				system, _ = json.Marshal(text)
+		case item.Role == "system" || item.Role == "developer":
+			// system / developer → Anthropic system field
+			if text := strings.TrimSpace(extractTextFromContent(item.Content)); text != "" {
+				systemParts = append(systemParts, text)
 			}
 
 		case item.Type == "function_call":
 			// function_call → assistant message with tool_use block
-			input := json.RawMessage("{}")
-			if item.Arguments != "" {
-				input = json.RawMessage(item.Arguments)
-			}
 			block := AnthropicContentBlock{
 				Type:  "tool_use",
 				ID:    fromResponsesCallIDToAnthropic(item.CallID),
 				Name:  item.Name,
-				Input: input,
+				Input: normalizeResponsesArguments(item.Arguments),
 			}
 			blockJSON, _ := json.Marshal([]AnthropicContentBlock{block})
 			messages = append(messages, AnthropicMessage{
@@ -145,7 +152,7 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage
 
 		case item.Type == "function_call_output":
 			// function_call_output → user message with tool_result block
-			outputContent := item.Output
+			outputContent := extractResponsesOutputText(item.Output)
 			if outputContent == "" {
 				outputContent = "(empty)"
 			}
@@ -195,7 +202,31 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage
 	// Merge consecutive same-role messages (Anthropic requires alternating roles)
 	messages = mergeConsecutiveMessages(messages)
 
-	return system, messages, nil
+	return buildSystemJSON(systemParts), messages, nil
+}
+
+// buildSystemJSON joins collected system prompt fragments into Anthropic's
+// system field. Returns nil when there is no non-empty content, so the system
+// field is omitted entirely — Anthropic returns 422 for an empty or
+// whitespace-only system.
+//
+// The system is emitted in ARRAY form ([{"type":"text","text":...}]), not as a
+// bare JSON string. Both are valid per the Anthropic spec and the official
+// Claude Code client uses the array form, but some third-party Anthropic-
+// compatible upstreams return 422 when a string-form system is
+// combined with tools. The array form works in every case.
+func buildSystemJSON(parts []string) json.RawMessage {
+	joined := strings.TrimSpace(strings.Join(parts, "\n\n"))
+	if joined == "" {
+		return nil
+	}
+	out, err := json.Marshal([]map[string]string{
+		{"type": "text", "text": joined},
+	})
+	if err != nil {
+		return nil
+	}
+	return out
 }
 
 // extractTextFromContent extracts text from a content field that may be a
@@ -386,30 +417,29 @@ func parseContentBlocks(raw json.RawMessage) []AnthropicContentBlock {
 
 // convertResponsesToAnthropicTools maps Responses API tools to Anthropic format.
 // Reverse of convertAnthropicToolsToResponses.
+//
+// Every emitted tool must carry a valid input_schema: Anthropic rejects the
+// whole request with 422 if any tool has a null/missing schema. Responses tools
+// of type "namespace" (codex MCP/agent tools) and bare "web_search" carry no
+// `parameters`, so they must be backfilled with an empty object schema.
+//
+// web_search is intentionally NOT translated to the Anthropic server-side
+// web_search_20250305 tool: some third-party Anthropic-compatible upstreams do
+// not implement server tools and return 422. Emitting it as a regular function
+// tool keeps the request valid; the upstream model simply sees a callable
+// named web_search.
 func convertResponsesToAnthropicTools(tools []ResponsesTool) []AnthropicTool {
 	var out []AnthropicTool
 	for _, t := range tools {
-		switch t.Type {
-		case "web_search", "google_search", "web_search_20250305":
-			out = append(out, AnthropicTool{
-				Type: "web_search_20250305",
-				Name: "web_search",
-			})
-		case "function":
-			out = append(out, AnthropicTool{
-				Name:        t.Name,
-				Description: t.Description,
-				InputSchema: normalizeAnthropicInputSchema(t.Parameters),
-			})
-		default:
-			// Pass through unknown tool types
-			out = append(out, AnthropicTool{
-				Type:        t.Type,
-				Name:        t.Name,
-				Description: t.Description,
-				InputSchema: t.Parameters,
-			})
+		name := t.Name
+		if name == "" && t.Type == "web_search" {
+			name = "web_search"
 		}
+		out = append(out, AnthropicTool{
+			Name:        name,
+			Description: t.Description,
+			InputSchema: normalizeAnthropicInputSchema(t.Parameters),
+		})
 	}
 	return out
 }
@@ -471,3 +501,71 @@ func convertResponsesToAnthropicToolChoice(raw json.RawMessage) (json.RawMessage
 	// Pass through unknown
 	return raw, nil
 }
+
+// normalizeResponsesArguments converts a Responses function_call.arguments
+// field into a JSON object suitable for Anthropic's tool_use.input.
+//
+// The arguments field has three observed shapes:
+//   - stringified JSON: "{\"x\":1}"  → unwrap one layer → {"x":1}
+//   - raw JSON object:   {"x":1}      → use as-is
+//   - empty/absent                    → {}
+//
+// Anything that does not resolve to a JSON object falls back to {} so the
+// upstream always receives a valid tool_use.input.
+func normalizeResponsesArguments(raw json.RawMessage) json.RawMessage {
+	trimmed := json.RawMessage(strings.TrimSpace(string(raw)))
+	if len(trimmed) == 0 || string(trimmed) == "null" {
+		return json.RawMessage("{}")
+	}
+
+	// Case 1: stringified JSON — unwrap one layer.
+	var s string
+	if err := json.Unmarshal(trimmed, &s); err == nil {
+		inner := strings.TrimSpace(s)
+		if inner == "" {
+			return json.RawMessage("{}")
+		}
+		if json.Valid([]byte(inner)) {
+			return json.RawMessage(inner)
+		}
+		return json.RawMessage("{}")
+	}
+
+	// Case 2: already a JSON object/value — use as-is.
+	return trimmed
+}
+
+// extractResponsesOutputText converts a Responses function_call_output.output
+// field into a plain string for Anthropic's tool_result.content.
+//
+// The output field has three observed shapes:
+//   - plain string: "result"                                  → use as-is
+//   - array of content parts: [{"type":"output_text",...}]    → join the text
+//   - empty/absent                                            → ""
+func extractResponsesOutputText(raw json.RawMessage) string {
+	trimmed := json.RawMessage(strings.TrimSpace(string(raw)))
+	if len(trimmed) == 0 || string(trimmed) == "null" {
+		return ""
+	}
+
+	// Case 1: plain string.
+	var s string
+	if err := json.Unmarshal(trimmed, &s); err == nil {
+		return s
+	}
+
+	// Case 2: array of content parts.
+	var parts []ResponsesContentPart
+	if err := json.Unmarshal(trimmed, &parts); err == nil {
+		var texts []string
+		for _, p := range parts {
+			if p.Text != "" {
+				texts = append(texts, p.Text)
+			}
+		}
+		return strings.Join(texts, "\n\n")
+	}
+
+	// Case 3: unknown structure — pass through raw JSON so content is not lost.
+	return string(trimmed)
+}
diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic_tools_system_test.go b/backend/internal/pkg/apicompat/responses_to_anthropic_tools_system_test.go
new file mode 100644
index 00000000000..5b270f8ffde
--- /dev/null
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic_tools_system_test.go
@@ -0,0 +1,201 @@
+package apicompat
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// These tests cover the codex → Responses → Anthropic conversion fixes that
+// eliminated upstream 422s:
+//  1. tools with no parameters (type "namespace"/"web_search") must get a
+//     valid input_schema, never null
+//  2. web_search must be a regular function tool, not an Anthropic server tool
+//     (some third-party upstreams do not implement server tools → 422)
+//  3. codex's top-level `instructions` must map to the Anthropic system field
+//  4. `developer` role items must map to system, not leak as user input_text
+//  5. an empty/whitespace system must be omitted (Anthropic 422s on empty system)
+
+func anthReqFrom(t *testing.T, body string) *AnthropicRequest {
+	t.Helper()
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal([]byte(body), &req))
+	out, err := ResponsesToAnthropicRequest(&req)
+	require.NoError(t, err)
+	return out
+}
+
+// systemText extracts the concatenated text from an Anthropic system field,
+// which buildSystemJSON emits in array form ([{"type":"text","text":...}]).
+func systemText(t *testing.T, raw json.RawMessage) string {
+	t.Helper()
+	if len(raw) == 0 {
+		return ""
+	}
+	// array form
+	var parts []struct {
+		Type string `json:"type"`
+		Text string `json:"text"`
+	}
+	if err := json.Unmarshal(raw, &parts); err == nil {
+		var sb []string
+		for _, p := range parts {
+			sb = append(sb, p.Text)
+		}
+		return strings.Join(sb, "\n\n")
+	}
+	// string form (fallback)
+	var s string
+	require.NoError(t, json.Unmarshal(raw, &s))
+	return s
+}
+
+func TestResponsesToAnthropic_ToolWithoutParametersGetsSchema(t *testing.T) {
+	// codex namespace tools (mcp__*, multi_agent_v1, codex_app) carry no parameters.
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}],
+		"tools": [
+			{"type":"namespace","name":"mcp__codegraph","description":"graph"},
+			{"type":"namespace","name":"codex_app"}
+		]
+	}`)
+	require.Len(t, out.Tools, 2)
+	for _, tool := range out.Tools {
+		require.NotEmpty(t, tool.InputSchema, "tool %s must have non-null input_schema", tool.Name)
+		assert.NotEqual(t, "null", string(tool.InputSchema))
+		// must be a valid object schema
+		var sch map[string]any
+		require.NoError(t, json.Unmarshal(tool.InputSchema, &sch))
+		assert.Equal(t, "object", sch["type"])
+	}
+}
+
+func TestResponsesToAnthropic_WebSearchIsFunctionToolNotServerTool(t *testing.T) {
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}],
+		"tools": [{"type":"web_search"}]
+	}`)
+	require.Len(t, out.Tools, 1)
+	tool := out.Tools[0]
+	assert.Equal(t, "web_search", tool.Name)
+	// must NOT be emitted as Anthropic server tool web_search_20250305
+	assert.NotEqual(t, "web_search_20250305", tool.Type)
+	assert.Empty(t, tool.Type, "web_search must be a plain function tool, not a server tool")
+	require.NotEmpty(t, tool.InputSchema)
+	assert.NotEqual(t, "null", string(tool.InputSchema))
+}
+
+func TestResponsesToAnthropic_FunctionToolSchemaPreserved(t *testing.T) {
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}],
+		"tools": [{"type":"function","name":"exec","description":"run","parameters":{"type":"object","properties":{"cmd":{"type":"string"}}}}]
+	}`)
+	require.Len(t, out.Tools, 1)
+	assert.Equal(t, "exec", out.Tools[0].Name)
+	var sch map[string]any
+	require.NoError(t, json.Unmarshal(out.Tools[0].InputSchema, &sch))
+	props, _ := sch["properties"].(map[string]any)
+	assert.Contains(t, props, "cmd")
+}
+
+func TestResponsesToAnthropic_InstructionsBecomeSystem(t *testing.T) {
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"instructions": "You are a coding agent.",
+		"input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}]
+	}`)
+	require.NotEmpty(t, out.System)
+	sys := systemText(t, out.System)
+	assert.Contains(t, sys, "You are a coding agent.")
+}
+
+func TestResponsesToAnthropic_DeveloperRoleBecomesSystem(t *testing.T) {
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"input": [
+			{"role":"developer","content":[{"type":"input_text","text":"Follow the rules."}]},
+			{"role":"user","content":[{"type":"input_text","text":"hi"}]}
+		]
+	}`)
+	// developer content must be in system, not leaked into a user message
+	require.NotEmpty(t, out.System)
+	sys := systemText(t, out.System)
+	assert.Contains(t, sys, "Follow the rules.")
+
+	// no message content may carry input_text (Anthropic only knows "text")
+	for _, m := range out.Messages {
+		assert.NotContains(t, string(m.Content), "input_text",
+			"input_text must not leak into Anthropic messages")
+	}
+}
+
+func TestResponsesToAnthropic_InstructionsAndDeveloperConcatenated(t *testing.T) {
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"instructions": "Primary prompt.",
+		"input": [
+			{"role":"developer","content":[{"type":"input_text","text":"Extra context."}]},
+			{"role":"user","content":[{"type":"input_text","text":"hi"}]}
+		]
+	}`)
+	sys := systemText(t, out.System)
+	assert.Contains(t, sys, "Primary prompt.")
+	assert.Contains(t, sys, "Extra context.")
+}
+
+func TestResponsesToAnthropic_EmptySystemOmitted(t *testing.T) {
+	// No instructions, no system/developer items → System must be nil/absent,
+	// never an empty or whitespace string (Anthropic 422s on empty system).
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"instructions": "   ",
+		"input": [
+			{"role":"developer","content":[{"type":"input_text","text":"  "}]},
+			{"role":"user","content":[{"type":"input_text","text":"hi"}]}
+		]
+	}`)
+	if len(out.System) > 0 {
+		sys := systemText(t, out.System)
+		assert.NotEqual(t, "", strings.TrimSpace(sys), "system must never be empty/whitespace")
+	}
+}
+
+// codex reads the tool call from the OutputItemDone item, so a streamed
+// function_call's output_item.done must carry call_id, name and arguments —
+// without them codex cannot execute the tool and stalls.
+func TestAnthropicStream_FunctionCallDoneCarriesCallFields(t *testing.T) {
+	state := &AnthropicEventToResponsesState{}
+	idx := 0
+	var all []ResponsesStreamEvent
+	all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
+		Type: "message_start", Message: &AnthropicResponse{ID: "msg_1", Model: "claude-opus-4-8"},
+	}, state)...)
+	all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
+		Type: "content_block_start", Index: &idx,
+		ContentBlock: &AnthropicContentBlock{Type: "tool_use", ID: "tu_1", Name: "exec"},
+	}, state)...)
+	all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
+		Type: "content_block_delta", Index: &idx,
+		Delta: &AnthropicDelta{Type: "input_json_delta", PartialJSON: `{"cmd":"ls"}`},
+	}, state)...)
+	all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
+		Type: "content_block_stop", Index: &idx,
+	}, state)...)
+
+	var fcDone *ResponsesOutput
+	for _, e := range all {
+		if e.Type == "response.output_item.done" && e.Item != nil && e.Item.Type == "function_call" {
+			fcDone = e.Item
+		}
+	}
+	require.NotNil(t, fcDone, "must emit function_call output_item.done")
+	assert.NotEmpty(t, fcDone.CallID, "call_id required")
+	assert.Equal(t, "exec", fcDone.Name)
+	assert.JSONEq(t, `{"cmd":"ls"}`, fcDone.Arguments)
+}
diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go
index b4451f235bb..d046f560bfc 100644
--- a/backend/internal/pkg/apicompat/types.go
+++ b/backend/internal/pkg/apicompat/types.go
@@ -230,13 +230,31 @@ type ResponsesInputItem struct {
 	Content json.RawMessage `json:"content,omitempty"` // string or []ResponsesContentPart
 
 	// type=function_call
-	CallID    string `json:"call_id,omitempty"`
-	Name      string `json:"name,omitempty"`
-	Arguments string `json:"arguments,omitempty"`
-	ID        string `json:"id,omitempty"`
+	CallID string `json:"call_id,omitempty"`
+	Name   string `json:"name,omitempty"`
+	// Arguments is stringified JSON per the OpenAI spec, but codex / newer
+	// clients may send a raw JSON object. RawMessage accepts both; callers
+	// normalize via normalizeResponsesArguments.
+	Arguments json.RawMessage `json:"arguments,omitempty"`
+	ID        string          `json:"id,omitempty"`
 
 	// type=function_call_output
-	Output string `json:"output,omitempty"`
+	// Output is a plain string in older clients, but newer Responses clients
+	// (codex) send an array like [{"type":"output_text","text":"..."}].
+	// RawMessage accepts both; callers normalize via extractResponsesOutputText.
+	Output json.RawMessage `json:"output,omitempty"`
+}
+
+// jsonRawString marshals a Go string into a JSON-string RawMessage (i.e. a
+// quoted value). Used when building ResponsesInputItem.Arguments / .Output from
+// a string source, preserving the OpenAI wire format where these fields are
+// emitted as JSON strings.
+func jsonRawString(s string) json.RawMessage {
+	b, err := json.Marshal(s)
+	if err != nil {
+		return json.RawMessage(`""`)
+	}
+	return json.RawMessage(b)
 }
 
 // ResponsesContentPart is a typed content part in a Responses message.
@@ -390,6 +408,9 @@ type ResponsesStreamEvent struct {
 	// response.output_item.added / response.output_item.done
 	Item *ResponsesOutput `json:"item,omitempty"`
 
+	// response.content_part.added / response.content_part.done
+	Part *ResponsesContentPart `json:"part,omitempty"`
+
 	// response.output_text.delta / response.output_text.done
 	OutputIndex  int    `json:"output_index,omitempty"`
 	ContentIndex int    `json:"content_index,omitempty"`

From d806b6d5f93ae13f79d0566fe6cdba0a0c2fee7f Mon Sep 17 00:00:00 2001
From: hanyunsushi <2424791234@qq.com>
Date: Sun, 31 May 2026 04:52:01 +0800
Subject: [PATCH 2/2] fix(apicompat): finalize streamed tool calls on
 chat/completions->Responses path

The ChatCompletions->Responses streaming bridge emitted output_item.added and
function_call_arguments.delta per tool call but never the terminal
function_call_arguments.done / output_item.done. codex collects tool calls
from OutputItemDone items, so an unterminated tool call left it stalled with a
blank render (observed when asking mimo-v2.5-pro to e.g. open a browser).
FinalizeChatCompletionsResponsesStream now emits both terminal events with
call_id/name/arguments.

Also fixes argument duplication: a tool call whose first chunk carried both
name and arguments had its arguments counted twice (copyCall already held them
and the accumulator appended them again), producing invalid JSON like
{...}{...}. New tool-call state now starts with empty arguments.
---
 .../chatcompletions_empty_delta_test.go       | 39 +++++++++++++++++++
 .../chatcompletions_responses_bridge.go       | 37 ++++++++++++++++++
 2 files changed, 76 insertions(+)

diff --git a/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go b/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go
index 7bd5d889d23..d2aa04bbafa 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go
@@ -160,3 +160,42 @@ func TestNormalizeChatReasoningEffort(t *testing.T) {
 	assert.Equal(t, "", normalizeChatReasoningEffort(""))
 	assert.Equal(t, "", normalizeChatReasoningEffort("bogus"))
 }
+
+// mimo and other chat/completions upstreams stream tool calls; the bridge must
+// emit terminal function_call_arguments.done + output_item.done (with
+// call_id/name/arguments) at stream end, or codex receives an unterminated
+// tool call and stalls/renders blank.
+func TestChatChunkToResponses_StreamedToolCallFinalized(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("test-reasoning-model")
+	idx := 0
+	chunk := &ChatCompletionsChunk{
+		ID: "x",
+		Choices: []ChatChunkChoice{{Delta: ChatDelta{ToolCalls: []ChatToolCall{{
+			Index:    &idx,
+			ID:       "call_abc",
+			Type:     "function",
+			Function: ChatFunctionCall{Name: "open_browser", Arguments: `{"url":"google.com"}`},
+		}}}}},
+	}
+	ChatCompletionsChunkToResponsesEvents(chunk, state)
+	final := FinalizeChatCompletionsResponsesStream(state)
+
+	var argsDone, itemDone *ResponsesStreamEvent
+	for i := range final {
+		switch final[i].Type {
+		case "response.function_call_arguments.done":
+			argsDone = &final[i]
+		case "response.output_item.done":
+			if final[i].Item != nil && final[i].Item.Type == "function_call" {
+				itemDone = &final[i]
+			}
+		}
+	}
+	require.NotNil(t, argsDone, "must emit function_call_arguments.done")
+	assert.Equal(t, "call_abc", argsDone.CallID)
+	assert.JSONEq(t, `{"url":"google.com"}`, argsDone.Arguments)
+	require.NotNil(t, itemDone, "must emit function_call output_item.done")
+	assert.Equal(t, "call_abc", itemDone.Item.CallID)
+	assert.Equal(t, "open_browser", itemDone.Item.Name)
+	assert.JSONEq(t, `{"url":"google.com"}`, itemDone.Item.Arguments)
+}
diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go
index 0647a260b3c..b261519e335 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go
@@ -525,6 +525,10 @@ func ChatCompletionsChunkToResponsesEvents(
 					copyCall.ID = generateItemID()
 				}
 				copyCall.Type = "function"
+				// Arguments are accumulated below (line: stored.Function.Arguments
+				// += ...). Clear them here so the first chunk's arguments are not
+				// counted twice (which produced duplicated JSON like `{...}{...}`).
+				copyCall.Function.Arguments = ""
 				state.ToolCalls[idx] = &copyCall
 				stored = &copyCall
 				events = append(events, chatToResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{
@@ -611,6 +615,39 @@ func FinalizeChatCompletionsResponsesStream(state *ChatCompletionsToResponsesStr
 		incompleteDetails = &ResponsesIncompleteDetails{Reason: "max_output_tokens"}
 	}
 
+	// Finalize streamed tool calls. The streaming loop emits
+	// output_item.added + function_call_arguments.delta per tool call but never
+	// their terminal events; without function_call_arguments.done and
+	// output_item.done (carrying call_id/name/arguments) codex receives an
+	// unterminated tool call, cannot execute it, and renders nothing.
+	for i := 0; i < len(state.ToolCalls); i++ {
+		toolCall, ok := state.ToolCalls[i]
+		if !ok || toolCall == nil {
+			continue
+		}
+		arguments := toolCall.Function.Arguments
+		if strings.TrimSpace(arguments) == "" {
+			arguments = "{}"
+		}
+		events = append(events, chatToResponsesEvent(state, "response.function_call_arguments.done", &ResponsesStreamEvent{
+			OutputIndex: i + 1,
+			CallID:      toolCall.ID,
+			Name:        toolCall.Function.Name,
+			Arguments:   arguments,
+		}))
+		events = append(events, chatToResponsesEvent(state, "response.output_item.done", &ResponsesStreamEvent{
+			OutputIndex: i + 1,
+			Item: &ResponsesOutput{
+				Type:      "function_call",
+				ID:        generateItemID(),
+				CallID:    toolCall.ID,
+				Name:      toolCall.Function.Name,
+				Arguments: arguments,
+				Status:    "completed",
+			},
+		}))
+	}
+
 	state.CompletedSent = true
 	events = append(events, chatToResponsesEvent(state, "response.completed", &ResponsesStreamEvent{
 		Response: &ResponsesResponse{