diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go
index 8997835c2aa..d8bcf5229b7 100644
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -143,7 +143,7 @@ func TestAnthropicToResponses_ToolUse(t *testing.T) {
 	assert.Empty(t, items[2].ID)
 	assert.Equal(t, "function_call_output", items[3].Type)
 	assert.Equal(t, "call_1", items[3].CallID)
-	assert.Equal(t, "Sunny, 72°F", items[3].Output)
+	assert.Equal(t, `"Sunny, 72°F"`, string(items[3].Output))
 }
 
 func TestAnthropicToResponses_ThinkingIgnored(t *testing.T) {
@@ -1340,7 +1340,7 @@ func TestAnthropicToResponses_ToolResultWithImage(t *testing.T) {
 	// function_call_output should have text-only output (no image).
 	assert.Equal(t, "function_call_output", items[2].Type)
 	assert.Equal(t, "toolu_1", items[2].CallID)
-	assert.Equal(t, "(empty)", items[2].Output)
+	assert.Equal(t, `"(empty)"`, string(items[2].Output))
 
 	// Image should be in a separate user message.
 	assert.Equal(t, "user", items[3].Role)
@@ -1377,7 +1377,7 @@ func TestAnthropicToResponses_ToolResultMixed(t *testing.T) {
 
 	// function_call_output should have text-only output.
 	assert.Equal(t, "function_call_output", items[2].Type)
-	assert.Equal(t, "File metadata: 800x600 PNG", items[2].Output)
+	assert.Equal(t, `"File metadata: 800x600 PNG"`, string(items[2].Output))
 
 	// Image should be in a separate user message.
 	assert.Equal(t, "user", items[3].Role)
@@ -1412,7 +1412,7 @@ func TestAnthropicToResponses_TextOnlyToolResultBackwardCompat(t *testing.T) {
 	require.Len(t, items, 3)
 
 	// Text-only tool_result should produce a plain string.
-	assert.Equal(t, "Sunny, 72°F", items[2].Output)
+	assert.Equal(t, `"Sunny, 72°F"`, string(items[2].Output))
 }
 
 func TestAnthropicToResponses_ImageEmptyMediaType(t *testing.T) {
diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go
index e2011bee0bf..bc29da07dd5 100644
--- a/backend/internal/pkg/apicompat/anthropic_to_responses.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -221,7 +221,7 @@ func anthropicUserToResponses(raw json.RawMessage) ([]ResponsesInputItem, error)
 		out = append(out, ResponsesInputItem{
 			Type:   "function_call_output",
 			CallID: toResponsesCallID(b.ToolUseID),
-			Output: outputText,
+			Output: jsonRawString(outputText),
 		})
 		toolResultImageParts = append(toolResultImageParts, imageParts...)
 	}
@@ -302,7 +302,7 @@ func anthropicAssistantToResponses(raw json.RawMessage) ([]ResponsesInputItem, e
 			Type:      "function_call",
 			CallID:    fcID,
 			Name:      b.Name,
-			Arguments: args,
+			Arguments: jsonRawString(args),
 		})
 	}
 
diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses_response.go b/backend/internal/pkg/apicompat/anthropic_to_responses_response.go
index de8ab78df89..d706339340d 100644
--- a/backend/internal/pkg/apicompat/anthropic_to_responses_response.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses_response.go
@@ -5,6 +5,7 @@ import (
 	"encoding/hex"
 	"encoding/json"
 	"fmt"
+	"strings"
 	"time"
 )
 
@@ -151,10 +152,20 @@ type AnthropicEventToResponsesState struct {
 
 	// For message output: accumulate text parts
 	ContentIndex int
+	// CurrentText accumulates the message's output_text so the terminal
+	// output_item.done can carry the full content. codex collects final text
+	// from OutputItemDone items, not from output_text.delta events, so the
+	// message item MUST include content:[{type:output_text,text:...}].
+	CurrentText string
 
 	// For function_call: track per-output info
 	CurrentCallID string
 	CurrentName   string
+	// CurrentArguments accumulates the function_call's argument JSON so the
+	// terminal output_item.done (and arguments.done) can carry the full args.
+	// codex reads the tool call from the OutputItemDone item; without
+	// call_id/name/arguments it cannot execute the tool and stalls.
+	CurrentArguments string
 
 	// Usage from message_start / message_delta. InputTokens here follows
 	// Anthropic semantics (excludes cached tokens); they are added back when
@@ -278,6 +289,7 @@ func anthToResHandleContentBlockStart(evt *AnthropicStreamEvent, state *Anthropi
 			state.CurrentItemID = generateItemID()
 			state.CurrentItemType = "message"
 			state.ContentIndex = 0
+			state.CurrentText = ""
 
 			events = append(events, makeResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{
 				OutputIndex: state.OutputIndex,
@@ -288,6 +300,21 @@ func anthToResHandleContentBlockStart(evt *AnthropicStreamEvent, state *Anthropi
 					Status: "in_progress",
 				},
 			}))
+
+			// Emit response.content_part.added so clients (e.g. codex) know a
+			// text content part is starting. Without it the subsequent
+			// output_text.delta events have no part to attach to and the client
+			// renders nothing. Reverse of anthToResHandleContentBlockStop's
+			// content_part.done.
+			events = append(events, makeResponsesEvent(state, "response.content_part.added", &ResponsesStreamEvent{
+				OutputIndex:  state.OutputIndex,
+				ContentIndex: state.ContentIndex,
+				ItemID:       state.CurrentItemID,
+				Part: &ResponsesContentPart{
+					Type: "output_text",
+					Text: "",
+				},
+			}))
 		}
 
 	case "tool_use":
@@ -298,6 +325,7 @@ func anthToResHandleContentBlockStart(evt *AnthropicStreamEvent, state *Anthropi
 		state.CurrentItemType = "function_call"
 		state.CurrentCallID = toResponsesCallID(evt.ContentBlock.ID)
 		state.CurrentName = evt.ContentBlock.Name
+		state.CurrentArguments = ""
 
 		events = append(events, makeResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{
 			OutputIndex: state.OutputIndex,
@@ -324,6 +352,7 @@ func anthToResHandleContentBlockDelta(evt *AnthropicStreamEvent, state *Anthropi
 		if evt.Delta.Text == "" {
 			return nil
 		}
+		state.CurrentText += evt.Delta.Text
 		return []ResponsesStreamEvent{makeResponsesEvent(state, "response.output_text.delta", &ResponsesStreamEvent{
 			OutputIndex:  state.OutputIndex,
 			ContentIndex: state.ContentIndex,
@@ -346,6 +375,7 @@ func anthToResHandleContentBlockDelta(evt *AnthropicStreamEvent, state *Anthropi
 		if evt.Delta.PartialJSON == "" {
 			return nil
 		}
+		state.CurrentArguments += evt.Delta.PartialJSON
 		return []ResponsesStreamEvent{makeResponsesEvent(state, "response.function_call_arguments.delta", &ResponsesStreamEvent{
 			OutputIndex: state.OutputIndex,
 			Delta:       evt.Delta.PartialJSON,
@@ -384,18 +414,32 @@ func anthToResHandleContentBlockStop(evt *AnthropicStreamEvent, state *Anthropic
 				ItemID:      state.CurrentItemID,
 				CallID:      state.CurrentCallID,
 				Name:        state.CurrentName,
+				Arguments:   nonEmptyArguments(state.CurrentArguments),
 			}),
 		}
 		events = append(events, closeCurrentResponsesItem(state)...)
 		return events
 
 	case "message":
-		// Emit output_text.done (text block is done, but message item stays open for potential more blocks)
+		// Text block done: emit output_text.done then content_part.done.
+		// The message item stays open for potential more blocks; it is closed
+		// later by closeCurrentResponsesItem. content_part.done mirrors the
+		// content_part.added emitted in anthToResHandleContentBlockStart.
 		return []ResponsesStreamEvent{
 			makeResponsesEvent(state, "response.output_text.done", &ResponsesStreamEvent{
 				OutputIndex:  state.OutputIndex,
 				ContentIndex: state.ContentIndex,
 				ItemID:       state.CurrentItemID,
+				Text:         state.CurrentText,
+			}),
+			makeResponsesEvent(state, "response.content_part.done", &ResponsesStreamEvent{
+				OutputIndex:  state.OutputIndex,
+				ContentIndex: state.ContentIndex,
+				ItemID:       state.CurrentItemID,
+				Part: &ResponsesContentPart{
+					Type: "output_text",
+					Text: state.CurrentText,
+				},
 			}),
 		}
 	}
@@ -450,25 +494,57 @@ func closeCurrentResponsesItem(state *AnthropicEventToResponsesState) []Response
 
 	itemType := state.CurrentItemType
 	itemID := state.CurrentItemID
+	currentText := state.CurrentText
+	currentCallID := state.CurrentCallID
+	currentName := state.CurrentName
+	currentArgs := state.CurrentArguments
 
 	// Reset
 	state.CurrentItemType = ""
 	state.CurrentItemID = ""
 	state.CurrentCallID = ""
 	state.CurrentName = ""
+	state.CurrentText = ""
+	state.CurrentArguments = ""
 	state.OutputIndex++
 	state.ContentIndex = 0
 
+	// The terminal item carries its full content. codex collects final output
+	// from OutputItemDone items (not from the delta events), so an item missing
+	// its content/arguments renders blank or cannot be executed as a tool call.
+	doneItem := &ResponsesOutput{
+		Type:   itemType,
+		ID:     itemID,
+		Status: "completed",
+	}
+	switch itemType {
+	case "message":
+		doneItem.Role = "assistant"
+		doneItem.Content = []ResponsesContentPart{{
+			Type: "output_text",
+			Text: currentText,
+		}}
+	case "function_call":
+		doneItem.CallID = currentCallID
+		doneItem.Name = currentName
+		doneItem.Arguments = nonEmptyArguments(currentArgs)
+	}
+
 	return []ResponsesStreamEvent{makeResponsesEvent(state, "response.output_item.done", &ResponsesStreamEvent{
 		OutputIndex: state.OutputIndex - 1, // Use the index before increment
-		Item: &ResponsesOutput{
-			Type:   itemType,
-			ID:     itemID,
-			Status: "completed",
-		},
+		Item:        doneItem,
 	})}
 }
 
+// nonEmptyArguments ensures function_call arguments are valid JSON. Anthropic
+// tool_use with no input produces an empty string; codex expects at least "{}".
+func nonEmptyArguments(args string) string {
+	if strings.TrimSpace(args) == "" {
+		return "{}"
+	}
+	return args
+}
+
 func makeResponsesCreatedEvent(state *AnthropicEventToResponsesState) ResponsesStreamEvent {
 	seq := state.SequenceNumber
 	state.SequenceNumber++
diff --git a/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go b/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go
new file mode 100644
index 00000000000..d2aa04bbafa
--- /dev/null
+++ b/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go
@@ -0,0 +1,201 @@
+package apicompat
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// strptr is a local helper for *string fields.
+func strptr(s string) *string { return &s }
+
+// Reproduces the mimo "thinking done, nothing shown" bug: the upstream emits a
+// leading {"content":""} chunk (non-nil, empty). The bridge must NOT emit a
+// response.output_text.delta for it (the delta would serialize empty and a
+// premature message item would be created), and must still stream the real
+// content that follows.
+func TestChatChunkToResponses_SkipsEmptyContentDelta(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("mimo-v2.5")
+
+	// chunk 1: empty content (some upstreams send a leading empty chunk) — no text delta
+	c1 := &ChatCompletionsChunk{
+		ID:      "c1",
+		Choices: []ChatChunkChoice{{Delta: ChatDelta{Role: "assistant", Content: strptr("")}}},
+	}
+	ev1 := ChatCompletionsChunkToResponsesEvents(c1, state)
+	for _, e := range ev1 {
+		assert.NotEqual(t, "response.output_text.delta", e.Type,
+			"empty content must not emit an output_text delta")
+	}
+
+	// chunk 2: real content — must emit a delta carrying the text
+	c2 := &ChatCompletionsChunk{
+		ID:      "c1",
+		Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hello")}}},
+	}
+	ev2 := ChatCompletionsChunkToResponsesEvents(c2, state)
+	var sawDelta bool
+	for _, e := range ev2 {
+		if e.Type == "response.output_text.delta" {
+			sawDelta = true
+			assert.Equal(t, "Hello", e.Delta)
+		}
+	}
+	assert.True(t, sawDelta, "real content must emit an output_text delta")
+}
+
+func TestChatChunkToResponses_SkipsEmptyReasoningDelta(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("mimo-v2.5")
+	c := &ChatCompletionsChunk{
+		ID:      "c1",
+		Choices: []ChatChunkChoice{{Delta: ChatDelta{ReasoningContent: strptr("")}}},
+	}
+	ev := ChatCompletionsChunkToResponsesEvents(c, state)
+	for _, e := range ev {
+		assert.NotEqual(t, "response.reasoning_summary_text.delta", e.Type,
+			"empty reasoning_content must not emit a reasoning delta")
+	}
+}
+
+// Full mimo-shaped stream: empty content → reasoning → real content. The final
+// visible text must be exactly the real content, and at least one non-empty
+// output_text delta must reach the client.
+func TestChatChunkToResponses_MimoShapedStream(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("mimo-v2.5")
+	chunks := []*ChatCompletionsChunk{
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Role: "assistant", Content: strptr("")}}}},
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{ReasoningContent: strptr("thinking...")}}}},
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hi")}}}},
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("!")}}}},
+	}
+	var textDeltas []string
+	for _, c := range chunks {
+		for _, e := range ChatCompletionsChunkToResponsesEvents(c, state) {
+			if e.Type == "response.output_text.delta" {
+				textDeltas = append(textDeltas, e.Delta)
+			}
+		}
+	}
+	// every emitted text delta is non-empty
+	for _, d := range textDeltas {
+		assert.NotEqual(t, "", d)
+	}
+	assert.Equal(t, "Hi!", strings.Join(textDeltas, ""))
+}
+
+// codex requires response.content_part.added before output_text deltas and
+// content_part.done at the end; without them it renders nothing.
+func TestChatChunkToResponses_EmitsContentPartEvents(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("mimo-v2.5")
+	var types []string
+	for _, c := range []*ChatCompletionsChunk{
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hi")}}}},
+	} {
+		for _, e := range ChatCompletionsChunkToResponsesEvents(c, state) {
+			types = append(types, e.Type)
+		}
+	}
+	for _, e := range FinalizeChatCompletionsResponsesStream(state) {
+		types = append(types, e.Type)
+	}
+	assert.Contains(t, types, "response.content_part.added")
+	assert.Contains(t, types, "response.content_part.done")
+	// content_part.added must come before the first output_text.delta
+	iAdded, iDelta := -1, -1
+	for i, ty := range types {
+		if ty == "response.content_part.added" && iAdded < 0 {
+			iAdded = i
+		}
+		if ty == "response.output_text.delta" && iDelta < 0 {
+			iDelta = i
+		}
+	}
+	assert.GreaterOrEqual(t, iDelta, 0)
+	assert.GreaterOrEqual(t, iAdded, 0)
+	assert.Less(t, iAdded, iDelta, "content_part.added must precede output_text.delta")
+}
+
+// codex collects final text from OutputItemDone items, so the message item in
+// response.output_item.done must carry content with the accumulated text.
+func TestChatChunkToResponses_OutputItemDoneCarriesContent(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("mimo-v2.5")
+	for _, c := range []*ChatCompletionsChunk{
+		{ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hello world")}}}},
+	} {
+		ChatCompletionsChunkToResponsesEvents(c, state)
+	}
+	var found bool
+	for _, e := range FinalizeChatCompletionsResponsesStream(state) {
+		if e.Type == "response.output_item.done" && e.Item != nil && e.Item.Type == "message" {
+			found = true
+			require.Len(t, e.Item.Content, 1)
+			assert.Equal(t, "output_text", e.Item.Content[0].Type)
+			assert.Equal(t, "Hello world", e.Item.Content[0].Text)
+		}
+	}
+	assert.True(t, found, "must emit message output_item.done with content")
+}
+
+// Some chat/completions upstreams reject reasoning_effort "xhigh"
+// (only low/medium/high allowed). It must be normalized to high.
+func TestResponsesToChatCompletions_XhighReasoningNormalized(t *testing.T) {
+	body := []byte(`{"model":"gpt-5.5","reasoning":{"effort":"xhigh"},"input":[{"role":"user","content":[{"type":"input_text","text":"hi"}]}]}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+	cc, err := ResponsesToChatCompletionsRequest(&req)
+	require.NoError(t, err)
+	assert.Equal(t, "high", cc.ReasoningEffort, "xhigh must be normalized to high for chat/completions")
+}
+
+func TestNormalizeChatReasoningEffort(t *testing.T) {
+	assert.Equal(t, "high", normalizeChatReasoningEffort("xhigh"))
+	assert.Equal(t, "high", normalizeChatReasoningEffort("high"))
+	assert.Equal(t, "high", normalizeChatReasoningEffort("max"))
+	assert.Equal(t, "medium", normalizeChatReasoningEffort("medium"))
+	assert.Equal(t, "low", normalizeChatReasoningEffort("low"))
+	assert.Equal(t, "low", normalizeChatReasoningEffort("minimal"))
+	assert.Equal(t, "", normalizeChatReasoningEffort(""))
+	assert.Equal(t, "", normalizeChatReasoningEffort("bogus"))
+}
+
+// mimo and other chat/completions upstreams stream tool calls; the bridge must
+// emit terminal function_call_arguments.done + output_item.done (with
+// call_id/name/arguments) at stream end, or codex receives an unterminated
+// tool call and stalls/renders blank.
+func TestChatChunkToResponses_StreamedToolCallFinalized(t *testing.T) {
+	state := NewChatCompletionsToResponsesStreamState("test-reasoning-model")
+	idx := 0
+	chunk := &ChatCompletionsChunk{
+		ID: "x",
+		Choices: []ChatChunkChoice{{Delta: ChatDelta{ToolCalls: []ChatToolCall{{
+			Index:    &idx,
+			ID:       "call_abc",
+			Type:     "function",
+			Function: ChatFunctionCall{Name: "open_browser", Arguments: `{"url":"google.com"}`},
+		}}}}},
+	}
+	ChatCompletionsChunkToResponsesEvents(chunk, state)
+	final := FinalizeChatCompletionsResponsesStream(state)
+
+	var argsDone, itemDone *ResponsesStreamEvent
+	for i := range final {
+		switch final[i].Type {
+		case "response.function_call_arguments.done":
+			argsDone = &final[i]
+		case "response.output_item.done":
+			if final[i].Item != nil && final[i].Item.Type == "function_call" {
+				itemDone = &final[i]
+			}
+		}
+	}
+	require.NotNil(t, argsDone, "must emit function_call_arguments.done")
+	assert.Equal(t, "call_abc", argsDone.CallID)
+	assert.JSONEq(t, `{"url":"google.com"}`, argsDone.Arguments)
+	require.NotNil(t, itemDone, "must emit function_call output_item.done")
+	assert.Equal(t, "call_abc", itemDone.Item.CallID)
+	assert.Equal(t, "open_browser", itemDone.Item.Name)
+	assert.JSONEq(t, `{"url":"google.com"}`, itemDone.Item.Arguments)
+}
diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go
index 09b680c7c73..b261519e335 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go
@@ -30,7 +30,7 @@ func ResponsesToChatCompletionsRequest(req *ResponsesRequest) (*ChatCompletionsR
 		ServiceTier:         req.ServiceTier,
 	}
 	if req.Reasoning != nil {
-		out.ReasoningEffort = req.Reasoning.Effort
+		out.ReasoningEffort = normalizeChatReasoningEffort(req.Reasoning.Effort)
 	}
 	if len(req.Tools) > 0 {
 		out.Tools = responsesToolsToChatTools(req.Tools)
@@ -93,7 +93,7 @@ func responsesInputToChatMessages(instructions string, inputRaw json.RawMessage)
 		itemType := rawString(item["type"])
 		switch itemType {
 		case "function_call":
-			arguments := rawString(item["arguments"])
+			arguments := responsesArgumentsToChatString(item["arguments"])
 			if strings.TrimSpace(arguments) == "" {
 				arguments = "{}"
 			}
@@ -110,7 +110,7 @@ func responsesInputToChatMessages(instructions string, inputRaw json.RawMessage)
 			})
 			continue
 		case "function_call_output":
-			content, _ := json.Marshal(rawString(item["output"]))
+			content, _ := json.Marshal(extractResponsesOutputText(item["output"]))
 			messages = append(messages, ChatMessage{
 				Role:       "tool",
 				ToolCallID: rawString(item["call_id"]),
@@ -490,7 +490,12 @@ func ChatCompletionsChunkToResponsesEvents(
 	events = append(events, ensureChatToResponsesCreated(state)...)
 
 	for _, choice := range chunk.Choices {
-		if choice.Delta.Content != nil {
+		// Skip empty-string content deltas. Some upstreams emit a
+		// leading {"content":""} chunk; it is non-nil but carries no text, and
+		// emitting it produces a response.output_text.delta with an empty delta
+		// (omitempty drops the field entirely) plus a premature message item —
+		// codex then shows "thinking" with no visible output.
+		if choice.Delta.Content != nil && *choice.Delta.Content != "" {
 			events = append(events, ensureChatToResponsesMessageItem(state)...)
 			_, _ = state.Text.WriteString(*choice.Delta.Content)
 			events = append(events, chatToResponsesEvent(state, "response.output_text.delta", &ResponsesStreamEvent{
@@ -500,7 +505,7 @@ func ChatCompletionsChunkToResponsesEvents(
 				ItemID:       state.MessageItemID,
 			}))
 		}
-		if choice.Delta.ReasoningContent != nil {
+		if choice.Delta.ReasoningContent != nil && *choice.Delta.ReasoningContent != "" {
 			_, _ = state.Reasoning.WriteString(*choice.Delta.ReasoningContent)
 			events = append(events, chatToResponsesEvent(state, "response.reasoning_summary_text.delta", &ResponsesStreamEvent{
 				OutputIndex:  0,
@@ -520,6 +525,10 @@ func ChatCompletionsChunkToResponsesEvents(
 					copyCall.ID = generateItemID()
 				}
 				copyCall.Type = "function"
+				// Arguments are accumulated below (line: stored.Function.Arguments
+				// += ...). Clear them here so the first chunk's arguments are not
+				// counted twice (which produced duplicated JSON like `{...}{...}`).
+				copyCall.Function.Arguments = ""
 				state.ToolCalls[idx] = &copyCall
 				stored = &copyCall
 				events = append(events, chatToResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{
@@ -572,6 +581,16 @@ func FinalizeChatCompletionsResponsesStream(state *ChatCompletionsToResponsesStr
 			Text:         state.Text.String(),
 			ItemID:       state.MessageItemID,
 		}))
+		// content_part.done mirrors content_part.added from ensureChatToResponsesMessageItem.
+		events = append(events, chatToResponsesEvent(state, "response.content_part.done", &ResponsesStreamEvent{
+			OutputIndex:  0,
+			ContentIndex: 0,
+			ItemID:       state.MessageItemID,
+			Part: &ResponsesContentPart{
+				Type: "output_text",
+				Text: state.Text.String(),
+			},
+		}))
 		events = append(events, chatToResponsesEvent(state, "response.output_item.done", &ResponsesStreamEvent{
 			OutputIndex: 0,
 			Item: &ResponsesOutput{
@@ -579,6 +598,12 @@ func FinalizeChatCompletionsResponsesStream(state *ChatCompletionsToResponsesStr
 				ID:     state.MessageItemID,
 				Role:   "assistant",
 				Status: "completed",
+				// codex collects final text from OutputItemDone items, so the
+				// message item must carry its full content, not just status.
+				Content: []ResponsesContentPart{{
+					Type: "output_text",
+					Text: state.Text.String(),
+				}},
 			},
 		}))
 	}
@@ -590,6 +615,39 @@ func FinalizeChatCompletionsResponsesStream(state *ChatCompletionsToResponsesStr
 		incompleteDetails = &ResponsesIncompleteDetails{Reason: "max_output_tokens"}
 	}
 
+	// Finalize streamed tool calls. The streaming loop emits
+	// output_item.added + function_call_arguments.delta per tool call but never
+	// their terminal events; without function_call_arguments.done and
+	// output_item.done (carrying call_id/name/arguments) codex receives an
+	// unterminated tool call, cannot execute it, and renders nothing.
+	for i := 0; i < len(state.ToolCalls); i++ {
+		toolCall, ok := state.ToolCalls[i]
+		if !ok || toolCall == nil {
+			continue
+		}
+		arguments := toolCall.Function.Arguments
+		if strings.TrimSpace(arguments) == "" {
+			arguments = "{}"
+		}
+		events = append(events, chatToResponsesEvent(state, "response.function_call_arguments.done", &ResponsesStreamEvent{
+			OutputIndex: i + 1,
+			CallID:      toolCall.ID,
+			Name:        toolCall.Function.Name,
+			Arguments:   arguments,
+		}))
+		events = append(events, chatToResponsesEvent(state, "response.output_item.done", &ResponsesStreamEvent{
+			OutputIndex: i + 1,
+			Item: &ResponsesOutput{
+				Type:      "function_call",
+				ID:        generateItemID(),
+				CallID:    toolCall.ID,
+				Name:      toolCall.Function.Name,
+				Arguments: arguments,
+				Status:    "completed",
+			},
+		}))
+	}
+
 	state.CompletedSent = true
 	events = append(events, chatToResponsesEvent(state, "response.completed", &ResponsesStreamEvent{
 		Response: &ResponsesResponse{
@@ -626,15 +684,28 @@ func ensureChatToResponsesMessageItem(state *ChatCompletionsToResponsesStreamSta
 		return nil
 	}
 	state.MessageItemID = generateItemID()
-	return []ResponsesStreamEvent{chatToResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{
-		OutputIndex: 0,
-		Item: &ResponsesOutput{
-			Type:   "message",
-			ID:     state.MessageItemID,
-			Role:   "assistant",
-			Status: "in_progress",
-		},
-	})}
+	return []ResponsesStreamEvent{
+		chatToResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{
+			OutputIndex: 0,
+			Item: &ResponsesOutput{
+				Type:   "message",
+				ID:     state.MessageItemID,
+				Role:   "assistant",
+				Status: "in_progress",
+			},
+		}),
+		// content_part.added must precede output_text.delta or strict clients
+		// (codex) have no part to attach text to and render nothing.
+		chatToResponsesEvent(state, "response.content_part.added", &ResponsesStreamEvent{
+			OutputIndex:  0,
+			ContentIndex: 0,
+			ItemID:       state.MessageItemID,
+			Part: &ResponsesContentPart{
+				Type: "output_text",
+				Text: "",
+			},
+		}),
+	}
 }
 
 func (state *ChatCompletionsToResponsesStreamState) chatOutput() []ResponsesOutput {
@@ -695,6 +766,45 @@ func chatToResponsesEvent(
 	return evt
 }
 
+// normalizeChatReasoningEffort maps a Responses reasoning effort to a value the
+// Chat Completions protocol accepts. The Responses API allows "xhigh" (codex's
+// highest tier for gpt-5.5 etc.), but chat/completions upstreams (and the
+// OpenAI chat/completions schema) only accept low/medium/high and 400 on
+// "xhigh". Map xhigh→high; pass through known values; drop unknown/empty.
+func normalizeChatReasoningEffort(effort string) string {
+	switch strings.ToLower(strings.TrimSpace(effort)) {
+	case "xhigh", "extrahigh", "max", "high":
+		return "high"
+	case "medium":
+		return "medium"
+	case "low", "minimal", "none":
+		return "low"
+	default:
+		return "" // omit unknown/empty so the upstream uses its default
+	}
+}
+
+// responsesArgumentsToChatString converts a Responses function_call.arguments
+// field into the stringified-JSON form required by Chat Completions
+// (ChatFunctionCall.Arguments is a string).
+//
+//   - stringified JSON: "{\"x\":1}" → use the inner string as-is
+//   - raw JSON object:   {"x":1}     → serialize to its string form
+//   - empty/absent                   → ""
+func responsesArgumentsToChatString(raw json.RawMessage) string {
+	trimmed := json.RawMessage(strings.TrimSpace(string(raw)))
+	if len(trimmed) == 0 || string(trimmed) == "null" {
+		return ""
+	}
+	// Already a JSON string — return the inner value verbatim.
+	var s string
+	if err := json.Unmarshal(trimmed, &s); err == nil {
+		return s
+	}
+	// Object/array/other JSON — serialize to its compact string form.
+	return string(trimmed)
+}
+
 func rawString(raw json.RawMessage) string {
 	raw = bytesTrimSpace(raw)
 	if len(raw) == 0 || string(raw) == "null" {
diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
index b03b012fc7a..c0c7384b1b2 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
@@ -105,7 +105,7 @@ func TestChatCompletionsToResponses_ToolCalls(t *testing.T) {
 	// Check function_call_output item
 	assert.Equal(t, "function_call_output", items[2].Type)
 	assert.Equal(t, "call_1", items[2].CallID)
-	assert.Equal(t, "pong", items[2].Output)
+	assert.Equal(t, `"pong"`, string(items[2].Output))
 
 	// Check tools
 	require.Len(t, resp.Tools, 1)
@@ -614,7 +614,7 @@ func TestChatCompletionsToResponses_ToolArrayContent(t *testing.T) {
 	require.Len(t, items, 3)
 	assert.Equal(t, "function_call_output", items[2].Type)
 	assert.Equal(t, "call_1", items[2].CallID)
-	assert.Equal(t, "image width: 100; image height: 200", items[2].Output)
+	assert.Equal(t, `"image width: 100; image height: 200"`, string(items[2].Output))
 }
 
 func TestResponsesToChatCompletions_Incomplete(t *testing.T) {
diff --git a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
index 463bdd0d15d..a7459bdeb45 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
@@ -194,7 +194,7 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
 			Type:      "function_call",
 			CallID:    tc.ID,
 			Name:      tc.Function.Name,
-			Arguments: args,
+			Arguments: jsonRawString(args),
 		})
 	}
 
@@ -284,7 +284,7 @@ func chatToolToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
 	return []ResponsesInputItem{{
 		Type:   "function_call_output",
 		CallID: m.ToolCallID,
-		Output: output,
+		Output: jsonRawString(output),
 	}}, nil
 }
 
@@ -302,7 +302,7 @@ func chatFunctionToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
 	return []ResponsesInputItem{{
 		Type:   "function_call_output",
 		CallID: m.Name,
-		Output: output,
+		Output: jsonRawString(output),
 	}}, nil
 }
 
diff --git a/backend/internal/pkg/apicompat/responses_input_item_polymorphic_test.go b/backend/internal/pkg/apicompat/responses_input_item_polymorphic_test.go
new file mode 100644
index 00000000000..e0108512de4
--- /dev/null
+++ b/backend/internal/pkg/apicompat/responses_input_item_polymorphic_test.go
@@ -0,0 +1,191 @@
+package apicompat
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// These tests cover the fix for codex (and newer Responses clients) sending
+// function_call.arguments as a JSON object and function_call_output.output as
+// a JSON array. Before the fix, ResponsesInputItem.Arguments / .Output were
+// typed `string`, so json.Unmarshal failed:
+//   - Responses→Anthropic path (ResponsesToAnthropicRequest): HTTP 502
+//   - Responses→ChatCompletions path (ResponsesToChatCompletionsRequest):
+//     silent data loss (rawString returned "" for non-string values)
+
+// --- helper-level tests ---------------------------------------------------
+
+func TestNormalizeResponsesArguments(t *testing.T) {
+	cases := []struct {
+		name string
+		in   string
+		want string
+	}{
+		{"object", `{"x":1}`, `{"x":1}`},
+		{"stringified", `"{\"x\":1}"`, `{"x":1}`},
+		{"empty string", `""`, `{}`},
+		{"empty raw", ``, `{}`},
+		{"null", `null`, `{}`},
+		{"non-json string", `"not json"`, `{}`},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := normalizeResponsesArguments(json.RawMessage(tc.in))
+			assert.JSONEq(t, tc.want, string(got))
+		})
+	}
+}
+
+func TestExtractResponsesOutputText(t *testing.T) {
+	cases := []struct {
+		name string
+		in   string
+		want string
+	}{
+		{"plain string", `"result"`, "result"},
+		{"array one part", `[{"type":"output_text","text":"result"}]`, "result"},
+		{"array two parts", `[{"type":"output_text","text":"a"},{"type":"output_text","text":"b"}]`, "a\n\nb"},
+		{"empty raw", ``, ""},
+		{"null", `null`, ""},
+		{"empty array", `[]`, ""},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := extractResponsesOutputText(json.RawMessage(tc.in))
+			assert.Equal(t, tc.want, got)
+		})
+	}
+}
+
+// --- Responses→Anthropic path: must not 502 ----------------------------
+
+func TestResponsesToAnthropicRequest_FunctionCallObjectArguments(t *testing.T) {
+	body := []byte(`{
+		"model": "claude-opus-4-8",
+		"input": [
+			{"type": "function_call", "call_id": "c1", "name": "foo", "arguments": {"x": 1}}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	anth, err := ResponsesToAnthropicRequest(&req)
+	require.NoError(t, err) // before fix: "cannot unmarshal object ... arguments of type string"
+	require.NotNil(t, anth)
+
+	require.Len(t, anth.Messages, 1)
+	var blocks []AnthropicContentBlock
+	require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks))
+	require.Len(t, blocks, 1)
+	assert.Equal(t, "tool_use", blocks[0].Type)
+	assert.Equal(t, "foo", blocks[0].Name)
+	assert.JSONEq(t, `{"x":1}`, string(blocks[0].Input))
+}
+
+func TestResponsesToAnthropicRequest_FunctionCallStringifiedArguments(t *testing.T) {
+	body := []byte(`{
+		"model": "claude-opus-4-8",
+		"input": [
+			{"type": "function_call", "call_id": "c1", "name": "foo", "arguments": "{\"x\":1}"}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	anth, err := ResponsesToAnthropicRequest(&req)
+	require.NoError(t, err)
+
+	require.Len(t, anth.Messages, 1)
+	var blocks []AnthropicContentBlock
+	require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks))
+	require.Len(t, blocks, 1)
+	assert.JSONEq(t, `{"x":1}`, string(blocks[0].Input))
+}
+
+func TestResponsesToAnthropicRequest_FunctionCallOutputArray(t *testing.T) {
+	body := []byte(`{
+		"model": "claude-opus-4-8",
+		"input": [
+			{"type": "function_call_output", "call_id": "c1",
+			 "output": [{"type": "output_text", "text": "result"}]}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	anth, err := ResponsesToAnthropicRequest(&req)
+	require.NoError(t, err) // before fix: "cannot unmarshal array ... output of type string"
+	require.NotNil(t, anth)
+
+	require.Len(t, anth.Messages, 1)
+	var blocks []AnthropicContentBlock
+	require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks))
+	require.Len(t, blocks, 1)
+	assert.Equal(t, "tool_result", blocks[0].Type)
+	assert.Equal(t, "toolu_c1", blocks[0].ToolUseID) // call_id is namespaced for Anthropic
+	assert.JSONEq(t, `"result"`, string(blocks[0].Content))
+}
+
+func TestResponsesToAnthropicRequest_FunctionCallOutputString(t *testing.T) {
+	// Backward compatibility: older clients send output as a plain string.
+	body := []byte(`{
+		"model": "claude-opus-4-8",
+		"input": [
+			{"type": "function_call_output", "call_id": "c1", "output": "result"}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	anth, err := ResponsesToAnthropicRequest(&req)
+	require.NoError(t, err)
+
+	require.Len(t, anth.Messages, 1)
+	var blocks []AnthropicContentBlock
+	require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks))
+	require.Len(t, blocks, 1)
+	assert.JSONEq(t, `"result"`, string(blocks[0].Content))
+}
+
+// --- Responses→ChatCompletions path: must not drop data ----------------
+
+func TestResponsesToChatCompletionsRequest_FunctionCallObjectArguments(t *testing.T) {
+	body := []byte(`{
+		"model": "gpt-5.4",
+		"input": [
+			{"type": "function_call", "call_id": "c1", "name": "foo", "arguments": {"x": 1}}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	cc, err := ResponsesToChatCompletionsRequest(&req)
+	require.NoError(t, err)
+	require.Len(t, cc.Messages, 1)
+	require.Len(t, cc.Messages[0].ToolCalls, 1)
+	// Chat Completions requires arguments to be a stringified JSON object;
+	// before the fix rawString returned "" and it degraded to "{}".
+	assert.JSONEq(t, `{"x":1}`, cc.Messages[0].ToolCalls[0].Function.Arguments)
+}
+
+func TestResponsesToChatCompletionsRequest_FunctionCallOutputArray(t *testing.T) {
+	body := []byte(`{
+		"model": "gpt-5.4",
+		"input": [
+			{"type": "function_call_output", "call_id": "c1",
+			 "output": [{"type": "output_text", "text": "result"}]}
+		]
+	}`)
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal(body, &req))
+
+	cc, err := ResponsesToChatCompletionsRequest(&req)
+	require.NoError(t, err)
+	require.Len(t, cc.Messages, 1)
+	assert.Equal(t, "tool", cc.Messages[0].Role)
+	// before the fix rawString returned "" → tool result content lost.
+	assert.JSONEq(t, `"result"`, string(cc.Messages[0].Content))
+}
diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go
index 8fa652f2bd1..37c8f258e13 100644
--- a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go
@@ -11,7 +11,7 @@ import (
 // enables Anthropic platform groups to accept OpenAI Responses API requests
 // by converting them to the native /v1/messages format before forwarding upstream.
 func ResponsesToAnthropicRequest(req *ResponsesRequest) (*AnthropicRequest, error) {
-	system, messages, err := convertResponsesInputToAnthropic(req.Input)
+	system, messages, err := convertResponsesInputToAnthropic(req.Instructions, req.Input)
 	if err != nil {
 		return nil, err
 	}
@@ -98,14 +98,27 @@ func mapResponsesEffortToAnthropic(effort string) string {
 }
 
 // convertResponsesInputToAnthropic extracts system prompt and messages from
-// a Responses API input array. Returns the system as raw JSON (for Anthropic's
-// polymorphic system field) and a list of Anthropic messages.
-func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage, []AnthropicMessage, error) {
+// a Responses API request. The system prompt is sourced from (in priority
+// order, concatenated): the top-level `instructions` field (codex's primary
+// system prompt) and any system/developer role items in the input array.
+// Returns the system as raw JSON (for Anthropic's polymorphic system field)
+// and a list of Anthropic messages.
+//
+// codex sends its ~20KB system prompt in `instructions` and additional context
+// in `developer` role items; both must map to Anthropic's system field, not be
+// dropped (the old code ignored both, leaving claude without instructions) nor
+// leaked into a user message as raw input_text blocks (which caused 422).
+func convertResponsesInputToAnthropic(instructions string, inputRaw json.RawMessage) (json.RawMessage, []AnthropicMessage, error) {
+	var systemParts []string
+	if s := strings.TrimSpace(instructions); s != "" {
+		systemParts = append(systemParts, s)
+	}
+
 	// Try as plain string input.
 	var inputStr string
 	if err := json.Unmarshal(inputRaw, &inputStr); err == nil {
 		content, _ := json.Marshal(inputStr)
-		return nil, []AnthropicMessage{{Role: "user", Content: content}}, nil
+		return buildSystemJSON(systemParts), []AnthropicMessage{{Role: "user", Content: content}}, nil
 	}
 
 	var items []ResponsesInputItem
@@ -113,29 +126,23 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage
 		return nil, nil, fmt.Errorf("parse responses input: %w", err)
 	}
 
-	var system json.RawMessage
 	var messages []AnthropicMessage
 
 	for _, item := range items {
 		switch {
-		case item.Role == "system":
-			// System prompt → Anthropic system field
-			text := extractTextFromContent(item.Content)
-			if text != "" {
-				system, _ = json.Marshal(text)
+		case item.Role == "system" || item.Role == "developer":
+			// system / developer → Anthropic system field
+			if text := strings.TrimSpace(extractTextFromContent(item.Content)); text != "" {
+				systemParts = append(systemParts, text)
 			}
 
 		case item.Type == "function_call":
 			// function_call → assistant message with tool_use block
-			input := json.RawMessage("{}")
-			if item.Arguments != "" {
-				input = json.RawMessage(item.Arguments)
-			}
 			block := AnthropicContentBlock{
 				Type:  "tool_use",
 				ID:    fromResponsesCallIDToAnthropic(item.CallID),
 				Name:  item.Name,
-				Input: input,
+				Input: normalizeResponsesArguments(item.Arguments),
 			}
 			blockJSON, _ := json.Marshal([]AnthropicContentBlock{block})
 			messages = append(messages, AnthropicMessage{
@@ -145,7 +152,7 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage
 
 		case item.Type == "function_call_output":
 			// function_call_output → user message with tool_result block
-			outputContent := item.Output
+			outputContent := extractResponsesOutputText(item.Output)
 			if outputContent == "" {
 				outputContent = "(empty)"
 			}
@@ -195,7 +202,31 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage
 	// Merge consecutive same-role messages (Anthropic requires alternating roles)
 	messages = mergeConsecutiveMessages(messages)
 
-	return system, messages, nil
+	return buildSystemJSON(systemParts), messages, nil
+}
+
+// buildSystemJSON joins collected system prompt fragments into Anthropic's
+// system field. Returns nil when there is no non-empty content, so the system
+// field is omitted entirely — Anthropic returns 422 for an empty or
+// whitespace-only system.
+//
+// The system is emitted in ARRAY form ([{"type":"text","text":...}]), not as a
+// bare JSON string. Both are valid per the Anthropic spec and the official
+// Claude Code client uses the array form, but some third-party Anthropic-
+// compatible upstreams return 422 when a string-form system is
+// combined with tools. The array form works in every case.
+func buildSystemJSON(parts []string) json.RawMessage {
+	joined := strings.TrimSpace(strings.Join(parts, "\n\n"))
+	if joined == "" {
+		return nil
+	}
+	out, err := json.Marshal([]map[string]string{
+		{"type": "text", "text": joined},
+	})
+	if err != nil {
+		return nil
+	}
+	return out
 }
 
 // extractTextFromContent extracts text from a content field that may be a
@@ -386,30 +417,29 @@ func parseContentBlocks(raw json.RawMessage) []AnthropicContentBlock {
 
 // convertResponsesToAnthropicTools maps Responses API tools to Anthropic format.
 // Reverse of convertAnthropicToolsToResponses.
+//
+// Every emitted tool must carry a valid input_schema: Anthropic rejects the
+// whole request with 422 if any tool has a null/missing schema. Responses tools
+// of type "namespace" (codex MCP/agent tools) and bare "web_search" carry no
+// `parameters`, so they must be backfilled with an empty object schema.
+//
+// web_search is intentionally NOT translated to the Anthropic server-side
+// web_search_20250305 tool: some third-party Anthropic-compatible upstreams do
+// not implement server tools and return 422. Emitting it as a regular function
+// tool keeps the request valid; the upstream model simply sees a callable
+// named web_search.
 func convertResponsesToAnthropicTools(tools []ResponsesTool) []AnthropicTool {
 	var out []AnthropicTool
 	for _, t := range tools {
-		switch t.Type {
-		case "web_search", "google_search", "web_search_20250305":
-			out = append(out, AnthropicTool{
-				Type: "web_search_20250305",
-				Name: "web_search",
-			})
-		case "function":
-			out = append(out, AnthropicTool{
-				Name:        t.Name,
-				Description: t.Description,
-				InputSchema: normalizeAnthropicInputSchema(t.Parameters),
-			})
-		default:
-			// Pass through unknown tool types
-			out = append(out, AnthropicTool{
-				Type:        t.Type,
-				Name:        t.Name,
-				Description: t.Description,
-				InputSchema: t.Parameters,
-			})
+		name := t.Name
+		if name == "" && t.Type == "web_search" {
+			name = "web_search"
 		}
+		out = append(out, AnthropicTool{
+			Name:        name,
+			Description: t.Description,
+			InputSchema: normalizeAnthropicInputSchema(t.Parameters),
+		})
 	}
 	return out
 }
@@ -471,3 +501,71 @@ func convertResponsesToAnthropicToolChoice(raw json.RawMessage) (json.RawMessage
 	// Pass through unknown
 	return raw, nil
 }
+
+// normalizeResponsesArguments converts a Responses function_call.arguments
+// field into a JSON object suitable for Anthropic's tool_use.input.
+//
+// The arguments field has three observed shapes:
+//   - stringified JSON: "{\"x\":1}"  → unwrap one layer → {"x":1}
+//   - raw JSON object:   {"x":1}      → use as-is
+//   - empty/absent                    → {}
+//
+// Anything that does not resolve to a JSON object falls back to {} so the
+// upstream always receives a valid tool_use.input.
+func normalizeResponsesArguments(raw json.RawMessage) json.RawMessage {
+	trimmed := json.RawMessage(strings.TrimSpace(string(raw)))
+	if len(trimmed) == 0 || string(trimmed) == "null" {
+		return json.RawMessage("{}")
+	}
+
+	// Case 1: stringified JSON — unwrap one layer.
+	var s string
+	if err := json.Unmarshal(trimmed, &s); err == nil {
+		inner := strings.TrimSpace(s)
+		if inner == "" {
+			return json.RawMessage("{}")
+		}
+		if json.Valid([]byte(inner)) {
+			return json.RawMessage(inner)
+		}
+		return json.RawMessage("{}")
+	}
+
+	// Case 2: already a JSON object/value — use as-is.
+	return trimmed
+}
+
+// extractResponsesOutputText converts a Responses function_call_output.output
+// field into a plain string for Anthropic's tool_result.content.
+//
+// The output field has three observed shapes:
+//   - plain string: "result"                                  → use as-is
+//   - array of content parts: [{"type":"output_text",...}]    → join the text
+//   - empty/absent                                            → ""
+func extractResponsesOutputText(raw json.RawMessage) string {
+	trimmed := json.RawMessage(strings.TrimSpace(string(raw)))
+	if len(trimmed) == 0 || string(trimmed) == "null" {
+		return ""
+	}
+
+	// Case 1: plain string.
+	var s string
+	if err := json.Unmarshal(trimmed, &s); err == nil {
+		return s
+	}
+
+	// Case 2: array of content parts.
+	var parts []ResponsesContentPart
+	if err := json.Unmarshal(trimmed, &parts); err == nil {
+		var texts []string
+		for _, p := range parts {
+			if p.Text != "" {
+				texts = append(texts, p.Text)
+			}
+		}
+		return strings.Join(texts, "\n\n")
+	}
+
+	// Case 3: unknown structure — pass through raw JSON so content is not lost.
+	return string(trimmed)
+}
diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic_tools_system_test.go b/backend/internal/pkg/apicompat/responses_to_anthropic_tools_system_test.go
new file mode 100644
index 00000000000..5b270f8ffde
--- /dev/null
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic_tools_system_test.go
@@ -0,0 +1,201 @@
+package apicompat
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// These tests cover the codex → Responses → Anthropic conversion fixes that
+// eliminated upstream 422s:
+//  1. tools with no parameters (type "namespace"/"web_search") must get a
+//     valid input_schema, never null
+//  2. web_search must be a regular function tool, not an Anthropic server tool
+//     (some third-party upstreams do not implement server tools → 422)
+//  3. codex's top-level `instructions` must map to the Anthropic system field
+//  4. `developer` role items must map to system, not leak as user input_text
+//  5. an empty/whitespace system must be omitted (Anthropic 422s on empty system)
+
+func anthReqFrom(t *testing.T, body string) *AnthropicRequest {
+	t.Helper()
+	var req ResponsesRequest
+	require.NoError(t, json.Unmarshal([]byte(body), &req))
+	out, err := ResponsesToAnthropicRequest(&req)
+	require.NoError(t, err)
+	return out
+}
+
+// systemText extracts the concatenated text from an Anthropic system field,
+// which buildSystemJSON emits in array form ([{"type":"text","text":...}]).
+func systemText(t *testing.T, raw json.RawMessage) string {
+	t.Helper()
+	if len(raw) == 0 {
+		return ""
+	}
+	// array form
+	var parts []struct {
+		Type string `json:"type"`
+		Text string `json:"text"`
+	}
+	if err := json.Unmarshal(raw, &parts); err == nil {
+		var sb []string
+		for _, p := range parts {
+			sb = append(sb, p.Text)
+		}
+		return strings.Join(sb, "\n\n")
+	}
+	// string form (fallback)
+	var s string
+	require.NoError(t, json.Unmarshal(raw, &s))
+	return s
+}
+
+func TestResponsesToAnthropic_ToolWithoutParametersGetsSchema(t *testing.T) {
+	// codex namespace tools (mcp__*, multi_agent_v1, codex_app) carry no parameters.
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}],
+		"tools": [
+			{"type":"namespace","name":"mcp__codegraph","description":"graph"},
+			{"type":"namespace","name":"codex_app"}
+		]
+	}`)
+	require.Len(t, out.Tools, 2)
+	for _, tool := range out.Tools {
+		require.NotEmpty(t, tool.InputSchema, "tool %s must have non-null input_schema", tool.Name)
+		assert.NotEqual(t, "null", string(tool.InputSchema))
+		// must be a valid object schema
+		var sch map[string]any
+		require.NoError(t, json.Unmarshal(tool.InputSchema, &sch))
+		assert.Equal(t, "object", sch["type"])
+	}
+}
+
+func TestResponsesToAnthropic_WebSearchIsFunctionToolNotServerTool(t *testing.T) {
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}],
+		"tools": [{"type":"web_search"}]
+	}`)
+	require.Len(t, out.Tools, 1)
+	tool := out.Tools[0]
+	assert.Equal(t, "web_search", tool.Name)
+	// must NOT be emitted as Anthropic server tool web_search_20250305
+	assert.NotEqual(t, "web_search_20250305", tool.Type)
+	assert.Empty(t, tool.Type, "web_search must be a plain function tool, not a server tool")
+	require.NotEmpty(t, tool.InputSchema)
+	assert.NotEqual(t, "null", string(tool.InputSchema))
+}
+
+func TestResponsesToAnthropic_FunctionToolSchemaPreserved(t *testing.T) {
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}],
+		"tools": [{"type":"function","name":"exec","description":"run","parameters":{"type":"object","properties":{"cmd":{"type":"string"}}}}]
+	}`)
+	require.Len(t, out.Tools, 1)
+	assert.Equal(t, "exec", out.Tools[0].Name)
+	var sch map[string]any
+	require.NoError(t, json.Unmarshal(out.Tools[0].InputSchema, &sch))
+	props, _ := sch["properties"].(map[string]any)
+	assert.Contains(t, props, "cmd")
+}
+
+func TestResponsesToAnthropic_InstructionsBecomeSystem(t *testing.T) {
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"instructions": "You are a coding agent.",
+		"input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}]
+	}`)
+	require.NotEmpty(t, out.System)
+	sys := systemText(t, out.System)
+	assert.Contains(t, sys, "You are a coding agent.")
+}
+
+func TestResponsesToAnthropic_DeveloperRoleBecomesSystem(t *testing.T) {
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"input": [
+			{"role":"developer","content":[{"type":"input_text","text":"Follow the rules."}]},
+			{"role":"user","content":[{"type":"input_text","text":"hi"}]}
+		]
+	}`)
+	// developer content must be in system, not leaked into a user message
+	require.NotEmpty(t, out.System)
+	sys := systemText(t, out.System)
+	assert.Contains(t, sys, "Follow the rules.")
+
+	// no message content may carry input_text (Anthropic only knows "text")
+	for _, m := range out.Messages {
+		assert.NotContains(t, string(m.Content), "input_text",
+			"input_text must not leak into Anthropic messages")
+	}
+}
+
+func TestResponsesToAnthropic_InstructionsAndDeveloperConcatenated(t *testing.T) {
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"instructions": "Primary prompt.",
+		"input": [
+			{"role":"developer","content":[{"type":"input_text","text":"Extra context."}]},
+			{"role":"user","content":[{"type":"input_text","text":"hi"}]}
+		]
+	}`)
+	sys := systemText(t, out.System)
+	assert.Contains(t, sys, "Primary prompt.")
+	assert.Contains(t, sys, "Extra context.")
+}
+
+func TestResponsesToAnthropic_EmptySystemOmitted(t *testing.T) {
+	// No instructions, no system/developer items → System must be nil/absent,
+	// never an empty or whitespace string (Anthropic 422s on empty system).
+	out := anthReqFrom(t, `{
+		"model": "claude-opus-4-8",
+		"instructions": "   ",
+		"input": [
+			{"role":"developer","content":[{"type":"input_text","text":"  "}]},
+			{"role":"user","content":[{"type":"input_text","text":"hi"}]}
+		]
+	}`)
+	if len(out.System) > 0 {
+		sys := systemText(t, out.System)
+		assert.NotEqual(t, "", strings.TrimSpace(sys), "system must never be empty/whitespace")
+	}
+}
+
+// codex reads the tool call from the OutputItemDone item, so a streamed
+// function_call's output_item.done must carry call_id, name and arguments —
+// without them codex cannot execute the tool and stalls.
+func TestAnthropicStream_FunctionCallDoneCarriesCallFields(t *testing.T) {
+	state := &AnthropicEventToResponsesState{}
+	idx := 0
+	var all []ResponsesStreamEvent
+	all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
+		Type: "message_start", Message: &AnthropicResponse{ID: "msg_1", Model: "claude-opus-4-8"},
+	}, state)...)
+	all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
+		Type: "content_block_start", Index: &idx,
+		ContentBlock: &AnthropicContentBlock{Type: "tool_use", ID: "tu_1", Name: "exec"},
+	}, state)...)
+	all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
+		Type: "content_block_delta", Index: &idx,
+		Delta: &AnthropicDelta{Type: "input_json_delta", PartialJSON: `{"cmd":"ls"}`},
+	}, state)...)
+	all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{
+		Type: "content_block_stop", Index: &idx,
+	}, state)...)
+
+	var fcDone *ResponsesOutput
+	for _, e := range all {
+		if e.Type == "response.output_item.done" && e.Item != nil && e.Item.Type == "function_call" {
+			fcDone = e.Item
+		}
+	}
+	require.NotNil(t, fcDone, "must emit function_call output_item.done")
+	assert.NotEmpty(t, fcDone.CallID, "call_id required")
+	assert.Equal(t, "exec", fcDone.Name)
+	assert.JSONEq(t, `{"cmd":"ls"}`, fcDone.Arguments)
+}
diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go
index b4451f235bb..d046f560bfc 100644
--- a/backend/internal/pkg/apicompat/types.go
+++ b/backend/internal/pkg/apicompat/types.go
@@ -230,13 +230,31 @@ type ResponsesInputItem struct {
 	Content json.RawMessage `json:"content,omitempty"` // string or []ResponsesContentPart
 
 	// type=function_call
-	CallID    string `json:"call_id,omitempty"`
-	Name      string `json:"name,omitempty"`
-	Arguments string `json:"arguments,omitempty"`
-	ID        string `json:"id,omitempty"`
+	CallID string `json:"call_id,omitempty"`
+	Name   string `json:"name,omitempty"`
+	// Arguments is stringified JSON per the OpenAI spec, but codex / newer
+	// clients may send a raw JSON object. RawMessage accepts both; callers
+	// normalize via normalizeResponsesArguments.
+	Arguments json.RawMessage `json:"arguments,omitempty"`
+	ID        string          `json:"id,omitempty"`
 
 	// type=function_call_output
-	Output string `json:"output,omitempty"`
+	// Output is a plain string in older clients, but newer Responses clients
+	// (codex) send an array like [{"type":"output_text","text":"..."}].
+	// RawMessage accepts both; callers normalize via extractResponsesOutputText.
+	Output json.RawMessage `json:"output,omitempty"`
+}
+
+// jsonRawString marshals a Go string into a JSON-string RawMessage (i.e. a
+// quoted value). Used when building ResponsesInputItem.Arguments / .Output from
+// a string source, preserving the OpenAI wire format where these fields are
+// emitted as JSON strings.
+func jsonRawString(s string) json.RawMessage {
+	b, err := json.Marshal(s)
+	if err != nil {
+		return json.RawMessage(`""`)
+	}
+	return json.RawMessage(b)
 }
 
 // ResponsesContentPart is a typed content part in a Responses message.
@@ -390,6 +408,9 @@ type ResponsesStreamEvent struct {
 	// response.output_item.added / response.output_item.done
 	Item *ResponsesOutput `json:"item,omitempty"`
 
+	// response.content_part.added / response.content_part.done
+	Part *ResponsesContentPart `json:"part,omitempty"`
+
 	// response.output_text.delta / response.output_text.done
 	OutputIndex  int    `json:"output_index,omitempty"`
 	ContentIndex int    `json:"content_index,omitempty"`