diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go index 8997835c2aa..d8bcf5229b7 100644 --- a/backend/internal/pkg/apicompat/anthropic_responses_test.go +++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go @@ -143,7 +143,7 @@ func TestAnthropicToResponses_ToolUse(t *testing.T) { assert.Empty(t, items[2].ID) assert.Equal(t, "function_call_output", items[3].Type) assert.Equal(t, "call_1", items[3].CallID) - assert.Equal(t, "Sunny, 72°F", items[3].Output) + assert.Equal(t, `"Sunny, 72°F"`, string(items[3].Output)) } func TestAnthropicToResponses_ThinkingIgnored(t *testing.T) { @@ -1340,7 +1340,7 @@ func TestAnthropicToResponses_ToolResultWithImage(t *testing.T) { // function_call_output should have text-only output (no image). assert.Equal(t, "function_call_output", items[2].Type) assert.Equal(t, "toolu_1", items[2].CallID) - assert.Equal(t, "(empty)", items[2].Output) + assert.Equal(t, `"(empty)"`, string(items[2].Output)) // Image should be in a separate user message. assert.Equal(t, "user", items[3].Role) @@ -1377,7 +1377,7 @@ func TestAnthropicToResponses_ToolResultMixed(t *testing.T) { // function_call_output should have text-only output. assert.Equal(t, "function_call_output", items[2].Type) - assert.Equal(t, "File metadata: 800x600 PNG", items[2].Output) + assert.Equal(t, `"File metadata: 800x600 PNG"`, string(items[2].Output)) // Image should be in a separate user message. assert.Equal(t, "user", items[3].Role) @@ -1412,7 +1412,7 @@ func TestAnthropicToResponses_TextOnlyToolResultBackwardCompat(t *testing.T) { require.Len(t, items, 3) // Text-only tool_result should produce a plain string. - assert.Equal(t, "Sunny, 72°F", items[2].Output) + assert.Equal(t, `"Sunny, 72°F"`, string(items[2].Output)) } func TestAnthropicToResponses_ImageEmptyMediaType(t *testing.T) { diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go index e2011bee0bf..bc29da07dd5 100644 --- a/backend/internal/pkg/apicompat/anthropic_to_responses.go +++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go @@ -221,7 +221,7 @@ func anthropicUserToResponses(raw json.RawMessage) ([]ResponsesInputItem, error) out = append(out, ResponsesInputItem{ Type: "function_call_output", CallID: toResponsesCallID(b.ToolUseID), - Output: outputText, + Output: jsonRawString(outputText), }) toolResultImageParts = append(toolResultImageParts, imageParts...) } @@ -302,7 +302,7 @@ func anthropicAssistantToResponses(raw json.RawMessage) ([]ResponsesInputItem, e Type: "function_call", CallID: fcID, Name: b.Name, - Arguments: args, + Arguments: jsonRawString(args), }) } diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses_response.go b/backend/internal/pkg/apicompat/anthropic_to_responses_response.go index de8ab78df89..d706339340d 100644 --- a/backend/internal/pkg/apicompat/anthropic_to_responses_response.go +++ b/backend/internal/pkg/apicompat/anthropic_to_responses_response.go @@ -5,6 +5,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "strings" "time" ) @@ -151,10 +152,20 @@ type AnthropicEventToResponsesState struct { // For message output: accumulate text parts ContentIndex int + // CurrentText accumulates the message's output_text so the terminal + // output_item.done can carry the full content. codex collects final text + // from OutputItemDone items, not from output_text.delta events, so the + // message item MUST include content:[{type:output_text,text:...}]. + CurrentText string // For function_call: track per-output info CurrentCallID string CurrentName string + // CurrentArguments accumulates the function_call's argument JSON so the + // terminal output_item.done (and arguments.done) can carry the full args. + // codex reads the tool call from the OutputItemDone item; without + // call_id/name/arguments it cannot execute the tool and stalls. + CurrentArguments string // Usage from message_start / message_delta. InputTokens here follows // Anthropic semantics (excludes cached tokens); they are added back when @@ -278,6 +289,7 @@ func anthToResHandleContentBlockStart(evt *AnthropicStreamEvent, state *Anthropi state.CurrentItemID = generateItemID() state.CurrentItemType = "message" state.ContentIndex = 0 + state.CurrentText = "" events = append(events, makeResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{ OutputIndex: state.OutputIndex, @@ -288,6 +300,21 @@ func anthToResHandleContentBlockStart(evt *AnthropicStreamEvent, state *Anthropi Status: "in_progress", }, })) + + // Emit response.content_part.added so clients (e.g. codex) know a + // text content part is starting. Without it the subsequent + // output_text.delta events have no part to attach to and the client + // renders nothing. Reverse of anthToResHandleContentBlockStop's + // content_part.done. + events = append(events, makeResponsesEvent(state, "response.content_part.added", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + ContentIndex: state.ContentIndex, + ItemID: state.CurrentItemID, + Part: &ResponsesContentPart{ + Type: "output_text", + Text: "", + }, + })) } case "tool_use": @@ -298,6 +325,7 @@ func anthToResHandleContentBlockStart(evt *AnthropicStreamEvent, state *Anthropi state.CurrentItemType = "function_call" state.CurrentCallID = toResponsesCallID(evt.ContentBlock.ID) state.CurrentName = evt.ContentBlock.Name + state.CurrentArguments = "" events = append(events, makeResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{ OutputIndex: state.OutputIndex, @@ -324,6 +352,7 @@ func anthToResHandleContentBlockDelta(evt *AnthropicStreamEvent, state *Anthropi if evt.Delta.Text == "" { return nil } + state.CurrentText += evt.Delta.Text return []ResponsesStreamEvent{makeResponsesEvent(state, "response.output_text.delta", &ResponsesStreamEvent{ OutputIndex: state.OutputIndex, ContentIndex: state.ContentIndex, @@ -346,6 +375,7 @@ func anthToResHandleContentBlockDelta(evt *AnthropicStreamEvent, state *Anthropi if evt.Delta.PartialJSON == "" { return nil } + state.CurrentArguments += evt.Delta.PartialJSON return []ResponsesStreamEvent{makeResponsesEvent(state, "response.function_call_arguments.delta", &ResponsesStreamEvent{ OutputIndex: state.OutputIndex, Delta: evt.Delta.PartialJSON, @@ -384,18 +414,32 @@ func anthToResHandleContentBlockStop(evt *AnthropicStreamEvent, state *Anthropic ItemID: state.CurrentItemID, CallID: state.CurrentCallID, Name: state.CurrentName, + Arguments: nonEmptyArguments(state.CurrentArguments), }), } events = append(events, closeCurrentResponsesItem(state)...) return events case "message": - // Emit output_text.done (text block is done, but message item stays open for potential more blocks) + // Text block done: emit output_text.done then content_part.done. + // The message item stays open for potential more blocks; it is closed + // later by closeCurrentResponsesItem. content_part.done mirrors the + // content_part.added emitted in anthToResHandleContentBlockStart. return []ResponsesStreamEvent{ makeResponsesEvent(state, "response.output_text.done", &ResponsesStreamEvent{ OutputIndex: state.OutputIndex, ContentIndex: state.ContentIndex, ItemID: state.CurrentItemID, + Text: state.CurrentText, + }), + makeResponsesEvent(state, "response.content_part.done", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + ContentIndex: state.ContentIndex, + ItemID: state.CurrentItemID, + Part: &ResponsesContentPart{ + Type: "output_text", + Text: state.CurrentText, + }, }), } } @@ -450,25 +494,57 @@ func closeCurrentResponsesItem(state *AnthropicEventToResponsesState) []Response itemType := state.CurrentItemType itemID := state.CurrentItemID + currentText := state.CurrentText + currentCallID := state.CurrentCallID + currentName := state.CurrentName + currentArgs := state.CurrentArguments // Reset state.CurrentItemType = "" state.CurrentItemID = "" state.CurrentCallID = "" state.CurrentName = "" + state.CurrentText = "" + state.CurrentArguments = "" state.OutputIndex++ state.ContentIndex = 0 + // The terminal item carries its full content. codex collects final output + // from OutputItemDone items (not from the delta events), so an item missing + // its content/arguments renders blank or cannot be executed as a tool call. + doneItem := &ResponsesOutput{ + Type: itemType, + ID: itemID, + Status: "completed", + } + switch itemType { + case "message": + doneItem.Role = "assistant" + doneItem.Content = []ResponsesContentPart{{ + Type: "output_text", + Text: currentText, + }} + case "function_call": + doneItem.CallID = currentCallID + doneItem.Name = currentName + doneItem.Arguments = nonEmptyArguments(currentArgs) + } + return []ResponsesStreamEvent{makeResponsesEvent(state, "response.output_item.done", &ResponsesStreamEvent{ OutputIndex: state.OutputIndex - 1, // Use the index before increment - Item: &ResponsesOutput{ - Type: itemType, - ID: itemID, - Status: "completed", - }, + Item: doneItem, })} } +// nonEmptyArguments ensures function_call arguments are valid JSON. Anthropic +// tool_use with no input produces an empty string; codex expects at least "{}". +func nonEmptyArguments(args string) string { + if strings.TrimSpace(args) == "" { + return "{}" + } + return args +} + func makeResponsesCreatedEvent(state *AnthropicEventToResponsesState) ResponsesStreamEvent { seq := state.SequenceNumber state.SequenceNumber++ diff --git a/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go b/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go new file mode 100644 index 00000000000..d2aa04bbafa --- /dev/null +++ b/backend/internal/pkg/apicompat/chatcompletions_empty_delta_test.go @@ -0,0 +1,201 @@ +package apicompat + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// strptr is a local helper for *string fields. +func strptr(s string) *string { return &s } + +// Reproduces the mimo "thinking done, nothing shown" bug: the upstream emits a +// leading {"content":""} chunk (non-nil, empty). The bridge must NOT emit a +// response.output_text.delta for it (the delta would serialize empty and a +// premature message item would be created), and must still stream the real +// content that follows. +func TestChatChunkToResponses_SkipsEmptyContentDelta(t *testing.T) { + state := NewChatCompletionsToResponsesStreamState("mimo-v2.5") + + // chunk 1: empty content (some upstreams send a leading empty chunk) — no text delta + c1 := &ChatCompletionsChunk{ + ID: "c1", + Choices: []ChatChunkChoice{{Delta: ChatDelta{Role: "assistant", Content: strptr("")}}}, + } + ev1 := ChatCompletionsChunkToResponsesEvents(c1, state) + for _, e := range ev1 { + assert.NotEqual(t, "response.output_text.delta", e.Type, + "empty content must not emit an output_text delta") + } + + // chunk 2: real content — must emit a delta carrying the text + c2 := &ChatCompletionsChunk{ + ID: "c1", + Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hello")}}}, + } + ev2 := ChatCompletionsChunkToResponsesEvents(c2, state) + var sawDelta bool + for _, e := range ev2 { + if e.Type == "response.output_text.delta" { + sawDelta = true + assert.Equal(t, "Hello", e.Delta) + } + } + assert.True(t, sawDelta, "real content must emit an output_text delta") +} + +func TestChatChunkToResponses_SkipsEmptyReasoningDelta(t *testing.T) { + state := NewChatCompletionsToResponsesStreamState("mimo-v2.5") + c := &ChatCompletionsChunk{ + ID: "c1", + Choices: []ChatChunkChoice{{Delta: ChatDelta{ReasoningContent: strptr("")}}}, + } + ev := ChatCompletionsChunkToResponsesEvents(c, state) + for _, e := range ev { + assert.NotEqual(t, "response.reasoning_summary_text.delta", e.Type, + "empty reasoning_content must not emit a reasoning delta") + } +} + +// Full mimo-shaped stream: empty content → reasoning → real content. The final +// visible text must be exactly the real content, and at least one non-empty +// output_text delta must reach the client. +func TestChatChunkToResponses_MimoShapedStream(t *testing.T) { + state := NewChatCompletionsToResponsesStreamState("mimo-v2.5") + chunks := []*ChatCompletionsChunk{ + {ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Role: "assistant", Content: strptr("")}}}}, + {ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{ReasoningContent: strptr("thinking...")}}}}, + {ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hi")}}}}, + {ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("!")}}}}, + } + var textDeltas []string + for _, c := range chunks { + for _, e := range ChatCompletionsChunkToResponsesEvents(c, state) { + if e.Type == "response.output_text.delta" { + textDeltas = append(textDeltas, e.Delta) + } + } + } + // every emitted text delta is non-empty + for _, d := range textDeltas { + assert.NotEqual(t, "", d) + } + assert.Equal(t, "Hi!", strings.Join(textDeltas, "")) +} + +// codex requires response.content_part.added before output_text deltas and +// content_part.done at the end; without them it renders nothing. +func TestChatChunkToResponses_EmitsContentPartEvents(t *testing.T) { + state := NewChatCompletionsToResponsesStreamState("mimo-v2.5") + var types []string + for _, c := range []*ChatCompletionsChunk{ + {ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hi")}}}}, + } { + for _, e := range ChatCompletionsChunkToResponsesEvents(c, state) { + types = append(types, e.Type) + } + } + for _, e := range FinalizeChatCompletionsResponsesStream(state) { + types = append(types, e.Type) + } + assert.Contains(t, types, "response.content_part.added") + assert.Contains(t, types, "response.content_part.done") + // content_part.added must come before the first output_text.delta + iAdded, iDelta := -1, -1 + for i, ty := range types { + if ty == "response.content_part.added" && iAdded < 0 { + iAdded = i + } + if ty == "response.output_text.delta" && iDelta < 0 { + iDelta = i + } + } + assert.GreaterOrEqual(t, iDelta, 0) + assert.GreaterOrEqual(t, iAdded, 0) + assert.Less(t, iAdded, iDelta, "content_part.added must precede output_text.delta") +} + +// codex collects final text from OutputItemDone items, so the message item in +// response.output_item.done must carry content with the accumulated text. +func TestChatChunkToResponses_OutputItemDoneCarriesContent(t *testing.T) { + state := NewChatCompletionsToResponsesStreamState("mimo-v2.5") + for _, c := range []*ChatCompletionsChunk{ + {ID: "x", Choices: []ChatChunkChoice{{Delta: ChatDelta{Content: strptr("Hello world")}}}}, + } { + ChatCompletionsChunkToResponsesEvents(c, state) + } + var found bool + for _, e := range FinalizeChatCompletionsResponsesStream(state) { + if e.Type == "response.output_item.done" && e.Item != nil && e.Item.Type == "message" { + found = true + require.Len(t, e.Item.Content, 1) + assert.Equal(t, "output_text", e.Item.Content[0].Type) + assert.Equal(t, "Hello world", e.Item.Content[0].Text) + } + } + assert.True(t, found, "must emit message output_item.done with content") +} + +// Some chat/completions upstreams reject reasoning_effort "xhigh" +// (only low/medium/high allowed). It must be normalized to high. +func TestResponsesToChatCompletions_XhighReasoningNormalized(t *testing.T) { + body := []byte(`{"model":"gpt-5.5","reasoning":{"effort":"xhigh"},"input":[{"role":"user","content":[{"type":"input_text","text":"hi"}]}]}`) + var req ResponsesRequest + require.NoError(t, json.Unmarshal(body, &req)) + cc, err := ResponsesToChatCompletionsRequest(&req) + require.NoError(t, err) + assert.Equal(t, "high", cc.ReasoningEffort, "xhigh must be normalized to high for chat/completions") +} + +func TestNormalizeChatReasoningEffort(t *testing.T) { + assert.Equal(t, "high", normalizeChatReasoningEffort("xhigh")) + assert.Equal(t, "high", normalizeChatReasoningEffort("high")) + assert.Equal(t, "high", normalizeChatReasoningEffort("max")) + assert.Equal(t, "medium", normalizeChatReasoningEffort("medium")) + assert.Equal(t, "low", normalizeChatReasoningEffort("low")) + assert.Equal(t, "low", normalizeChatReasoningEffort("minimal")) + assert.Equal(t, "", normalizeChatReasoningEffort("")) + assert.Equal(t, "", normalizeChatReasoningEffort("bogus")) +} + +// mimo and other chat/completions upstreams stream tool calls; the bridge must +// emit terminal function_call_arguments.done + output_item.done (with +// call_id/name/arguments) at stream end, or codex receives an unterminated +// tool call and stalls/renders blank. +func TestChatChunkToResponses_StreamedToolCallFinalized(t *testing.T) { + state := NewChatCompletionsToResponsesStreamState("test-reasoning-model") + idx := 0 + chunk := &ChatCompletionsChunk{ + ID: "x", + Choices: []ChatChunkChoice{{Delta: ChatDelta{ToolCalls: []ChatToolCall{{ + Index: &idx, + ID: "call_abc", + Type: "function", + Function: ChatFunctionCall{Name: "open_browser", Arguments: `{"url":"google.com"}`}, + }}}}}, + } + ChatCompletionsChunkToResponsesEvents(chunk, state) + final := FinalizeChatCompletionsResponsesStream(state) + + var argsDone, itemDone *ResponsesStreamEvent + for i := range final { + switch final[i].Type { + case "response.function_call_arguments.done": + argsDone = &final[i] + case "response.output_item.done": + if final[i].Item != nil && final[i].Item.Type == "function_call" { + itemDone = &final[i] + } + } + } + require.NotNil(t, argsDone, "must emit function_call_arguments.done") + assert.Equal(t, "call_abc", argsDone.CallID) + assert.JSONEq(t, `{"url":"google.com"}`, argsDone.Arguments) + require.NotNil(t, itemDone, "must emit function_call output_item.done") + assert.Equal(t, "call_abc", itemDone.Item.CallID) + assert.Equal(t, "open_browser", itemDone.Item.Name) + assert.JSONEq(t, `{"url":"google.com"}`, itemDone.Item.Arguments) +} diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go index 09b680c7c73..b261519e335 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go +++ b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go @@ -30,7 +30,7 @@ func ResponsesToChatCompletionsRequest(req *ResponsesRequest) (*ChatCompletionsR ServiceTier: req.ServiceTier, } if req.Reasoning != nil { - out.ReasoningEffort = req.Reasoning.Effort + out.ReasoningEffort = normalizeChatReasoningEffort(req.Reasoning.Effort) } if len(req.Tools) > 0 { out.Tools = responsesToolsToChatTools(req.Tools) @@ -93,7 +93,7 @@ func responsesInputToChatMessages(instructions string, inputRaw json.RawMessage) itemType := rawString(item["type"]) switch itemType { case "function_call": - arguments := rawString(item["arguments"]) + arguments := responsesArgumentsToChatString(item["arguments"]) if strings.TrimSpace(arguments) == "" { arguments = "{}" } @@ -110,7 +110,7 @@ func responsesInputToChatMessages(instructions string, inputRaw json.RawMessage) }) continue case "function_call_output": - content, _ := json.Marshal(rawString(item["output"])) + content, _ := json.Marshal(extractResponsesOutputText(item["output"])) messages = append(messages, ChatMessage{ Role: "tool", ToolCallID: rawString(item["call_id"]), @@ -490,7 +490,12 @@ func ChatCompletionsChunkToResponsesEvents( events = append(events, ensureChatToResponsesCreated(state)...) for _, choice := range chunk.Choices { - if choice.Delta.Content != nil { + // Skip empty-string content deltas. Some upstreams emit a + // leading {"content":""} chunk; it is non-nil but carries no text, and + // emitting it produces a response.output_text.delta with an empty delta + // (omitempty drops the field entirely) plus a premature message item — + // codex then shows "thinking" with no visible output. + if choice.Delta.Content != nil && *choice.Delta.Content != "" { events = append(events, ensureChatToResponsesMessageItem(state)...) _, _ = state.Text.WriteString(*choice.Delta.Content) events = append(events, chatToResponsesEvent(state, "response.output_text.delta", &ResponsesStreamEvent{ @@ -500,7 +505,7 @@ func ChatCompletionsChunkToResponsesEvents( ItemID: state.MessageItemID, })) } - if choice.Delta.ReasoningContent != nil { + if choice.Delta.ReasoningContent != nil && *choice.Delta.ReasoningContent != "" { _, _ = state.Reasoning.WriteString(*choice.Delta.ReasoningContent) events = append(events, chatToResponsesEvent(state, "response.reasoning_summary_text.delta", &ResponsesStreamEvent{ OutputIndex: 0, @@ -520,6 +525,10 @@ func ChatCompletionsChunkToResponsesEvents( copyCall.ID = generateItemID() } copyCall.Type = "function" + // Arguments are accumulated below (line: stored.Function.Arguments + // += ...). Clear them here so the first chunk's arguments are not + // counted twice (which produced duplicated JSON like `{...}{...}`). + copyCall.Function.Arguments = "" state.ToolCalls[idx] = ©Call stored = ©Call events = append(events, chatToResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{ @@ -572,6 +581,16 @@ func FinalizeChatCompletionsResponsesStream(state *ChatCompletionsToResponsesStr Text: state.Text.String(), ItemID: state.MessageItemID, })) + // content_part.done mirrors content_part.added from ensureChatToResponsesMessageItem. + events = append(events, chatToResponsesEvent(state, "response.content_part.done", &ResponsesStreamEvent{ + OutputIndex: 0, + ContentIndex: 0, + ItemID: state.MessageItemID, + Part: &ResponsesContentPart{ + Type: "output_text", + Text: state.Text.String(), + }, + })) events = append(events, chatToResponsesEvent(state, "response.output_item.done", &ResponsesStreamEvent{ OutputIndex: 0, Item: &ResponsesOutput{ @@ -579,6 +598,12 @@ func FinalizeChatCompletionsResponsesStream(state *ChatCompletionsToResponsesStr ID: state.MessageItemID, Role: "assistant", Status: "completed", + // codex collects final text from OutputItemDone items, so the + // message item must carry its full content, not just status. + Content: []ResponsesContentPart{{ + Type: "output_text", + Text: state.Text.String(), + }}, }, })) } @@ -590,6 +615,39 @@ func FinalizeChatCompletionsResponsesStream(state *ChatCompletionsToResponsesStr incompleteDetails = &ResponsesIncompleteDetails{Reason: "max_output_tokens"} } + // Finalize streamed tool calls. The streaming loop emits + // output_item.added + function_call_arguments.delta per tool call but never + // their terminal events; without function_call_arguments.done and + // output_item.done (carrying call_id/name/arguments) codex receives an + // unterminated tool call, cannot execute it, and renders nothing. + for i := 0; i < len(state.ToolCalls); i++ { + toolCall, ok := state.ToolCalls[i] + if !ok || toolCall == nil { + continue + } + arguments := toolCall.Function.Arguments + if strings.TrimSpace(arguments) == "" { + arguments = "{}" + } + events = append(events, chatToResponsesEvent(state, "response.function_call_arguments.done", &ResponsesStreamEvent{ + OutputIndex: i + 1, + CallID: toolCall.ID, + Name: toolCall.Function.Name, + Arguments: arguments, + })) + events = append(events, chatToResponsesEvent(state, "response.output_item.done", &ResponsesStreamEvent{ + OutputIndex: i + 1, + Item: &ResponsesOutput{ + Type: "function_call", + ID: generateItemID(), + CallID: toolCall.ID, + Name: toolCall.Function.Name, + Arguments: arguments, + Status: "completed", + }, + })) + } + state.CompletedSent = true events = append(events, chatToResponsesEvent(state, "response.completed", &ResponsesStreamEvent{ Response: &ResponsesResponse{ @@ -626,15 +684,28 @@ func ensureChatToResponsesMessageItem(state *ChatCompletionsToResponsesStreamSta return nil } state.MessageItemID = generateItemID() - return []ResponsesStreamEvent{chatToResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{ - OutputIndex: 0, - Item: &ResponsesOutput{ - Type: "message", - ID: state.MessageItemID, - Role: "assistant", - Status: "in_progress", - }, - })} + return []ResponsesStreamEvent{ + chatToResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{ + OutputIndex: 0, + Item: &ResponsesOutput{ + Type: "message", + ID: state.MessageItemID, + Role: "assistant", + Status: "in_progress", + }, + }), + // content_part.added must precede output_text.delta or strict clients + // (codex) have no part to attach text to and render nothing. + chatToResponsesEvent(state, "response.content_part.added", &ResponsesStreamEvent{ + OutputIndex: 0, + ContentIndex: 0, + ItemID: state.MessageItemID, + Part: &ResponsesContentPart{ + Type: "output_text", + Text: "", + }, + }), + } } func (state *ChatCompletionsToResponsesStreamState) chatOutput() []ResponsesOutput { @@ -695,6 +766,45 @@ func chatToResponsesEvent( return evt } +// normalizeChatReasoningEffort maps a Responses reasoning effort to a value the +// Chat Completions protocol accepts. The Responses API allows "xhigh" (codex's +// highest tier for gpt-5.5 etc.), but chat/completions upstreams (and the +// OpenAI chat/completions schema) only accept low/medium/high and 400 on +// "xhigh". Map xhigh→high; pass through known values; drop unknown/empty. +func normalizeChatReasoningEffort(effort string) string { + switch strings.ToLower(strings.TrimSpace(effort)) { + case "xhigh", "extrahigh", "max", "high": + return "high" + case "medium": + return "medium" + case "low", "minimal", "none": + return "low" + default: + return "" // omit unknown/empty so the upstream uses its default + } +} + +// responsesArgumentsToChatString converts a Responses function_call.arguments +// field into the stringified-JSON form required by Chat Completions +// (ChatFunctionCall.Arguments is a string). +// +// - stringified JSON: "{\"x\":1}" → use the inner string as-is +// - raw JSON object: {"x":1} → serialize to its string form +// - empty/absent → "" +func responsesArgumentsToChatString(raw json.RawMessage) string { + trimmed := json.RawMessage(strings.TrimSpace(string(raw))) + if len(trimmed) == 0 || string(trimmed) == "null" { + return "" + } + // Already a JSON string — return the inner value verbatim. + var s string + if err := json.Unmarshal(trimmed, &s); err == nil { + return s + } + // Object/array/other JSON — serialize to its compact string form. + return string(trimmed) +} + func rawString(raw json.RawMessage) string { raw = bytesTrimSpace(raw) if len(raw) == 0 || string(raw) == "null" { diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go index b03b012fc7a..c0c7384b1b2 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go +++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go @@ -105,7 +105,7 @@ func TestChatCompletionsToResponses_ToolCalls(t *testing.T) { // Check function_call_output item assert.Equal(t, "function_call_output", items[2].Type) assert.Equal(t, "call_1", items[2].CallID) - assert.Equal(t, "pong", items[2].Output) + assert.Equal(t, `"pong"`, string(items[2].Output)) // Check tools require.Len(t, resp.Tools, 1) @@ -614,7 +614,7 @@ func TestChatCompletionsToResponses_ToolArrayContent(t *testing.T) { require.Len(t, items, 3) assert.Equal(t, "function_call_output", items[2].Type) assert.Equal(t, "call_1", items[2].CallID) - assert.Equal(t, "image width: 100; image height: 200", items[2].Output) + assert.Equal(t, `"image width: 100; image height: 200"`, string(items[2].Output)) } func TestResponsesToChatCompletions_Incomplete(t *testing.T) { diff --git a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go index 463bdd0d15d..a7459bdeb45 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go +++ b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go @@ -194,7 +194,7 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) { Type: "function_call", CallID: tc.ID, Name: tc.Function.Name, - Arguments: args, + Arguments: jsonRawString(args), }) } @@ -284,7 +284,7 @@ func chatToolToResponses(m ChatMessage) ([]ResponsesInputItem, error) { return []ResponsesInputItem{{ Type: "function_call_output", CallID: m.ToolCallID, - Output: output, + Output: jsonRawString(output), }}, nil } @@ -302,7 +302,7 @@ func chatFunctionToResponses(m ChatMessage) ([]ResponsesInputItem, error) { return []ResponsesInputItem{{ Type: "function_call_output", CallID: m.Name, - Output: output, + Output: jsonRawString(output), }}, nil } diff --git a/backend/internal/pkg/apicompat/responses_input_item_polymorphic_test.go b/backend/internal/pkg/apicompat/responses_input_item_polymorphic_test.go new file mode 100644 index 00000000000..e0108512de4 --- /dev/null +++ b/backend/internal/pkg/apicompat/responses_input_item_polymorphic_test.go @@ -0,0 +1,191 @@ +package apicompat + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// These tests cover the fix for codex (and newer Responses clients) sending +// function_call.arguments as a JSON object and function_call_output.output as +// a JSON array. Before the fix, ResponsesInputItem.Arguments / .Output were +// typed `string`, so json.Unmarshal failed: +// - Responses→Anthropic path (ResponsesToAnthropicRequest): HTTP 502 +// - Responses→ChatCompletions path (ResponsesToChatCompletionsRequest): +// silent data loss (rawString returned "" for non-string values) + +// --- helper-level tests --------------------------------------------------- + +func TestNormalizeResponsesArguments(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + {"object", `{"x":1}`, `{"x":1}`}, + {"stringified", `"{\"x\":1}"`, `{"x":1}`}, + {"empty string", `""`, `{}`}, + {"empty raw", ``, `{}`}, + {"null", `null`, `{}`}, + {"non-json string", `"not json"`, `{}`}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := normalizeResponsesArguments(json.RawMessage(tc.in)) + assert.JSONEq(t, tc.want, string(got)) + }) + } +} + +func TestExtractResponsesOutputText(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + {"plain string", `"result"`, "result"}, + {"array one part", `[{"type":"output_text","text":"result"}]`, "result"}, + {"array two parts", `[{"type":"output_text","text":"a"},{"type":"output_text","text":"b"}]`, "a\n\nb"}, + {"empty raw", ``, ""}, + {"null", `null`, ""}, + {"empty array", `[]`, ""}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := extractResponsesOutputText(json.RawMessage(tc.in)) + assert.Equal(t, tc.want, got) + }) + } +} + +// --- Responses→Anthropic path: must not 502 ---------------------------- + +func TestResponsesToAnthropicRequest_FunctionCallObjectArguments(t *testing.T) { + body := []byte(`{ + "model": "claude-opus-4-8", + "input": [ + {"type": "function_call", "call_id": "c1", "name": "foo", "arguments": {"x": 1}} + ] + }`) + var req ResponsesRequest + require.NoError(t, json.Unmarshal(body, &req)) + + anth, err := ResponsesToAnthropicRequest(&req) + require.NoError(t, err) // before fix: "cannot unmarshal object ... arguments of type string" + require.NotNil(t, anth) + + require.Len(t, anth.Messages, 1) + var blocks []AnthropicContentBlock + require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks)) + require.Len(t, blocks, 1) + assert.Equal(t, "tool_use", blocks[0].Type) + assert.Equal(t, "foo", blocks[0].Name) + assert.JSONEq(t, `{"x":1}`, string(blocks[0].Input)) +} + +func TestResponsesToAnthropicRequest_FunctionCallStringifiedArguments(t *testing.T) { + body := []byte(`{ + "model": "claude-opus-4-8", + "input": [ + {"type": "function_call", "call_id": "c1", "name": "foo", "arguments": "{\"x\":1}"} + ] + }`) + var req ResponsesRequest + require.NoError(t, json.Unmarshal(body, &req)) + + anth, err := ResponsesToAnthropicRequest(&req) + require.NoError(t, err) + + require.Len(t, anth.Messages, 1) + var blocks []AnthropicContentBlock + require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks)) + require.Len(t, blocks, 1) + assert.JSONEq(t, `{"x":1}`, string(blocks[0].Input)) +} + +func TestResponsesToAnthropicRequest_FunctionCallOutputArray(t *testing.T) { + body := []byte(`{ + "model": "claude-opus-4-8", + "input": [ + {"type": "function_call_output", "call_id": "c1", + "output": [{"type": "output_text", "text": "result"}]} + ] + }`) + var req ResponsesRequest + require.NoError(t, json.Unmarshal(body, &req)) + + anth, err := ResponsesToAnthropicRequest(&req) + require.NoError(t, err) // before fix: "cannot unmarshal array ... output of type string" + require.NotNil(t, anth) + + require.Len(t, anth.Messages, 1) + var blocks []AnthropicContentBlock + require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks)) + require.Len(t, blocks, 1) + assert.Equal(t, "tool_result", blocks[0].Type) + assert.Equal(t, "toolu_c1", blocks[0].ToolUseID) // call_id is namespaced for Anthropic + assert.JSONEq(t, `"result"`, string(blocks[0].Content)) +} + +func TestResponsesToAnthropicRequest_FunctionCallOutputString(t *testing.T) { + // Backward compatibility: older clients send output as a plain string. + body := []byte(`{ + "model": "claude-opus-4-8", + "input": [ + {"type": "function_call_output", "call_id": "c1", "output": "result"} + ] + }`) + var req ResponsesRequest + require.NoError(t, json.Unmarshal(body, &req)) + + anth, err := ResponsesToAnthropicRequest(&req) + require.NoError(t, err) + + require.Len(t, anth.Messages, 1) + var blocks []AnthropicContentBlock + require.NoError(t, json.Unmarshal(anth.Messages[0].Content, &blocks)) + require.Len(t, blocks, 1) + assert.JSONEq(t, `"result"`, string(blocks[0].Content)) +} + +// --- Responses→ChatCompletions path: must not drop data ---------------- + +func TestResponsesToChatCompletionsRequest_FunctionCallObjectArguments(t *testing.T) { + body := []byte(`{ + "model": "gpt-5.4", + "input": [ + {"type": "function_call", "call_id": "c1", "name": "foo", "arguments": {"x": 1}} + ] + }`) + var req ResponsesRequest + require.NoError(t, json.Unmarshal(body, &req)) + + cc, err := ResponsesToChatCompletionsRequest(&req) + require.NoError(t, err) + require.Len(t, cc.Messages, 1) + require.Len(t, cc.Messages[0].ToolCalls, 1) + // Chat Completions requires arguments to be a stringified JSON object; + // before the fix rawString returned "" and it degraded to "{}". + assert.JSONEq(t, `{"x":1}`, cc.Messages[0].ToolCalls[0].Function.Arguments) +} + +func TestResponsesToChatCompletionsRequest_FunctionCallOutputArray(t *testing.T) { + body := []byte(`{ + "model": "gpt-5.4", + "input": [ + {"type": "function_call_output", "call_id": "c1", + "output": [{"type": "output_text", "text": "result"}]} + ] + }`) + var req ResponsesRequest + require.NoError(t, json.Unmarshal(body, &req)) + + cc, err := ResponsesToChatCompletionsRequest(&req) + require.NoError(t, err) + require.Len(t, cc.Messages, 1) + assert.Equal(t, "tool", cc.Messages[0].Role) + // before the fix rawString returned "" → tool result content lost. + assert.JSONEq(t, `"result"`, string(cc.Messages[0].Content)) +} diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go index 8fa652f2bd1..37c8f258e13 100644 --- a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go +++ b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go @@ -11,7 +11,7 @@ import ( // enables Anthropic platform groups to accept OpenAI Responses API requests // by converting them to the native /v1/messages format before forwarding upstream. func ResponsesToAnthropicRequest(req *ResponsesRequest) (*AnthropicRequest, error) { - system, messages, err := convertResponsesInputToAnthropic(req.Input) + system, messages, err := convertResponsesInputToAnthropic(req.Instructions, req.Input) if err != nil { return nil, err } @@ -98,14 +98,27 @@ func mapResponsesEffortToAnthropic(effort string) string { } // convertResponsesInputToAnthropic extracts system prompt and messages from -// a Responses API input array. Returns the system as raw JSON (for Anthropic's -// polymorphic system field) and a list of Anthropic messages. -func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage, []AnthropicMessage, error) { +// a Responses API request. The system prompt is sourced from (in priority +// order, concatenated): the top-level `instructions` field (codex's primary +// system prompt) and any system/developer role items in the input array. +// Returns the system as raw JSON (for Anthropic's polymorphic system field) +// and a list of Anthropic messages. +// +// codex sends its ~20KB system prompt in `instructions` and additional context +// in `developer` role items; both must map to Anthropic's system field, not be +// dropped (the old code ignored both, leaving claude without instructions) nor +// leaked into a user message as raw input_text blocks (which caused 422). +func convertResponsesInputToAnthropic(instructions string, inputRaw json.RawMessage) (json.RawMessage, []AnthropicMessage, error) { + var systemParts []string + if s := strings.TrimSpace(instructions); s != "" { + systemParts = append(systemParts, s) + } + // Try as plain string input. var inputStr string if err := json.Unmarshal(inputRaw, &inputStr); err == nil { content, _ := json.Marshal(inputStr) - return nil, []AnthropicMessage{{Role: "user", Content: content}}, nil + return buildSystemJSON(systemParts), []AnthropicMessage{{Role: "user", Content: content}}, nil } var items []ResponsesInputItem @@ -113,29 +126,23 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage return nil, nil, fmt.Errorf("parse responses input: %w", err) } - var system json.RawMessage var messages []AnthropicMessage for _, item := range items { switch { - case item.Role == "system": - // System prompt → Anthropic system field - text := extractTextFromContent(item.Content) - if text != "" { - system, _ = json.Marshal(text) + case item.Role == "system" || item.Role == "developer": + // system / developer → Anthropic system field + if text := strings.TrimSpace(extractTextFromContent(item.Content)); text != "" { + systemParts = append(systemParts, text) } case item.Type == "function_call": // function_call → assistant message with tool_use block - input := json.RawMessage("{}") - if item.Arguments != "" { - input = json.RawMessage(item.Arguments) - } block := AnthropicContentBlock{ Type: "tool_use", ID: fromResponsesCallIDToAnthropic(item.CallID), Name: item.Name, - Input: input, + Input: normalizeResponsesArguments(item.Arguments), } blockJSON, _ := json.Marshal([]AnthropicContentBlock{block}) messages = append(messages, AnthropicMessage{ @@ -145,7 +152,7 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage case item.Type == "function_call_output": // function_call_output → user message with tool_result block - outputContent := item.Output + outputContent := extractResponsesOutputText(item.Output) if outputContent == "" { outputContent = "(empty)" } @@ -195,7 +202,31 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage // Merge consecutive same-role messages (Anthropic requires alternating roles) messages = mergeConsecutiveMessages(messages) - return system, messages, nil + return buildSystemJSON(systemParts), messages, nil +} + +// buildSystemJSON joins collected system prompt fragments into Anthropic's +// system field. Returns nil when there is no non-empty content, so the system +// field is omitted entirely — Anthropic returns 422 for an empty or +// whitespace-only system. +// +// The system is emitted in ARRAY form ([{"type":"text","text":...}]), not as a +// bare JSON string. Both are valid per the Anthropic spec and the official +// Claude Code client uses the array form, but some third-party Anthropic- +// compatible upstreams return 422 when a string-form system is +// combined with tools. The array form works in every case. +func buildSystemJSON(parts []string) json.RawMessage { + joined := strings.TrimSpace(strings.Join(parts, "\n\n")) + if joined == "" { + return nil + } + out, err := json.Marshal([]map[string]string{ + {"type": "text", "text": joined}, + }) + if err != nil { + return nil + } + return out } // extractTextFromContent extracts text from a content field that may be a @@ -386,30 +417,29 @@ func parseContentBlocks(raw json.RawMessage) []AnthropicContentBlock { // convertResponsesToAnthropicTools maps Responses API tools to Anthropic format. // Reverse of convertAnthropicToolsToResponses. +// +// Every emitted tool must carry a valid input_schema: Anthropic rejects the +// whole request with 422 if any tool has a null/missing schema. Responses tools +// of type "namespace" (codex MCP/agent tools) and bare "web_search" carry no +// `parameters`, so they must be backfilled with an empty object schema. +// +// web_search is intentionally NOT translated to the Anthropic server-side +// web_search_20250305 tool: some third-party Anthropic-compatible upstreams do +// not implement server tools and return 422. Emitting it as a regular function +// tool keeps the request valid; the upstream model simply sees a callable +// named web_search. func convertResponsesToAnthropicTools(tools []ResponsesTool) []AnthropicTool { var out []AnthropicTool for _, t := range tools { - switch t.Type { - case "web_search", "google_search", "web_search_20250305": - out = append(out, AnthropicTool{ - Type: "web_search_20250305", - Name: "web_search", - }) - case "function": - out = append(out, AnthropicTool{ - Name: t.Name, - Description: t.Description, - InputSchema: normalizeAnthropicInputSchema(t.Parameters), - }) - default: - // Pass through unknown tool types - out = append(out, AnthropicTool{ - Type: t.Type, - Name: t.Name, - Description: t.Description, - InputSchema: t.Parameters, - }) + name := t.Name + if name == "" && t.Type == "web_search" { + name = "web_search" } + out = append(out, AnthropicTool{ + Name: name, + Description: t.Description, + InputSchema: normalizeAnthropicInputSchema(t.Parameters), + }) } return out } @@ -471,3 +501,71 @@ func convertResponsesToAnthropicToolChoice(raw json.RawMessage) (json.RawMessage // Pass through unknown return raw, nil } + +// normalizeResponsesArguments converts a Responses function_call.arguments +// field into a JSON object suitable for Anthropic's tool_use.input. +// +// The arguments field has three observed shapes: +// - stringified JSON: "{\"x\":1}" → unwrap one layer → {"x":1} +// - raw JSON object: {"x":1} → use as-is +// - empty/absent → {} +// +// Anything that does not resolve to a JSON object falls back to {} so the +// upstream always receives a valid tool_use.input. +func normalizeResponsesArguments(raw json.RawMessage) json.RawMessage { + trimmed := json.RawMessage(strings.TrimSpace(string(raw))) + if len(trimmed) == 0 || string(trimmed) == "null" { + return json.RawMessage("{}") + } + + // Case 1: stringified JSON — unwrap one layer. + var s string + if err := json.Unmarshal(trimmed, &s); err == nil { + inner := strings.TrimSpace(s) + if inner == "" { + return json.RawMessage("{}") + } + if json.Valid([]byte(inner)) { + return json.RawMessage(inner) + } + return json.RawMessage("{}") + } + + // Case 2: already a JSON object/value — use as-is. + return trimmed +} + +// extractResponsesOutputText converts a Responses function_call_output.output +// field into a plain string for Anthropic's tool_result.content. +// +// The output field has three observed shapes: +// - plain string: "result" → use as-is +// - array of content parts: [{"type":"output_text",...}] → join the text +// - empty/absent → "" +func extractResponsesOutputText(raw json.RawMessage) string { + trimmed := json.RawMessage(strings.TrimSpace(string(raw))) + if len(trimmed) == 0 || string(trimmed) == "null" { + return "" + } + + // Case 1: plain string. + var s string + if err := json.Unmarshal(trimmed, &s); err == nil { + return s + } + + // Case 2: array of content parts. + var parts []ResponsesContentPart + if err := json.Unmarshal(trimmed, &parts); err == nil { + var texts []string + for _, p := range parts { + if p.Text != "" { + texts = append(texts, p.Text) + } + } + return strings.Join(texts, "\n\n") + } + + // Case 3: unknown structure — pass through raw JSON so content is not lost. + return string(trimmed) +} diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic_tools_system_test.go b/backend/internal/pkg/apicompat/responses_to_anthropic_tools_system_test.go new file mode 100644 index 00000000000..5b270f8ffde --- /dev/null +++ b/backend/internal/pkg/apicompat/responses_to_anthropic_tools_system_test.go @@ -0,0 +1,201 @@ +package apicompat + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// These tests cover the codex → Responses → Anthropic conversion fixes that +// eliminated upstream 422s: +// 1. tools with no parameters (type "namespace"/"web_search") must get a +// valid input_schema, never null +// 2. web_search must be a regular function tool, not an Anthropic server tool +// (some third-party upstreams do not implement server tools → 422) +// 3. codex's top-level `instructions` must map to the Anthropic system field +// 4. `developer` role items must map to system, not leak as user input_text +// 5. an empty/whitespace system must be omitted (Anthropic 422s on empty system) + +func anthReqFrom(t *testing.T, body string) *AnthropicRequest { + t.Helper() + var req ResponsesRequest + require.NoError(t, json.Unmarshal([]byte(body), &req)) + out, err := ResponsesToAnthropicRequest(&req) + require.NoError(t, err) + return out +} + +// systemText extracts the concatenated text from an Anthropic system field, +// which buildSystemJSON emits in array form ([{"type":"text","text":...}]). +func systemText(t *testing.T, raw json.RawMessage) string { + t.Helper() + if len(raw) == 0 { + return "" + } + // array form + var parts []struct { + Type string `json:"type"` + Text string `json:"text"` + } + if err := json.Unmarshal(raw, &parts); err == nil { + var sb []string + for _, p := range parts { + sb = append(sb, p.Text) + } + return strings.Join(sb, "\n\n") + } + // string form (fallback) + var s string + require.NoError(t, json.Unmarshal(raw, &s)) + return s +} + +func TestResponsesToAnthropic_ToolWithoutParametersGetsSchema(t *testing.T) { + // codex namespace tools (mcp__*, multi_agent_v1, codex_app) carry no parameters. + out := anthReqFrom(t, `{ + "model": "claude-opus-4-8", + "input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}], + "tools": [ + {"type":"namespace","name":"mcp__codegraph","description":"graph"}, + {"type":"namespace","name":"codex_app"} + ] + }`) + require.Len(t, out.Tools, 2) + for _, tool := range out.Tools { + require.NotEmpty(t, tool.InputSchema, "tool %s must have non-null input_schema", tool.Name) + assert.NotEqual(t, "null", string(tool.InputSchema)) + // must be a valid object schema + var sch map[string]any + require.NoError(t, json.Unmarshal(tool.InputSchema, &sch)) + assert.Equal(t, "object", sch["type"]) + } +} + +func TestResponsesToAnthropic_WebSearchIsFunctionToolNotServerTool(t *testing.T) { + out := anthReqFrom(t, `{ + "model": "claude-opus-4-8", + "input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}], + "tools": [{"type":"web_search"}] + }`) + require.Len(t, out.Tools, 1) + tool := out.Tools[0] + assert.Equal(t, "web_search", tool.Name) + // must NOT be emitted as Anthropic server tool web_search_20250305 + assert.NotEqual(t, "web_search_20250305", tool.Type) + assert.Empty(t, tool.Type, "web_search must be a plain function tool, not a server tool") + require.NotEmpty(t, tool.InputSchema) + assert.NotEqual(t, "null", string(tool.InputSchema)) +} + +func TestResponsesToAnthropic_FunctionToolSchemaPreserved(t *testing.T) { + out := anthReqFrom(t, `{ + "model": "claude-opus-4-8", + "input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}], + "tools": [{"type":"function","name":"exec","description":"run","parameters":{"type":"object","properties":{"cmd":{"type":"string"}}}}] + }`) + require.Len(t, out.Tools, 1) + assert.Equal(t, "exec", out.Tools[0].Name) + var sch map[string]any + require.NoError(t, json.Unmarshal(out.Tools[0].InputSchema, &sch)) + props, _ := sch["properties"].(map[string]any) + assert.Contains(t, props, "cmd") +} + +func TestResponsesToAnthropic_InstructionsBecomeSystem(t *testing.T) { + out := anthReqFrom(t, `{ + "model": "claude-opus-4-8", + "instructions": "You are a coding agent.", + "input": [{"role":"user","content":[{"type":"input_text","text":"hi"}]}] + }`) + require.NotEmpty(t, out.System) + sys := systemText(t, out.System) + assert.Contains(t, sys, "You are a coding agent.") +} + +func TestResponsesToAnthropic_DeveloperRoleBecomesSystem(t *testing.T) { + out := anthReqFrom(t, `{ + "model": "claude-opus-4-8", + "input": [ + {"role":"developer","content":[{"type":"input_text","text":"Follow the rules."}]}, + {"role":"user","content":[{"type":"input_text","text":"hi"}]} + ] + }`) + // developer content must be in system, not leaked into a user message + require.NotEmpty(t, out.System) + sys := systemText(t, out.System) + assert.Contains(t, sys, "Follow the rules.") + + // no message content may carry input_text (Anthropic only knows "text") + for _, m := range out.Messages { + assert.NotContains(t, string(m.Content), "input_text", + "input_text must not leak into Anthropic messages") + } +} + +func TestResponsesToAnthropic_InstructionsAndDeveloperConcatenated(t *testing.T) { + out := anthReqFrom(t, `{ + "model": "claude-opus-4-8", + "instructions": "Primary prompt.", + "input": [ + {"role":"developer","content":[{"type":"input_text","text":"Extra context."}]}, + {"role":"user","content":[{"type":"input_text","text":"hi"}]} + ] + }`) + sys := systemText(t, out.System) + assert.Contains(t, sys, "Primary prompt.") + assert.Contains(t, sys, "Extra context.") +} + +func TestResponsesToAnthropic_EmptySystemOmitted(t *testing.T) { + // No instructions, no system/developer items → System must be nil/absent, + // never an empty or whitespace string (Anthropic 422s on empty system). + out := anthReqFrom(t, `{ + "model": "claude-opus-4-8", + "instructions": " ", + "input": [ + {"role":"developer","content":[{"type":"input_text","text":" "}]}, + {"role":"user","content":[{"type":"input_text","text":"hi"}]} + ] + }`) + if len(out.System) > 0 { + sys := systemText(t, out.System) + assert.NotEqual(t, "", strings.TrimSpace(sys), "system must never be empty/whitespace") + } +} + +// codex reads the tool call from the OutputItemDone item, so a streamed +// function_call's output_item.done must carry call_id, name and arguments — +// without them codex cannot execute the tool and stalls. +func TestAnthropicStream_FunctionCallDoneCarriesCallFields(t *testing.T) { + state := &AnthropicEventToResponsesState{} + idx := 0 + var all []ResponsesStreamEvent + all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{ + Type: "message_start", Message: &AnthropicResponse{ID: "msg_1", Model: "claude-opus-4-8"}, + }, state)...) + all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{ + Type: "content_block_start", Index: &idx, + ContentBlock: &AnthropicContentBlock{Type: "tool_use", ID: "tu_1", Name: "exec"}, + }, state)...) + all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{ + Type: "content_block_delta", Index: &idx, + Delta: &AnthropicDelta{Type: "input_json_delta", PartialJSON: `{"cmd":"ls"}`}, + }, state)...) + all = append(all, AnthropicEventToResponsesEvents(&AnthropicStreamEvent{ + Type: "content_block_stop", Index: &idx, + }, state)...) + + var fcDone *ResponsesOutput + for _, e := range all { + if e.Type == "response.output_item.done" && e.Item != nil && e.Item.Type == "function_call" { + fcDone = e.Item + } + } + require.NotNil(t, fcDone, "must emit function_call output_item.done") + assert.NotEmpty(t, fcDone.CallID, "call_id required") + assert.Equal(t, "exec", fcDone.Name) + assert.JSONEq(t, `{"cmd":"ls"}`, fcDone.Arguments) +} diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go index b4451f235bb..d046f560bfc 100644 --- a/backend/internal/pkg/apicompat/types.go +++ b/backend/internal/pkg/apicompat/types.go @@ -230,13 +230,31 @@ type ResponsesInputItem struct { Content json.RawMessage `json:"content,omitempty"` // string or []ResponsesContentPart // type=function_call - CallID string `json:"call_id,omitempty"` - Name string `json:"name,omitempty"` - Arguments string `json:"arguments,omitempty"` - ID string `json:"id,omitempty"` + CallID string `json:"call_id,omitempty"` + Name string `json:"name,omitempty"` + // Arguments is stringified JSON per the OpenAI spec, but codex / newer + // clients may send a raw JSON object. RawMessage accepts both; callers + // normalize via normalizeResponsesArguments. + Arguments json.RawMessage `json:"arguments,omitempty"` + ID string `json:"id,omitempty"` // type=function_call_output - Output string `json:"output,omitempty"` + // Output is a plain string in older clients, but newer Responses clients + // (codex) send an array like [{"type":"output_text","text":"..."}]. + // RawMessage accepts both; callers normalize via extractResponsesOutputText. + Output json.RawMessage `json:"output,omitempty"` +} + +// jsonRawString marshals a Go string into a JSON-string RawMessage (i.e. a +// quoted value). Used when building ResponsesInputItem.Arguments / .Output from +// a string source, preserving the OpenAI wire format where these fields are +// emitted as JSON strings. +func jsonRawString(s string) json.RawMessage { + b, err := json.Marshal(s) + if err != nil { + return json.RawMessage(`""`) + } + return json.RawMessage(b) } // ResponsesContentPart is a typed content part in a Responses message. @@ -390,6 +408,9 @@ type ResponsesStreamEvent struct { // response.output_item.added / response.output_item.done Item *ResponsesOutput `json:"item,omitempty"` + // response.content_part.added / response.content_part.done + Part *ResponsesContentPart `json:"part,omitempty"` + // response.output_text.delta / response.output_text.done OutputIndex int `json:"output_index,omitempty"` ContentIndex int `json:"content_index,omitempty"`