From 3c9d29a123afcbab3e83bb37915fa82b5352c0cc Mon Sep 17 00:00:00 2001 From: haruka <1628615876@qq.com> Date: Sun, 31 May 2026 01:24:13 +0800 Subject: [PATCH] fix: handle responses tool input variants --- .../pkg/apicompat/anthropic_responses_test.go | 60 +++++++++++- .../pkg/apicompat/anthropic_to_responses.go | 8 +- .../chatcompletions_responses_bridge.go | 7 +- .../chatcompletions_responses_bridge_test.go | 35 +++++++ .../chatcompletions_responses_test.go | 4 +- .../apicompat/chatcompletions_to_responses.go | 6 +- .../responses_input_normalization.go | 98 +++++++++++++++++++ .../responses_to_anthropic_request.go | 8 +- .../pkg/apicompat/test_helpers_test.go | 16 +++ backend/internal/pkg/apicompat/types.go | 10 +- 10 files changed, 223 insertions(+), 29 deletions(-) create mode 100644 backend/internal/pkg/apicompat/responses_input_normalization.go create mode 100644 backend/internal/pkg/apicompat/test_helpers_test.go diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go index 8997835c2aa..0343cb452ac 100644 --- a/backend/internal/pkg/apicompat/anthropic_responses_test.go +++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go @@ -143,7 +143,7 @@ func TestAnthropicToResponses_ToolUse(t *testing.T) { assert.Empty(t, items[2].ID) assert.Equal(t, "function_call_output", items[3].Type) assert.Equal(t, "call_1", items[3].CallID) - assert.Equal(t, "Sunny, 72°F", items[3].Output) + assert.Equal(t, "Sunny, 72°F", requireRawJSONString(t, items[3].Output)) } func TestAnthropicToResponses_ThinkingIgnored(t *testing.T) { @@ -1340,7 +1340,7 @@ func TestAnthropicToResponses_ToolResultWithImage(t *testing.T) { // function_call_output should have text-only output (no image). assert.Equal(t, "function_call_output", items[2].Type) assert.Equal(t, "toolu_1", items[2].CallID) - assert.Equal(t, "(empty)", items[2].Output) + assert.Equal(t, "(empty)", requireRawJSONString(t, items[2].Output)) // Image should be in a separate user message. assert.Equal(t, "user", items[3].Role) @@ -1377,7 +1377,7 @@ func TestAnthropicToResponses_ToolResultMixed(t *testing.T) { // function_call_output should have text-only output. assert.Equal(t, "function_call_output", items[2].Type) - assert.Equal(t, "File metadata: 800x600 PNG", items[2].Output) + assert.Equal(t, "File metadata: 800x600 PNG", requireRawJSONString(t, items[2].Output)) // Image should be in a separate user message. assert.Equal(t, "user", items[3].Role) @@ -1412,7 +1412,59 @@ func TestAnthropicToResponses_TextOnlyToolResultBackwardCompat(t *testing.T) { require.Len(t, items, 3) // Text-only tool_result should produce a plain string. - assert.Equal(t, "Sunny, 72°F", items[2].Output) + assert.Equal(t, "Sunny, 72°F", requireRawJSONString(t, items[2].Output)) +} + +func TestConvertResponsesInputToAnthropic_ToolCallObjectArgumentsAndArrayOutput(t *testing.T) { + input := json.RawMessage(`[ + {"role":"user","content":[{"type":"input_text","text":"hi"}]}, + {"type":"function_call","call_id":"c1","name":"foo","arguments":{"x":1}}, + {"type":"function_call_output","call_id":"c1","output":[{"type":"output_text","text":"result"}]} + ]`) + + system, messages, err := convertResponsesInputToAnthropic(input) + require.NoError(t, err) + assert.Empty(t, system) + require.Len(t, messages, 3) + + var toolUse []AnthropicContentBlock + require.NoError(t, json.Unmarshal(messages[1].Content, &toolUse)) + require.Len(t, toolUse, 1) + assert.Equal(t, "tool_use", toolUse[0].Type) + assert.Equal(t, "toolu_c1", toolUse[0].ID) + assert.Equal(t, "foo", toolUse[0].Name) + assert.JSONEq(t, `{"x":1}`, string(toolUse[0].Input)) + + var toolResult []AnthropicContentBlock + require.NoError(t, json.Unmarshal(messages[2].Content, &toolResult)) + require.Len(t, toolResult, 1) + assert.Equal(t, "tool_result", toolResult[0].Type) + assert.Equal(t, "toolu_c1", toolResult[0].ToolUseID) + + var output string + require.NoError(t, json.Unmarshal(toolResult[0].Content, &output)) + assert.Equal(t, "result", output) +} + +func TestConvertResponsesInputToAnthropic_ToolCallStringArgumentsAndStringOutput(t *testing.T) { + input := json.RawMessage(`[ + {"type":"function_call","call_id":"c1","name":"foo","arguments":"{\"x\":1}"}, + {"type":"function_call_output","call_id":"c1","output":"result"} + ]`) + + _, messages, err := convertResponsesInputToAnthropic(input) + require.NoError(t, err) + require.Len(t, messages, 2) + + var toolUse []AnthropicContentBlock + require.NoError(t, json.Unmarshal(messages[0].Content, &toolUse)) + assert.JSONEq(t, `{"x":1}`, string(toolUse[0].Input)) + + var toolResult []AnthropicContentBlock + require.NoError(t, json.Unmarshal(messages[1].Content, &toolResult)) + var output string + require.NoError(t, json.Unmarshal(toolResult[0].Content, &output)) + assert.Equal(t, "result", output) } func TestAnthropicToResponses_ImageEmptyMediaType(t *testing.T) { diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go index e2011bee0bf..f92da58023b 100644 --- a/backend/internal/pkg/apicompat/anthropic_to_responses.go +++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go @@ -211,8 +211,8 @@ func anthropicUserToResponses(raw json.RawMessage) ([]ResponsesInputItem, error) var toolResultImageParts []ResponsesContentPart // Extract tool_result blocks → function_call_output items. - // Images inside tool_results are extracted separately because the - // Responses API function_call_output.output only accepts strings. + // Images inside tool_results are extracted separately so existing + // Responses input wire output remains a plain string. for _, b := range blocks { if b.Type != "tool_result" { continue @@ -221,7 +221,7 @@ func anthropicUserToResponses(raw json.RawMessage) ([]ResponsesInputItem, error) out = append(out, ResponsesInputItem{ Type: "function_call_output", CallID: toResponsesCallID(b.ToolUseID), - Output: outputText, + Output: jsonStringRawMessage(outputText), }) toolResultImageParts = append(toolResultImageParts, imageParts...) } @@ -302,7 +302,7 @@ func anthropicAssistantToResponses(raw json.RawMessage) ([]ResponsesInputItem, e Type: "function_call", CallID: fcID, Name: b.Name, - Arguments: args, + Arguments: jsonStringRawMessage(args), }) } diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go index 09b680c7c73..6ba9a8b4aac 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go +++ b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge.go @@ -93,10 +93,7 @@ func responsesInputToChatMessages(instructions string, inputRaw json.RawMessage) itemType := rawString(item["type"]) switch itemType { case "function_call": - arguments := rawString(item["arguments"]) - if strings.TrimSpace(arguments) == "" { - arguments = "{}" - } + arguments := responsesToolArgumentsForChat(item["arguments"]) messages = append(messages, ChatMessage{ Role: "assistant", ToolCalls: []ChatToolCall{{ @@ -110,7 +107,7 @@ func responsesInputToChatMessages(instructions string, inputRaw json.RawMessage) }) continue case "function_call_output": - content, _ := json.Marshal(rawString(item["output"])) + content, _ := json.Marshal(responsesToolOutputText(item["output"])) messages = append(messages, ChatMessage{ Role: "tool", ToolCallID: rawString(item["call_id"]), diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge_test.go b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge_test.go index 3e55e23a814..8c804bd4156 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_responses_bridge_test.go +++ b/backend/internal/pkg/apicompat/chatcompletions_responses_bridge_test.go @@ -53,6 +53,41 @@ func TestResponsesInputToChatMessages_DeveloperRoleTrimAndCaseInsensitive(t *tes assert.Equal(t, []string{"system", "system"}, chatMessageRoles(messages)) } +func TestResponsesInputToChatMessages_ToolCallObjectArgumentsAndArrayOutput(t *testing.T) { + input := json.RawMessage(`[ + {"role":"user","content":[{"type":"input_text","text":"hi"}]}, + {"type":"function_call","call_id":"c1","name":"foo","arguments":{"x":1}}, + {"type":"function_call_output","call_id":"c1","output":[{"type":"output_text","text":"result"}]} + ]`) + + messages, err := responsesInputToChatMessages("", input) + require.NoError(t, err) + require.Len(t, messages, 3) + + require.Len(t, messages[1].ToolCalls, 1) + assert.Equal(t, "c1", messages[1].ToolCalls[0].ID) + assert.Equal(t, "foo", messages[1].ToolCalls[0].Function.Name) + assert.JSONEq(t, `{"x":1}`, messages[1].ToolCalls[0].Function.Arguments) + + assert.Equal(t, "tool", messages[2].Role) + assert.Equal(t, "c1", messages[2].ToolCallID) + assert.JSONEq(t, `"result"`, string(messages[2].Content)) +} + +func TestResponsesInputToChatMessages_ToolCallStringArgumentsAndStringOutput(t *testing.T) { + input := json.RawMessage(`[ + {"type":"function_call","call_id":"c1","name":"foo","arguments":"{\"x\":1}"}, + {"type":"function_call_output","call_id":"c1","output":"result"} + ]`) + + messages, err := responsesInputToChatMessages("", input) + require.NoError(t, err) + require.Len(t, messages, 2) + + assert.JSONEq(t, `{"x":1}`, messages[0].ToolCalls[0].Function.Arguments) + assert.JSONEq(t, `"result"`, string(messages[1].Content)) +} + func TestResponsesToChatCompletionsRequest_InstructionsAndInputDeveloperRole(t *testing.T) { req := &ResponsesRequest{ Model: "gpt-4o", diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go index b03b012fc7a..af043822c90 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go +++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go @@ -105,7 +105,7 @@ func TestChatCompletionsToResponses_ToolCalls(t *testing.T) { // Check function_call_output item assert.Equal(t, "function_call_output", items[2].Type) assert.Equal(t, "call_1", items[2].CallID) - assert.Equal(t, "pong", items[2].Output) + assert.Equal(t, "pong", requireRawJSONString(t, items[2].Output)) // Check tools require.Len(t, resp.Tools, 1) @@ -614,7 +614,7 @@ func TestChatCompletionsToResponses_ToolArrayContent(t *testing.T) { require.Len(t, items, 3) assert.Equal(t, "function_call_output", items[2].Type) assert.Equal(t, "call_1", items[2].CallID) - assert.Equal(t, "image width: 100; image height: 200", items[2].Output) + assert.Equal(t, "image width: 100; image height: 200", requireRawJSONString(t, items[2].Output)) } func TestResponsesToChatCompletions_Incomplete(t *testing.T) { diff --git a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go index 463bdd0d15d..2a797f4c6ae 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go +++ b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go @@ -194,7 +194,7 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) { Type: "function_call", CallID: tc.ID, Name: tc.Function.Name, - Arguments: args, + Arguments: jsonStringRawMessage(args), }) } @@ -284,7 +284,7 @@ func chatToolToResponses(m ChatMessage) ([]ResponsesInputItem, error) { return []ResponsesInputItem{{ Type: "function_call_output", CallID: m.ToolCallID, - Output: output, + Output: jsonStringRawMessage(output), }}, nil } @@ -302,7 +302,7 @@ func chatFunctionToResponses(m ChatMessage) ([]ResponsesInputItem, error) { return []ResponsesInputItem{{ Type: "function_call_output", CallID: m.Name, - Output: output, + Output: jsonStringRawMessage(output), }}, nil } diff --git a/backend/internal/pkg/apicompat/responses_input_normalization.go b/backend/internal/pkg/apicompat/responses_input_normalization.go new file mode 100644 index 00000000000..d40c62c6ccc --- /dev/null +++ b/backend/internal/pkg/apicompat/responses_input_normalization.go @@ -0,0 +1,98 @@ +package apicompat + +import ( + "bytes" + "encoding/json" + "strings" +) + +func jsonStringRawMessage(s string) json.RawMessage { + raw, _ := json.Marshal(s) + return raw +} + +func responsesToolArgumentsForAnthropic(raw json.RawMessage) json.RawMessage { + raw = bytesTrimSpace(raw) + if len(raw) == 0 || string(raw) == "null" { + return json.RawMessage("{}") + } + if rawIsJSONObject(raw) { + return compactRawJSON(raw) + } + + var s string + if err := json.Unmarshal(raw, &s); err != nil { + return json.RawMessage("{}") + } + s = strings.TrimSpace(s) + if s == "" { + return json.RawMessage("{}") + } + raw = json.RawMessage(s) + if !rawIsJSONObject(raw) { + return json.RawMessage("{}") + } + return compactRawJSON(raw) +} + +func responsesToolArgumentsForChat(raw json.RawMessage) string { + raw = bytesTrimSpace(raw) + if len(raw) == 0 || string(raw) == "null" { + return "{}" + } + if rawIsJSONObject(raw) { + return string(compactRawJSON(raw)) + } + + var s string + if err := json.Unmarshal(raw, &s); err != nil { + return "{}" + } + s = strings.TrimSpace(s) + if s == "" { + return "{}" + } + return s +} + +func responsesToolOutputText(raw json.RawMessage) string { + raw = bytesTrimSpace(raw) + if len(raw) == 0 || string(raw) == "null" { + return "" + } + + var s string + if err := json.Unmarshal(raw, &s); err == nil { + return s + } + + var parts []ResponsesContentPart + if err := json.Unmarshal(raw, &parts); err != nil { + return "" + } + + var texts []string + for _, part := range parts { + if (part.Type == "output_text" || part.Type == "text") && part.Text != "" { + texts = append(texts, part.Text) + } + } + return strings.Join(texts, "\n\n") +} + +func rawIsJSONObject(raw json.RawMessage) bool { + raw = bytesTrimSpace(raw) + if len(raw) == 0 || raw[0] != '{' { + return false + } + var obj map[string]json.RawMessage + return json.Unmarshal(raw, &obj) == nil +} + +func compactRawJSON(raw json.RawMessage) json.RawMessage { + var buf bytes.Buffer + if err := json.Compact(&buf, raw); err != nil { + return raw + } + return json.RawMessage(buf.Bytes()) +} diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go index 8fa652f2bd1..127d3e1d4a3 100644 --- a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go +++ b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go @@ -127,15 +127,11 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage case item.Type == "function_call": // function_call → assistant message with tool_use block - input := json.RawMessage("{}") - if item.Arguments != "" { - input = json.RawMessage(item.Arguments) - } block := AnthropicContentBlock{ Type: "tool_use", ID: fromResponsesCallIDToAnthropic(item.CallID), Name: item.Name, - Input: input, + Input: responsesToolArgumentsForAnthropic(item.Arguments), } blockJSON, _ := json.Marshal([]AnthropicContentBlock{block}) messages = append(messages, AnthropicMessage{ @@ -145,7 +141,7 @@ func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage case item.Type == "function_call_output": // function_call_output → user message with tool_result block - outputContent := item.Output + outputContent := responsesToolOutputText(item.Output) if outputContent == "" { outputContent = "(empty)" } diff --git a/backend/internal/pkg/apicompat/test_helpers_test.go b/backend/internal/pkg/apicompat/test_helpers_test.go new file mode 100644 index 00000000000..3ed90a97b6f --- /dev/null +++ b/backend/internal/pkg/apicompat/test_helpers_test.go @@ -0,0 +1,16 @@ +package apicompat + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/require" +) + +func requireRawJSONString(t testing.TB, raw json.RawMessage) string { + t.Helper() + + var s string + require.NoError(t, json.Unmarshal(raw, &s)) + return s +} diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go index b4451f235bb..be2701c9a0c 100644 --- a/backend/internal/pkg/apicompat/types.go +++ b/backend/internal/pkg/apicompat/types.go @@ -230,13 +230,13 @@ type ResponsesInputItem struct { Content json.RawMessage `json:"content,omitempty"` // string or []ResponsesContentPart // type=function_call - CallID string `json:"call_id,omitempty"` - Name string `json:"name,omitempty"` - Arguments string `json:"arguments,omitempty"` - ID string `json:"id,omitempty"` + CallID string `json:"call_id,omitempty"` + Name string `json:"name,omitempty"` + Arguments json.RawMessage `json:"arguments,omitempty"` + ID string `json:"id,omitempty"` // type=function_call_output - Output string `json:"output,omitempty"` + Output json.RawMessage `json:"output,omitempty"` } // ResponsesContentPart is a typed content part in a Responses message.