From d1fccb2cef990a632336615fa72e2594733a47a7 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 5 Mar 2026 14:42:43 +0200 Subject: [PATCH 1/6] feat: record model thoughts Signed-off-by: Danny Kopping --- bridge_integration_test.go | 125 +++++++++++++++++++ fixtures/anthropic/single_builtin_tool.txtar | 48 ++++++- intercept/messages/blocking.go | 42 ++++++- intercept/messages/streaming.go | 46 +++++++ internal/testutil/mock_recorder.go | 15 +++ recorder/recorder.go | 34 +++++ recorder/types.go | 10 ++ trace_integration_test.go | 2 + 8 files changed, 316 insertions(+), 6 deletions(-) diff --git a/bridge_integration_test.go b/bridge_integration_test.go index 5eed920..b6e7e63 100644 --- a/bridge_integration_test.go +++ b/bridge_integration_test.go @@ -167,6 +167,131 @@ func TestAnthropicMessages(t *testing.T) { }) } +func TestAnthropicMessagesModelThoughts(t *testing.T) { + t.Parallel() + + t.Run("thinking captured with builtin tool", func(t *testing.T) { + t.Parallel() + + cases := []struct { + streaming bool + expectedToolCallID string + expectedThinkingSubstr string + }{ + { + streaming: true, + expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo", + expectedThinkingSubstr: "Let me find and read it.", + }, + { + streaming: false, + expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq", + expectedThinkingSubstr: "Let me find and read it.", + }, + } + + for _, tc := range cases { + t.Run(fmt.Sprintf("streaming=%v", tc.streaming), func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) + t.Cleanup(cancel) + + fix := fixtures.Parse(t, fixtures.AntSingleBuiltinTool) + upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) + + recorderClient := &testutil.MockRecorder{} + logger := slogtest.Make(t, &slogtest.Options{}).Leveled(slog.LevelDebug) + providers := []aibridge.Provider{provider.NewAnthropic(anthropicCfg(upstream.URL, apiKey), nil)} + b, err := aibridge.NewRequestBridge(ctx, providers, recorderClient, mcp.NewServerProxyManager(nil, testTracer), logger, nil, testTracer) + require.NoError(t, err) + + mockSrv := httptest.NewUnstartedServer(b) + t.Cleanup(mockSrv.Close) + mockSrv.Config.BaseContext = func(_ net.Listener) context.Context { + return aibcontext.AsActor(ctx, userID, nil) + } + mockSrv.Start() + + reqBody, err := sjson.SetBytes(fix.Request(), "stream", tc.streaming) + require.NoError(t, err) + req := createAnthropicMessagesReq(t, mockSrv.URL, reqBody) + client := &http.Client{} + resp, err := client.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + defer resp.Body.Close() + + if tc.streaming { + sp := aibridge.NewSSEParser() + require.NoError(t, sp.Parse(resp.Body)) + assert.Contains(t, sp.AllEvents(), "message_start") + assert.Contains(t, sp.AllEvents(), "message_stop") + } + + // Verify model thoughts were captured and associated with the tool call. + thoughts := recorderClient.RecordedModelThoughts() + require.Len(t, thoughts, 1) + assert.Contains(t, thoughts[0].Content, "The user wants me to read") + assert.Contains(t, thoughts[0].Content, tc.expectedThinkingSubstr) + assert.NotEmpty(t, thoughts[0].InterceptionID) + assert.Equal(t, tc.expectedToolCallID, thoughts[0].ProviderToolCallID) + + // Verify tool usage was also recorded. + toolUsages := recorderClient.RecordedToolUsages() + require.Len(t, toolUsages, 1) + assert.Equal(t, "Read", toolUsages[0].Tool) + assert.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID) + + recorderClient.VerifyAllInterceptionsEnded(t) + }) + } + }) + + t.Run("no thoughts without tool calls", func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) + t.Cleanup(cancel) + + // Use the simple fixture which has no tool calls — any thinking blocks + // should not be persisted since they can't be associated with a tool call. + fix := fixtures.Parse(t, fixtures.AntSimple) + upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) + + recorderClient := &testutil.MockRecorder{} + logger := slogtest.Make(t, &slogtest.Options{}).Leveled(slog.LevelDebug) + providers := []aibridge.Provider{provider.NewAnthropic(anthropicCfg(upstream.URL, apiKey), nil)} + b, err := aibridge.NewRequestBridge(ctx, providers, recorderClient, mcp.NewServerProxyManager(nil, testTracer), logger, nil, testTracer) + require.NoError(t, err) + + mockSrv := httptest.NewUnstartedServer(b) + t.Cleanup(mockSrv.Close) + mockSrv.Config.BaseContext = func(_ net.Listener) context.Context { + return aibcontext.AsActor(ctx, userID, nil) + } + mockSrv.Start() + + reqBody, err := sjson.SetBytes(fix.Request(), "stream", true) + require.NoError(t, err) + req := createAnthropicMessagesReq(t, mockSrv.URL, reqBody) + client := &http.Client{} + resp, err := client.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + defer resp.Body.Close() + + sp := aibridge.NewSSEParser() + require.NoError(t, sp.Parse(resp.Body)) + + // No thoughts should be recorded when there are no tool calls. + thoughts := recorderClient.RecordedModelThoughts() + assert.Empty(t, thoughts) + + recorderClient.VerifyAllInterceptionsEnded(t) + }) +} + func TestAWSBedrockIntegration(t *testing.T) { t.Parallel() diff --git a/fixtures/anthropic/single_builtin_tool.txtar b/fixtures/anthropic/single_builtin_tool.txtar index 5df793b..6bf37ad 100644 --- a/fixtures/anthropic/single_builtin_tool.txtar +++ b/fixtures/anthropic/single_builtin_tool.txtar @@ -17,22 +17,55 @@ event: message_start data: {"type":"message_start","message":{"id":"msg_015SQewixvT9s4cABCVvUE6g","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":22,"cache_read_input_tokens":13993,"output_tokens":5,"service_tier":"standard"}} } event: content_block_start -data: {"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_01RX68weRSquLx6HUTj65iBo","name":"Read","input":{}} } +data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"The user wants me to read"} } + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":" a"} } + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":" file called \""} } + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"foo\"."} } + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":" Let me find"} } + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":" and"} } + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":" read it."} } + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":""} } + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"Eu8BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: content_block_start +data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01RX68weRSquLx6HUTj65iBo","name":"Read","input":{}}} event: ping data: {"type": "ping"} event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"file_path\": \"/tmp/blah/foo"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"file_path\": \"/tmp/blah/foo"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"\"}"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\"}"} } event: content_block_stop -data: {"type":"content_block_stop","index":0 } +data: {"type":"content_block_stop","index":1 } event: message_delta data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":61} } @@ -49,6 +82,11 @@ data: {"type":"message_stop" } "expires_at": "0001-01-01T00:00:00Z" }, "content": [ + { + "type": "thinking", + "thinking": "The user wants me to read a file called \"foo\". Let me find and read it.", + "signature": "Eu8BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ==" + }, { "citations": null, "text": "I can see there's a file named `foo` in the `/tmp/blah` directory. Let me read it.", diff --git a/intercept/messages/blocking.go b/intercept/messages/blocking.go index e22b97f..28e6457 100644 --- a/intercept/messages/blocking.go +++ b/intercept/messages/blocking.go @@ -135,6 +135,23 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req accumulateUsage(&cumulativeUsage, resp.Usage) + // Capture any thinking blocks that were returned. + var thoughtRecords []*recorder.ModelThoughtRecord + if !i.isSmallFastModel() { + for _, block := range resp.Content { + switch variant := block.AsAny().(type) { + case anthropic.ThinkingBlock: + thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{ + InterceptionID: i.ID().String(), + Content: variant.Thinking, + }) + case anthropic.RedactedThinkingBlock: + // For redacted thinking, there's nothing useful we can capture. + continue + } + } + } + // Handle tool calls for non-streaming. var pendingToolCalls []anthropic.ToolUseBlock for _, c := range resp.Content { @@ -158,10 +175,20 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req Injected: false, }) + // Associate the model thoughts with this tool call. + for _, thought := range thoughtRecords { + thought.ProviderToolCallID = toolUse.ID + } } - // If no injected tool calls, we're done. + // If no injected tool calls, persist thoughts and we're done. if len(pendingToolCalls) == 0 { + for _, thought := range thoughtRecords { + if thought.ProviderToolCallID == "" { + continue + } + _ = i.recorder.RecordModelThought(ctx, thought) + } break } @@ -198,6 +225,11 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req InvocationError: err, }) + // Associate the model thoughts with this tool call. + for _, thought := range thoughtRecords { + thought.ProviderToolCallID = tc.ID + } + if err != nil { // Always provide a tool_result even if the tool call failed messages.Messages = append(messages.Messages, @@ -283,6 +315,14 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req } } + // Only persist thoughts that are associated to a tool call. + for _, thought := range thoughtRecords { + if thought.ProviderToolCallID == "" { + continue + } + _ = i.recorder.RecordModelThought(ctx, thought) + } + // Sync the raw payload with updated messages so that withBody() // sends the updated payload on the next iteration. if err := i.syncPayloadMessages(messages.Messages); err != nil { diff --git a/intercept/messages/streaming.go b/intercept/messages/streaming.go index 4e87fd8..f9b6525 100644 --- a/intercept/messages/streaming.go +++ b/intercept/messages/streaming.go @@ -252,6 +252,24 @@ newStream: // Don't send message_stop until all tools have been called. case string(constant.ValueOf[constant.MessageStop]()): + + // Capture any thinking blocks that were returned. + var thoughtRecords []*recorder.ModelThoughtRecord + if !i.isSmallFastModel() { // TODO: remove. + for _, block := range message.Content { + switch variant := block.AsAny().(type) { + case anthropic.ThinkingBlock: + thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{ + InterceptionID: i.ID().String(), + Content: variant.Thinking, + }) + case anthropic.RedactedThinkingBlock: + // For redacted thinking, there's nothing useful we can capture. + continue + } + } + } + if len(pendingToolCalls) > 0 { // Append the whole message from this stream as context since we'll be sending a new request with the tool results. messages.Messages = append(messages.Messages, message.ToParam()) @@ -306,6 +324,11 @@ newStream: InvocationError: err, }) + // Associate the model thoughts with this tool call. + for _, thought := range thoughtRecords { + thought.ProviderToolCallID = id + } + if err != nil { // Always provide a tool_result even if the tool call failed messages.Messages = append(messages.Messages, @@ -390,6 +413,15 @@ newStream: } } + // Only persist thoughts that are associated to a tool call. + for _, thought := range thoughtRecords { + if thought.ProviderToolCallID == "" { + continue + } + + _ = i.recorder.RecordModelThought(streamCtx, thought) + } + // Sync the raw payload with updated messages so that withBody() // sends the updated payload on the next iteration. if syncErr := i.syncPayloadMessages(messages.Messages); syncErr != nil { @@ -417,7 +449,21 @@ newStream: Args: variant.Input, Injected: false, }) + + // Associate the model thoughts with this tool call. + for _, thought := range thoughtRecords { + thought.ProviderToolCallID = variant.ID + } + } + } + + // Only persist thoughts that are associated to a tool call. + for _, thought := range thoughtRecords { + if thought.ProviderToolCallID == "" { + continue } + + _ = i.recorder.RecordModelThought(streamCtx, thought) } } } diff --git a/internal/testutil/mock_recorder.go b/internal/testutil/mock_recorder.go index ac39006..da29b4b 100644 --- a/internal/testutil/mock_recorder.go +++ b/internal/testutil/mock_recorder.go @@ -20,6 +20,7 @@ type MockRecorder struct { tokenUsages []*recorder.TokenUsageRecord userPrompts []*recorder.PromptUsageRecord toolUsages []*recorder.ToolUsageRecord + modelThoughts []*recorder.ModelThoughtRecord interceptionsEnd map[string]*recorder.InterceptionRecordEnded } @@ -64,6 +65,13 @@ func (m *MockRecorder) RecordToolUsage(ctx context.Context, req *recorder.ToolUs return nil } +func (m *MockRecorder) RecordModelThought(ctx context.Context, req *recorder.ModelThoughtRecord) error { + m.mu.Lock() + defer m.mu.Unlock() + m.modelThoughts = append(m.modelThoughts, req) + return nil +} + // RecordedTokenUsages returns a copy of recorded token usages in a thread-safe manner. // Note: This is a shallow clone - the slice is copied but the pointers reference the // same underlying records. This is sufficient for our test assertions which only read @@ -106,6 +114,13 @@ func (m *MockRecorder) ToolUsages() []*recorder.ToolUsageRecord { return m.toolUsages } +// RecordedModelThoughts returns a copy of recorded model thoughts in a thread-safe manner. +func (m *MockRecorder) RecordedModelThoughts() []*recorder.ModelThoughtRecord { + m.mu.Lock() + defer m.mu.Unlock() + return slices.Clone(m.modelThoughts) +} + // RecordedInterceptionEnd returns the stored InterceptionRecordEnded for the // given interception ID, or nil if not found. func (m *MockRecorder) RecordedInterceptionEnd(id string) *recorder.InterceptionRecordEnded { diff --git a/recorder/recorder.go b/recorder/recorder.go index 3814488..525c65a 100644 --- a/recorder/recorder.go +++ b/recorder/recorder.go @@ -116,6 +116,24 @@ func (r *RecorderWrapper) RecordToolUsage(ctx context.Context, req *ToolUsageRec return err } +func (r *RecorderWrapper) RecordModelThought(ctx context.Context, req *ModelThoughtRecord) (outErr error) { + ctx, span := r.tracer.Start(ctx, "Intercept.RecordModelThought", trace.WithAttributes(tracing.InterceptionAttributesFromContext(ctx)...)) + defer tracing.EndSpanErr(span, &outErr) + + client, err := r.clientFn() + if err != nil { + return fmt.Errorf("acquire client: %w", err) + } + + req.CreatedAt = time.Now() + if err = client.RecordModelThought(ctx, req); err == nil { + return nil + } + + r.logger.Warn(ctx, "failed to record model thought", slog.Error(err), slog.F("interception_id", req.InterceptionID)) + return err +} + func NewRecorder(logger slog.Logger, tracer trace.Tracer, clientFn func() (Recorder, error)) *RecorderWrapper { return &RecorderWrapper{ logger: logger, @@ -252,6 +270,22 @@ func (a *AsyncRecorder) RecordToolUsage(ctx context.Context, req *ToolUsageRecor return nil // Caller is not interested in error. } +func (a *AsyncRecorder) RecordModelThought(ctx context.Context, req *ModelThoughtRecord) error { + a.wg.Add(1) + go func() { + defer a.wg.Done() + timedCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), a.timeout) + defer cancel() + + err := a.wrapped.RecordModelThought(timedCtx, req) + if err != nil { + a.logger.Warn(timedCtx, "failed to record model thought", slog.Error(err), slog.F("payload", req)) + } + }() + + return nil // Caller is not interested in error. +} + func (a *AsyncRecorder) Wait() { a.wg.Wait() } diff --git a/recorder/types.go b/recorder/types.go index b33494d..609e714 100644 --- a/recorder/types.go +++ b/recorder/types.go @@ -19,6 +19,8 @@ type Recorder interface { RecordPromptUsage(ctx context.Context, req *PromptUsageRecord) error // RecordToolUsage records the tools used in an interception with an upstream AI provider. RecordToolUsage(ctx context.Context, req *ToolUsageRecord) error + // RecordModelThought records the reasoning/thinking produced in an interception with an upstream AI provider. + RecordModelThought(ctx context.Context, req *ModelThoughtRecord) error } type ToolArgs any @@ -73,3 +75,11 @@ type ToolUsageRecord struct { Metadata Metadata CreatedAt time.Time } + +type ModelThoughtRecord struct { + InterceptionID string + ProviderToolCallID string + Content string + Metadata Metadata + CreatedAt time.Time +} diff --git a/trace_integration_test.go b/trace_integration_test.go index a62e58e..5046387 100644 --- a/trace_integration_test.go +++ b/trace_integration_test.go @@ -41,6 +41,7 @@ func TestTraceAnthropic(t *testing.T) { {"Intercept.RecordPromptUsage", 1, codes.Unset}, {"Intercept.RecordTokenUsage", 1, codes.Unset}, {"Intercept.RecordToolUsage", 1, codes.Unset}, + {"Intercept.RecordModelThought", 1, codes.Unset}, {"Intercept.ProcessRequest.Upstream", 1, codes.Unset}, } @@ -53,6 +54,7 @@ func TestTraceAnthropic(t *testing.T) { {"Intercept.RecordPromptUsage", 1, codes.Unset}, {"Intercept.RecordTokenUsage", 2, codes.Unset}, {"Intercept.RecordToolUsage", 1, codes.Unset}, + {"Intercept.RecordModelThought", 1, codes.Unset}, {"Intercept.ProcessRequest.Upstream", 1, codes.Unset}, } From c8cb04f0f4db678a598b2375e7e26b04980b2d86 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 5 Mar 2026 17:36:44 +0200 Subject: [PATCH 2/6] fix: send model thoughts with tool usage recording Signed-off-by: Danny Kopping --- bridge_integration_test.go | 22 ++++++------ intercept/messages/blocking.go | 56 ++++++++++------------------- intercept/messages/streaming.go | 58 ++++++++++-------------------- internal/testutil/mock_recorder.go | 15 -------- recorder/recorder.go | 34 ------------------ recorder/types.go | 12 +++---- trace_integration_test.go | 2 -- 7 files changed, 52 insertions(+), 147 deletions(-) diff --git a/bridge_integration_test.go b/bridge_integration_test.go index b6e7e63..5545599 100644 --- a/bridge_integration_test.go +++ b/bridge_integration_test.go @@ -229,20 +229,17 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) { assert.Contains(t, sp.AllEvents(), "message_stop") } - // Verify model thoughts were captured and associated with the tool call. - thoughts := recorderClient.RecordedModelThoughts() - require.Len(t, thoughts, 1) - assert.Contains(t, thoughts[0].Content, "The user wants me to read") - assert.Contains(t, thoughts[0].Content, tc.expectedThinkingSubstr) - assert.NotEmpty(t, thoughts[0].InterceptionID) - assert.Equal(t, tc.expectedToolCallID, thoughts[0].ProviderToolCallID) - - // Verify tool usage was also recorded. + // Verify tool usage was recorded with associated model thoughts. toolUsages := recorderClient.RecordedToolUsages() require.Len(t, toolUsages, 1) assert.Equal(t, "Read", toolUsages[0].Tool) assert.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID) + // Model thoughts should be embedded in the tool usage record. + require.Len(t, toolUsages[0].ModelThoughts, 1) + assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants me to read") + assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, tc.expectedThinkingSubstr) + recorderClient.VerifyAllInterceptionsEnded(t) }) } @@ -284,9 +281,10 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) { sp := aibridge.NewSSEParser() require.NoError(t, sp.Parse(resp.Body)) - // No thoughts should be recorded when there are no tool calls. - thoughts := recorderClient.RecordedModelThoughts() - assert.Empty(t, thoughts) + // No tool usages (and therefore no thoughts) should be recorded + // when there are no tool calls. + toolUsages := recorderClient.RecordedToolUsages() + assert.Empty(t, toolUsages) recorderClient.VerifyAllInterceptionsEnded(t) }) diff --git a/intercept/messages/blocking.go b/intercept/messages/blocking.go index 28e6457..4ba7187 100644 --- a/intercept/messages/blocking.go +++ b/intercept/messages/blocking.go @@ -137,18 +137,16 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req // Capture any thinking blocks that were returned. var thoughtRecords []*recorder.ModelThoughtRecord - if !i.isSmallFastModel() { - for _, block := range resp.Content { - switch variant := block.AsAny().(type) { - case anthropic.ThinkingBlock: - thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{ - InterceptionID: i.ID().String(), - Content: variant.Thinking, - }) - case anthropic.RedactedThinkingBlock: - // For redacted thinking, there's nothing useful we can capture. - continue - } + for _, block := range resp.Content { + switch variant := block.AsAny().(type) { + case anthropic.ThinkingBlock: + thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{ + Content: variant.Thinking, + CreatedAt: time.Now(), + }) + case anthropic.RedactedThinkingBlock: + // For redacted thinking, there's nothing useful we can capture. + continue } } @@ -173,22 +171,15 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req Tool: toolUse.Name, Args: toolUse.Input, Injected: false, + ModelThoughts: thoughtRecords, }) - - // Associate the model thoughts with this tool call. - for _, thought := range thoughtRecords { - thought.ProviderToolCallID = toolUse.ID - } + // Clear after first use to avoid duplicating across + // multiple tool calls in the same message. + thoughtRecords = nil } - // If no injected tool calls, persist thoughts and we're done. + // If no injected tool calls, we're done. if len(pendingToolCalls) == 0 { - for _, thought := range thoughtRecords { - if thought.ProviderToolCallID == "" { - continue - } - _ = i.recorder.RecordModelThought(ctx, thought) - } break } @@ -223,12 +214,11 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req Args: tc.Input, Injected: true, InvocationError: err, + ModelThoughts: thoughtRecords, }) - - // Associate the model thoughts with this tool call. - for _, thought := range thoughtRecords { - thought.ProviderToolCallID = tc.ID - } + // Clear after first use to avoid duplicating across + // multiple tool calls in the same message. + thoughtRecords = nil if err != nil { // Always provide a tool_result even if the tool call failed @@ -315,14 +305,6 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req } } - // Only persist thoughts that are associated to a tool call. - for _, thought := range thoughtRecords { - if thought.ProviderToolCallID == "" { - continue - } - _ = i.recorder.RecordModelThought(ctx, thought) - } - // Sync the raw payload with updated messages so that withBody() // sends the updated payload on the next iteration. if err := i.syncPayloadMessages(messages.Messages); err != nil { diff --git a/intercept/messages/streaming.go b/intercept/messages/streaming.go index f9b6525..949401f 100644 --- a/intercept/messages/streaming.go +++ b/intercept/messages/streaming.go @@ -255,18 +255,16 @@ newStream: // Capture any thinking blocks that were returned. var thoughtRecords []*recorder.ModelThoughtRecord - if !i.isSmallFastModel() { // TODO: remove. - for _, block := range message.Content { - switch variant := block.AsAny().(type) { - case anthropic.ThinkingBlock: - thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{ - InterceptionID: i.ID().String(), - Content: variant.Thinking, - }) - case anthropic.RedactedThinkingBlock: - // For redacted thinking, there's nothing useful we can capture. - continue - } + for _, block := range message.Content { + switch variant := block.AsAny().(type) { + case anthropic.ThinkingBlock: + thoughtRecords = append(thoughtRecords, &recorder.ModelThoughtRecord{ + Content: variant.Thinking, + CreatedAt: time.Now(), + }) + case anthropic.RedactedThinkingBlock: + // For redacted thinking, there's nothing useful we can capture. + continue } } @@ -322,12 +320,11 @@ newStream: Args: input, Injected: true, InvocationError: err, + ModelThoughts: thoughtRecords, }) - - // Associate the model thoughts with this tool call. - for _, thought := range thoughtRecords { - thought.ProviderToolCallID = id - } + // Clear after first use to avoid duplicating across + // multiple tool calls in the same message. + thoughtRecords = nil if err != nil { // Always provide a tool_result even if the tool call failed @@ -413,15 +410,6 @@ newStream: } } - // Only persist thoughts that are associated to a tool call. - for _, thought := range thoughtRecords { - if thought.ProviderToolCallID == "" { - continue - } - - _ = i.recorder.RecordModelThought(streamCtx, thought) - } - // Sync the raw payload with updated messages so that withBody() // sends the updated payload on the next iteration. if syncErr := i.syncPayloadMessages(messages.Messages); syncErr != nil { @@ -448,23 +436,13 @@ newStream: Tool: variant.Name, Args: variant.Input, Injected: false, + ModelThoughts: thoughtRecords, }) - - // Associate the model thoughts with this tool call. - for _, thought := range thoughtRecords { - thought.ProviderToolCallID = variant.ID - } + // Clear after first use to avoid duplicating across + // multiple tool calls in the same message. + thoughtRecords = nil } } - - // Only persist thoughts that are associated to a tool call. - for _, thought := range thoughtRecords { - if thought.ProviderToolCallID == "" { - continue - } - - _ = i.recorder.RecordModelThought(streamCtx, thought) - } } } diff --git a/internal/testutil/mock_recorder.go b/internal/testutil/mock_recorder.go index da29b4b..ac39006 100644 --- a/internal/testutil/mock_recorder.go +++ b/internal/testutil/mock_recorder.go @@ -20,7 +20,6 @@ type MockRecorder struct { tokenUsages []*recorder.TokenUsageRecord userPrompts []*recorder.PromptUsageRecord toolUsages []*recorder.ToolUsageRecord - modelThoughts []*recorder.ModelThoughtRecord interceptionsEnd map[string]*recorder.InterceptionRecordEnded } @@ -65,13 +64,6 @@ func (m *MockRecorder) RecordToolUsage(ctx context.Context, req *recorder.ToolUs return nil } -func (m *MockRecorder) RecordModelThought(ctx context.Context, req *recorder.ModelThoughtRecord) error { - m.mu.Lock() - defer m.mu.Unlock() - m.modelThoughts = append(m.modelThoughts, req) - return nil -} - // RecordedTokenUsages returns a copy of recorded token usages in a thread-safe manner. // Note: This is a shallow clone - the slice is copied but the pointers reference the // same underlying records. This is sufficient for our test assertions which only read @@ -114,13 +106,6 @@ func (m *MockRecorder) ToolUsages() []*recorder.ToolUsageRecord { return m.toolUsages } -// RecordedModelThoughts returns a copy of recorded model thoughts in a thread-safe manner. -func (m *MockRecorder) RecordedModelThoughts() []*recorder.ModelThoughtRecord { - m.mu.Lock() - defer m.mu.Unlock() - return slices.Clone(m.modelThoughts) -} - // RecordedInterceptionEnd returns the stored InterceptionRecordEnded for the // given interception ID, or nil if not found. func (m *MockRecorder) RecordedInterceptionEnd(id string) *recorder.InterceptionRecordEnded { diff --git a/recorder/recorder.go b/recorder/recorder.go index 525c65a..3814488 100644 --- a/recorder/recorder.go +++ b/recorder/recorder.go @@ -116,24 +116,6 @@ func (r *RecorderWrapper) RecordToolUsage(ctx context.Context, req *ToolUsageRec return err } -func (r *RecorderWrapper) RecordModelThought(ctx context.Context, req *ModelThoughtRecord) (outErr error) { - ctx, span := r.tracer.Start(ctx, "Intercept.RecordModelThought", trace.WithAttributes(tracing.InterceptionAttributesFromContext(ctx)...)) - defer tracing.EndSpanErr(span, &outErr) - - client, err := r.clientFn() - if err != nil { - return fmt.Errorf("acquire client: %w", err) - } - - req.CreatedAt = time.Now() - if err = client.RecordModelThought(ctx, req); err == nil { - return nil - } - - r.logger.Warn(ctx, "failed to record model thought", slog.Error(err), slog.F("interception_id", req.InterceptionID)) - return err -} - func NewRecorder(logger slog.Logger, tracer trace.Tracer, clientFn func() (Recorder, error)) *RecorderWrapper { return &RecorderWrapper{ logger: logger, @@ -270,22 +252,6 @@ func (a *AsyncRecorder) RecordToolUsage(ctx context.Context, req *ToolUsageRecor return nil // Caller is not interested in error. } -func (a *AsyncRecorder) RecordModelThought(ctx context.Context, req *ModelThoughtRecord) error { - a.wg.Add(1) - go func() { - defer a.wg.Done() - timedCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), a.timeout) - defer cancel() - - err := a.wrapped.RecordModelThought(timedCtx, req) - if err != nil { - a.logger.Warn(timedCtx, "failed to record model thought", slog.Error(err), slog.F("payload", req)) - } - }() - - return nil // Caller is not interested in error. -} - func (a *AsyncRecorder) Wait() { a.wg.Wait() } diff --git a/recorder/types.go b/recorder/types.go index 609e714..d3cbaf7 100644 --- a/recorder/types.go +++ b/recorder/types.go @@ -18,9 +18,8 @@ type Recorder interface { // RecordPromptUsage records the prompts used in an interception with an upstream AI provider. RecordPromptUsage(ctx context.Context, req *PromptUsageRecord) error // RecordToolUsage records the tools used in an interception with an upstream AI provider. + // Any associated model thoughts should be included in the ToolUsageRecord. RecordToolUsage(ctx context.Context, req *ToolUsageRecord) error - // RecordModelThought records the reasoning/thinking produced in an interception with an upstream AI provider. - RecordModelThought(ctx context.Context, req *ModelThoughtRecord) error } type ToolArgs any @@ -74,12 +73,11 @@ type ToolUsageRecord struct { InvocationError error Metadata Metadata CreatedAt time.Time + ModelThoughts []*ModelThoughtRecord } type ModelThoughtRecord struct { - InterceptionID string - ProviderToolCallID string - Content string - Metadata Metadata - CreatedAt time.Time + Content string + Metadata Metadata + CreatedAt time.Time } diff --git a/trace_integration_test.go b/trace_integration_test.go index 5046387..a62e58e 100644 --- a/trace_integration_test.go +++ b/trace_integration_test.go @@ -41,7 +41,6 @@ func TestTraceAnthropic(t *testing.T) { {"Intercept.RecordPromptUsage", 1, codes.Unset}, {"Intercept.RecordTokenUsage", 1, codes.Unset}, {"Intercept.RecordToolUsage", 1, codes.Unset}, - {"Intercept.RecordModelThought", 1, codes.Unset}, {"Intercept.ProcessRequest.Upstream", 1, codes.Unset}, } @@ -54,7 +53,6 @@ func TestTraceAnthropic(t *testing.T) { {"Intercept.RecordPromptUsage", 1, codes.Unset}, {"Intercept.RecordTokenUsage", 2, codes.Unset}, {"Intercept.RecordToolUsage", 1, codes.Unset}, - {"Intercept.RecordModelThought", 1, codes.Unset}, {"Intercept.ProcessRequest.Upstream", 1, codes.Unset}, } From 4068b9c948baad65ae7c0d57c5c1216703a9ce24 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 6 Mar 2026 14:18:23 +0200 Subject: [PATCH 3/6] feat: capture responses reasoning Signed-off-by: Danny Kopping --- fixtures/anthropic/simple.txtar | 69 +++++++------ .../blocking/single_builtin_tool.txtar | 11 +++ .../responses/streaming/builtin_tool.txtar | 44 ++++++--- .../openai/responses/streaming/simple.txtar | 64 +++++++----- intercept/responses/base.go | 35 +++++++ intercept/responses/injected_tools.go | 11 ++- responses_integration_test.go | 97 +++++++++++++++++++ 7 files changed, 265 insertions(+), 66 deletions(-) diff --git a/fixtures/anthropic/simple.txtar b/fixtures/anthropic/simple.txtar index f1300b7..235138c 100644 --- a/fixtures/anthropic/simple.txtar +++ b/fixtures/anthropic/simple.txtar @@ -23,91 +23,100 @@ event: message_start data: {"type":"message_start","message":{"id":"msg_01Pvyf26bY17RcjmWfJsXGBn","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":18,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":1,"service_tier":"standard"}} } event: content_block_start -data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } +data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"This is a classic philosophical question about medieval scholasticism. I'll give a thoughtful answer."}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: content_block_start +data: {"type":"content_block_start","index":1,"content_block":{"type":"text","text":""} } event: ping data: {"type": "ping"} event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"This"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"This"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" is a famous philosophical question often used to illustrate medieval"}} +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" is a famous philosophical question often used to illustrate medieval"}} event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" scholastic debates that seem pointless or ov"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" scholastic debates that seem pointless or ov"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"erly abstract. The question \"How many angels can dance on the head of"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"erly abstract. The question \"How many angels can dance on the head of"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" a pin?\" is typically cited as an example of us"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" a pin?\" is typically cited as an example of us"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"eless speculation.\n\nHistorically, medieval theolog"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"eless speculation.\n\nHistorically, medieval theolog"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"ians did debate the nature of angels -"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"ians did debate the nature of angels -"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" whether they were incorporeal beings, how"}} +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" whether they were incorporeal beings, how"}} event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" they occupied space, and whether multiple angels could exist"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" they occupied space, and whether multiple angels could exist"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" in the same location. However, there"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" in the same location. However, there"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s little evidence they literally"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"'s little evidence they literally"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" debated dancing angels on pinheads.\n\nThe question has"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" debated dancing angels on pinheads.\n\nThe question has"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" no factual answer since it depends on assumptions about:"}} +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" no factual answer since it depends on assumptions about:"}} event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n- The existence and nature of angels\n- Whether"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"\n- The existence and nature of angels\n- Whether"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" incorporeal beings occupy physical space\n- What"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" incorporeal beings occupy physical space\n- What"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" constitutes \"dancing\" for a spiritual"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" constitutes \"dancing\" for a spiritual"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" entity\n- The size of both the"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" entity\n- The size of both the"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" pin and the angels\n\nIt's become a metaph"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" pin and the angels\n\nIt's become a metaph"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"or for overthinking trivial matters"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"or for overthinking trivial matters"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" or getting lost in theoretical discussions disconnected from practical reality."} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" or getting lost in theoretical discussions disconnected from practical reality."} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Some use it to critique certain types of academic"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" Some use it to critique certain types of academic"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" or theological debate, while others defen"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" or theological debate, while others defen"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"d the value of exploring fundamental questions about existence an"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"d the value of exploring fundamental questions about existence an"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"d metaphysics.\n\nSo while u"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"d metaphysics.\n\nSo while u"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"nanswerable literally, it serves as an interesting lens"} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"nanswerable literally, it serves as an interesting lens"} } event: content_block_delta -data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" for discussing the nature of philosophical inquiry itself."} } +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":" for discussing the nature of philosophical inquiry itself."} } event: content_block_stop -data: {"type":"content_block_stop","index":0 } +data: {"type":"content_block_stop","index":1 } event: message_delta data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":240} } @@ -122,6 +131,10 @@ data: {"type":"message_stop" } "role": "assistant", "model": "claude-sonnet-4-20250514", "content": [ + { + "type": "thinking", + "thinking": "This is a classic philosophical question about medieval scholasticism. I'll give a thoughtful answer." + }, { "type": "text", "text": "This is a famous philosophical question, often called \"How many angels can dance on the head of a pin?\" It's typically used to represent pointless or overly abstract theological debates.\n\nThe question doesn't have a literal answer because:\n\n1. **Historical context**: It's often attributed to medieval scholastic philosophers, though there's little evidence they actually debated this exact question. It became a popular way to mock what some saw as useless academic arguments.\n\n2. **Philosophical purpose**: The question highlights the difficulty of discussing non-physical beings (angels) in physical terms (space on a pinhead).\n\n3. **Different interpretations**: \n - If angels are purely spiritual, they might not take up physical space at all\n - If they do occupy space, we'd need to know their \"size\"\n - The question might be asking about the nature of space, matter, and spirit\n\nSo the real answer is that it's not meant to be answered literally - it's a thought experiment about the limits of rational inquiry and the sometimes absurd directions theological speculation can take.\n\nWould you like to explore the philosophical implications behind this question, or were you thinking about it in a different context?" diff --git a/fixtures/openai/responses/blocking/single_builtin_tool.txtar b/fixtures/openai/responses/blocking/single_builtin_tool.txtar index f41bd3c..14299ff 100644 --- a/fixtures/openai/responses/blocking/single_builtin_tool.txtar +++ b/fixtures/openai/responses/blocking/single_builtin_tool.txtar @@ -50,6 +50,17 @@ "max_tool_calls": null, "model": "gpt-4.1-2025-04-14", "output": [ + { + "id": "rs_0da6045a8b68fa5200695fa23e100081a19bf68887d47ae93d", + "type": "reasoning", + "status": "completed", + "summary": [ + { + "type": "summary_text", + "text": "The user wants to add 3 and 5. Let me call the add function." + } + ] + }, { "id": "fc_0da6045a8b68fa5200695fa23e198081a19bf68887d47ae93d", "type": "function_call", diff --git a/fixtures/openai/responses/streaming/builtin_tool.txtar b/fixtures/openai/responses/streaming/builtin_tool.txtar index b6a7a0a..98793f3 100644 --- a/fixtures/openai/responses/streaming/builtin_tool.txtar +++ b/fixtures/openai/responses/streaming/builtin_tool.txtar @@ -40,41 +40,59 @@ event: response.in_progress data: {"type":"response.in_progress","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} event: response.output_item.added -data: {"type":"response.output_item.added","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"in_progress","arguments":"","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":0,"sequence_number":2} +data: {"type":"response.output_item.added","item":{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2} + +event: response.reasoning_summary_part.added +data: {"type":"response.reasoning_summary_part.added","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":3} + +event: response.reasoning_summary_text.delta +data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"summary_index":0,"delta":"The user wants to add 3 and 5. Let me call the add function.","sequence_number":4} + +event: response.reasoning_summary_text.done +data: {"type":"response.reasoning_summary_text.done","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"summary_index":0,"text":"The user wants to add 3 and 5. Let me call the add function.","sequence_number":5} + +event: response.reasoning_summary_part.done +data: {"type":"response.reasoning_summary_part.done","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"part":{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."},"summary_index":0,"sequence_number":6} + +event: response.output_item.done +data: {"type":"response.output_item.done","item":{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."}]},"output_index":0,"sequence_number":7} + +event: response.output_item.added +data: {"type":"response.output_item.added","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"in_progress","arguments":"","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":1,"sequence_number":8} event: response.function_call_arguments.delta -data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"gWZHP8i4lSgQYT","output_index":0,"sequence_number":3} +data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"gWZHP8i4lSgQYT","output_index":1,"sequence_number":9} event: response.function_call_arguments.delta -data: {"type":"response.function_call_arguments.delta","delta":"a","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"yC1iubuqc098ZSH","output_index":0,"sequence_number":4} +data: {"type":"response.function_call_arguments.delta","delta":"a","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"yC1iubuqc098ZSH","output_index":1,"sequence_number":10} event: response.function_call_arguments.delta -data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"G17nNbWUcJkqA2","output_index":0,"sequence_number":5} +data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"G17nNbWUcJkqA2","output_index":1,"sequence_number":11} event: response.function_call_arguments.delta -data: {"type":"response.function_call_arguments.delta","delta":"3","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"Mj71L4eeLZbIEFU","output_index":0,"sequence_number":6} +data: {"type":"response.function_call_arguments.delta","delta":"3","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"Mj71L4eeLZbIEFU","output_index":1,"sequence_number":12} event: response.function_call_arguments.delta -data: {"type":"response.function_call_arguments.delta","delta":",\"","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"ZchcCauvlPtVc7","output_index":0,"sequence_number":7} +data: {"type":"response.function_call_arguments.delta","delta":",\"","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"ZchcCauvlPtVc7","output_index":1,"sequence_number":13} event: response.function_call_arguments.delta -data: {"type":"response.function_call_arguments.delta","delta":"b","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"gWLYMrsBI3ZHKVP","output_index":0,"sequence_number":8} +data: {"type":"response.function_call_arguments.delta","delta":"b","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"gWLYMrsBI3ZHKVP","output_index":1,"sequence_number":14} event: response.function_call_arguments.delta -data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"n4iUzpnbPE4DnO","output_index":0,"sequence_number":9} +data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"n4iUzpnbPE4DnO","output_index":1,"sequence_number":15} event: response.function_call_arguments.delta -data: {"type":"response.function_call_arguments.delta","delta":"5","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"23mO3rxkXqDOi6g","output_index":0,"sequence_number":10} +data: {"type":"response.function_call_arguments.delta","delta":"5","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"23mO3rxkXqDOi6g","output_index":1,"sequence_number":16} event: response.function_call_arguments.delta -data: {"type":"response.function_call_arguments.delta","delta":"}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"AQnBsNz7GqkdylH","output_index":0,"sequence_number":11} +data: {"type":"response.function_call_arguments.delta","delta":"}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"AQnBsNz7GqkdylH","output_index":1,"sequence_number":17} event: response.function_call_arguments.done -data: {"type":"response.function_call_arguments.done","arguments":"{\"a\":3,\"b\":5}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","output_index":0,"sequence_number":12} +data: {"type":"response.function_call_arguments.done","arguments":"{\"a\":3,\"b\":5}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","output_index":1,"sequence_number":18} event: response.output_item.done -data: {"type":"response.output_item.done","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":0,"sequence_number":13} +data: {"type":"response.output_item.done","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":1,"sequence_number":19} event: response.completed -data: {"type":"response.completed","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"completed","background":false,"completed_at":1767875312,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":58,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":76},"user":null,"metadata":{}},"sequence_number":14} +data: {"type":"response.completed","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"completed","background":false,"completed_at":1767875312,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."}]},{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":58,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":76},"user":null,"metadata":{}},"sequence_number":20} diff --git a/fixtures/openai/responses/streaming/simple.txtar b/fixtures/openai/responses/streaming/simple.txtar index d86aa6e..c8736f9 100644 --- a/fixtures/openai/responses/streaming/simple.txtar +++ b/fixtures/openai/responses/streaming/simple.txtar @@ -13,71 +13,89 @@ event: response.in_progress data: {"type":"response.in_progress","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} event: response.output_item.added -data: {"type":"response.output_item.added","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} +data: {"type":"response.output_item.added","item":{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2} + +event: response.reasoning_summary_part.added +data: {"type":"response.reasoning_summary_part.added","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":3} + +event: response.reasoning_summary_text.delta +data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"summary_index":0,"delta":"The user wants a joke. I will tell a classic scarecrow joke.","sequence_number":4} + +event: response.reasoning_summary_text.done +data: {"type":"response.reasoning_summary_text.done","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"summary_index":0,"text":"The user wants a joke. I will tell a classic scarecrow joke.","sequence_number":5} + +event: response.reasoning_summary_part.done +data: {"type":"response.reasoning_summary_part.done","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."},"summary_index":0,"sequence_number":6} + +event: response.output_item.done +data: {"type":"response.output_item.done","item":{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."}]},"output_index":0,"sequence_number":7} + +event: response.output_item.added +data: {"type":"response.output_item.added","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":1,"sequence_number":8} event: response.content_part.added -data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} +data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":1,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":9} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":"Why","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N16SG5UiLncOU","output_index":0,"sequence_number":4} +data: {"type":"response.output_text.delta","content_index":0,"delta":"Why","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N16SG5UiLncOU","output_index":1,"sequence_number":10} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" did","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"OpojJ3pv0h55","output_index":0,"sequence_number":5} +data: {"type":"response.output_text.delta","content_index":0,"delta":" did","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"OpojJ3pv0h55","output_index":1,"sequence_number":11} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" the","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"11RCrnBxLo5x","output_index":0,"sequence_number":6} +data: {"type":"response.output_text.delta","content_index":0,"delta":" the","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"11RCrnBxLo5x","output_index":1,"sequence_number":12} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" scare","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"QZrRBlk6BV","output_index":0,"sequence_number":7} +data: {"type":"response.output_text.delta","content_index":0,"delta":" scare","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"QZrRBlk6BV","output_index":1,"sequence_number":13} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":"crow","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"gp7F8IVupiHG","output_index":0,"sequence_number":8} +data: {"type":"response.output_text.delta","content_index":0,"delta":"crow","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"gp7F8IVupiHG","output_index":1,"sequence_number":14} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" win","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"uKq4X8mT1jl9","output_index":0,"sequence_number":9} +data: {"type":"response.output_text.delta","content_index":0,"delta":" win","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"uKq4X8mT1jl9","output_index":1,"sequence_number":15} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" an","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"2Ox5JzaAsJHuT","output_index":0,"sequence_number":10} +data: {"type":"response.output_text.delta","content_index":0,"delta":" an","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"2Ox5JzaAsJHuT","output_index":1,"sequence_number":16} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" award","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"ZOQbZabNAQ","output_index":0,"sequence_number":11} +data: {"type":"response.output_text.delta","content_index":0,"delta":" award","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"ZOQbZabNAQ","output_index":1,"sequence_number":17} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":"?\n\n","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N2dSd0FHBxooR","output_index":0,"sequence_number":12} +data: {"type":"response.output_text.delta","content_index":0,"delta":"?\n\n","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N2dSd0FHBxooR","output_index":1,"sequence_number":18} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":"Because","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"LZ1O4laHt","output_index":0,"sequence_number":13} +data: {"type":"response.output_text.delta","content_index":0,"delta":"Because","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"LZ1O4laHt","output_index":1,"sequence_number":19} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" he","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dqcS6ePaMvxMD","output_index":0,"sequence_number":14} +data: {"type":"response.output_text.delta","content_index":0,"delta":" he","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dqcS6ePaMvxMD","output_index":1,"sequence_number":20} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" was","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"nR6CtC7MUsWW","output_index":0,"sequence_number":15} +data: {"type":"response.output_text.delta","content_index":0,"delta":" was","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"nR6CtC7MUsWW","output_index":1,"sequence_number":21} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" outstanding","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dNVG","output_index":0,"sequence_number":16} +data: {"type":"response.output_text.delta","content_index":0,"delta":" outstanding","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dNVG","output_index":1,"sequence_number":22} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" in","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"P7w4jjOcdVOla","output_index":0,"sequence_number":17} +data: {"type":"response.output_text.delta","content_index":0,"delta":" in","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"P7w4jjOcdVOla","output_index":1,"sequence_number":23} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" his","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"u9dg4RLIld4e","output_index":0,"sequence_number":18} +data: {"type":"response.output_text.delta","content_index":0,"delta":" his","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"u9dg4RLIld4e","output_index":1,"sequence_number":24} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" field","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"qefuqzOCOy","output_index":0,"sequence_number":19} +data: {"type":"response.output_text.delta","content_index":0,"delta":" field","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"qefuqzOCOy","output_index":1,"sequence_number":25} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"DT9j4dSh0xyJdxU","output_index":0,"sequence_number":20} +data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"DT9j4dSh0xyJdxU","output_index":1,"sequence_number":26} event: response.output_text.done -data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"output_index":0,"sequence_number":21,"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"} +data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"output_index":1,"sequence_number":27,"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"} event: response.content_part.done -data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"},"sequence_number":22} +data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":1,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"},"sequence_number":28} event: response.output_item.done -data: {"type":"response.output_item.done","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"},"output_index":0,"sequence_number":23} +data: {"type":"response.output_item.done","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"},"output_index":1,"sequence_number":29} event: response.completed -data: {"type":"response.completed","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"completed","background":false,"completed_at":1767874660,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":11,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":29},"user":null,"metadata":{}},"sequence_number":24} +data: {"type":"response.completed","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"completed","background":false,"completed_at":1767874660,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."}]},{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":11,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":29},"user":null,"metadata":{}},"sequence_number":30} diff --git a/intercept/responses/base.go b/intercept/responses/base.go index 8b7c3de..21301f7 100644 --- a/intercept/responses/base.go +++ b/intercept/responses/base.go @@ -260,6 +260,9 @@ func (i *responsesInterceptionBase) recordNonInjectedToolUsage(ctx context.Conte return } + // Capture any reasoning items from the response output as model thoughts. + thoughtRecords := i.extractModelThoughts(response) + for _, item := range response.Output { var args recorder.ToolArgs @@ -280,9 +283,13 @@ func (i *responsesInterceptionBase) recordNonInjectedToolUsage(ctx context.Conte Tool: item.Name, Args: args, Injected: false, + ModelThoughts: thoughtRecords, }); err != nil { i.logger.Warn(ctx, "failed to record tool usage", slog.Error(err), slog.F("tool", item.Name)) } + // Clear after first use to avoid duplicating across + // multiple tool calls in the same message. + thoughtRecords = nil } } @@ -326,6 +333,34 @@ func (i *responsesInterceptionBase) recordTokenUsage(ctx context.Context, respon } } +// extractModelThoughts extracts reasoning summary items from response output +// and converts them to ModelThoughtRecords for association with tool usage. +func (i *responsesInterceptionBase) extractModelThoughts(response *responses.Response) []*recorder.ModelThoughtRecord { + if response == nil { + return nil + } + + var thoughts []*recorder.ModelThoughtRecord + for _, item := range response.Output { + if item.Type != string(constant.ValueOf[constant.Reasoning]()) { + continue + } + + reasoning := item.AsReasoning() + for _, summary := range reasoning.Summary { + if summary.Text == "" { + continue + } + thoughts = append(thoughts, &recorder.ModelThoughtRecord{ + Content: summary.Text, + CreatedAt: time.Now(), + }) + } + } + + return thoughts +} + // responseCopier helper struct to send original response to the client type responseCopier struct { buff deltaBuffer diff --git a/intercept/responses/injected_tools.go b/intercept/responses/injected_tools.go index 8a47801..633188d 100644 --- a/intercept/responses/injected_tools.go +++ b/intercept/responses/injected_tools.go @@ -114,9 +114,15 @@ func (i *responsesInterceptionBase) handleInjectedToolCalls(ctx context.Context, return nil, nil } + // Capture any reasoning items from the response output as model thoughts. + thoughtRecords := i.extractModelThoughts(response) + var results []responses.ResponseInputItemUnionParam for _, fc := range pending { - results = append(results, i.invokeInjectedTool(ctx, response.ID, fc)) + results = append(results, i.invokeInjectedTool(ctx, response.ID, fc, thoughtRecords)) + // Clear after first use to avoid duplicating across + // multiple tool calls in the same message. + thoughtRecords = nil } return results, nil @@ -201,7 +207,7 @@ func (i *responsesInterceptionBase) getPendingInjectedToolCalls(response *respon return calls } -func (i *responsesInterceptionBase) invokeInjectedTool(ctx context.Context, responseID string, fc responses.ResponseFunctionToolCall) responses.ResponseInputItemUnionParam { +func (i *responsesInterceptionBase) invokeInjectedTool(ctx context.Context, responseID string, fc responses.ResponseFunctionToolCall, thoughtRecords []*recorder.ModelThoughtRecord) responses.ResponseInputItemUnionParam { tool := i.mcpProxy.GetTool(fc.Name) if tool == nil { return responses.ResponseInputItemParamOfFunctionCallOutput(fc.CallID, fmt.Sprintf("error: unknown injected function %q", fc.ID)) @@ -218,6 +224,7 @@ func (i *responsesInterceptionBase) invokeInjectedTool(ctx context.Context, resp Args: args, Injected: true, InvocationError: err, + ModelThoughts: thoughtRecords, }) var output string diff --git a/responses_integration_test.go b/responses_integration_test.go index 7a26fff..80d2722 100644 --- a/responses_integration_test.go +++ b/responses_integration_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "io" "net" "net/http" @@ -384,6 +385,7 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) { require.Len(t, recordedTools, 1) recordedTools[0].InterceptionID = tc.expectToolRecorded.InterceptionID // ignore interception id (interception id is not constant and response doesn't contain it) recordedTools[0].CreatedAt = tc.expectToolRecorded.CreatedAt // ignore time + recordedTools[0].ModelThoughts = tc.expectToolRecorded.ModelThoughts // ignore model thoughts (tested separately) require.Equal(t, tc.expectToolRecorded, recordedTools[0]) } else { require.Empty(t, recordedTools) @@ -941,6 +943,101 @@ func TestResponsesInjectedTool(t *testing.T) { } } +func TestResponsesModelThoughts(t *testing.T) { + t.Parallel() + + t.Run("reasoning captured with builtin tool", func(t *testing.T) { + t.Parallel() + + cases := []struct { + streaming bool + expectedToolCallID string + }{ + { + streaming: false, + expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq", + }, + { + streaming: true, + expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t", + }, + } + + for _, tc := range cases { + t.Run(fmt.Sprintf("streaming=%v", tc.streaming), func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) + t.Cleanup(cancel) + + var fix fixtures.Fixture + if tc.streaming { + fix = fixtures.Parse(t, fixtures.OaiResponsesStreamingBuiltinTool) + } else { + fix = fixtures.Parse(t, fixtures.OaiResponsesBlockingSingleBuiltinTool) + } + upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) + + prov := provider.NewOpenAI(openaiCfg(upstream.URL, apiKey)) + srv, mockRecorder := newTestSrv(t, ctx, prov, nil, testTracer) + defer srv.Close() + + req := createOpenAIResponsesReq(t, srv.URL, fix.Request()) + client := &http.Client{} + resp, err := client.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + defer resp.Body.Close() + + _, err = io.ReadAll(resp.Body) + require.NoError(t, err) + + // Verify tool usage was recorded with associated model thoughts. + toolUsages := mockRecorder.RecordedToolUsages() + require.Len(t, toolUsages, 1) + require.Equal(t, "add", toolUsages[0].Tool) + require.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID) + + // Model thoughts should be embedded in the tool usage record. + require.Len(t, toolUsages[0].ModelThoughts, 1) + require.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants to add 3 and 5") + require.Contains(t, toolUsages[0].ModelThoughts[0].Content, "Let me call the add function") + }) + } + }) + + t.Run("no thoughts without tool calls", func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) + t.Cleanup(cancel) + + // Use the simple fixture which has no tool calls — any reasoning + // should not be persisted since it can't be associated with a tool call. + fix := fixtures.Parse(t, fixtures.OaiResponsesStreamingSimple) + upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) + + prov := provider.NewOpenAI(openaiCfg(upstream.URL, apiKey)) + srv, mockRecorder := newTestSrv(t, ctx, prov, nil, testTracer) + defer srv.Close() + + req := createOpenAIResponsesReq(t, srv.URL, fix.Request()) + client := &http.Client{} + resp, err := client.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + defer resp.Body.Close() + + _, err = io.ReadAll(resp.Body) + require.NoError(t, err) + + // No tool usages (and therefore no thoughts) should be recorded + // when there are no tool calls. + toolUsages := mockRecorder.RecordedToolUsages() + require.Empty(t, toolUsages) + }) +} + func createOpenAIResponsesReq(t *testing.T, baseURL string, input []byte) *http.Request { t.Helper() From 3f484176e3c2ef800418252f359081616ecdba95 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 6 Mar 2026 15:54:11 +0200 Subject: [PATCH 4/6] chore: refactor tests Signed-off-by: Danny Kopping --- bridge_integration_test.go | 16 ++--- .../openai/responses/streaming/simple.txtar | 64 +++++++------------ responses_integration_test.go | 2 +- 3 files changed, 30 insertions(+), 52 deletions(-) diff --git a/bridge_integration_test.go b/bridge_integration_test.go index 5545599..00716c0 100644 --- a/bridge_integration_test.go +++ b/bridge_integration_test.go @@ -174,19 +174,16 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) { t.Parallel() cases := []struct { - streaming bool - expectedToolCallID string - expectedThinkingSubstr string + streaming bool + expectedToolCallID string }{ { - streaming: true, - expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo", - expectedThinkingSubstr: "Let me find and read it.", + streaming: true, + expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo", }, { - streaming: false, - expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq", - expectedThinkingSubstr: "Let me find and read it.", + streaming: false, + expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq", }, } @@ -238,7 +235,6 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) { // Model thoughts should be embedded in the tool usage record. require.Len(t, toolUsages[0].ModelThoughts, 1) assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants me to read") - assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, tc.expectedThinkingSubstr) recorderClient.VerifyAllInterceptionsEnded(t) }) diff --git a/fixtures/openai/responses/streaming/simple.txtar b/fixtures/openai/responses/streaming/simple.txtar index c8736f9..d86aa6e 100644 --- a/fixtures/openai/responses/streaming/simple.txtar +++ b/fixtures/openai/responses/streaming/simple.txtar @@ -13,89 +13,71 @@ event: response.in_progress data: {"type":"response.in_progress","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} event: response.output_item.added -data: {"type":"response.output_item.added","item":{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2} - -event: response.reasoning_summary_part.added -data: {"type":"response.reasoning_summary_part.added","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":3} - -event: response.reasoning_summary_text.delta -data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"summary_index":0,"delta":"The user wants a joke. I will tell a classic scarecrow joke.","sequence_number":4} - -event: response.reasoning_summary_text.done -data: {"type":"response.reasoning_summary_text.done","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"summary_index":0,"text":"The user wants a joke. I will tell a classic scarecrow joke.","sequence_number":5} - -event: response.reasoning_summary_part.done -data: {"type":"response.reasoning_summary_part.done","item_id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."},"summary_index":0,"sequence_number":6} - -event: response.output_item.done -data: {"type":"response.output_item.done","item":{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."}]},"output_index":0,"sequence_number":7} - -event: response.output_item.added -data: {"type":"response.output_item.added","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":1,"sequence_number":8} +data: {"type":"response.output_item.added","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} event: response.content_part.added -data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":1,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":9} +data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":"Why","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N16SG5UiLncOU","output_index":1,"sequence_number":10} +data: {"type":"response.output_text.delta","content_index":0,"delta":"Why","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N16SG5UiLncOU","output_index":0,"sequence_number":4} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" did","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"OpojJ3pv0h55","output_index":1,"sequence_number":11} +data: {"type":"response.output_text.delta","content_index":0,"delta":" did","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"OpojJ3pv0h55","output_index":0,"sequence_number":5} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" the","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"11RCrnBxLo5x","output_index":1,"sequence_number":12} +data: {"type":"response.output_text.delta","content_index":0,"delta":" the","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"11RCrnBxLo5x","output_index":0,"sequence_number":6} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" scare","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"QZrRBlk6BV","output_index":1,"sequence_number":13} +data: {"type":"response.output_text.delta","content_index":0,"delta":" scare","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"QZrRBlk6BV","output_index":0,"sequence_number":7} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":"crow","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"gp7F8IVupiHG","output_index":1,"sequence_number":14} +data: {"type":"response.output_text.delta","content_index":0,"delta":"crow","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"gp7F8IVupiHG","output_index":0,"sequence_number":8} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" win","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"uKq4X8mT1jl9","output_index":1,"sequence_number":15} +data: {"type":"response.output_text.delta","content_index":0,"delta":" win","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"uKq4X8mT1jl9","output_index":0,"sequence_number":9} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" an","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"2Ox5JzaAsJHuT","output_index":1,"sequence_number":16} +data: {"type":"response.output_text.delta","content_index":0,"delta":" an","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"2Ox5JzaAsJHuT","output_index":0,"sequence_number":10} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" award","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"ZOQbZabNAQ","output_index":1,"sequence_number":17} +data: {"type":"response.output_text.delta","content_index":0,"delta":" award","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"ZOQbZabNAQ","output_index":0,"sequence_number":11} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":"?\n\n","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N2dSd0FHBxooR","output_index":1,"sequence_number":18} +data: {"type":"response.output_text.delta","content_index":0,"delta":"?\n\n","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"N2dSd0FHBxooR","output_index":0,"sequence_number":12} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":"Because","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"LZ1O4laHt","output_index":1,"sequence_number":19} +data: {"type":"response.output_text.delta","content_index":0,"delta":"Because","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"LZ1O4laHt","output_index":0,"sequence_number":13} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" he","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dqcS6ePaMvxMD","output_index":1,"sequence_number":20} +data: {"type":"response.output_text.delta","content_index":0,"delta":" he","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dqcS6ePaMvxMD","output_index":0,"sequence_number":14} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" was","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"nR6CtC7MUsWW","output_index":1,"sequence_number":21} +data: {"type":"response.output_text.delta","content_index":0,"delta":" was","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"nR6CtC7MUsWW","output_index":0,"sequence_number":15} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" outstanding","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dNVG","output_index":1,"sequence_number":22} +data: {"type":"response.output_text.delta","content_index":0,"delta":" outstanding","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"dNVG","output_index":0,"sequence_number":16} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" in","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"P7w4jjOcdVOla","output_index":1,"sequence_number":23} +data: {"type":"response.output_text.delta","content_index":0,"delta":" in","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"P7w4jjOcdVOla","output_index":0,"sequence_number":17} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" his","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"u9dg4RLIld4e","output_index":1,"sequence_number":24} +data: {"type":"response.output_text.delta","content_index":0,"delta":" his","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"u9dg4RLIld4e","output_index":0,"sequence_number":18} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":" field","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"qefuqzOCOy","output_index":1,"sequence_number":25} +data: {"type":"response.output_text.delta","content_index":0,"delta":" field","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"qefuqzOCOy","output_index":0,"sequence_number":19} event: response.output_text.delta -data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"DT9j4dSh0xyJdxU","output_index":1,"sequence_number":26} +data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"obfuscation":"DT9j4dSh0xyJdxU","output_index":0,"sequence_number":20} event: response.output_text.done -data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"output_index":1,"sequence_number":27,"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"} +data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","logprobs":[],"output_index":0,"sequence_number":21,"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"} event: response.content_part.done -data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":1,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"},"sequence_number":28} +data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"},"sequence_number":22} event: response.output_item.done -data: {"type":"response.output_item.done","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"},"output_index":1,"sequence_number":29} +data: {"type":"response.output_item.done","item":{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"},"output_index":0,"sequence_number":23} event: response.completed -data: {"type":"response.completed","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"completed","background":false,"completed_at":1767874660,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[{"id":"rs_0f9c4b2f224d858000695fa063a0708197af73c2f37cb0b9d3","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants a joke. I will tell a classic scarecrow joke."}]},{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":11,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":29},"user":null,"metadata":{}},"sequence_number":30} +data: {"type":"response.completed","response":{"id":"resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000","object":"response","created_at":1767874658,"status":"completed","background":false,"completed_at":1767874660,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[{"id":"msg_0f9c4b2f224d858000695fa063d4708197af73c2f37cb0b9d3","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":11,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":29},"user":null,"metadata":{}},"sequence_number":24} diff --git a/responses_integration_test.go b/responses_integration_test.go index 80d2722..d10ddf4 100644 --- a/responses_integration_test.go +++ b/responses_integration_test.go @@ -1014,7 +1014,7 @@ func TestResponsesModelThoughts(t *testing.T) { // Use the simple fixture which has no tool calls — any reasoning // should not be persisted since it can't be associated with a tool call. - fix := fixtures.Parse(t, fixtures.OaiResponsesStreamingSimple) + fix := fixtures.Parse(t, fixtures.OaiResponsesStreamingCodex) upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) prov := provider.NewOpenAI(openaiCfg(upstream.URL, apiKey)) From 3b5ee805355a216e75e3a4c7fd158e5639e0cbaf Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 6 Mar 2026 16:07:06 +0200 Subject: [PATCH 5/6] chore: test multiple thoughts Signed-off-by: Danny Kopping --- bridge_integration_test.go | 34 ++++- .../multi_thinking_builtin_tool.txtar | 136 +++++++++++++++++ fixtures/fixtures.go | 9 ++ .../multi_reasoning_builtin_tool.txtar | 142 ++++++++++++++++++ .../multi_reasoning_builtin_tool.txtar | 94 ++++++++++++ responses_integration_test.go | 42 ++++-- 6 files changed, 437 insertions(+), 20 deletions(-) create mode 100644 fixtures/anthropic/multi_thinking_builtin_tool.txtar create mode 100644 fixtures/openai/responses/blocking/multi_reasoning_builtin_tool.txtar create mode 100644 fixtures/openai/responses/streaming/multi_reasoning_builtin_tool.txtar diff --git a/bridge_integration_test.go b/bridge_integration_test.go index 00716c0..b943d04 100644 --- a/bridge_integration_test.go +++ b/bridge_integration_test.go @@ -174,27 +174,50 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) { t.Parallel() cases := []struct { + name string streaming bool + fixture []byte expectedToolCallID string + expectedThoughts []string }{ { + name: "single thinking block/streaming", streaming: true, + fixture: fixtures.AntSingleBuiltinTool, expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo", + expectedThoughts: []string{"The user wants me to read"}, }, { + name: "single thinking block/blocking", streaming: false, + fixture: fixtures.AntSingleBuiltinTool, expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq", + expectedThoughts: []string{"The user wants me to read"}, + }, + { + name: "multiple thinking blocks/streaming", + streaming: true, + fixture: fixtures.AntMultiThinkingBuiltinTool, + expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo", + expectedThoughts: []string{"The user wants me to read", "I should use the Read tool"}, + }, + { + name: "multiple thinking blocks/blocking", + streaming: false, + fixture: fixtures.AntMultiThinkingBuiltinTool, + expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq", + expectedThoughts: []string{"The user wants me to read", "I should use the Read tool"}, }, } for _, tc := range cases { - t.Run(fmt.Sprintf("streaming=%v", tc.streaming), func(t *testing.T) { + t.Run(tc.name, func(t *testing.T) { t.Parallel() ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) t.Cleanup(cancel) - fix := fixtures.Parse(t, fixtures.AntSingleBuiltinTool) + fix := fixtures.Parse(t, tc.fixture) upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) recorderClient := &testutil.MockRecorder{} @@ -232,9 +255,10 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) { assert.Equal(t, "Read", toolUsages[0].Tool) assert.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID) - // Model thoughts should be embedded in the tool usage record. - require.Len(t, toolUsages[0].ModelThoughts, 1) - assert.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants me to read") + require.Len(t, toolUsages[0].ModelThoughts, len(tc.expectedThoughts)) + for i, expected := range tc.expectedThoughts { + assert.Contains(t, toolUsages[0].ModelThoughts[i].Content, expected) + } recorderClient.VerifyAllInterceptionsEnded(t) }) diff --git a/fixtures/anthropic/multi_thinking_builtin_tool.txtar b/fixtures/anthropic/multi_thinking_builtin_tool.txtar new file mode 100644 index 0000000..633d11d --- /dev/null +++ b/fixtures/anthropic/multi_thinking_builtin_tool.txtar @@ -0,0 +1,136 @@ +Claude Code has builtin tools to (e.g.) explore the filesystem. +This fixture has two thinking blocks before the tool_use block. + +-- request -- +{ + "model": "claude-sonnet-4-20250514", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "read the foo file" + } + ] +} + +-- streaming -- +event: message_start +data: {"type":"message_start","message":{"id":"msg_015SQewixvT9s4cABCVvUE6g","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":2,"cache_creation_input_tokens":22,"cache_read_input_tokens":13993,"output_tokens":5,"service_tier":"standard"}} } + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"The user wants me to read a file called \"foo\". Let me find and read it."}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"Eu8BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: content_block_start +data: {"type":"content_block_start","index":1,"content_block":{"type":"thinking","thinking":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"thinking_delta","thinking":"I should use the Read tool to access the file contents."}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"signature_delta","signature":"Aa1BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ=="}} + +event: content_block_stop +data: {"type":"content_block_stop","index":1} + +event: content_block_start +data: {"type":"content_block_start","index":2,"content_block":{"type":"tool_use","id":"toolu_01RX68weRSquLx6HUTj65iBo","name":"Read","input":{}}} + +event: ping +data: {"type": "ping"} + +event: content_block_delta +data: {"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":""} } + +event: content_block_delta +data: {"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"{\"file_path\": \"/tmp/blah/foo"} } + +event: content_block_delta +data: {"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"\"}"} } + +event: content_block_stop +data: {"type":"content_block_stop","index":2 } + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":61} } + +event: message_stop +data: {"type":"message_stop" } + + +-- non-streaming -- +{ + "id": "msg_01JHKqEmh7wYuPXqUWUvusfL", + "container": { + "id": "", + "expires_at": "0001-01-01T00:00:00Z" + }, + "content": [ + { + "type": "thinking", + "thinking": "The user wants me to read a file called \"foo\". Let me find and read it.", + "signature": "Eu8BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ==" + }, + { + "type": "thinking", + "thinking": "I should use the Read tool to access the file contents.", + "signature": "Aa1BCkYICxgCKkBR++kFr7Za2JhF/9OCpjEc46/EcipL75RK+MEbxJ/VBJPWQTWrNGfwb5khWYJtKEpjjkH07cR/MQvThfb7t7CkEgwU4pKwL7NuZXd1/wgaDILyd0bYMqQovWo3dyIw95Ny7yZPljNBDLsvMBdBr7w+RtbU+AlSftjBuBZHp0VzI54/W+9u6f7qfx0JXsVBKldqqOjFvewT8Xm6Qp/77g6/j0zBiuAQABj/6vS1qATjd8KSIFDg9G/tCtzwmV/T/egmzswWd5CBiAhW6lgJgEDRr+gRUrFSOB7o3hypW8FUnUrr1JtzzwMYAQ==" + }, + { + "citations": null, + "text": "", + "type": "tool_use", + "id": "toolu_01AusGgY5aKFhzWrFBv9JfHq", + "input": { + "file_path": "/tmp/blah/foo" + }, + "name": "Read", + "content": { + "OfWebSearchResultBlockArray": null, + "OfString": "", + "OfMCPToolResultBlockContent": null, + "error_code": "", + "type": "", + "content": null, + "return_code": 0, + "stderr": "", + "stdout": "" + }, + "tool_use_id": "", + "server_name": "", + "is_error": false, + "file_id": "", + "signature": "", + "thinking": "", + "data": "" + } + ], + "model": "claude-sonnet-4-20250514", + "role": "assistant", + "stop_reason": "tool_use", + "stop_sequence": "", + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 23490, + "input_tokens": 5, + "output_tokens": 84, + "server_tool_use": { + "web_search_requests": 0 + }, + "service_tier": "standard" + } +} + diff --git a/fixtures/fixtures.go b/fixtures/fixtures.go index 3c15047..cacf657 100644 --- a/fixtures/fixtures.go +++ b/fixtures/fixtures.go @@ -15,6 +15,9 @@ var ( //go:embed anthropic/single_builtin_tool.txtar AntSingleBuiltinTool []byte + //go:embed anthropic/multi_thinking_builtin_tool.txtar + AntMultiThinkingBuiltinTool []byte + //go:embed anthropic/single_injected_tool.txtar AntSingleInjectedTool []byte @@ -61,6 +64,9 @@ var ( //go:embed openai/responses/blocking/single_builtin_tool.txtar OaiResponsesBlockingSingleBuiltinTool []byte + //go:embed openai/responses/blocking/multi_reasoning_builtin_tool.txtar + OaiResponsesBlockingMultiReasoningBuiltinTool []byte + //go:embed openai/responses/blocking/cached_input_tokens.txtar OaiResponsesBlockingCachedInputTokens []byte @@ -96,6 +102,9 @@ var ( //go:embed openai/responses/streaming/builtin_tool.txtar OaiResponsesStreamingBuiltinTool []byte + //go:embed openai/responses/streaming/multi_reasoning_builtin_tool.txtar + OaiResponsesStreamingMultiReasoningBuiltinTool []byte + //go:embed openai/responses/streaming/cached_input_tokens.txtar OaiResponsesStreamingCachedInputTokens []byte diff --git a/fixtures/openai/responses/blocking/multi_reasoning_builtin_tool.txtar b/fixtures/openai/responses/blocking/multi_reasoning_builtin_tool.txtar new file mode 100644 index 0000000..022b433 --- /dev/null +++ b/fixtures/openai/responses/blocking/multi_reasoning_builtin_tool.txtar @@ -0,0 +1,142 @@ +Two reasoning output items before a function_call. + +-- request -- +{ + "input": [ + { + "role": "user", + "content": "Is 3 + 5 a prime number? Use the add function to calculate the sum." + } + ], + "model": "gpt-4.1", + "stream": false, + "tools": [ + { + "type": "function", + "name": "add", + "description": "Add two numbers together.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "number" + }, + "b": { + "type": "number" + } + }, + "required": [ + "a", + "b" + ] + } + } + ] +} + +-- non-streaming -- +{ + "id": "resp_0da6045a8b68fa5200695fa23dcc2c81a19c849f627abf8a31", + "object": "response", + "created_at": 1767875133, + "status": "completed", + "background": false, + "completed_at": 1767875134, + "error": null, + "incomplete_details": null, + "instructions": null, + "max_output_tokens": null, + "max_tool_calls": null, + "model": "gpt-4.1-2025-04-14", + "output": [ + { + "id": "rs_0da6045a8b68fa5200695fa23e100081a19bf68887d47ae93d", + "type": "reasoning", + "status": "completed", + "summary": [ + { + "type": "summary_text", + "text": "The user wants to add 3 and 5. Let me call the add function." + } + ] + }, + { + "id": "rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e", + "type": "reasoning", + "status": "completed", + "summary": [ + { + "type": "summary_text", + "text": "After adding, I will check if the result is prime." + } + ] + }, + { + "id": "fc_0da6045a8b68fa5200695fa23e198081a19bf68887d47ae93d", + "type": "function_call", + "status": "completed", + "arguments": "{\"a\":3,\"b\":5}", + "call_id": "call_CJSaa2u51JG996575oVljuNq", + "name": "add" + } + ], + "parallel_tool_calls": true, + "previous_response_id": null, + "prompt_cache_key": null, + "prompt_cache_retention": null, + "reasoning": { + "effort": null, + "summary": null + }, + "safety_identifier": null, + "service_tier": "default", + "store": true, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "description": "Add two numbers together.", + "name": "add", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "number" + }, + "b": { + "type": "number" + } + }, + "required": [ + "a", + "b" + ], + "additionalProperties": false + }, + "strict": true + } + ], + "top_logprobs": 0, + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 58, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 18, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 76 + }, + "user": null, + "metadata": {} +} diff --git a/fixtures/openai/responses/streaming/multi_reasoning_builtin_tool.txtar b/fixtures/openai/responses/streaming/multi_reasoning_builtin_tool.txtar new file mode 100644 index 0000000..b54ebc7 --- /dev/null +++ b/fixtures/openai/responses/streaming/multi_reasoning_builtin_tool.txtar @@ -0,0 +1,94 @@ +Two reasoning output items before a function_call. + +-- request -- +{ + "input": [ + { + "role": "user", + "content": "Is 3 + 5 a prime number? Use the add function to calculate the sum." + } + ], + "model": "gpt-4.1", + "stream": true, + "tools": [ + { + "type": "function", + "name": "add", + "description": "Add two numbers together.", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "number" + }, + "b": { + "type": "number" + } + }, + "required": [ + "a", + "b" + ] + } + } + ] +} + +-- streaming -- +event: response.created +data: {"type":"response.created","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + +event: response.in_progress +data: {"type":"response.in_progress","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"in_progress","background":false,"completed_at":null,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + +event: response.output_item.added +data: {"type":"response.output_item.added","item":{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"in_progress","summary":[]},"output_index":0,"sequence_number":2} + +event: response.reasoning_summary_part.added +data: {"type":"response.reasoning_summary_part.added","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":3} + +event: response.reasoning_summary_text.delta +data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"summary_index":0,"delta":"The user wants to add 3 and 5. Let me call the add function.","sequence_number":4} + +event: response.reasoning_summary_text.done +data: {"type":"response.reasoning_summary_text.done","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"summary_index":0,"text":"The user wants to add 3 and 5. Let me call the add function.","sequence_number":5} + +event: response.reasoning_summary_part.done +data: {"type":"response.reasoning_summary_part.done","item_id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","output_index":0,"part":{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."},"summary_index":0,"sequence_number":6} + +event: response.output_item.done +data: {"type":"response.output_item.done","item":{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."}]},"output_index":0,"sequence_number":7} + +event: response.output_item.added +data: {"type":"response.output_item.added","item":{"id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","type":"reasoning","status":"in_progress","summary":[]},"output_index":1,"sequence_number":8} + +event: response.reasoning_summary_part.added +data: {"type":"response.reasoning_summary_part.added","item_id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","output_index":1,"part":{"type":"summary_text","text":""},"summary_index":0,"sequence_number":9} + +event: response.reasoning_summary_text.delta +data: {"type":"response.reasoning_summary_text.delta","item_id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","output_index":1,"summary_index":0,"delta":"After adding, I will check if the result is prime.","sequence_number":10} + +event: response.reasoning_summary_text.done +data: {"type":"response.reasoning_summary_text.done","item_id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","output_index":1,"summary_index":0,"text":"After adding, I will check if the result is prime.","sequence_number":11} + +event: response.reasoning_summary_part.done +data: {"type":"response.reasoning_summary_part.done","item_id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","output_index":1,"part":{"type":"summary_text","text":"After adding, I will check if the result is prime."},"summary_index":0,"sequence_number":12} + +event: response.output_item.done +data: {"type":"response.output_item.done","item":{"id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"After adding, I will check if the result is prime."}]},"output_index":1,"sequence_number":13} + +event: response.output_item.added +data: {"type":"response.output_item.added","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"in_progress","arguments":"","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":2,"sequence_number":14} + +event: response.function_call_arguments.delta +data: {"type":"response.function_call_arguments.delta","delta":"{\"a\":3,\"b\":5}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","obfuscation":"gWZHP8i4lSgQYT","output_index":2,"sequence_number":15} + +event: response.function_call_arguments.done +data: {"type":"response.function_call_arguments.done","arguments":"{\"a\":3,\"b\":5}","item_id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","output_index":2,"sequence_number":16} + +event: response.output_item.done +data: {"type":"response.output_item.done","item":{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"},"output_index":2,"sequence_number":17} + +event: response.completed +data: {"type":"response.completed","response":{"id":"resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458","object":"response","created_at":1767875312,"status":"completed","background":false,"completed_at":1767875312,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4.1-2025-04-14","output":[{"id":"rs_0c3fb28cfcf463a500695fa2f0a0a881a0890103ba88b0628e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"The user wants to add 3 and 5. Let me call the add function."}]},{"id":"rs_1aa7045a8b68fa5200695fa23e200082b29cf79998e58bf94e","type":"reasoning","status":"completed","summary":[{"type":"summary_text","text":"After adding, I will check if the result is prime."}]},{"id":"fc_0c3fb28cfcf463a500695fa2f0b0a881a0890103ba88b0628e","type":"function_call","status":"completed","arguments":"{\"a\":3,\"b\":5}","call_id":"call_7VaiUXZYuuuwWwviCrckxq6t","name":"add"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Add two numbers together.","name":"add","parameters":{"type":"object","properties":{"a":{"type":"number"},"b":{"type":"number"}},"required":["a","b"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":58,"input_tokens_details":{"cached_tokens":0},"output_tokens":18,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":76},"user":null,"metadata":{}},"sequence_number":18} + diff --git a/responses_integration_test.go b/responses_integration_test.go index d10ddf4..0037612 100644 --- a/responses_integration_test.go +++ b/responses_integration_test.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "encoding/json" - "fmt" "io" "net" "net/http" @@ -950,32 +949,45 @@ func TestResponsesModelThoughts(t *testing.T) { t.Parallel() cases := []struct { - streaming bool + name string + fixture []byte expectedToolCallID string + expectedThoughts []string }{ { - streaming: false, + name: "single reasoning/blocking", + fixture: fixtures.OaiResponsesBlockingSingleBuiltinTool, expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq", + expectedThoughts: []string{"The user wants to add 3 and 5"}, }, { - streaming: true, + name: "single reasoning/streaming", + fixture: fixtures.OaiResponsesStreamingBuiltinTool, expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t", + expectedThoughts: []string{"The user wants to add 3 and 5"}, + }, + { + name: "multiple reasoning items/blocking", + fixture: fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool, + expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq", + expectedThoughts: []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"}, + }, + { + name: "multiple reasoning items/streaming", + fixture: fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool, + expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t", + expectedThoughts: []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"}, }, } for _, tc := range cases { - t.Run(fmt.Sprintf("streaming=%v", tc.streaming), func(t *testing.T) { + t.Run(tc.name, func(t *testing.T) { t.Parallel() ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) t.Cleanup(cancel) - var fix fixtures.Fixture - if tc.streaming { - fix = fixtures.Parse(t, fixtures.OaiResponsesStreamingBuiltinTool) - } else { - fix = fixtures.Parse(t, fixtures.OaiResponsesBlockingSingleBuiltinTool) - } + fix := fixtures.Parse(t, tc.fixture) upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) prov := provider.NewOpenAI(openaiCfg(upstream.URL, apiKey)) @@ -998,10 +1010,10 @@ func TestResponsesModelThoughts(t *testing.T) { require.Equal(t, "add", toolUsages[0].Tool) require.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID) - // Model thoughts should be embedded in the tool usage record. - require.Len(t, toolUsages[0].ModelThoughts, 1) - require.Contains(t, toolUsages[0].ModelThoughts[0].Content, "The user wants to add 3 and 5") - require.Contains(t, toolUsages[0].ModelThoughts[0].Content, "Let me call the add function") + require.Len(t, toolUsages[0].ModelThoughts, len(tc.expectedThoughts)) + for i, expected := range tc.expectedThoughts { + require.Contains(t, toolUsages[0].ModelThoughts[i].Content, expected) + } }) } }) From a0ad39219bade40c61e502b2fa69fb40888a7dbb Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 6 Mar 2026 16:13:51 +0200 Subject: [PATCH 6/6] chore: refactor tests Signed-off-by: Danny Kopping --- bridge_integration_test.go | 202 ++++++++++++++-------------------- responses_integration_test.go | 148 ++++++++++--------------- 2 files changed, 141 insertions(+), 209 deletions(-) diff --git a/bridge_integration_test.go b/bridge_integration_test.go index b943d04..0c6812b 100644 --- a/bridge_integration_test.go +++ b/bridge_integration_test.go @@ -170,87 +170,91 @@ func TestAnthropicMessages(t *testing.T) { func TestAnthropicMessagesModelThoughts(t *testing.T) { t.Parallel() - t.Run("thinking captured with builtin tool", func(t *testing.T) { - t.Parallel() - - cases := []struct { - name string - streaming bool - fixture []byte - expectedToolCallID string - expectedThoughts []string - }{ - { - name: "single thinking block/streaming", - streaming: true, - fixture: fixtures.AntSingleBuiltinTool, - expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo", - expectedThoughts: []string{"The user wants me to read"}, - }, - { - name: "single thinking block/blocking", - streaming: false, - fixture: fixtures.AntSingleBuiltinTool, - expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq", - expectedThoughts: []string{"The user wants me to read"}, - }, - { - name: "multiple thinking blocks/streaming", - streaming: true, - fixture: fixtures.AntMultiThinkingBuiltinTool, - expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo", - expectedThoughts: []string{"The user wants me to read", "I should use the Read tool"}, - }, - { - name: "multiple thinking blocks/blocking", - streaming: false, - fixture: fixtures.AntMultiThinkingBuiltinTool, - expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq", - expectedThoughts: []string{"The user wants me to read", "I should use the Read tool"}, - }, - } + cases := []struct { + name string + streaming bool + fixture []byte + expectedToolCallID string + expectedThoughts []string // nil means no tool usages expected at all + }{ + { + name: "single thinking block/streaming", + streaming: true, + fixture: fixtures.AntSingleBuiltinTool, + expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo", + expectedThoughts: []string{"The user wants me to read"}, + }, + { + name: "single thinking block/blocking", + streaming: false, + fixture: fixtures.AntSingleBuiltinTool, + expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq", + expectedThoughts: []string{"The user wants me to read"}, + }, + { + name: "multiple thinking blocks/streaming", + streaming: true, + fixture: fixtures.AntMultiThinkingBuiltinTool, + expectedToolCallID: "toolu_01RX68weRSquLx6HUTj65iBo", + expectedThoughts: []string{"The user wants me to read", "I should use the Read tool"}, + }, + { + name: "multiple thinking blocks/blocking", + streaming: false, + fixture: fixtures.AntMultiThinkingBuiltinTool, + expectedToolCallID: "toolu_01AusGgY5aKFhzWrFBv9JfHq", + expectedThoughts: []string{"The user wants me to read", "I should use the Read tool"}, + }, + { + name: "no thoughts without tool calls", + streaming: true, + fixture: fixtures.AntSimple, // This fixture contains thoughts, but they're not associated with tool calls. + }, + } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() - ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) - t.Cleanup(cancel) + ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) + t.Cleanup(cancel) - fix := fixtures.Parse(t, tc.fixture) - upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) + fix := fixtures.Parse(t, tc.fixture) + upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) - recorderClient := &testutil.MockRecorder{} - logger := slogtest.Make(t, &slogtest.Options{}).Leveled(slog.LevelDebug) - providers := []aibridge.Provider{provider.NewAnthropic(anthropicCfg(upstream.URL, apiKey), nil)} - b, err := aibridge.NewRequestBridge(ctx, providers, recorderClient, mcp.NewServerProxyManager(nil, testTracer), logger, nil, testTracer) - require.NoError(t, err) + recorderClient := &testutil.MockRecorder{} + logger := slogtest.Make(t, &slogtest.Options{}).Leveled(slog.LevelDebug) + providers := []aibridge.Provider{provider.NewAnthropic(anthropicCfg(upstream.URL, apiKey), nil)} + b, err := aibridge.NewRequestBridge(ctx, providers, recorderClient, mcp.NewServerProxyManager(nil, testTracer), logger, nil, testTracer) + require.NoError(t, err) - mockSrv := httptest.NewUnstartedServer(b) - t.Cleanup(mockSrv.Close) - mockSrv.Config.BaseContext = func(_ net.Listener) context.Context { - return aibcontext.AsActor(ctx, userID, nil) - } - mockSrv.Start() + mockSrv := httptest.NewUnstartedServer(b) + t.Cleanup(mockSrv.Close) + mockSrv.Config.BaseContext = func(_ net.Listener) context.Context { + return aibcontext.AsActor(ctx, userID, nil) + } + mockSrv.Start() - reqBody, err := sjson.SetBytes(fix.Request(), "stream", tc.streaming) - require.NoError(t, err) - req := createAnthropicMessagesReq(t, mockSrv.URL, reqBody) - client := &http.Client{} - resp, err := client.Do(req) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - defer resp.Body.Close() + reqBody, err := sjson.SetBytes(fix.Request(), "stream", tc.streaming) + require.NoError(t, err) + req := createAnthropicMessagesReq(t, mockSrv.URL, reqBody) + client := &http.Client{} + resp, err := client.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + defer resp.Body.Close() - if tc.streaming { - sp := aibridge.NewSSEParser() - require.NoError(t, sp.Parse(resp.Body)) - assert.Contains(t, sp.AllEvents(), "message_start") - assert.Contains(t, sp.AllEvents(), "message_stop") - } + if tc.streaming { + sp := aibridge.NewSSEParser() + require.NoError(t, sp.Parse(resp.Body)) + assert.Contains(t, sp.AllEvents(), "message_start") + assert.Contains(t, sp.AllEvents(), "message_stop") + } - // Verify tool usage was recorded with associated model thoughts. - toolUsages := recorderClient.RecordedToolUsages() + toolUsages := recorderClient.RecordedToolUsages() + if tc.expectedThoughts == nil { + assert.Empty(t, toolUsages) + } else { require.Len(t, toolUsages, 1) assert.Equal(t, "Read", toolUsages[0].Tool) assert.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID) @@ -259,55 +263,11 @@ func TestAnthropicMessagesModelThoughts(t *testing.T) { for i, expected := range tc.expectedThoughts { assert.Contains(t, toolUsages[0].ModelThoughts[i].Content, expected) } + } - recorderClient.VerifyAllInterceptionsEnded(t) - }) - } - }) - - t.Run("no thoughts without tool calls", func(t *testing.T) { - t.Parallel() - - ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) - t.Cleanup(cancel) - - // Use the simple fixture which has no tool calls — any thinking blocks - // should not be persisted since they can't be associated with a tool call. - fix := fixtures.Parse(t, fixtures.AntSimple) - upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) - - recorderClient := &testutil.MockRecorder{} - logger := slogtest.Make(t, &slogtest.Options{}).Leveled(slog.LevelDebug) - providers := []aibridge.Provider{provider.NewAnthropic(anthropicCfg(upstream.URL, apiKey), nil)} - b, err := aibridge.NewRequestBridge(ctx, providers, recorderClient, mcp.NewServerProxyManager(nil, testTracer), logger, nil, testTracer) - require.NoError(t, err) - - mockSrv := httptest.NewUnstartedServer(b) - t.Cleanup(mockSrv.Close) - mockSrv.Config.BaseContext = func(_ net.Listener) context.Context { - return aibcontext.AsActor(ctx, userID, nil) - } - mockSrv.Start() - - reqBody, err := sjson.SetBytes(fix.Request(), "stream", true) - require.NoError(t, err) - req := createAnthropicMessagesReq(t, mockSrv.URL, reqBody) - client := &http.Client{} - resp, err := client.Do(req) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - defer resp.Body.Close() - - sp := aibridge.NewSSEParser() - require.NoError(t, sp.Parse(resp.Body)) - - // No tool usages (and therefore no thoughts) should be recorded - // when there are no tool calls. - toolUsages := recorderClient.RecordedToolUsages() - assert.Empty(t, toolUsages) - - recorderClient.VerifyAllInterceptionsEnded(t) - }) + recorderClient.VerifyAllInterceptionsEnded(t) + }) + } } func TestAWSBedrockIntegration(t *testing.T) { diff --git a/responses_integration_test.go b/responses_integration_test.go index 0037612..06d2b29 100644 --- a/responses_integration_test.go +++ b/responses_integration_test.go @@ -945,67 +945,70 @@ func TestResponsesInjectedTool(t *testing.T) { func TestResponsesModelThoughts(t *testing.T) { t.Parallel() - t.Run("reasoning captured with builtin tool", func(t *testing.T) { - t.Parallel() - - cases := []struct { - name string - fixture []byte - expectedToolCallID string - expectedThoughts []string - }{ - { - name: "single reasoning/blocking", - fixture: fixtures.OaiResponsesBlockingSingleBuiltinTool, - expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq", - expectedThoughts: []string{"The user wants to add 3 and 5"}, - }, - { - name: "single reasoning/streaming", - fixture: fixtures.OaiResponsesStreamingBuiltinTool, - expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t", - expectedThoughts: []string{"The user wants to add 3 and 5"}, - }, - { - name: "multiple reasoning items/blocking", - fixture: fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool, - expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq", - expectedThoughts: []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"}, - }, - { - name: "multiple reasoning items/streaming", - fixture: fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool, - expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t", - expectedThoughts: []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"}, - }, - } + cases := []struct { + name string + fixture []byte + expectedToolCallID string + expectedThoughts []string // nil means no tool usages expected at all + }{ + { + name: "single reasoning/blocking", + fixture: fixtures.OaiResponsesBlockingSingleBuiltinTool, + expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq", + expectedThoughts: []string{"The user wants to add 3 and 5"}, + }, + { + name: "single reasoning/streaming", + fixture: fixtures.OaiResponsesStreamingBuiltinTool, + expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t", + expectedThoughts: []string{"The user wants to add 3 and 5"}, + }, + { + name: "multiple reasoning items/blocking", + fixture: fixtures.OaiResponsesBlockingMultiReasoningBuiltinTool, + expectedToolCallID: "call_CJSaa2u51JG996575oVljuNq", + expectedThoughts: []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"}, + }, + { + name: "multiple reasoning items/streaming", + fixture: fixtures.OaiResponsesStreamingMultiReasoningBuiltinTool, + expectedToolCallID: "call_7VaiUXZYuuuwWwviCrckxq6t", + expectedThoughts: []string{"The user wants to add 3 and 5", "After adding, I will check if the result is prime"}, + }, + { + name: "no thoughts without tool calls", + fixture: fixtures.OaiResponsesStreamingCodex, // This fixture contains reasoning, but it's not associated with tool calls. + }, + } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() - ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) - t.Cleanup(cancel) + ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) + t.Cleanup(cancel) - fix := fixtures.Parse(t, tc.fixture) - upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) + fix := fixtures.Parse(t, tc.fixture) + upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) - prov := provider.NewOpenAI(openaiCfg(upstream.URL, apiKey)) - srv, mockRecorder := newTestSrv(t, ctx, prov, nil, testTracer) - defer srv.Close() + prov := provider.NewOpenAI(openaiCfg(upstream.URL, apiKey)) + srv, mockRecorder := newTestSrv(t, ctx, prov, nil, testTracer) + defer srv.Close() - req := createOpenAIResponsesReq(t, srv.URL, fix.Request()) - client := &http.Client{} - resp, err := client.Do(req) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - defer resp.Body.Close() + req := createOpenAIResponsesReq(t, srv.URL, fix.Request()) + client := &http.Client{} + resp, err := client.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + defer resp.Body.Close() - _, err = io.ReadAll(resp.Body) - require.NoError(t, err) + _, err = io.ReadAll(resp.Body) + require.NoError(t, err) - // Verify tool usage was recorded with associated model thoughts. - toolUsages := mockRecorder.RecordedToolUsages() + toolUsages := mockRecorder.RecordedToolUsages() + if tc.expectedThoughts == nil { + require.Empty(t, toolUsages) + } else { require.Len(t, toolUsages, 1) require.Equal(t, "add", toolUsages[0].Tool) require.Equal(t, tc.expectedToolCallID, toolUsages[0].ToolCallID) @@ -1014,40 +1017,9 @@ func TestResponsesModelThoughts(t *testing.T) { for i, expected := range tc.expectedThoughts { require.Contains(t, toolUsages[0].ModelThoughts[i].Content, expected) } - }) - } - }) - - t.Run("no thoughts without tool calls", func(t *testing.T) { - t.Parallel() - - ctx, cancel := context.WithTimeout(t.Context(), time.Second*30) - t.Cleanup(cancel) - - // Use the simple fixture which has no tool calls — any reasoning - // should not be persisted since it can't be associated with a tool call. - fix := fixtures.Parse(t, fixtures.OaiResponsesStreamingCodex) - upstream := testutil.NewMockUpstream(t, ctx, testutil.NewFixtureResponse(fix)) - - prov := provider.NewOpenAI(openaiCfg(upstream.URL, apiKey)) - srv, mockRecorder := newTestSrv(t, ctx, prov, nil, testTracer) - defer srv.Close() - - req := createOpenAIResponsesReq(t, srv.URL, fix.Request()) - client := &http.Client{} - resp, err := client.Do(req) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - defer resp.Body.Close() - - _, err = io.ReadAll(resp.Body) - require.NoError(t, err) - - // No tool usages (and therefore no thoughts) should be recorded - // when there are no tool calls. - toolUsages := mockRecorder.RecordedToolUsages() - require.Empty(t, toolUsages) - }) + } + }) + } } func createOpenAIResponsesReq(t *testing.T, baseURL string, input []byte) *http.Request {