|
1 | | -import { SBChatMessage, SBChatMessageMetadata } from "@/features/chat/types"; |
| 1 | +import { SBChatMessage, SBChatMessageMetadata, StepTokenUsageEntry, ToolTokenUsageEntry } from "@/features/chat/types"; |
| 2 | +import { estimateModelToolOutputTokens } from "@/ee/features/chat/tokenEstimation"; |
2 | 3 | import { getFileSource } from '@/features/git'; |
3 | 4 | import { isServiceError } from "@/lib/utils"; |
4 | 5 | import { LanguageModelV3 as AISDKLanguageModelV3 } from "@ai-sdk/provider"; |
@@ -190,19 +191,76 @@ export const createMessageStream = async ({ |
190 | 191 | }); |
191 | 192 |
|
192 | 193 | const totalUsage = await researchStream.totalUsage; |
| 194 | + const steps = await researchStream.steps; |
| 195 | + const response = await researchStream.response; |
| 196 | + |
| 197 | + // Tool output estimates are derived from `response.messages` rather |
| 198 | + // than per-step `toolResults` because the response messages cover |
| 199 | + // tool calls that never run inside a step — approval-gated tools |
| 200 | + // execute before the step loop, and thrown tool errors are recorded |
| 201 | + // as `tool-error` parts that `toolResults` excludes. Their |
| 202 | + // `tool-result` parts also carry the output in model-visible form |
| 203 | + // (`toModelOutput` already applied), which is exactly the payload |
| 204 | + // whose token footprint we want to estimate. |
| 205 | + const toolUsageByToolCallId = new Map<string, ToolTokenUsageEntry>( |
| 206 | + response.messages.flatMap((message) => |
| 207 | + message.role !== 'tool' ? [] : message.content.flatMap((part) => |
| 208 | + part.type !== 'tool-result' ? [] : [[part.toolCallId, { |
| 209 | + toolCallId: part.toolCallId, |
| 210 | + toolName: part.toolName, |
| 211 | + estimatedOutputTokens: estimateModelToolOutputTokens(part.output), |
| 212 | + }] as const] |
| 213 | + ) |
| 214 | + ) |
| 215 | + ); |
| 216 | + |
| 217 | + // One entry per step, in step order. The UI joins its step groups |
| 218 | + // to these entries by array position, so the order and count must |
| 219 | + // mirror the stream's steps exactly. Tool calls nest under the |
| 220 | + // step they ran in; `content` is matched rather than `toolResults` |
| 221 | + // so that thrown tool errors (`tool-error` parts, which |
| 222 | + // `toolResults` excludes) are still attributed to their step. |
| 223 | + const stepTokenUsage: StepTokenUsageEntry[] = steps.map(({ usage, content }) => ({ |
| 224 | + inputTokens: usage.inputTokens, |
| 225 | + outputTokens: usage.outputTokens, |
| 226 | + cacheReadTokens: usage.inputTokenDetails?.cacheReadTokens, |
| 227 | + tools: content.flatMap((part) => { |
| 228 | + if (part.type !== 'tool-result' && part.type !== 'tool-error') { |
| 229 | + return []; |
| 230 | + } |
| 231 | + const entry = toolUsageByToolCallId.get(part.toolCallId); |
| 232 | + if (!entry) { |
| 233 | + return []; |
| 234 | + } |
| 235 | + toolUsageByToolCallId.delete(part.toolCallId); |
| 236 | + return [entry]; |
| 237 | + }), |
| 238 | + })); |
| 239 | + |
| 240 | + // Any estimates left unclaimed belong to tool calls that executed |
| 241 | + // before the step loop (approval continuations). Their output |
| 242 | + // enters the context as input to this phase's first step, so nest |
| 243 | + // them under it. |
| 244 | + if (toolUsageByToolCallId.size > 0 && stepTokenUsage.length > 0) { |
| 245 | + stepTokenUsage[0].tools.unshift(...toolUsageByToolCallId.values()); |
| 246 | + } |
193 | 247 |
|
194 | 248 | writer.write({ |
195 | 249 | type: 'message-metadata', |
196 | 250 | messageMetadata: { |
| 251 | + // Spread first so the derived fields below can't be overwritten by caller metadata. |
| 252 | + ...metadata, |
197 | 253 | totalTokens: (priorMetadata?.totalTokens ?? 0) + (totalUsage.totalTokens ?? 0), |
198 | 254 | totalInputTokens: (priorMetadata?.totalInputTokens ?? 0) + (totalUsage.inputTokens ?? 0), |
199 | 255 | totalOutputTokens: (priorMetadata?.totalOutputTokens ?? 0) + (totalUsage.outputTokens ?? 0), |
200 | 256 | totalCacheReadTokens: (priorMetadata?.totalCacheReadTokens ?? 0) + (totalUsage.inputTokenDetails?.cacheReadTokens ?? 0), |
201 | 257 | totalCacheWriteTokens: (priorMetadata?.totalCacheWriteTokens ?? 0) + (totalUsage.inputTokenDetails?.cacheWriteTokens ?? 0), |
202 | 258 | totalResponseTimeMs: (priorMetadata?.totalResponseTimeMs ?? 0) + (new Date().getTime() - startTime.getTime()), |
| 259 | + // Concatenated (not summed) across approval-continuation |
| 260 | + // phases so earlier phases' steps are preserved in order. |
| 261 | + stepTokenUsage: [...(priorMetadata?.stepTokenUsage ?? []), ...stepTokenUsage], |
203 | 262 | modelName, |
204 | 263 | traceId, |
205 | | - ...metadata, |
206 | 264 | } |
207 | 265 | }); |
208 | 266 |
|
@@ -430,6 +488,13 @@ const createAgentStream = async ({ |
430 | 488 | logger.warn(`Tool call repair failed for "${toolCall.toolName}": ${error.message}`); |
431 | 489 | return null; |
432 | 490 | }, |
| 491 | + // Token usage collection deliberately does NOT happen here: the SDK |
| 492 | + // awaits this callback before starting the next step, so it must |
| 493 | + // stay cheap, and `toolResults` misses tool calls that never run |
| 494 | + // inside a step (approval-gated tools execute before the step loop) |
| 495 | + // as well as thrown tool errors (recorded as `tool-error` parts). |
| 496 | + // Both are instead derived post-stream in `createMessageStream` |
| 497 | + // from `steps` and `response.messages`. |
433 | 498 | onStepFinish: ({ toolResults }) => { |
434 | 499 | toolResults.forEach(({ output, dynamic }) => { |
435 | 500 | if (dynamic || isServiceError(output)) { |
|
0 commit comments