Skip to content

Commit d3474ec

Browse files
authored
fix(core): Avoid double counting cached input tokens for Vercel AI SDK v6 (#21488)
The AI SDK v6 reports `ai.usage.inputTokens` as a cache-inclusive total, but our integration adds the cache-read count on top which is correct for <v6. We distinguish between v6 and pre-v6 SDKs by the presence of `ai.usage.inputTokenDetails.*` attributes and skip adding the cached count. Closes: #21484
1 parent e9ae99c commit d3474ec

3 files changed

Lines changed: 71 additions & 32 deletions

File tree

packages/core/src/tracing/vercel-ai/index.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ import {
5454
AI_TOOL_CALL_RESULT_ATTRIBUTE,
5555
AI_USAGE_CACHED_INPUT_TOKENS_ATTRIBUTE,
5656
AI_USAGE_COMPLETION_TOKENS_ATTRIBUTE,
57+
AI_USAGE_INPUT_TOKEN_DETAILS_ATTRIBUTE_PREFIX,
5758
AI_USAGE_PROMPT_TOKENS_ATTRIBUTE,
5859
AI_USAGE_TOKENS_ATTRIBUTE,
5960
AI_VALUES_ATTRIBUTE,
@@ -255,8 +256,13 @@ export function processVercelAiSpanAttributes(attributes: Record<string, unknown
255256
// AI SDK uses avgOutputTokensPerSecond, map to our expected attribute name
256257
renameAttributeKey(attributes, 'ai.response.avgOutputTokensPerSecond', 'ai.response.avgCompletionTokensPerSecond');
257258

258-
// Input tokens is the sum of prompt tokens and cached input tokens
259+
// v6 input tokens are cache-inclusive (marked by the presence of `inputTokenDetails.*`); only
260+
// older SDKs need the cached tokens added back.
261+
const inputTokensAreCacheInclusive = Object.keys(attributes).some(key =>
262+
key.startsWith(AI_USAGE_INPUT_TOKEN_DETAILS_ATTRIBUTE_PREFIX),
263+
);
259264
if (
265+
!inputTokensAreCacheInclusive &&
260266
typeof attributes[GEN_AI_USAGE_INPUT_TOKENS_ATTRIBUTE] === 'number' &&
261267
typeof attributes[GEN_AI_USAGE_INPUT_TOKENS_CACHED_ATTRIBUTE] === 'number'
262268
) {

packages/core/src/tracing/vercel-ai/vercel-ai-attributes.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,14 @@ export const AI_RESPONSE_PROVIDER_METADATA_ATTRIBUTE = 'ai.response.providerMeta
145145
* @see https://ai-sdk.dev/docs/ai-sdk-core/telemetry#basic-llm-span-information
146146
*/
147147
export const AI_USAGE_CACHED_INPUT_TOKENS_ATTRIBUTE = 'ai.usage.cachedInputTokens';
148+
149+
/**
150+
* Prefix for the per-category breakdown of input tokens (AI SDK v6+), e.g.
151+
* `noCacheTokens`, `cacheReadTokens`, `cacheWriteTokens`. Only emitted by v6, where
152+
* `ai.usage.inputTokens` is already cache-inclusive.
153+
*/
154+
export const AI_USAGE_INPUT_TOKEN_DETAILS_ATTRIBUTE_PREFIX = 'ai.usage.inputTokenDetails.';
155+
148156
/**
149157
* Basic LLM span information
150158
* Multiple spans

packages/core/test/lib/tracing/vercel-ai-cached-tokens.test.ts

Lines changed: 56 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,62 @@ import { addVercelAiProcessors } from '../../../src/tracing/vercel-ai';
33
import type { SpanJSON } from '../../../src/types/span';
44
import { getDefaultTestClientOptions, TestClient } from '../../mocks/client';
55

6+
function processSpan(data: SpanJSON['data']): SpanJSON {
7+
const options = getDefaultTestClientOptions({ tracesSampleRate: 1.0 });
8+
const client = new TestClient(options);
9+
client.init();
10+
addVercelAiProcessors(client);
11+
12+
const mockSpan: SpanJSON = {
13+
description: 'test',
14+
span_id: 'test-span-id',
15+
trace_id: 'test-trace-id',
16+
start_timestamp: 1000,
17+
timestamp: 2000,
18+
origin: 'auto.vercelai.otel',
19+
data,
20+
};
21+
22+
const event = {
23+
type: 'transaction' as const,
24+
spans: [mockSpan],
25+
};
26+
27+
const eventProcessor = client['_eventProcessors'].find(processor => processor.id === 'VercelAiEventProcessor');
28+
expect(eventProcessor).toBeDefined();
29+
30+
return eventProcessor!(event, {})!.spans![0]!;
31+
}
32+
633
describe('vercel-ai cached tokens', () => {
7-
it('should add cached input tokens to total input tokens', () => {
8-
const options = getDefaultTestClientOptions({ tracesSampleRate: 1.0 });
9-
const client = new TestClient(options);
10-
client.init();
11-
addVercelAiProcessors(client);
12-
13-
const mockSpan: SpanJSON = {
14-
description: 'test',
15-
span_id: 'test-span-id',
16-
trace_id: 'test-trace-id',
17-
start_timestamp: 1000,
18-
timestamp: 2000,
19-
origin: 'auto.vercelai.otel',
20-
data: {
21-
'ai.usage.promptTokens': 100,
22-
'ai.usage.cachedInputTokens': 50,
23-
},
24-
};
25-
26-
const event = {
27-
type: 'transaction' as const,
28-
spans: [mockSpan],
29-
};
30-
31-
const eventProcessor = client['_eventProcessors'].find(processor => processor.id === 'VercelAiEventProcessor');
32-
expect(eventProcessor).toBeDefined();
33-
34-
const processedEvent = eventProcessor!(event, {});
35-
36-
expect(processedEvent?.spans?.[0]?.data?.['gen_ai.usage.input_tokens']).toBe(150);
37-
expect(processedEvent?.spans?.[0]?.data?.['gen_ai.usage.input_tokens.cached']).toBe(50);
34+
it('adds cached input tokens to the input tokens for AI SDK <=5 (cache-exclusive input tokens)', () => {
35+
const span = processSpan({
36+
'ai.usage.promptTokens': 100,
37+
'ai.usage.cachedInputTokens': 50,
38+
});
39+
40+
expect(span.data?.['gen_ai.usage.input_tokens']).toBe(150);
41+
expect(span.data?.['gen_ai.usage.input_tokens.cached']).toBe(50);
42+
expect(span.data?.['gen_ai.usage.total_tokens']).toBe(150);
43+
});
44+
45+
it('does not double-count cached input tokens for AI SDK v6 (cache-inclusive input tokens)', () => {
46+
// AI SDK v6 reports `ai.usage.inputTokens` as a cache-inclusive total
47+
// (noCache + cacheRead + cacheWrite) and emits the breakdown under
48+
// `ai.usage.inputTokenDetails.*`. The cached tokens must not be added again.
49+
const span = processSpan({
50+
'ai.operationId': 'ai.streamText.doStream',
51+
'ai.usage.inputTokens': 9500, // 1000 noCache + 8000 cacheRead + 500 cacheWrite
52+
'ai.usage.outputTokens': 300,
53+
'ai.usage.cachedInputTokens': 8000,
54+
'ai.usage.inputTokenDetails.noCacheTokens': 1000,
55+
'ai.usage.inputTokenDetails.cacheReadTokens': 8000,
56+
'ai.usage.inputTokenDetails.cacheWriteTokens': 500,
57+
});
58+
59+
expect(span.data?.['gen_ai.usage.input_tokens']).toBe(9500);
60+
expect(span.data?.['gen_ai.usage.input_tokens.cached']).toBe(8000);
61+
expect(span.data?.['gen_ai.usage.output_tokens']).toBe(300);
62+
expect(span.data?.['gen_ai.usage.total_tokens']).toBe(9800);
3863
});
3964
});

0 commit comments

Comments
 (0)