fix(core): Avoid double counting cached input tokens for Vercel AI SDK v6 (#21488)

andreiborza · web-flow · commit d3474ecf1bbf · 2026-06-12T11:28:12.000+02:00
The AI SDK v6 reports `ai.usage.inputTokens` as a cache-inclusive total, but our integration adds the cache-read count on top which is correct for <v6. We distinguish between v6 and pre-v6 SDKs by the presence of `ai.usage.inputTokenDetails.*` attributes and skip adding the cached count. Closes: #21484
diff --git a/packages/core/src/tracing/vercel-ai/index.ts b/packages/core/src/tracing/vercel-ai/index.ts
@@ -54,6 +54,7 @@ import {
   AI_TOOL_CALL_RESULT_ATTRIBUTE,
   AI_USAGE_CACHED_INPUT_TOKENS_ATTRIBUTE,
   AI_USAGE_COMPLETION_TOKENS_ATTRIBUTE,
+  AI_USAGE_INPUT_TOKEN_DETAILS_ATTRIBUTE_PREFIX,
   AI_USAGE_PROMPT_TOKENS_ATTRIBUTE,
   AI_USAGE_TOKENS_ATTRIBUTE,
   AI_VALUES_ATTRIBUTE,
@@ -255,8 +256,13 @@ export function processVercelAiSpanAttributes(attributes: Record<string, unknown
   // AI SDK uses avgOutputTokensPerSecond, map to our expected attribute name
   renameAttributeKey(attributes, 'ai.response.avgOutputTokensPerSecond', 'ai.response.avgCompletionTokensPerSecond');
 
-  // Input tokens is the sum of prompt tokens and cached input tokens
+  // v6 input tokens are cache-inclusive (marked by the presence of `inputTokenDetails.*`); only
+  // older SDKs need the cached tokens added back.
+  const inputTokensAreCacheInclusive = Object.keys(attributes).some(key =>
+    key.startsWith(AI_USAGE_INPUT_TOKEN_DETAILS_ATTRIBUTE_PREFIX),
+  );
   if (
+    !inputTokensAreCacheInclusive &&
     typeof attributes[GEN_AI_USAGE_INPUT_TOKENS_ATTRIBUTE] === 'number' &&
     typeof attributes[GEN_AI_USAGE_INPUT_TOKENS_CACHED_ATTRIBUTE] === 'number'
   ) {
diff --git a/packages/core/src/tracing/vercel-ai/vercel-ai-attributes.ts b/packages/core/src/tracing/vercel-ai/vercel-ai-attributes.ts
@@ -145,6 +145,14 @@ export const AI_RESPONSE_PROVIDER_METADATA_ATTRIBUTE = 'ai.response.providerMeta
  * @see https://ai-sdk.dev/docs/ai-sdk-core/telemetry#basic-llm-span-information
  */
 export const AI_USAGE_CACHED_INPUT_TOKENS_ATTRIBUTE = 'ai.usage.cachedInputTokens';
+
+/**
+ * Prefix for the per-category breakdown of input tokens (AI SDK v6+), e.g.
+ * `noCacheTokens`, `cacheReadTokens`, `cacheWriteTokens`. Only emitted by v6, where
+ * `ai.usage.inputTokens` is already cache-inclusive.
+ */
+export const AI_USAGE_INPUT_TOKEN_DETAILS_ATTRIBUTE_PREFIX = 'ai.usage.inputTokenDetails.';
+
 /**
  * Basic LLM span information
  * Multiple spans
diff --git a/packages/core/test/lib/tracing/vercel-ai-cached-tokens.test.ts b/packages/core/test/lib/tracing/vercel-ai-cached-tokens.test.ts
@@ -3,37 +3,62 @@ import { addVercelAiProcessors } from '../../../src/tracing/vercel-ai';
 import type { SpanJSON } from '../../../src/types/span';
 import { getDefaultTestClientOptions, TestClient } from '../../mocks/client';
 
+function processSpan(data: SpanJSON['data']): SpanJSON {
+  const options = getDefaultTestClientOptions({ tracesSampleRate: 1.0 });
+  const client = new TestClient(options);
+  client.init();
+  addVercelAiProcessors(client);
+
+  const mockSpan: SpanJSON = {
+    description: 'test',
+    span_id: 'test-span-id',
+    trace_id: 'test-trace-id',
+    start_timestamp: 1000,
+    timestamp: 2000,
+    origin: 'auto.vercelai.otel',
+    data,
+  };
+
+  const event = {
+    type: 'transaction' as const,
+    spans: [mockSpan],
+  };
+
+  const eventProcessor = client['_eventProcessors'].find(processor => processor.id === 'VercelAiEventProcessor');
+  expect(eventProcessor).toBeDefined();
+
+  return eventProcessor!(event, {})!.spans![0]!;
+}
+
 describe('vercel-ai cached tokens', () => {
-  it('should add cached input tokens to total input tokens', () => {
-    const options = getDefaultTestClientOptions({ tracesSampleRate: 1.0 });
-    const client = new TestClient(options);
-    client.init();
-    addVercelAiProcessors(client);
-
-    const mockSpan: SpanJSON = {
-      description: 'test',
-      span_id: 'test-span-id',
-      trace_id: 'test-trace-id',
-      start_timestamp: 1000,
-      timestamp: 2000,
-      origin: 'auto.vercelai.otel',
-      data: {
-        'ai.usage.promptTokens': 100,
-        'ai.usage.cachedInputTokens': 50,
-      },
-    };
-
-    const event = {
-      type: 'transaction' as const,
-      spans: [mockSpan],
-    };
-
-    const eventProcessor = client['_eventProcessors'].find(processor => processor.id === 'VercelAiEventProcessor');
-    expect(eventProcessor).toBeDefined();
-
-    const processedEvent = eventProcessor!(event, {});
-
-    expect(processedEvent?.spans?.[0]?.data?.['gen_ai.usage.input_tokens']).toBe(150);
-    expect(processedEvent?.spans?.[0]?.data?.['gen_ai.usage.input_tokens.cached']).toBe(50);
+  it('adds cached input tokens to the input tokens for AI SDK <=5 (cache-exclusive input tokens)', () => {
+    const span = processSpan({
+      'ai.usage.promptTokens': 100,
+      'ai.usage.cachedInputTokens': 50,
+    });
+
+    expect(span.data?.['gen_ai.usage.input_tokens']).toBe(150);
+    expect(span.data?.['gen_ai.usage.input_tokens.cached']).toBe(50);
+    expect(span.data?.['gen_ai.usage.total_tokens']).toBe(150);
+  });
+
+  it('does not double-count cached input tokens for AI SDK v6 (cache-inclusive input tokens)', () => {
+    // AI SDK v6 reports `ai.usage.inputTokens` as a cache-inclusive total
+    // (noCache + cacheRead + cacheWrite) and emits the breakdown under
+    // `ai.usage.inputTokenDetails.*`. The cached tokens must not be added again.
+    const span = processSpan({
+      'ai.operationId': 'ai.streamText.doStream',
+      'ai.usage.inputTokens': 9500, // 1000 noCache + 8000 cacheRead + 500 cacheWrite
+      'ai.usage.outputTokens': 300,
+      'ai.usage.cachedInputTokens': 8000,
+      'ai.usage.inputTokenDetails.noCacheTokens': 1000,
+      'ai.usage.inputTokenDetails.cacheReadTokens': 8000,
+      'ai.usage.inputTokenDetails.cacheWriteTokens': 500,
+    });
+
+    expect(span.data?.['gen_ai.usage.input_tokens']).toBe(9500);
+    expect(span.data?.['gen_ai.usage.input_tokens.cached']).toBe(8000);
+    expect(span.data?.['gen_ai.usage.output_tokens']).toBe(300);
+    expect(span.data?.['gen_ai.usage.total_tokens']).toBe(9800);
   });
 });