From b30e3e214d655134ec525005396ddaec67928d53 Mon Sep 17 00:00:00 2001 From: Florian Reifschneider Date: Tue, 14 Apr 2026 13:06:51 -0700 Subject: [PATCH 1/2] fix(langfuse): avoid double-counting cache and reasoning tokens Langfuse sums all keys containing "input" for Input usage and "output" for Output usage. Since inputTokens/outputTokens already included cache/reasoning tokens, they were double-counted. Now input/output report only their non-overlapping portion, and total is omitted so Langfuse derives it correctly by summing all keys. --- packages/langfuse/src/index.test.ts | 6 ------ packages/langfuse/src/index.ts | 13 +++++++------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/packages/langfuse/src/index.test.ts b/packages/langfuse/src/index.test.ts index 6c11ebd..2c6c178 100644 --- a/packages/langfuse/src/index.test.ts +++ b/packages/langfuse/src/index.test.ts @@ -238,7 +238,6 @@ describe('@core-ai/langfuse', () => { usageDetails: { input: 5, output: 3, - total: 8, cache_read_input: 0, cache_creation_input: 0, }, @@ -273,7 +272,6 @@ describe('@core-ai/langfuse', () => { usageDetails: { input: 5, output: 3, - total: 8, cache_read_input: 0, cache_creation_input: 0, }, @@ -314,7 +312,6 @@ describe('@core-ai/langfuse', () => { usageDetails: { input: 5, output: 3, - total: 8, cache_read_input: 0, cache_creation_input: 0, }, @@ -364,7 +361,6 @@ describe('@core-ai/langfuse', () => { usageDetails: { input: 5, output: 3, - total: 8, cache_read_input: 0, cache_creation_input: 0, }, @@ -491,7 +487,6 @@ describe('@core-ai/langfuse', () => { usageDetails: { input: 5, output: 3, - total: 8, cache_read_input: 0, cache_creation_input: 0, }, @@ -539,7 +534,6 @@ describe('@core-ai/langfuse', () => { expect(observation.update).toHaveBeenNthCalledWith(2, { usageDetails: { input: 3, - total: 3, }, }); }); diff --git a/packages/langfuse/src/index.ts b/packages/langfuse/src/index.ts index 58063ee..3c6aee4 100644 --- a/packages/langfuse/src/index.ts +++ b/packages/langfuse/src/index.ts @@ -107,12 +107,14 @@ function createImageModelParameters( } function createChatUsageDetails(usage: ChatUsage): Record { + const { cacheReadTokens, cacheWriteTokens } = usage.inputTokenDetails; + const reasoningTokens = usage.outputTokenDetails.reasoningTokens ?? 0; + return { - input: usage.inputTokens, - output: usage.outputTokens, - total: usage.inputTokens + usage.outputTokens, - cache_read_input: usage.inputTokenDetails.cacheReadTokens, - cache_creation_input: usage.inputTokenDetails.cacheWriteTokens, + input: usage.inputTokens - cacheReadTokens - cacheWriteTokens, + output: usage.outputTokens - reasoningTokens, + cache_read_input: cacheReadTokens, + cache_creation_input: cacheWriteTokens, ...(usage.outputTokenDetails.reasoningTokens !== undefined ? { reasoning_output: usage.outputTokenDetails.reasoningTokens } : {}), @@ -128,7 +130,6 @@ function createEmbeddingUsageDetails( return { input: usage.inputTokens, - total: usage.inputTokens, }; } From 90cf021f24970603439c30045e2668c0c4e18242 Mon Sep 17 00:00:00 2001 From: Florian Reifschneider Date: Tue, 14 Apr 2026 13:08:32 -0700 Subject: [PATCH 2/2] add changeset --- .changeset/fix-langfuse-usage-double-counting.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/fix-langfuse-usage-double-counting.md diff --git a/.changeset/fix-langfuse-usage-double-counting.md b/.changeset/fix-langfuse-usage-double-counting.md new file mode 100644 index 0000000..28c3944 --- /dev/null +++ b/.changeset/fix-langfuse-usage-double-counting.md @@ -0,0 +1,5 @@ +--- +'@core-ai/langfuse': patch +--- + +Fix double-counting of cache and reasoning tokens in Langfuse usage breakdown. The `input` and `output` keys now report only non-overlapping token counts so Langfuse's aggregation sums correctly.