Skip to content

Commit be80da4

Browse files
fix: 修复缓存
1 parent fce40fe commit be80da4

File tree

3 files changed

+61
-3
lines changed

3 files changed

+61
-3
lines changed

src/services/api/claude.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2907,7 +2907,12 @@ async function* queryModel(
29072907
provider: getAPIProvider(),
29082908
input: convertMessagesToLangfuse(messagesForAPI, systemPrompt),
29092909
output: convertOutputToLangfuse(newMessages),
2910-
usage: { input_tokens: usage.input_tokens, output_tokens: usage.output_tokens },
2910+
usage: {
2911+
input_tokens: usage.input_tokens,
2912+
output_tokens: usage.output_tokens,
2913+
cache_creation_input_tokens: usage.cache_creation_input_tokens,
2914+
cache_read_input_tokens: usage.cache_read_input_tokens,
2915+
},
29112916
startTime: new Date(startIncludingRetries),
29122917
endTime: new Date(),
29132918
completionStartTime: ttftMs > 0 ? new Date(start + ttftMs) : undefined,

src/services/langfuse/__tests__/langfuse.test.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,48 @@ describe('Langfuse integration', () => {
284284
}))
285285
expect(mockRootEnd).toHaveBeenCalled()
286286
})
287+
288+
test('includes cache tokens in usageDetails when provided', async () => {
289+
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
290+
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
291+
const { createTrace, recordLLMObservation } = await import('../tracing.js')
292+
const span = createTrace({ sessionId: 's1', model: 'claude-3', provider: 'firstParty' })
293+
mockStartObservation.mockClear()
294+
mockRootUpdate.mockClear()
295+
recordLLMObservation(span, {
296+
model: 'claude-3',
297+
provider: 'firstParty',
298+
input: [],
299+
output: [],
300+
usage: { input_tokens: 10000, output_tokens: 50, cache_creation_input_tokens: 2000, cache_read_input_tokens: 7000 },
301+
})
302+
expect(mockRootUpdate).toHaveBeenCalledWith(expect.objectContaining({
303+
usageDetails: {
304+
input: 19000, // 10000 + 2000 + 7000
305+
output: 50,
306+
cache_read: 7000,
307+
cache_creation: 2000,
308+
},
309+
}))
310+
})
311+
312+
test('omits cache fields when not provided', async () => {
313+
process.env.LANGFUSE_PUBLIC_KEY = 'pk-test'
314+
process.env.LANGFUSE_SECRET_KEY = 'sk-test'
315+
const { createTrace, recordLLMObservation } = await import('../tracing.js')
316+
const span = createTrace({ sessionId: 's1', model: 'claude-3', provider: 'firstParty' })
317+
mockRootUpdate.mockClear()
318+
recordLLMObservation(span, {
319+
model: 'claude-3',
320+
provider: 'firstParty',
321+
input: [],
322+
output: [],
323+
usage: { input_tokens: 100, output_tokens: 20 },
324+
})
325+
expect(mockRootUpdate).toHaveBeenCalledWith(expect.objectContaining({
326+
usageDetails: { input: 100, output: 20 },
327+
}))
328+
})
287329
})
288330

289331
describe('recordToolObservation', () => {

src/services/langfuse/tracing.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,12 @@ export function recordLLMObservation(
6868
provider: string
6969
input: unknown
7070
output: unknown
71-
usage: { input_tokens: number; output_tokens: number }
71+
usage: {
72+
input_tokens: number
73+
output_tokens: number
74+
cache_creation_input_tokens?: number
75+
cache_read_input_tokens?: number
76+
}
7277
startTime?: Date
7378
endTime?: Date
7479
completionStartTime?: Date
@@ -109,11 +114,17 @@ export function recordLLMObservation(
109114
gen.otelSpan.setAttribute(LangfuseOtelSpanAttributes.TRACE_USER_ID, userId)
110115
}
111116

117+
// Anthropic splits input into uncached + cache_read + cache_creation.
118+
// Langfuse's "input" should be the total prompt tokens so cost calc is correct.
119+
const cacheRead = params.usage.cache_read_input_tokens ?? 0
120+
const cacheCreation = params.usage.cache_creation_input_tokens ?? 0
112121
gen.update({
113122
output: params.output,
114123
usageDetails: {
115-
input: params.usage.input_tokens,
124+
input: params.usage.input_tokens + cacheCreation + cacheRead,
116125
output: params.usage.output_tokens,
126+
...(cacheRead > 0 && { cache_read: cacheRead }),
127+
...(cacheCreation > 0 && { cache_creation: cacheCreation }),
117128
},
118129
})
119130

0 commit comments

Comments
 (0)