Skip to content

Commit 859686a

Browse files
committed
feat(cli): split cache-creation TTL and ship per-model time buckets (rollup v3)
Anthropic prices a 1-hour ephemeral cache write at 2x input vs 1.25x for the 5-minute default, so claude-code usage now carries the cache_creation.ephemeral_5m/1h split (when reported) through MetricBag, modelRollups, and the new modelBuckets array. modelBuckets aggregate model.usage events per (15-min bucket, model) so the server can place cost on the time it actually happened instead of the session's last_event_at. schemaVersion bumps to 3; the hash change re-uploads history on the next backfill as with v2.
1 parent 25203cc commit 859686a

5 files changed

Lines changed: 228 additions & 8 deletions

File tree

packages/cli/src/adapters/claude-code.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,18 +378,43 @@ export function claudeUsageFromMessage(message: Record<string, unknown>): Partia
378378
const outputTokens = numberField(usage, 'output_tokens') || 0
379379
const cachedInputTokens = cacheCreationInputTokens + cacheReadInputTokens
380380
const totalInputTokens = inputTokens + cachedInputTokens
381+
const cacheCreationSplit = claudeCacheCreationSplit(usage)
381382

382383
return {
383384
tokensInput: totalInputTokens || undefined,
384385
tokensCachedInput: cachedInputTokens || undefined,
385386
tokensCacheCreationInput: cacheCreationInputTokens || undefined,
387+
tokensCacheCreation5mInput: cacheCreationSplit?.fiveMinute,
388+
tokensCacheCreation1hInput: cacheCreationSplit?.oneHour,
386389
tokensCacheReadInput: cacheReadInputTokens || undefined,
387390
tokensOutput: outputTokens || undefined,
388391
tokensTotal: totalInputTokens + outputTokens || undefined,
389392
modelCalls: 1,
390393
}
391394
}
392395

396+
// Splits Anthropic's `cache_creation_input_tokens` total into 5m/1h TTL buckets
397+
// using the optional `usage.cache_creation: { ephemeral_5m_input_tokens,
398+
// ephemeral_1h_input_tokens }` breakdown. The split matters for cost: ccusage
399+
// (rust/crates/ccusage/src/cost.rs) prices the 5m portion at the cache_create
400+
// rate (~1.25x input) but the 1h portion at input * CACHE_CREATE_1H_INPUT_MULTIPLIER
401+
// (= 2.0), so collapsing them underestimates 1h writes by ~60%.
402+
//
403+
// Fault tolerance mirrors ccusage: the explicit ephemeral fields are trusted as
404+
// the per-TTL split (defaulting to 0), while the total stays
405+
// cache_creation_input_tokens. We only emit the split when the breakdown object
406+
// is present; otherwise the TTL split is unknown and left absent.
407+
function claudeCacheCreationSplit(usage: Record<string, unknown>): { fiveMinute: number, oneHour: number } | undefined {
408+
const breakdown = usage.cache_creation
409+
if (!isPlainObject(breakdown)) {
410+
return undefined
411+
}
412+
return {
413+
fiveMinute: numberField(breakdown, 'ephemeral_5m_input_tokens') || 0,
414+
oneHour: numberField(breakdown, 'ephemeral_1h_input_tokens') || 0,
415+
}
416+
}
417+
393418
function claudeSubagentMetrics(
394419
toolResult: Record<string, unknown>,
395420
fallbackDurationMs: number | undefined,
@@ -401,6 +426,7 @@ function claudeSubagentMetrics(
401426
const outputTokens = numberField(usage, 'output_tokens') || 0
402427
const cachedInputTokens = cacheCreationInputTokens + cacheReadInputTokens
403428
const totalInputTokens = inputTokens + cachedInputTokens
429+
const cacheCreationSplit = claudeCacheCreationSplit(usage)
404430
const durationMs = numberField(toolResult, 'totalDurationMs') || fallbackDurationMs
405431

406432
return {
@@ -410,6 +436,8 @@ function claudeSubagentMetrics(
410436
tokensInput: totalInputTokens || undefined,
411437
tokensCachedInput: cachedInputTokens || undefined,
412438
tokensCacheCreationInput: cacheCreationInputTokens || undefined,
439+
tokensCacheCreation5mInput: cacheCreationSplit?.fiveMinute,
440+
tokensCacheCreation1hInput: cacheCreationSplit?.oneHour,
413441
tokensCacheReadInput: cacheReadInputTokens || undefined,
414442
tokensOutput: outputTokens || undefined,
415443
tokensTotal: numberField(toolResult, 'totalTokens') || totalInputTokens + outputTokens || undefined,

packages/cli/src/backfill/rollup.ts

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ function buildSessionRollup(rollupKey: string, events: CanonicalEvent[]): Sessio
4040
const lastEventAt = ordered.at(-1)?.ts || startedAt
4141
const timeBuckets = new Map<string, SessionRollup['timeBuckets'][number]>()
4242
const modelRollups = new Map<string, SessionRollup['modelRollups'][number]>()
43+
// Keyed by `${bucketTs}\0${model}` (v3 per-(15-min bucket, model) token buckets).
44+
const modelBuckets = new Map<string, NonNullable<SessionRollup['modelBuckets']>[number]>()
4345
const toolRollups = new Map<string, SessionRollup['toolRollups'][number]>()
4446
const fileRollups = new Map<string, SessionRollup['fileRollups'][number]>()
4547
const turnRollups = new Map<string, NonNullable<SessionRollup['turnRollups']>[number]>()
@@ -65,6 +67,9 @@ function buildSessionRollup(rollupKey: string, events: CanonicalEvent[]): Sessio
6567
const eventInputTokens = Math.max(0, event.metrics?.tokensInput || 0)
6668
const eventCachedInputTokens = Math.max(0, event.metrics?.tokensCachedInput || 0)
6769
const eventCacheCreationInputTokens = Math.max(0, event.metrics?.tokensCacheCreationInput || 0)
70+
// TTL split subsets of cacheCreation; 0 when the agent doesn't report them.
71+
const eventCacheCreation5mInputTokens = Math.max(0, event.metrics?.tokensCacheCreation5mInput || 0)
72+
const eventCacheCreation1hInputTokens = Math.max(0, event.metrics?.tokensCacheCreation1hInput || 0)
6873
const eventCacheReadInputTokens = Math.max(0, event.metrics?.tokensCacheReadInput || 0)
6974
const eventOutputTokens = Math.max(0, event.metrics?.tokensOutput || 0)
7075
const eventReasoningOutputTokens = Math.max(0, event.metrics?.tokensReasoningOutput || 0)
@@ -197,6 +202,8 @@ function buildSessionRollup(rollupKey: string, events: CanonicalEvent[]): Sessio
197202
inputTokens: 0,
198203
cachedInputTokens: 0,
199204
cacheCreationInputTokens: 0,
205+
cacheCreation5mInputTokens: 0,
206+
cacheCreation1hInputTokens: 0,
200207
cacheReadInputTokens: 0,
201208
outputTokens: 0,
202209
reasoningOutputTokens: 0,
@@ -207,12 +214,42 @@ function buildSessionRollup(rollupKey: string, events: CanonicalEvent[]): Sessio
207214
modelRollup.inputTokens += eventInputTokens
208215
modelRollup.cachedInputTokens += eventCachedInputTokens
209216
modelRollup.cacheCreationInputTokens += eventCacheCreationInputTokens
217+
modelRollup.cacheCreation5mInputTokens! += eventCacheCreation5mInputTokens
218+
modelRollup.cacheCreation1hInputTokens! += eventCacheCreation1hInputTokens
210219
modelRollup.cacheReadInputTokens += eventCacheReadInputTokens
211220
modelRollup.outputTokens += eventOutputTokens
212221
modelRollup.reasoningOutputTokens += eventReasoningOutputTokens
213222
modelRollup.totalTokens += eventTotalTokens
214223
modelRollup.estimatedCostUsd += eventCostUsd
215224
modelRollups.set(modelKey, modelRollup)
225+
226+
// Per-(15-min bucket, model) token bucket (v3).
227+
const modelBucketKey = `${bucketTs}\0${modelKey}`
228+
const modelBucket = modelBuckets.get(modelBucketKey) || {
229+
ts: bucketTs,
230+
model: modelKey,
231+
callCount: 0,
232+
inputTokens: 0,
233+
cachedInputTokens: 0,
234+
cacheCreationInputTokens: 0,
235+
cacheCreation5mInputTokens: 0,
236+
cacheCreation1hInputTokens: 0,
237+
cacheReadInputTokens: 0,
238+
outputTokens: 0,
239+
reasoningOutputTokens: 0,
240+
totalTokens: 0,
241+
}
242+
modelBucket.callCount += 1
243+
modelBucket.inputTokens += eventInputTokens
244+
modelBucket.cachedInputTokens += eventCachedInputTokens
245+
modelBucket.cacheCreationInputTokens += eventCacheCreationInputTokens
246+
modelBucket.cacheCreation5mInputTokens += eventCacheCreation5mInputTokens
247+
modelBucket.cacheCreation1hInputTokens += eventCacheCreation1hInputTokens
248+
modelBucket.cacheReadInputTokens += eventCacheReadInputTokens
249+
modelBucket.outputTokens += eventOutputTokens
250+
modelBucket.reasoningOutputTokens += eventReasoningOutputTokens
251+
modelBucket.totalTokens += eventTotalTokens
252+
modelBuckets.set(modelBucketKey, modelBucket)
216253
}
217254
if (event.type === 'tool.started') {
218255
bucket.toolCalls += 1
@@ -284,11 +321,12 @@ function buildSessionRollup(rollupKey: string, events: CanonicalEvent[]): Sessio
284321
const baseRollup: SessionRollup = {
285322
rollupKey,
286323
payloadHash: '',
287-
// v2 schema: trustworthy gap-clamped turn durations + billable-output token
288-
// convention. Set on baseRollup (not after) so it participates in payloadHash:
289-
// every historical rollup's hash changes, and a re-backfill (uploaded with
290-
// replace=true by default) cleanly refreshes all data onto the new convention.
291-
// This full-refresh churn is intentional.
324+
// v3 schema: v2 (gap-clamped turn durations + billable-output token
325+
// convention) plus per-model cache-creation TTL split and modelBuckets. Set on
326+
// baseRollup (not after) so it participates in payloadHash: every historical
327+
// rollup's hash changes, and a re-backfill (uploaded with replace=true by
328+
// default) cleanly refreshes all data onto the new convention. This
329+
// full-refresh churn is intentional.
292330
schemaVersion: AGENT_ROLLUP_SCHEMA_VERSION,
293331
source: first.source,
294332
project,
@@ -313,6 +351,8 @@ function buildSessionRollup(rollupKey: string, events: CanonicalEvent[]): Sessio
313351
durationMs: Math.max(0, Date.parse(lastEventAt) - Date.parse(startedAt)),
314352
timeBuckets: [...timeBuckets.values()].sort((a, b) => a.ts.localeCompare(b.ts)),
315353
modelRollups: [...modelRollups.values()].sort((a, b) => b.callCount - a.callCount || a.model.localeCompare(b.model)),
354+
// Sorted ts ascending, then model lexicographically (wire contract).
355+
modelBuckets: [...modelBuckets.values()].sort((a, b) => a.ts.localeCompare(b.ts) || a.model.localeCompare(b.model)),
316356
toolRollups: [...toolRollups.values()].sort((a, b) => b.callCount - a.callCount || a.tool.localeCompare(b.tool)),
317357
fileRollups: [...fileRollups.values()].sort((a, b) => b.writes - a.writes || b.reads - a.reads || a.displayPath.localeCompare(b.displayPath)),
318358
turnRollups: [...turnRollups.values()]

packages/cli/test/claude-code.test.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,40 @@ test('parity: claude usage decomposes cache and keeps tokensInput cache-inclusiv
5050
assert.equal(usages[0].metrics?.tokensCacheReadInput, 5)
5151
assert.equal(usages[0].metrics?.tokensOutput, 4)
5252
assert.equal(usages[0].metrics?.tokensTotal, 17 + 4)
53+
// No usage.cache_creation breakdown -> TTL split is left absent.
54+
assert.equal(usages[0].metrics?.tokensCacheCreation5mInput, undefined)
55+
assert.equal(usages[0].metrics?.tokensCacheCreation1hInput, undefined)
56+
})
57+
58+
test('claude usage splits cache creation by TTL when usage.cache_creation is present', async () => {
59+
const usages = usageEvents(await parse([
60+
{
61+
type: 'assistant',
62+
sessionId: 's1',
63+
timestamp: '2026-03-29T07:00:00.000Z',
64+
requestId: 'req-ttl',
65+
message: {
66+
id: 'msg-ttl',
67+
model: 'claude-sonnet-4-20250514',
68+
usage: {
69+
input_tokens: 10,
70+
cache_creation_input_tokens: 300,
71+
cache_creation: { ephemeral_5m_input_tokens: 100, ephemeral_1h_input_tokens: 200 },
72+
cache_read_input_tokens: 5,
73+
output_tokens: 4,
74+
},
75+
},
76+
},
77+
]))
78+
79+
assert.equal(usages.length, 1)
80+
// The total stays cache_creation_input_tokens; the split fills the subset fields.
81+
assert.equal(usages[0].metrics?.tokensCacheCreationInput, 300)
82+
assert.equal(usages[0].metrics?.tokensCacheCreation5mInput, 100)
83+
assert.equal(usages[0].metrics?.tokensCacheCreation1hInput, 200)
84+
// Cache-inclusive input and cached totals are unchanged by the split.
85+
assert.equal(usages[0].metrics?.tokensInput, 10 + 300 + 5)
86+
assert.equal(usages[0].metrics?.tokensCachedInput, 300 + 5)
5387
})
5488

5589
test('parity: claude cache-only usage (from ccusage propagates_sidechain_metadata fixture)', async () => {

packages/cli/test/codex.test.ts

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,98 @@ test('turn duration sums gap-clamped active intervals', () => {
109109
assert.equal(turn.lastEventAt, '2026-01-02T00:23:00.000Z')
110110
})
111111

112-
test('session rollups carry the v2 schemaVersion', () => {
112+
test('session rollups carry the v3 schemaVersion', () => {
113113
const rollup = buildSessionRollups([
114114
turnEvent('schema-session', 'schema-turn', '2026-01-02T00:00:00.000Z', 'prompt.submitted'),
115115
turnEvent('schema-session', 'schema-turn', '2026-01-02T00:00:01.000Z', 'turn.completed'),
116116
])[0]
117-
assert.equal(rollup.schemaVersion, 2)
117+
assert.equal(rollup.schemaVersion, 3)
118+
})
119+
120+
// Synthetic model.usage event with a metric bag (v3 modelBuckets / TTL split).
121+
function modelUsageEvent(
122+
ts: string,
123+
model: string,
124+
metrics: NonNullable<CanonicalEvent['metrics']>,
125+
): CanonicalEvent {
126+
return {
127+
schemaVersion: '2026-04-29',
128+
ts,
129+
type: 'model.usage',
130+
source: 'claude-code',
131+
agent: 'claude-code',
132+
sessionId: 'bucket-session',
133+
model,
134+
metrics,
135+
refs: { sourcePathHash: 'sha256:bucket-session' },
136+
}
137+
}
138+
139+
test('rollup builds modelBuckets grouped by 15-min bucket and model, with TTL split', () => {
140+
// Two models across two 15-min buckets (00:00 and 00:15), with a repeated
141+
// (bucket, model) pair to exercise summing.
142+
const rollup = buildSessionRollups([
143+
modelUsageEvent('2026-01-02T00:01:00.000Z', 'model-a', {
144+
tokensInput: 10,
145+
tokensCachedInput: 7,
146+
tokensCacheCreationInput: 300,
147+
tokensCacheCreation5mInput: 100,
148+
tokensCacheCreation1hInput: 200,
149+
tokensCacheReadInput: 5,
150+
tokensOutput: 4,
151+
tokensReasoningOutput: 1,
152+
tokensTotal: 21,
153+
}),
154+
modelUsageEvent('2026-01-02T00:14:00.000Z', 'model-a', {
155+
tokensInput: 5,
156+
tokensCacheCreationInput: 50,
157+
tokensCacheCreation5mInput: 50,
158+
tokensCacheCreation1hInput: 0,
159+
tokensOutput: 2,
160+
tokensTotal: 7,
161+
}),
162+
modelUsageEvent('2026-01-02T00:20:00.000Z', 'model-b', {
163+
tokensInput: 8,
164+
tokensOutput: 3,
165+
tokensTotal: 11,
166+
}),
167+
])[0]
168+
169+
// Sorted ts ascending, then model lexicographically.
170+
const buckets = rollup.modelBuckets!
171+
assert.equal(buckets.length, 2)
172+
assert.deepEqual(buckets.map(b => [b.ts, b.model]), [
173+
['2026-01-02T00:00:00.000Z', 'model-a'],
174+
['2026-01-02T00:15:00.000Z', 'model-b'],
175+
])
176+
177+
// First bucket merges both 00:00-window model-a events.
178+
const [a, b] = buckets
179+
assert.equal(a.callCount, 2)
180+
assert.equal(a.inputTokens, 15)
181+
assert.equal(a.cacheCreationInputTokens, 350)
182+
assert.equal(a.cacheCreation5mInputTokens, 150)
183+
assert.equal(a.cacheCreation1hInputTokens, 200)
184+
assert.equal(a.cacheReadInputTokens, 5)
185+
assert.equal(a.outputTokens, 6)
186+
assert.equal(a.reasoningOutputTokens, 1)
187+
assert.equal(a.totalTokens, 28)
188+
189+
// Second bucket is the single model-b event with no TTL split reported.
190+
assert.equal(b.callCount, 1)
191+
assert.equal(b.inputTokens, 8)
192+
assert.equal(b.cacheCreation5mInputTokens, 0)
193+
assert.equal(b.cacheCreation1hInputTokens, 0)
194+
assert.equal(b.totalTokens, 11)
195+
196+
// modelRollups accumulate the TTL split across buckets.
197+
const modelA = rollup.modelRollups.find(m => m.model === 'model-a')!
198+
assert.equal(modelA.cacheCreationInputTokens, 350)
199+
assert.equal(modelA.cacheCreation5mInputTokens, 150)
200+
assert.equal(modelA.cacheCreation1hInputTokens, 200)
201+
const modelB = rollup.modelRollups.find(m => m.model === 'model-b')!
202+
assert.equal(modelB.cacheCreation5mInputTokens, 0)
203+
assert.equal(modelB.cacheCreation1hInputTokens, 0)
118204
})
119205

120206
// ── ccusage parity ──

packages/shared/src/index.ts

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ export const AGENT_TIME_SCHEMA_VERSION = '2026-04-29'
99
// treat the data: v1 rollups still get the legacy 15-min per-turn cap applied,
1010
// while v2 rollups are trusted as-is. Older CLIs omit the field; the server reads
1111
// a missing schemaVersion as 1.
12-
export const AGENT_ROLLUP_SCHEMA_VERSION = 2
12+
// v3 = v2 + per-model cache-creation TTL split (cacheCreation5m/1hInputTokens on
13+
// modelRollups) + modelBuckets (15-min aligned per-model token buckets).
14+
export const AGENT_ROLLUP_SCHEMA_VERSION = 3
1315

1416
export const KNOWN_AGENT_SOURCES = [
1517
'codex',
@@ -249,6 +251,11 @@ export interface MetricBag {
249251
tokensOutput?: number
250252
tokensCachedInput?: number
251253
tokensCacheCreationInput?: number
254+
// tokensCacheCreation5mInput / tokensCacheCreation1hInput are subsets of
255+
// tokensCacheCreationInput split by cache write TTL. Absent when the agent
256+
// doesn't report the TTL split (only Anthropic surfaces usage.cache_creation).
257+
tokensCacheCreation5mInput?: number
258+
tokensCacheCreation1hInput?: number
252259
tokensCacheReadInput?: number
253260
// tokensReasoningOutput is an informational subset of tokensOutput. It is NOT
254261
// added on top of tokensOutput for billing/total purposes — it only exposes how
@@ -411,13 +418,35 @@ export interface SessionModelRollup {
411418
inputTokens: number
412419
cachedInputTokens: number
413420
cacheCreationInputTokens: number
421+
// 5-minute-TTL cache writes (subset of cacheCreationInputTokens).
422+
cacheCreation5mInputTokens?: number
423+
// 1-hour-TTL cache writes (subset of cacheCreationInputTokens).
424+
cacheCreation1hInputTokens?: number
414425
cacheReadInputTokens: number
415426
outputTokens: number
416427
reasoningOutputTokens: number
417428
totalTokens: number
418429
estimatedCostUsd: number
419430
}
420431

432+
// Per-(15-min bucket, model) token aggregation. The ts field uses the same
433+
// 15-min alignment (floorRollupBucket) as SessionTimeBucketRollup. Added in v3
434+
// so the server can attribute cache-creation TTL splits per model over time.
435+
export interface SessionModelBucketRollup {
436+
ts: string
437+
model: string
438+
callCount: number
439+
inputTokens: number
440+
cachedInputTokens: number
441+
cacheCreationInputTokens: number
442+
cacheCreation5mInputTokens: number
443+
cacheCreation1hInputTokens: number
444+
cacheReadInputTokens: number
445+
outputTokens: number
446+
reasoningOutputTokens: number
447+
totalTokens: number
448+
}
449+
421450
export interface SessionToolRollup {
422451
tool: string
423452
callCount: number
@@ -480,6 +509,9 @@ export interface SessionRollup {
480509
durationMs: number
481510
timeBuckets: SessionTimeBucketRollup[]
482511
modelRollups: SessionModelRollup[]
512+
// 15-min aligned per-model token buckets (v3). Optional so the type still
513+
// describes rollups produced by older CLIs, which omit it.
514+
modelBuckets?: SessionModelBucketRollup[]
483515
toolRollups: SessionToolRollup[]
484516
fileRollups: SessionFileRollup[]
485517
turnRollups?: SessionTurnRollup[]

0 commit comments

Comments
 (0)