Skip to content

Commit b8f5cc1

Browse files
committed
fix(cli): dedup claude usage and reset backfill state schema
1 parent d28b8ec commit b8f5cc1

3 files changed

Lines changed: 52 additions & 9 deletions

File tree

packages/cli/src/adapters/claude-code.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ async function parseClaudeCodeSessionFile(
5353
const lines = text.split('\n').filter(Boolean)
5454
const projectContext = await claudeProjectContextFromLines(filePath, lines, options)
5555
const pendingTools = new Map<string, ClaudePendingTool>()
56+
// Claude Code occasionally writes the same assistant message to the
57+
// jsonl more than once (streaming flushes, retries, replays). ccusage
58+
// dedups by `${messageId}:${requestId}` — without it codetime double-
59+
// counts tokens 2-6×. Track which usage rows we've already emitted.
60+
const seenUsageKeys = new Set<string>()
5661
let sessionId = sessionIdFromFilePath(filePath, 'claude')
5762
let cwd: string | undefined
5863
let project: string | undefined = projectContext.project
@@ -221,16 +226,37 @@ async function parseClaudeCodeSessionFile(
221226
}
222227

223228
model = stringField(message, 'model') || model
229+
// Skip the entire assistant entry when (messageId, requestId) was
230+
// already processed — applies to both the usage metrics and any
231+
// tool_use items so we don't double-emit tool.started either.
232+
// When messageId is absent we cannot dedup safely — emit and accept
233+
// the risk (matches ccusage's createUniqueHash returning null).
234+
const messageId = stringField(message, 'id')
235+
const requestId = stringField(raw, 'requestId')
236+
const usageKey = messageId ? `${messageId}:${requestId}` : null
237+
if (usageKey != null && seenUsageKeys.has(usageKey)) {
238+
continue
239+
}
240+
if (usageKey != null) {
241+
seenUsageKeys.add(usageKey)
242+
}
224243
const usage = claudeUsageFromMessage(message)
225244
if (usage) {
245+
// Anthropic surfaces fast inference via `usage.speed === 'fast'`.
246+
// Append `-fast` so the model name lines up with OpenRouter's
247+
// separate `anthropic/claude-opus-4.7-fast` pricing entry (~6×
248+
// standard). Tag only the model.usage event to keep downstream
249+
// tool events on the base model name.
250+
const speed = stringField(objectField(message, 'usage'), 'speed')
251+
const usageModel = speed === 'fast' && model ? `${model}-fast` : model
226252
push(baseClaudeEvent({
227253
ts,
228254
type: 'model.usage',
229255
sessionId,
230256
turnId: state.currentTurnId,
231257
cwd,
232258
project,
233-
model,
259+
model: usageModel,
234260
confidence: 'partial',
235261
metrics: usage,
236262
}), lineNumber, topType, 'usage')

packages/cli/src/cli.ts

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import type {
77
} from '@codetime/shared'
88
import type { BackfillSourceDefinition } from './lib/backfill.js'
99
import type { BackfillImportCounts, BackfillIncrementalState, BackfillSourceFile, ParsedArgs, RunContext, SyncLocalLock, SyncLocalTriggerState, WritableLike } from './lib/types.js'
10+
import { BACKFILL_STATE_SCHEMA_VERSION } from './lib/types.js'
1011
import { spawn } from 'node:child_process'
1112
import { mkdir, rm, stat, writeFile } from 'node:fs/promises'
1213
import os from 'node:os'
@@ -973,26 +974,33 @@ function syncLocalTriggerLockPath(home: string): string {
973974

974975
async function readBackfillIncrementalState(home: string, ctx?: RunContext): Promise<BackfillIncrementalState> {
975976
// Corrupt JSON now surfaces from readJsonIfExists; a missing file
976-
// resolves to null. Anything else (wrong shape, future schema
977+
// resolves to null. Anything else (wrong shape, mismatched schema
977978
// version, manual edits that dropped `sources`) lands here and would
978979
// previously vanish silently — log via debug so the user can see
979980
// when watermarks were dropped.
981+
//
982+
// When the on-disk schema version doesn't match the CLI's current
983+
// BACKFILL_STATE_SCHEMA_VERSION we deliberately drop every watermark.
984+
// The next sync-local-runner then re-parses every jsonl from scratch
985+
// and the server upserts via `replace: true`, so a CLI upgrade that
986+
// changed parser semantics (e.g. v2's dedup fix) silently rewrites
987+
// historical rollups without the user knowing.
980988
const statePath = backfillIncrementalStatePath(home)
981989
const state = await readJsonIfExists(statePath)
982990
if (state === null) {
983-
return { version: 1, sources: {} }
991+
return { version: BACKFILL_STATE_SCHEMA_VERSION, sources: {} }
984992
}
985993
if (!isPlainObject(state) || !isPlainObject(state.sources)) {
986994
if (ctx) {
987995
debug(ctx, `backfill-state malformed at ${statePath}; ignoring watermarks\n`)
988996
}
989-
return { version: 1, sources: {} }
997+
return { version: BACKFILL_STATE_SCHEMA_VERSION, sources: {} }
990998
}
991-
if (state.version !== undefined && state.version !== 1) {
999+
if (state.version !== BACKFILL_STATE_SCHEMA_VERSION) {
9921000
if (ctx) {
993-
debug(ctx, `backfill-state version ${String(state.version)} at ${statePath} is not supported; ignoring watermarks\n`)
1001+
debug(ctx, `backfill-state version ${String(state.version)} at ${statePath} differs from current v${BACKFILL_STATE_SCHEMA_VERSION}; dropping watermarks so the next sync re-imports under the new parser\n`)
9941002
}
995-
return { version: 1, sources: {} }
1003+
return { version: BACKFILL_STATE_SCHEMA_VERSION, sources: {} }
9961004
}
9971005

9981006
const sources: BackfillIncrementalState['sources'] = {}
@@ -1003,7 +1011,7 @@ async function readBackfillIncrementalState(home: string, ctx?: RunContext): Pro
10031011
}
10041012
}
10051013

1006-
return { version: 1, sources }
1014+
return { version: BACKFILL_STATE_SCHEMA_VERSION, sources }
10071015
}
10081016

10091017
async function updateBackfillIncrementalState(

packages/cli/src/lib/types.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,17 @@ export interface BackfillSourceFile {
2727
modifiedAt: string
2828
}
2929

30+
// Bump `BACKFILL_STATE_SCHEMA_VERSION` whenever the offline parsers
31+
// change in a way that invalidates already-uploaded rollups (e.g. the
32+
// Claude assistant-message dedup added in v2). The CLI compares the
33+
// constant against the on-disk schema; on a mismatch it drops every
34+
// watermark so the next sync silently re-parses all jsonl from scratch
35+
// and upserts the deduped rollups (`replace: true` is already set).
36+
// Users get the fix transparently the next time their agent runs.
37+
export const BACKFILL_STATE_SCHEMA_VERSION = 2
38+
3039
export interface BackfillIncrementalState {
31-
version: 1
40+
version: typeof BACKFILL_STATE_SCHEMA_VERSION
3241
sources: Partial<Record<
3342
import('@codetime/shared').BackfillSourceId,
3443
{ watermarkTs: string }

0 commit comments

Comments
 (0)