Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified bin/opik-logger-darwin-amd64
Binary file not shown.
Binary file modified bin/opik-logger-darwin-arm64
Binary file not shown.
Binary file modified bin/opik-logger-linux-amd64
Binary file not shown.
Binary file modified bin/opik-logger-windows-amd64.exe
Binary file not shown.
75 changes: 62 additions & 13 deletions src/billing.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,15 @@ func computeBillingSnapshot(fullEntries, turnEntries []TranscriptEntry) map[stri
acc := map[billingKey]*billingTier{}
totals := billingTier{}
for _, call := range calls {
// After a compaction the request no longer contains the pre-compact
// conversation — only the summary entry and what follows. Replaying
// from the start would lay out content that isn't in the request,
// and the (usage-derived, never-rescaled) assistant pieces would
// overflow the positional cut into the fresh-input tier.
history := fullEntries[:call.entryIdx]
history = history[compactReplayStart(history):]
pieces := append(append([]billingPiece{}, staticPieces...),
conversationPieces(fullEntries[:call.entryIdx], skillBodyNames, toolNames)...)
conversationPieces(history, skillBodyNames, toolNames)...)
pieces = reconcileToUsage(pieces, float64(call.read+call.write+call.fresh))
cutByPosition(pieces, float64(call.read), float64(call.write), acc)

Expand All @@ -87,6 +94,22 @@ func computeBillingSnapshot(fullEntries, turnEntries []TranscriptEntry) map[stri
return renderBillingSnapshot(len(calls), totals, acc, counts)
}

// compactReplayStart returns the index in entries where the live request
// content begins: the entry AFTER the last compact boundary (the summary
// user entry itself is in the request, so it is included). 0 when the
// session has never compacted.
func compactReplayStart(entries []TranscriptEntry) int {
start := 0
for i, e := range entries {
if e.Type == "system" && e.Subtype == "compact_boundary" {
start = i + 1
} else if e.IsCompactSummary {
start = i
}
}
return start
}

type billingCall struct {
entryIdx int // index in fullEntries of the call's FIRST entry: its request is the prefix before it
entryEnd int // one past the call's LAST entry
Expand Down Expand Up @@ -226,6 +249,17 @@ func conversationPieces(entries []TranscriptEntry, skillBodyNames map[string]str
if e.Message == nil {
continue
}
if e.IsCompactSummary {
// The summary stands in for the compacted conversation — it
// is session-length cost, not something the user typed.
for _, c := range e.Message.Content {
if c.Type == "text" {
add("prior_assistant", "compact_summary", kindUsage,
float64(measuredOrEstimate(c.Text, "prose")), false)
}
}
continue
}
for _, c := range e.Message.Content {
switch c.Type {
case "text":
Expand Down Expand Up @@ -359,9 +393,13 @@ func toolLane(name string) (string, string) {
}
}

// reconcileToUsage makes Σ pieces == total exactly. Overshoot shrinks only
// the estimated pieces (usage-derived ones are already exact); undershoot
// appends the explicit `unattributed` tail piece.
// reconcileToUsage makes Σ pieces == total exactly. Overshoot shrinks the
// estimated pieces first (usage-derived ones are normally already exact);
// if the usage-derived pieces alone still exceed the measured total — the
// request dropped content we can't see (compaction we failed to detect,
// context editing) — they are scaled down too: the per-call exactness
// contract outranks per-piece exactness. Undershoot appends the explicit
// `unattributed` tail piece.
func reconcileToUsage(pieces []billingPiece, total float64) []billingPiece {
sum, estSum := 0.0, 0.0
for _, p := range pieces {
Expand All @@ -370,16 +408,27 @@ func reconcileToUsage(pieces []billingPiece, total float64) []billingPiece {
estSum += p.tokens
}
}
exactSum := sum - estSum
switch {
case sum > total && estSum > 0:
target := total - (sum - estSum)
if target < 0 {
target = 0
case sum > total:
if estSum > 0 {
target := total - exactSum
if target < 0 {
target = 0
}
scale := target / estSum
for i := range pieces {
if !pieces[i].exact {
pieces[i].tokens *= scale
}
}
}
scale := target / estSum
for i := range pieces {
if !pieces[i].exact {
pieces[i].tokens *= scale
if exactSum > total {
scale := total / exactSum
for i := range pieces {
if pieces[i].exact {
pieces[i].tokens *= scale
}
}
}
case sum < total:
Expand Down Expand Up @@ -447,7 +496,7 @@ func countNewEvents(turnEntries []TranscriptEntry, skillBodyNames map[string]str
for _, e := range turnEntries {
switch e.Type {
case "user":
if e.Message == nil {
if e.Message == nil || e.IsCompactSummary {
continue
}
for _, c := range e.Message.Content {
Expand Down
109 changes: 109 additions & 0 deletions src/billing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,3 +260,112 @@ func TestDeferredCatalogSplitsBuiltinFromMcp(t *testing.T) {
t.Errorf("expected catalog_deltas under mcp_servers: %v", mcp["items"])
}
}

// After /compact the request contains only the summary + post-compact
// entries. The replay must truncate at the boundary: pre-compact content
// must not be laid out, and the summary lands in prior_assistant.
func TestBillingCompactBoundaryTruncatesReplay(t *testing.T) {
preCompact := []TranscriptEntry{userPromptEntry("the original ask")}
// A huge pre-compact call: its blocks are usage-derived (exact) replay
// pieces. Before the fix these overflowed the next call's cut into the
// fresh-input tier.
preCompact = append(preCompact, assistantCall(t, "m1",
&Usage{InputTokens: 500, CacheCreationInputTokens: 10_000, OutputTokens: 80_000},
Content{Type: "thinking", Thinking: "redacted"},
Content{Type: "text", Text: strings.Repeat("big ", 50)},
)...)

boundary := TranscriptEntry{Type: "system", Subtype: "compact_boundary"}
summary := TranscriptEntry{Type: "user", IsCompactSummary: true,
Message: &Message{Content: ContentSlice{
{Type: "text", Text: strings.Repeat("summary of prior work ", 100)},
}}}

u2 := &Usage{InputTokens: 60, CacheReadInputTokens: 9_000,
CacheCreationInputTokens: 400, OutputTokens: 30}
post := assistantCall(t, "m2", u2, Content{Type: "text", Text: "continuing"})

entries := append(append(preCompact, boundary, summary), post...)
turn := entries[len(entries)-len(post):]

snap := computeBillingSnapshot(entries, turn)
if snap == nil {
t.Fatal("expected billing snapshot")
}

read, write, fresh, output, rows := billingColumnSums(snap)
closeEnough := func(got, want int) bool {
d := got - want
if d < 0 {
d = -d
}
return d <= rows
}
if !closeEnough(read, u2.CacheReadInputTokens) || !closeEnough(write, u2.CacheCreationInputTokens) ||
!closeEnough(fresh, u2.InputTokens) || !closeEnough(output, u2.OutputTokens) {
t.Errorf("Σ lanes = read %d / write %d / fresh %d / output %d, want %d/%d/%d/%d (±%d)",
read, write, fresh, output, u2.CacheReadInputTokens, u2.CacheCreationInputTokens,
u2.InputTokens, u2.OutputTokens, rows)
}

lanes := snap["lanes"].(map[string]interface{})
pa, ok := lanes["prior_assistant"].(map[string]interface{})
if !ok {
t.Fatal("expected prior_assistant lane (compact summary)")
}
foundSummary := false
for _, it := range pa["items"].([]map[string]interface{}) {
if it["name"] == "compact_summary" && it["total"].(int) > 0 {
foundSummary = true
}
if it["name"] == "thinking" && it["total"].(int) > 0 {
t.Errorf("pre-compact thinking leaked into the replay: %v", it)
}
}
if !foundSummary {
t.Errorf("expected compact_summary item in prior_assistant: %v", pa["items"])
}
if up, ok := lanes["user_prompts"].(map[string]interface{}); ok {
if up["total"].(int) > 0 {
t.Errorf("pre-compact user prompt leaked into the replay: %v", up)
}
}
}

// Safety net: when usage-derived replay pieces alone exceed the call's
// measured prompt (undetected truncation), they must be scaled down so the
// per-call exactness contract still holds — the overshoot must never land
// in the fresh-input tier.
func TestBillingExactOvershootIsClamped(t *testing.T) {
entries := []TranscriptEntry{userPromptEntry("hi")}
entries = append(entries, assistantCall(t, "m1",
&Usage{InputTokens: 20, OutputTokens: 50_000},
Content{Type: "thinking", Thinking: "redacted"},
Content{Type: "text", Text: "done"},
)...)
u2 := &Usage{InputTokens: 40, CacheReadInputTokens: 1_000, OutputTokens: 10}
entries = append(entries, assistantCall(t, "m2", u2, Content{Type: "text", Text: "ok"})...)

snap := computeBillingSnapshot(entries, entries)
if snap == nil {
t.Fatal("expected billing snapshot")
}

wantRead := u2.CacheReadInputTokens
wantFresh := 20 + u2.InputTokens
wantOut := 50_000 + u2.OutputTokens

read, write, fresh, output, rows := billingColumnSums(snap)
closeEnough := func(got, want int) bool {
d := got - want
if d < 0 {
d = -d
}
return d <= rows
}
if !closeEnough(read, wantRead) || !closeEnough(write, 0) ||
!closeEnough(fresh, wantFresh) || !closeEnough(output, wantOut) {
t.Errorf("Σ lanes = read %d / write %d / fresh %d / output %d, want %d/0/%d/%d (±%d)",
read, write, fresh, output, wantRead, wantFresh, wantOut, rows)
}
}
22 changes: 13 additions & 9 deletions src/transcript.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,19 @@ import (
)

type TranscriptEntry struct {
Type string `json:"type"`
UUID string `json:"uuid"`
Timestamp string `json:"timestamp"`
Slug string `json:"slug,omitempty"`
AITitle string `json:"aiTitle,omitempty"` // populated on type:"ai-title" events
Version string `json:"version,omitempty"` // Claude Code CLI version stamped on every user/assistant entry
Message *Message `json:"message,omitempty"`
ToolUseResult *ToolUseResult `json:"toolUseResult,omitempty"`
Attachment *Attachment `json:"attachment,omitempty"`
Type string `json:"type"`
Subtype string `json:"subtype,omitempty"` // e.g. "compact_boundary" on type:"system"
UUID string `json:"uuid"`
Timestamp string `json:"timestamp"`
Slug string `json:"slug,omitempty"`
AITitle string `json:"aiTitle,omitempty"` // populated on type:"ai-title" events
Version string `json:"version,omitempty"` // Claude Code CLI version stamped on every user/assistant entry
// Set on the user entry that carries the compaction summary — the text
// that REPLACES the pre-compact conversation in subsequent requests.
IsCompactSummary bool `json:"isCompactSummary,omitempty"`
Message *Message `json:"message,omitempty"`
ToolUseResult *ToolUseResult `json:"toolUseResult,omitempty"`
Attachment *Attachment `json:"attachment,omitempty"`
}

// Attachment covers the subset of `type:"attachment"` records we extract
Expand Down
Loading