diff --git a/src/dryrun_test.go b/src/dryrun_test.go index 09bca96..3e5a1be 100644 --- a/src/dryrun_test.go +++ b/src/dryrun_test.go @@ -18,7 +18,7 @@ func TestDryRunOnTestThread(t *testing.T) { t.Fatal(err) } snaps := domainSnapshotsFromEntries(entries, entries) - for _, domain := range []string{"tools", "skills", "user_prompts", "tool_results", "thinking", "memory", "agents", "cc_builtin", "assistant_text", "prior_assistant", "file_attachments"} { + for _, domain := range []string{"tools", "skills", "user_prompts", "tool_results", "thinking", "memory", "agents", "cc_builtin", "assistant_text", "prior_assistant", "file_attachments", "output_tokens"} { fmt.Printf("--- %s ---\n", domain) if snaps[domain] == nil { fmt.Println("(nil)") diff --git a/src/extractors.go b/src/extractors.go index 9aacc86..98be7bf 100644 --- a/src/extractors.go +++ b/src/extractors.go @@ -705,3 +705,68 @@ func extractAssistantTextSnapshot(entries []TranscriptEntry) map[string]interfac }, } } + +// extractOutputTokensSnapshot aggregates attributed output tokens by category +// at the trace level. This lets the Sankey visualization use +// sum(metadata.cc.output_tokens.by_category.*) directly without span +// aggregation. `cc.output_tokens.{summary, by_category}`. +// +// Categories: +// - thinking — extended thinking blocks +// - assistant_text — visible text responses +// - builtin_tool_use — CC built-in tools (Bash, Read, Edit, …) +// - mcp_tool_use — MCP tool calls (name prefix "mcp__") +// - skill_invocations — Skill tool invocations +// +// `parsed` should be the dedup-applied output of ParseAssistantMessages + +// DeduplicateUsage. Pass nil to reparse from entries. +func extractOutputTokensSnapshot(entries []TranscriptEntry, parsed []ParsedEntry) map[string]interface{} { + if parsed == nil { + parsed = ParseAssistantMessages(entries) + DeduplicateUsage(parsed) + } + + var ( + thinking int + assistantText int + builtinToolUse int + mcpToolUse int + skillInvocations int + ) + + for _, p := range parsed { + tok := p.AttributedOutputTokens + switch p.ContentType { + case "thinking": + thinking += tok + case "text": + assistantText += tok + case "tool_use": + switch { + case strings.HasPrefix(p.Content.Name, "mcp__"): + mcpToolUse += tok + case p.Content.Name == "Skill": + skillInvocations += tok + default: + builtinToolUse += tok + } + } + } + + total := thinking + assistantText + builtinToolUse + mcpToolUse + skillInvocations + if total == 0 { + return nil + } + return map[string]interface{}{ + "summary": map[string]interface{}{ + "total_tokens": total, + }, + "by_category": map[string]interface{}{ + "thinking": thinking, + "assistant_text": assistantText, + "builtin_tool_use": builtinToolUse, + "mcp_tool_use": mcpToolUse, + "skill_invocations": skillInvocations, + }, + } +} diff --git a/src/extractors_test.go b/src/extractors_test.go index a66c57f..b7520ea 100644 --- a/src/extractors_test.go +++ b/src/extractors_test.go @@ -6,6 +6,81 @@ import ( "testing" ) +func TestExtractOutputTokensSnapshot(t *testing.T) { + // One LLM call with thinking + text + builtin tool + MCP tool + Skill. + // All blocks share the same message.id so DeduplicateUsage can attribute. + const msgID = "msg_abc123" + entries := []TranscriptEntry{ + { + Type: "assistant", + UUID: "u1", + Message: &Message{ + ID: msgID, + Model: "claude-opus-4-8", + Usage: &Usage{OutputTokens: 1000}, + Content: ContentSlice{ + {Type: "thinking", Thinking: "..."}, + {Type: "text", Text: "hello world"}, + {Type: "tool_use", ID: "t1", Name: "Bash", Input: map[string]interface{}{"command": "ls"}}, + {Type: "tool_use", ID: "t2", Name: "mcp__slack__send", Input: map[string]interface{}{}}, + {Type: "tool_use", ID: "t3", Name: "Skill", Input: map[string]interface{}{}}, + }, + }, + }, + } + + parsed := ParseAssistantMessages(entries) + DeduplicateUsage(parsed) + + snap := extractOutputTokensSnapshot(entries, parsed) + if snap == nil { + t.Fatal("expected non-nil snapshot") + } + + summary, _ := snap["summary"].(map[string]interface{}) + if summary == nil { + t.Fatal("missing summary") + } + total, _ := summary["total_tokens"].(int) + if total != 1000 { + t.Errorf("total_tokens = %d, want 1000", total) + } + + cat, _ := snap["by_category"].(map[string]interface{}) + if cat == nil { + t.Fatal("missing by_category") + } + + // Sum of all categories must equal total. + catSum := 0 + for _, key := range []string{"thinking", "assistant_text", "builtin_tool_use", "mcp_tool_use", "skill_invocations"} { + v, _ := cat[key].(int) + catSum += v + } + if catSum != total { + t.Errorf("sum(by_category) = %d, want %d (total_tokens)", catSum, total) + } + + // thinking must be > 0 (leftover after non-thinking blocks). + if thinking, _ := cat["thinking"].(int); thinking == 0 { + t.Error("thinking should be > 0") + } + + // Each non-thinking category must have been assigned something. + for _, key := range []string{"assistant_text", "builtin_tool_use", "mcp_tool_use", "skill_invocations"} { + if v, _ := cat[key].(int); v == 0 { + t.Errorf("by_category[%s] = 0, expected > 0", key) + } + } +} + +func TestExtractOutputTokensSnapshotNilOnEmpty(t *testing.T) { + snap := extractOutputTokensSnapshot(nil, nil) + if snap != nil { + t.Errorf("expected nil on empty entries, got %v", snap) + } +} + func TestExtractAgentsSnapshotPrefersFrontmatterName(t *testing.T) { home := t.TempDir() cwd := t.TempDir() diff --git a/src/main.go b/src/main.go index 7c5bee8..8afae26 100644 --- a/src/main.go +++ b/src/main.go @@ -767,6 +767,7 @@ func domainSnapshotsFromEntries(fullEntries, turnEntries []TranscriptEntry) map[ "file_attachments": extractFileAttachmentsSnapshot(turnEntries), "prior_assistant": extractPriorAssistantSnapshot(fullEntries, turnEntries), "assistant_text": extractAssistantTextSnapshot(turnEntries), + "output_tokens": extractOutputTokensSnapshot(turnEntries, parsedTurn), // cc_builtin covers the bundled system-prompt + tool-catalog cost // /context reports under "System prompt" / "System tools" / // "System tools (deferred)". These never appear in the transcript