comet-ml · jverre · Jun 9, 2026 · Jun 9, 2026
diff --git a/src/dryrun_test.go b/src/dryrun_test.go
@@ -18,7 +18,7 @@ func TestDryRunOnTestThread(t *testing.T) {
 		t.Fatal(err)
 	}
 	snaps := domainSnapshotsFromEntries(entries, entries)
-	for _, domain := range []string{"tools", "skills", "user_prompts", "tool_results", "thinking", "memory", "agents", "cc_builtin", "assistant_text", "prior_assistant", "file_attachments"} {
+	for _, domain := range []string{"tools", "skills", "user_prompts", "tool_results", "thinking", "memory", "agents", "cc_builtin", "assistant_text", "prior_assistant", "file_attachments", "output_tokens"} {
 		fmt.Printf("--- %s ---\n", domain)
 		if snaps[domain] == nil {
 			fmt.Println("(nil)")

diff --git a/src/extractors.go b/src/extractors.go
@@ -705,3 +705,68 @@ func extractAssistantTextSnapshot(entries []TranscriptEntry) map[string]interfac
 		},
 	}
 }
+
+// extractOutputTokensSnapshot aggregates attributed output tokens by category
+// at the trace level. This lets the Sankey visualization use
+// sum(metadata.cc.output_tokens.by_category.*) directly without span
+// aggregation. `cc.output_tokens.{summary, by_category}`.
+//
+// Categories:
+//   - thinking         — extended thinking blocks
+//   - assistant_text   — visible text responses
+//   - builtin_tool_use — CC built-in tools (Bash, Read, Edit, …)
+//   - mcp_tool_use     — MCP tool calls (name prefix "mcp__")
+//   - skill_invocations — Skill tool invocations
+//
+// `parsed` should be the dedup-applied output of ParseAssistantMessages +
+// DeduplicateUsage. Pass nil to reparse from entries.
+func extractOutputTokensSnapshot(entries []TranscriptEntry, parsed []ParsedEntry) map[string]interface{} {
+	if parsed == nil {
+		parsed = ParseAssistantMessages(entries)
+		DeduplicateUsage(parsed)
+	}
+
+	var (
+		thinking         int
+		assistantText    int
+		builtinToolUse   int
+		mcpToolUse       int
+		skillInvocations int
+	)
+
+	for _, p := range parsed {
+		tok := p.AttributedOutputTokens
+		switch p.ContentType {
+		case "thinking":
+			thinking += tok
+		case "text":
+			assistantText += tok
+		case "tool_use":
+			switch {
+			case strings.HasPrefix(p.Content.Name, "mcp__"):
+				mcpToolUse += tok
+			case p.Content.Name == "Skill":
+				skillInvocations += tok
+			default:
+				builtinToolUse += tok
+			}
+		}
+	}
+
+	total := thinking + assistantText + builtinToolUse + mcpToolUse + skillInvocations
+	if total == 0 {
+		return nil
+	}
+	return map[string]interface{}{
+		"summary": map[string]interface{}{
+			"total_tokens": total,
+		},
+		"by_category": map[string]interface{}{
+			"thinking":          thinking,
+			"assistant_text":    assistantText,
+			"builtin_tool_use":  builtinToolUse,
+			"mcp_tool_use":      mcpToolUse,
+			"skill_invocations": skillInvocations,
+		},
+	}
+}
diff --git a/src/extractors_test.go b/src/extractors_test.go
@@ -6,6 +6,81 @@ import (
 	"testing"
 )
 
+func TestExtractOutputTokensSnapshot(t *testing.T) {
+	// One LLM call with thinking + text + builtin tool + MCP tool + Skill.
+	// All blocks share the same message.id so DeduplicateUsage can attribute.
+	const msgID = "msg_abc123"
+	entries := []TranscriptEntry{
+		{
+			Type: "assistant",
+			UUID: "u1",
+			Message: &Message{
+				ID:    msgID,
+				Model: "claude-opus-4-8",
+				Usage: &Usage{OutputTokens: 1000},
+				Content: ContentSlice{
+					{Type: "thinking", Thinking: "..."},
+					{Type: "text", Text: "hello world"},
+					{Type: "tool_use", ID: "t1", Name: "Bash", Input: map[string]interface{}{"command": "ls"}},
+					{Type: "tool_use", ID: "t2", Name: "mcp__slack__send", Input: map[string]interface{}{}},
+					{Type: "tool_use", ID: "t3", Name: "Skill", Input: map[string]interface{}{}},
+				},
+			},
+		},
+	}
+
+	parsed := ParseAssistantMessages(entries)
+	DeduplicateUsage(parsed)
+
+	snap := extractOutputTokensSnapshot(entries, parsed)
+	if snap == nil {
+		t.Fatal("expected non-nil snapshot")
+	}
+
+	summary, _ := snap["summary"].(map[string]interface{})
+	if summary == nil {
+		t.Fatal("missing summary")
+	}
+	total, _ := summary["total_tokens"].(int)
+	if total != 1000 {
+		t.Errorf("total_tokens = %d, want 1000", total)
+	}
+
+	cat, _ := snap["by_category"].(map[string]interface{})
+	if cat == nil {
+		t.Fatal("missing by_category")
+	}
+
+	// Sum of all categories must equal total.
+	catSum := 0
+	for _, key := range []string{"thinking", "assistant_text", "builtin_tool_use", "mcp_tool_use", "skill_invocations"} {
+		v, _ := cat[key].(int)
+		catSum += v
+	}
+	if catSum != total {
+		t.Errorf("sum(by_category) = %d, want %d (total_tokens)", catSum, total)
+	}
+
+	// thinking must be > 0 (leftover after non-thinking blocks).
+	if thinking, _ := cat["thinking"].(int); thinking == 0 {
+		t.Error("thinking should be > 0")
+	}
+
+	// Each non-thinking category must have been assigned something.
+	for _, key := range []string{"assistant_text", "builtin_tool_use", "mcp_tool_use", "skill_invocations"} {
+		if v, _ := cat[key].(int); v == 0 {
+			t.Errorf("by_category[%s] = 0, expected > 0", key)
+		}
+	}
+}
+
+func TestExtractOutputTokensSnapshotNilOnEmpty(t *testing.T) {
+	snap := extractOutputTokensSnapshot(nil, nil)
+	if snap != nil {
+		t.Errorf("expected nil on empty entries, got %v", snap)
+	}
+}
+
 func TestExtractAgentsSnapshotPrefersFrontmatterName(t *testing.T) {
 	home := t.TempDir()
 	cwd := t.TempDir()

diff --git a/src/main.go b/src/main.go
@@ -767,6 +767,7 @@ func domainSnapshotsFromEntries(fullEntries, turnEntries []TranscriptEntry) map[
 		"file_attachments": extractFileAttachmentsSnapshot(turnEntries),
 		"prior_assistant":  extractPriorAssistantSnapshot(fullEntries, turnEntries),
 		"assistant_text":   extractAssistantTextSnapshot(turnEntries),
+		"output_tokens":    extractOutputTokensSnapshot(turnEntries, parsedTurn),
 		// cc_builtin covers the bundled system-prompt + tool-catalog cost
 		// /context reports under "System prompt" / "System tools" /
 		// "System tools (deferred)". These never appear in the transcript