diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index b9db5476e..270a66af0 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -273,6 +273,11 @@ type WriteCommittedOptions struct { // CheckpointTranscriptStart is written to both CommittedMetadata.CheckpointTranscriptStart // and the deprecated CommittedMetadata.TranscriptLinesAtStart for backward compatibility. + // CompactTranscriptStart is the transcript.jsonl line offset at checkpoint start. + // V2 /main writes this to checkpoint_transcript_start; v1 continues to use + // CheckpointTranscriptStart (full.jsonl). + CompactTranscriptStart int + // TokenUsage contains the token usage for this checkpoint TokenUsage *agent.TokenUsage diff --git a/cmd/entire/cli/checkpoint/v2_committed.go b/cmd/entire/cli/checkpoint/v2_committed.go index bc712da2a..61e7f94d8 100644 --- a/cmd/entire/cli/checkpoint/v2_committed.go +++ b/cmd/entire/cli/checkpoint/v2_committed.go @@ -382,7 +382,7 @@ func (s *V2GitStore) writeMainSessionToSubdirectory(opts WriteCommittedOptions, IsTask: opts.IsTask, ToolUseID: opts.ToolUseID, TranscriptIdentifierAtStart: opts.TranscriptIdentifierAtStart, - CheckpointTranscriptStart: opts.CheckpointTranscriptStart, + CheckpointTranscriptStart: opts.CompactTranscriptStart, TokenUsage: opts.TokenUsage, SessionMetrics: opts.SessionMetrics, InitialAttribution: opts.InitialAttribution, diff --git a/cmd/entire/cli/checkpoint/v2_store_test.go b/cmd/entire/cli/checkpoint/v2_store_test.go index c0762a7f6..d7e88c717 100644 --- a/cmd/entire/cli/checkpoint/v2_store_test.go +++ b/cmd/entire/cli/checkpoint/v2_store_test.go @@ -4,14 +4,13 @@ import ( "context" "encoding/json" "fmt" - "os" - "path/filepath" "strings" "testing" "github.com/entireio/cli/cmd/entire/cli/agent" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/testutil" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -25,18 +24,12 @@ func initTestRepo(t *testing.T) *git.Repository { t.Helper() dir := t.TempDir() - repo, err := git.PlainInit(dir, false) - require.NoError(t, err) - - wt, err := repo.Worktree() - require.NoError(t, err) + testutil.InitRepo(t, dir) + testutil.WriteFile(t, dir, "README.md", "init") + testutil.GitAdd(t, dir, "README.md") + testutil.GitCommit(t, dir, "initial") - require.NoError(t, os.WriteFile(filepath.Join(dir, "README.md"), []byte("init"), 0o644)) - _, err = wt.Add("README.md") - require.NoError(t, err) - _, err = wt.Commit("initial", &git.CommitOptions{ - Author: &object.Signature{Name: "Test", Email: "test@test.com"}, - }) + repo, err := git.PlainOpen(dir) require.NoError(t, err) return repo @@ -379,6 +372,42 @@ func TestV2GitStore_WriteCommittedMain_NoCompactTranscript_SkipsGracefully(t *te assert.Error(t, err, "transcript.jsonl should not exist when CompactTranscript is nil") } +func TestV2GitStore_WriteCommittedMain_UsesCompactTranscriptStart(t *testing.T) { + t.Parallel() + repo := initTestRepo(t) + store := NewV2GitStore(repo, "origin") + ctx := context.Background() + + cpID := id.MustCheckpointID("a1b2c3d4e5f7") + compactData := []byte("{\"v\":1,\"type\":\"user\",\"content\":\"hello\"}\n{\"v\":1,\"type\":\"assistant\",\"content\":\"hi\"}\n") + + _, err := store.writeCommittedMain(ctx, WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "test-session-compact-start", + Strategy: "manual-commit", + Transcript: []byte(`{"type":"human","message":"hello"}`), + CompactTranscript: compactData, + Prompts: []string{"hello"}, + AuthorName: "Test", + AuthorEmail: "test@test.com", + CheckpointTranscriptStart: 42, // full.jsonl offset (must not be used in v2 metadata) + CompactTranscriptStart: 15, // transcript.jsonl offset (must be used in v2 metadata) + }) + require.NoError(t, err) + + tree := v2MainTree(t, repo) + cpPath := cpID.Path() + + // Read session metadata from /main + metadataContent := v2ReadFile(t, tree, cpPath+"/0/"+paths.MetadataFileName) + var metadata CommittedMetadata + require.NoError(t, json.Unmarshal([]byte(metadataContent), &metadata)) + + // v2 should store the compact offset, not the full transcript offset. + assert.Equal(t, 15, metadata.CheckpointTranscriptStart, + "v2 /main metadata should use CompactTranscriptStart for checkpoint_transcript_start") +} + func TestV2GitStore_UpdateCommitted_WritesCompactTranscript(t *testing.T) { t.Parallel() repo := initTestRepo(t) diff --git a/cmd/entire/cli/migrate.go b/cmd/entire/cli/migrate.go index 38e84ecac..5a610560d 100644 --- a/cmd/entire/cli/migrate.go +++ b/cmd/entire/cli/migrate.go @@ -1,6 +1,7 @@ package cli import ( + "bytes" "context" "errors" "fmt" @@ -203,6 +204,7 @@ func migrateOneCheckpoint(ctx context.Context, repo *git.Repository, v1Store *ch compacted := tryCompactTranscript(ctx, content.Transcript, content.Metadata) if compacted != nil { opts.CompactTranscript = compacted + opts.CompactTranscriptStart = computeCompactOffset(ctx, content.Transcript, compacted, content.Metadata) } else if len(content.Transcript) > 0 { compactFailed = true } @@ -412,6 +414,10 @@ func buildMigrateWriteOpts(content *checkpoint.SessionContent, info checkpoint.C } func tryCompactTranscript(ctx context.Context, transcript []byte, m checkpoint.CommittedMetadata) []byte { + return compactTranscriptForStartLine(ctx, transcript, m, 0) +} + +func compactTranscriptForStartLine(ctx context.Context, transcript []byte, m checkpoint.CommittedMetadata, startLine int) []byte { if len(transcript) == 0 { return nil } @@ -425,7 +431,7 @@ func tryCompactTranscript(ctx context.Context, transcript []byte, m checkpoint.C compacted, err := compact.Compact(transcript, compact.MetadataFields{ Agent: string(m.Agent), CLIVersion: versioninfo.Version, - StartLine: m.GetTranscriptStart(), + StartLine: startLine, }) if err != nil { logging.Warn(ctx, "compact transcript generation failed during migration", @@ -446,6 +452,50 @@ func tryCompactTranscript(ctx context.Context, transcript []byte, m checkpoint.C return compacted } +// computeCompactOffset determines the transcript.jsonl line offset for a checkpoint +// by comparing a full compact (startLine=0) against the scoped compact. The difference +// is the number of compact lines before this checkpoint's data. +func computeCompactOffset(ctx context.Context, fullTranscript, fullCompact []byte, m checkpoint.CommittedMetadata) int { + startLine := m.GetTranscriptStart() + if startLine == 0 || len(fullTranscript) == 0 || m.Agent == "" { + return 0 + } + + if len(fullCompact) == 0 { + return 0 + } + + scopedCompact, err := compact.Compact(fullTranscript, compact.MetadataFields{ + Agent: string(m.Agent), + CLIVersion: versioninfo.Version, + StartLine: startLine, + }) + if err != nil { + logging.Warn(ctx, "compact transcript offset calculation failed during migration", + slog.String("checkpoint_id", string(m.CheckpointID)), + slog.String("agent", string(m.Agent)), + slog.String("error", err.Error()), + ) + return 0 + } + if len(scopedCompact) == 0 { + return 0 + } + + fullLines := bytes.Count(fullCompact, []byte{'\n'}) + scopedLines := bytes.Count(scopedCompact, []byte{'\n'}) + offset := fullLines - scopedLines + if offset < 0 { + logging.Warn(ctx, "compact transcript offset was negative during migration, defaulting to 0", + slog.String("checkpoint_id", string(m.CheckpointID)), + slog.Int("full_lines", fullLines), + slog.Int("scoped_lines", scopedLines), + ) + return 0 + } + return offset +} + // copyTaskMetadataToV2 copies task metadata files (subagent transcripts, checkpoint JSONs) // from the v1 branch to the v2 /full/current ref via tree surgery. func copyTaskMetadataToV2(repo *git.Repository, _ *checkpoint.GitStore, v2Store *checkpoint.V2GitStore, cpID id.CheckpointID, summary *checkpoint.CheckpointSummary) error { diff --git a/cmd/entire/cli/migrate_test.go b/cmd/entire/cli/migrate_test.go index b013277ed..edd16d31a 100644 --- a/cmd/entire/cli/migrate_test.go +++ b/cmd/entire/cli/migrate_test.go @@ -3,14 +3,18 @@ package cli import ( "bytes" "context" + "encoding/json" "strconv" "strings" "testing" + "github.com/entireio/cli/cmd/entire/cli/agent" "github.com/entireio/cli/cmd/entire/cli/checkpoint" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" "github.com/entireio/cli/cmd/entire/cli/paths" "github.com/entireio/cli/cmd/entire/cli/testutil" + "github.com/entireio/cli/cmd/entire/cli/transcript/compact" + "github.com/entireio/cli/cmd/entire/cli/versioninfo" "github.com/go-git/go-git/v6" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/filemode" @@ -326,6 +330,73 @@ func TestMigrateCheckpointsV2_BackfillCompactTranscript(t *testing.T) { assert.NotEmpty(t, summary2.Sessions[0].Transcript, "should have compact transcript after backfill") } +func TestMigrateCheckpointsV2_UsesComputedCompactTranscriptStart(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + ctx := context.Background() + + cpID := id.MustCheckpointID("5566778899aa") + transcript := []byte( + "{\"type\":\"human\",\"message\":{\"content\":\"prompt 1\"}}\n" + + "{\"type\":\"assistant\",\"message\":{\"content\":\"reply 1\"}}\n" + + "{\"type\":\"human\",\"message\":{\"content\":\"prompt 2\"}}\n" + + "{\"type\":\"assistant\",\"message\":{\"content\":\"reply 2\"}}\n", + ) + err := v1Store.WriteCommitted(ctx, checkpoint.WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-compact-start-migrate", + Strategy: "manual-commit", + Transcript: transcript, + Prompts: []string{"prompt 2"}, + Agent: agent.AgentTypeClaudeCode, + CheckpointTranscriptStart: 2, // full transcript line domain + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + v1Content, err := v1Store.ReadSessionContent(ctx, cpID, 0) + require.NoError(t, err) + fullCompacted := tryCompactTranscript(ctx, v1Content.Transcript, v1Content.Metadata) + require.NotNil(t, fullCompacted) + scopedCompacted, err := compact.Compact(v1Content.Transcript, compact.MetadataFields{ + Agent: string(v1Content.Metadata.Agent), + CLIVersion: versioninfo.Version, + StartLine: v1Content.Metadata.GetTranscriptStart(), + }) + require.NoError(t, err) + require.NotNil(t, scopedCompacted) + require.Greater(t, bytes.Count(fullCompacted, []byte{'\n'}), bytes.Count(scopedCompacted, []byte{'\n'})) + expectedOffset := computeCompactOffset(ctx, v1Content.Transcript, fullCompacted, v1Content.Metadata) + require.Positive(t, expectedOffset, "expected non-zero compact transcript start") + + var stdout bytes.Buffer + result, migrateErr := migrateCheckpointsV2(ctx, repo, v1Store, v2Store, &stdout) + require.NoError(t, migrateErr) + assert.Equal(t, 1, result.migrated) + + v2MainRef, err := repo.Reference(plumbing.ReferenceName(paths.V2MainRefName), true) + require.NoError(t, err) + v2MainCommit, err := repo.CommitObject(v2MainRef.Hash()) + require.NoError(t, err) + v2MainTree, err := v2MainCommit.Tree() + require.NoError(t, err) + + metadataFile, err := v2MainTree.File(cpID.Path() + "/0/" + paths.MetadataFileName) + require.NoError(t, err) + metadataContent, err := metadataFile.Contents() + require.NoError(t, err) + + var metadata checkpoint.CommittedMetadata + require.NoError(t, json.Unmarshal([]byte(metadataContent), &metadata)) + assert.Equal(t, expectedOffset, metadata.CheckpointTranscriptStart) + + storedCompact, err := v2Store.ReadSessionCompactTranscript(ctx, cpID, 0) + require.NoError(t, err) + assert.Equal(t, fullCompacted, storedCompact, "migration should persist cumulative compact transcript") +} + func TestMigrateCheckpointsV2_RepairsMissingFullTranscriptBeforeBackfill(t *testing.T) { t.Parallel() repo := initMigrateTestRepo(t) diff --git a/cmd/entire/cli/session/state.go b/cmd/entire/cli/session/state.go index 79e01eb05..ec8eb2428 100644 --- a/cmd/entire/cli/session/state.go +++ b/cmd/entire/cli/session/state.go @@ -109,6 +109,11 @@ type State struct { // against this value without reading the full transcript content. CheckpointTranscriptSize int64 `json:"checkpoint_transcript_size,omitempty"` + // CompactTranscriptStart is the transcript.jsonl line offset where the current + // checkpoint cycle began. It parallels CheckpointTranscriptStart (full.jsonl) + // and is updated after each condensation. + CompactTranscriptStart int `json:"compact_transcript_start,omitempty"` + // Deprecated: CondensedTranscriptLines is replaced by CheckpointTranscriptStart. // Kept for backward compatibility with existing state files. // Use NormalizeAfterLoad() to migrate. diff --git a/cmd/entire/cli/session/state_test.go b/cmd/entire/cli/session/state_test.go index b4742c96b..dac4d5b65 100644 --- a/cmd/entire/cli/session/state_test.go +++ b/cmd/entire/cli/session/state_test.go @@ -77,36 +77,66 @@ func TestState_NormalizeAfterLoad(t *testing.T) { assert.Equal(t, 200, state.CheckpointTranscriptStart) assert.Equal(t, 0, state.TranscriptLinesAtStart) }) + + t.Run("leaves_CompactTranscriptStart_zero_when_missing", func(t *testing.T) { + t.Parallel() + state := &State{ + CheckpointTranscriptStart: 120, + } + state.NormalizeAfterLoad(context.Background()) + assert.Equal(t, 0, state.CompactTranscriptStart) + }) + + t.Run("preserves_existing_CompactTranscriptStart", func(t *testing.T) { + t.Parallel() + state := &State{ + CheckpointTranscriptStart: 120, + CompactTranscriptStart: 45, + } + state.NormalizeAfterLoad(context.Background()) + assert.Equal(t, 45, state.CompactTranscriptStart) + }) } func TestState_NormalizeAfterLoad_JSONRoundTrip(t *testing.T) { tests := []struct { - name string - json string - wantCTS int // CheckpointTranscriptStart - wantStep int // StepCount + name string + json string + wantCTS int // CheckpointTranscriptStart + wantCompact int // CompactTranscriptStart + wantStep int // StepCount }{ { - name: "migrates old condensed_transcript_lines", - json: `{"session_id":"s1","condensed_transcript_lines":42,"checkpoint_count":5}`, - wantCTS: 42, - wantStep: 5, + name: "migrates old condensed_transcript_lines", + json: `{"session_id":"s1","condensed_transcript_lines":42,"checkpoint_count":5}`, + wantCTS: 42, + wantCompact: 0, + wantStep: 5, + }, + { + name: "migrates old transcript_lines_at_start", + json: `{"session_id":"s1","transcript_lines_at_start":75}`, + wantCTS: 75, + wantCompact: 0, }, { - name: "migrates old transcript_lines_at_start", - json: `{"session_id":"s1","transcript_lines_at_start":75}`, - wantCTS: 75, + name: "preserves new field over old", + json: `{"session_id":"s1","condensed_transcript_lines":10,"checkpoint_transcript_start":50}`, + wantCTS: 50, + wantCompact: 0, }, { - name: "preserves new field over old", - json: `{"session_id":"s1","condensed_transcript_lines":10,"checkpoint_transcript_start":50}`, - wantCTS: 50, + name: "handles clean new format", + json: `{"session_id":"s1","checkpoint_transcript_start":25,"checkpoint_count":3}`, + wantCTS: 25, + wantCompact: 0, + wantStep: 3, }, { - name: "handles clean new format", - json: `{"session_id":"s1","checkpoint_transcript_start":25,"checkpoint_count":3}`, - wantCTS: 25, - wantStep: 3, + name: "preserves explicit compact_transcript_start", + json: `{"session_id":"s1","checkpoint_transcript_start":25,"compact_transcript_start":9}`, + wantCTS: 25, + wantCompact: 9, }, } @@ -117,6 +147,7 @@ func TestState_NormalizeAfterLoad_JSONRoundTrip(t *testing.T) { state.NormalizeAfterLoad(context.Background()) assert.Equal(t, tt.wantCTS, state.CheckpointTranscriptStart) + assert.Equal(t, tt.wantCompact, state.CompactTranscriptStart) assert.Equal(t, tt.wantStep, state.StepCount) assert.Equal(t, 0, state.CondensedTranscriptLines, "deprecated field should be cleared") assert.Equal(t, 0, state.TranscriptLinesAtStart, "deprecated field should be cleared") diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index 90db3bef1..2af1e6825 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -1,6 +1,7 @@ package strategy import ( + "bytes" "context" "encoding/json" "errors" @@ -118,18 +119,8 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re logCtx := logging.WithComponent(ctx, "checkpoint") condenseStart := time.Now() - // Get shadow branch — use pre-resolved ref if available, otherwise resolve from repo. shadowBranchName := getShadowBranchNameForCommit(state.BaseCommit, state.WorktreeID) - ref := o.shadowRef - var hasShadowBranch bool - if ref != nil { - hasShadowBranch = true - } else { - refName := plumbing.NewBranchReferenceName(shadowBranchName) - var err error - ref, err = repo.Reference(refName, true) - hasShadowBranch = err == nil - } + ref, hasShadowBranch := resolveShadowRef(repo, shadowBranchName, o.shadowRef) // Re-resolve transcript path before any reads — handles agents that relocate // transcripts mid-session (e.g., Cursor CLI flat → nested layout change). @@ -175,28 +166,7 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re state.TokenUsage = backfillUsage } - // For 1:1 checkpoint model: filter files_touched to only include files actually - // committed in this specific commit. This ensures each checkpoint represents - // exactly the files in that commit, not all files mentioned in the transcript. - if len(committedFiles) > 0 { - hadFilesBeforeFiltering := len(sessionData.FilesTouched) > 0 - - if hadFilesBeforeFiltering { - filtered := make([]string, 0, len(sessionData.FilesTouched)) - for _, f := range sessionData.FilesTouched { - if _, ok := committedFiles[f]; ok { - filtered = append(filtered, f) - } - } - sessionData.FilesTouched = filtered - } else { - // Mid-turn commits can happen before SaveStep records FilesTouched. - // In that case, fall back to the actual committed files, excluding - // Entire's own metadata paths, so the checkpoint still reflects the - // files captured by this commit. - sessionData.FilesTouched = committedFilesExcludingMetadata(committedFiles) - } - } + filterFilesTouched(sessionData, committedFiles) // Get checkpoint store store, err := s.getCheckpointStore() @@ -260,24 +230,7 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re Summary: summary, } - compactRedactStart := time.Now() - compactCtx, compactRedactSpan := perf.Start(ctx, "redact_transcript_for_compact") - redactedForCompact, compactRedactErr := redact.JSONLBytes(sessionData.Transcript) - if compactRedactErr != nil { - compactRedactSpan.RecordError(compactRedactErr) - logging.Warn(ctx, "compact transcript redaction failed, skipping transcript.jsonl on /main", - slog.String("session_id", state.SessionID), - slog.String("error", compactRedactErr.Error()), - ) - redactedForCompact = nil - } - compactRedactSpan.End() - compactRedactDuration := time.Since(compactRedactStart) - compactTranscriptStart := time.Now() - compactCtx, compactTranscriptSpan := perf.Start(compactCtx, "compact_transcript_v2") - writeOpts.CompactTranscript = compactTranscriptForV2(compactCtx, ag, redactedForCompact, state.CheckpointTranscriptStart) - compactTranscriptSpan.End() - compactTranscriptDuration := time.Since(compactTranscriptStart) + compactRedactDuration, compactTranscriptDuration := buildCompactTranscript(ctx, ag, sessionData, state, &writeOpts) // Write checkpoint metadata to v1 branch writeV1Start := time.Now() @@ -310,17 +263,101 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re slog.Int("transcript_lines", sessionData.FullTranscriptLines), ) + // Count scoped (new-only) compact lines, not full compact lines, + // so state.CompactTranscriptStart accumulates correctly. + compactLines := 0 + if writeOpts.CompactTranscript != nil { + fullLines := countCompactLines(writeOpts.CompactTranscript) + compactLines = fullLines - writeOpts.CompactTranscriptStart + } + return &CondenseResult{ - CheckpointID: checkpointID, - SessionID: state.SessionID, - CheckpointsCount: state.StepCount, - FilesTouched: sessionData.FilesTouched, - Prompts: sessionData.Prompts, - TotalTranscriptLines: sessionData.FullTranscriptLines, - Transcript: sessionData.Transcript, + CheckpointID: checkpointID, + SessionID: state.SessionID, + CheckpointsCount: state.StepCount, + FilesTouched: sessionData.FilesTouched, + Prompts: sessionData.Prompts, + TotalTranscriptLines: sessionData.FullTranscriptLines, + CompactTranscriptLines: compactLines, + Transcript: sessionData.Transcript, }, nil } +// resolveShadowRef returns the shadow branch reference, preferring a pre-resolved +// ref when available and falling back to a repo lookup. +func resolveShadowRef(repo *git.Repository, branchName string, preResolved *plumbing.Reference) (ref *plumbing.Reference, exists bool) { + if preResolved != nil { + return preResolved, true + } + refName := plumbing.NewBranchReferenceName(branchName) + resolved, err := repo.Reference(refName, true) + if err != nil { + return nil, false + } + return resolved, true +} + +// filterFilesTouched narrows sessionData.FilesTouched to only files present in +// committedFiles. When no prior files were recorded (mid-turn commit), it falls +// back to the committed set minus Entire metadata paths. +func filterFilesTouched(sessionData *ExtractedSessionData, committedFiles map[string]struct{}) { + if len(committedFiles) == 0 { + return + } + if len(sessionData.FilesTouched) > 0 { + filtered := make([]string, 0, len(sessionData.FilesTouched)) + for _, f := range sessionData.FilesTouched { + if _, ok := committedFiles[f]; ok { + filtered = append(filtered, f) + } + } + sessionData.FilesTouched = filtered + } else { + // Mid-turn commits can happen before SaveStep records FilesTouched. + // In that case, fall back to the actual committed files, excluding + // Entire's own metadata paths, so the checkpoint still reflects the + // files captured by this commit. + sessionData.FilesTouched = committedFilesExcludingMetadata(committedFiles) + } +} + +// buildCompactTranscript redacts the transcript and produces compact (v2) forms +// when v2 checkpoints are enabled. Returns per-phase durations for timing logs. +func buildCompactTranscript(ctx context.Context, ag agent.Agent, sessionData *ExtractedSessionData, state *SessionState, writeOpts *cpkg.WriteCommittedOptions) (redactDuration, compactDuration time.Duration) { + redactStart := time.Now() + compactCtx, redactSpan := perf.Start(ctx, "redact_transcript_for_compact") + var redacted []byte + if settings.IsCheckpointsV2Enabled(ctx) { + var err error + redacted, err = redact.JSONLBytes(sessionData.Transcript) + if err != nil { + redactSpan.RecordError(err) + logging.Warn(ctx, "compact transcript redaction failed, skipping transcript.jsonl on /main", + slog.String("session_id", state.SessionID), + slog.String("error", err.Error()), + ) + redacted = nil + } + } + redactSpan.End() + redactDuration = time.Since(redactStart) + + compactStart := time.Now() + compactCtx, compactSpan := perf.Start(compactCtx, "compact_transcript_v2") + if settings.IsCheckpointsV2Enabled(ctx) { + // Generate scoped compact (only new content) for line counting and offset calculation. + scopedCompact := compactTranscriptForV2(compactCtx, ag, redacted, state.CheckpointTranscriptStart) + // Generate full compact (cumulative) for storage — v2 /main replaces + // the session's transcript.jsonl on each write, so we must include all + // prior content, not just the new portion. + writeOpts.CompactTranscript = compactTranscriptForV2(compactCtx, ag, redacted, 0) + writeOpts.CompactTranscriptStart = computeCompactTranscriptStart(compactCtx, ag, state, redacted, scopedCompact) + } + compactSpan.End() + compactDuration = time.Since(compactStart) + return redactDuration, compactDuration +} + // generateSummary produces an LLM-generated summary of the session transcript. // Returns nil if the scoped transcript is empty or generation fails. func generateSummary(ctx context.Context, sessionData *ExtractedSessionData, state *SessionState) *cpkg.Summary { @@ -988,6 +1025,7 @@ func (s *ManualCommitStrategy) CondenseSessionByID(ctx context.Context, sessionI // Update session state: reset step count and transition to idle state.StepCount = 0 state.CheckpointTranscriptStart = result.TotalTranscriptLines + state.CompactTranscriptStart += result.CompactTranscriptLines state.CheckpointTranscriptSize = int64(len(result.Transcript)) state.Phase = session.PhaseIdle state.LastCheckpointID = checkpointID @@ -1096,6 +1134,7 @@ func (s *ManualCommitStrategy) CondenseAndMarkFullyCondensed(ctx context.Context // Update state — keep Phase = ENDED (unlike CondenseSessionByID which sets IDLE) state.StepCount = 0 state.CheckpointTranscriptStart = result.TotalTranscriptLines + state.CompactTranscriptStart += result.CompactTranscriptLines state.LastCheckpointID = checkpointID state.AttributionBaseCommit = state.BaseCommit state.PromptAttributions = nil @@ -1181,6 +1220,46 @@ func compactTranscriptForV2(ctx context.Context, ag agent.Agent, transcript []by return compacted } +// countCompactLines returns line count for compact transcript JSONL. +func countCompactLines(compactTranscript []byte) int { + return bytes.Count(compactTranscript, []byte{'\n'}) +} + +// computeCompactTranscriptStart chooses the compact transcript start line offset +// for v2 /main metadata. +// +// Preferred source is session state CompactTranscriptStart. For legacy sessions +// that have only full-transcript offsets persisted, this recalculates the compact +// offset from transcript bytes when possible. On any failure, returns 0 (fail-open). +func computeCompactTranscriptStart(ctx context.Context, ag agent.Agent, state *SessionState, transcript []byte, scopedCompact []byte) int { + if state.CompactTranscriptStart > 0 { + return state.CompactTranscriptStart + } + if state.CheckpointTranscriptStart == 0 || ag == nil || len(transcript) == 0 || len(scopedCompact) == 0 { + return 0 + } + + fullCompacted, err := compact.Compact(transcript, compact.MetadataFields{ + Agent: string(ag.Name()), + CLIVersion: versioninfo.Version, + StartLine: 0, + }) + if err != nil || len(fullCompacted) == 0 { + logging.Warn(ctx, "failed to recalculate compact transcript start, using 0", + slog.String("session_id", state.SessionID), + ) + return 0 + } + + fullLines := countCompactLines(fullCompacted) + scopedLines := countCompactLines(scopedCompact) + offset := fullLines - scopedLines + if offset < 0 { + return 0 + } + return offset +} + // writeCommittedV2IfEnabled writes checkpoint data to v2 refs when checkpoints_v2 // is enabled in settings. Failures are logged as warnings — v2 writes are // best-effort during the dual-write period and must not block the v1 path. diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index e0a9e6cfe..e9ed33daf 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -1314,6 +1314,7 @@ func (s *ManualCommitStrategy) condenseAndUpdateState( state.AttributionBaseCommit = newHead state.StepCount = 0 state.CheckpointTranscriptStart = result.TotalTranscriptLines + state.CompactTranscriptStart += result.CompactTranscriptLines state.CheckpointTranscriptSize = int64(len(result.Transcript)) // Clear attribution tracking — condensation already used these values @@ -2712,6 +2713,7 @@ func (s *ManualCommitStrategy) carryForwardToNewShadowBranch( // but this would complicate checkpoint retrieval and require careful tracking of dependencies. state.StepCount = 1 state.CheckpointTranscriptStart = 0 + state.CompactTranscriptStart = 0 state.CheckpointTranscriptSize = 0 state.LastCheckpointID = "" // NOTE: TurnCheckpointIDs is intentionally NOT cleared here. Those checkpoint diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index ae5c08450..aa8020b9d 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -1,6 +1,7 @@ package strategy import ( + "bytes" "context" "encoding/json" "errors" @@ -15,6 +16,7 @@ import ( "github.com/entireio/cli/cmd/entire/cli/checkpoint" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/testutil" "github.com/entireio/cli/cmd/entire/cli/trailers" "github.com/go-git/go-git/v6" "github.com/go-git/go-git/v6/plumbing" @@ -4028,6 +4030,189 @@ func TestCondenseSession_V2DualWrite(t *testing.T) { require.NoError(t, err, "full.jsonl should exist on /full/current") } +// TestCondenseSession_V2CompactTranscriptStart verifies v2 /main writes +// checkpoint_transcript_start from compact transcript offset, not full.jsonl offset. +func TestCondenseSession_V2CompactTranscriptStart(t *testing.T) { + dir := t.TempDir() + testutil.InitRepo(t, dir) + testutil.WriteFile(t, dir, "main.go", "package main") + testutil.GitAdd(t, dir, "main.go") + testutil.GitCommit(t, dir, "Initial commit") + + repo, err := git.PlainOpen(dir) + require.NoError(t, err) + commitHash := testutil.GetHeadHash(t, dir) + + t.Chdir(dir) + + // Enable checkpoints_v2 via settings + entireDir := filepath.Join(dir, ".entire") + require.NoError(t, os.MkdirAll(entireDir, 0o755)) + settingsJSON := `{"enabled": true, "strategy": "manual-commit", "strategy_options": {"checkpoints_v2": true}}` + require.NoError(t, os.WriteFile(filepath.Join(entireDir, "settings.json"), []byte(settingsJSON), 0o644)) + + s := &ManualCommitStrategy{} + sessionID := "2025-01-15-test-v2-compact-start" + + // Create metadata directory with transcript + metadataDir := ".entire/metadata/" + sessionID + metadataDirAbs := filepath.Join(dir, metadataDir) + require.NoError(t, os.MkdirAll(metadataDirAbs, 0o755)) + + transcript := `{"type":"human","message":{"content":"hello"}} +{"type":"assistant","message":{"content":"hi there"}} +` + require.NoError(t, os.WriteFile(filepath.Join(metadataDirAbs, paths.TranscriptFileName), []byte(transcript), 0o644)) + + // SaveStep to create shadow branch + err = s.SaveStep(context.Background(), StepContext{ + SessionID: sessionID, + ModifiedFiles: []string{"main.go"}, + MetadataDir: metadataDir, + MetadataDirAbs: metadataDirAbs, + CommitMessage: "Checkpoint 1", + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + state, err := s.loadSessionState(context.Background(), sessionID) + require.NoError(t, err) + state.TranscriptPath = filepath.Join(metadataDirAbs, paths.TranscriptFileName) + state.BaseCommit = commitHash[:7] + state.AgentType = agent.AgentTypeClaudeCode + + // First condensation starts at compact offset 0. + checkpointID := id.MustCheckpointID("cc11dd22ee33") + result, err := s.CondenseSession(context.Background(), repo, checkpointID, state, nil) + require.NoError(t, err) + require.NotNil(t, result) + + // v2 /main should have checkpoint_transcript_start = 0 for first checkpoint. + v2MainRef, err := repo.Reference(plumbing.ReferenceName(paths.V2MainRefName), true) + require.NoError(t, err) + v2MainCommit, err := repo.CommitObject(v2MainRef.Hash()) + require.NoError(t, err) + v2MainTree, err := v2MainCommit.Tree() + require.NoError(t, err) + + cpPath := checkpointID.Path() + sessionTree, err := v2MainTree.Tree(cpPath + "/0") + require.NoError(t, err) + metadataFile, err := sessionTree.File(paths.MetadataFileName) + require.NoError(t, err) + metadataContent, err := metadataFile.Contents() + require.NoError(t, err) + + var v2Metadata checkpoint.CommittedMetadata + require.NoError(t, json.Unmarshal([]byte(metadataContent), &v2Metadata)) + require.Equal(t, 0, v2Metadata.CheckpointTranscriptStart, + "first checkpoint v2 metadata should have checkpoint_transcript_start=0") + + // Read v1 metadata for comparison. + v1Ref, err := repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) + require.NoError(t, err) + v1Commit, err := repo.CommitObject(v1Ref.Hash()) + require.NoError(t, err) + v1Tree, err := v1Commit.Tree() + require.NoError(t, err) + v1SessionTree, err := v1Tree.Tree(cpPath + "/0") + require.NoError(t, err) + v1MetadataFile, err := v1SessionTree.File(paths.MetadataFileName) + require.NoError(t, err) + v1MetadataContent, err := v1MetadataFile.Contents() + require.NoError(t, err) + + var v1Metadata checkpoint.CommittedMetadata + require.NoError(t, json.Unmarshal([]byte(v1MetadataContent), &v1Metadata)) + require.Equal(t, 0, v1Metadata.CheckpointTranscriptStart, + "first checkpoint v1 metadata should also have checkpoint_transcript_start=0") + + // Verify compact transcript lines were counted in the result + require.Positive(t, result.CompactTranscriptLines, + "CondenseResult should report compact transcript lines") + + // Read compact transcript.jsonl from v2 /main for the first checkpoint. + compactFile1, err := sessionTree.File(paths.CompactTranscriptFileName) + require.NoError(t, err, "transcript.jsonl should exist on v2 /main") + compactContent1, err := compactFile1.Contents() + require.NoError(t, err) + firstCompactLines := bytes.Count([]byte(compactContent1), []byte{'\n'}) + require.Positive(t, firstCompactLines, "first checkpoint compact transcript should have lines") + + // --- Second condensation: add more transcript content --- + transcript2 := transcript + `{"type":"human","message":{"content":"next question"}} +{"type":"assistant","message":{"content":"next answer"}} +` + require.NoError(t, os.WriteFile(filepath.Join(metadataDirAbs, paths.TranscriptFileName), []byte(transcript2), 0o644)) + + // Update state after first condensation (mimic what CondenseSessionByID does) + state.StepCount = 0 + state.CheckpointTranscriptStart = result.TotalTranscriptLines + state.CompactTranscriptStart += result.CompactTranscriptLines + + // SaveStep for second checkpoint + testutil.WriteFile(t, dir, "main.go", "package main\n// v2") + err = s.SaveStep(context.Background(), StepContext{ + SessionID: sessionID, + ModifiedFiles: []string{"main.go"}, + MetadataDir: metadataDir, + MetadataDirAbs: metadataDirAbs, + CommitMessage: "Checkpoint 2", + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + state2, err := s.loadSessionState(context.Background(), sessionID) + require.NoError(t, err) + state2.TranscriptPath = filepath.Join(metadataDirAbs, paths.TranscriptFileName) + state2.BaseCommit = commitHash[:7] + state2.AgentType = agent.AgentTypeClaudeCode + state2.CheckpointTranscriptStart = state.CheckpointTranscriptStart + state2.CompactTranscriptStart = state.CompactTranscriptStart + + checkpointID2 := id.MustCheckpointID("dd22ee33ff44") + result2, err := s.CondenseSession(context.Background(), repo, checkpointID2, state2, nil) + require.NoError(t, err) + require.NotNil(t, result2) + + // v2 /main metadata for second checkpoint should have compact start = firstCompactLines. + v2MainRef2, err := repo.Reference(plumbing.ReferenceName(paths.V2MainRefName), true) + require.NoError(t, err) + v2MainCommit2, err := repo.CommitObject(v2MainRef2.Hash()) + require.NoError(t, err) + v2MainTree2, err := v2MainCommit2.Tree() + require.NoError(t, err) + + cpPath2 := checkpointID2.Path() + sessionTree2, err := v2MainTree2.Tree(cpPath2 + "/0") + require.NoError(t, err) + metadataFile2, err := sessionTree2.File(paths.MetadataFileName) + require.NoError(t, err) + metadataContent2, err := metadataFile2.Contents() + require.NoError(t, err) + + var v2Metadata2 checkpoint.CommittedMetadata + require.NoError(t, json.Unmarshal([]byte(metadataContent2), &v2Metadata2)) + require.Equal(t, firstCompactLines, v2Metadata2.CheckpointTranscriptStart, + "second checkpoint v2 metadata should have checkpoint_transcript_start = first checkpoint's compact line count") + + // The compact transcript.jsonl for checkpoint 2 should be CUMULATIVE: + // it should contain both checkpoint 1's and checkpoint 2's compact lines. + compactFile2, err := sessionTree2.File(paths.CompactTranscriptFileName) + require.NoError(t, err, "transcript.jsonl should exist for second checkpoint") + compactContent2, err := compactFile2.Contents() + require.NoError(t, err) + secondCompactTotalLines := bytes.Count([]byte(compactContent2), []byte{'\n'}) + require.Greater(t, secondCompactTotalLines, firstCompactLines, + "second checkpoint compact transcript should include all prior content plus new content") + + // The first checkpoint's content should be a prefix of the second checkpoint's content. + require.True(t, strings.HasPrefix(compactContent2, compactContent1), + "second checkpoint compact transcript should start with first checkpoint's content") +} + // TestCondenseSession_V2Disabled_NoV2Refs verifies that when checkpoints_v2 is // not enabled, CondenseSession only writes to v1 and does not create v2 refs. func TestCondenseSession_V2Disabled_NoV2Refs(t *testing.T) { @@ -4086,6 +4271,7 @@ func TestCondenseSession_V2Disabled_NoV2Refs(t *testing.T) { result, err := s.CondenseSession(context.Background(), repo, checkpointID, state, nil) require.NoError(t, err) require.NotNil(t, result) + require.Equal(t, 0, result.CompactTranscriptLines, "v2-disabled condensation should not report compact transcript line deltas") // v1 should exist _, err = repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) diff --git a/cmd/entire/cli/strategy/manual_commit_types.go b/cmd/entire/cli/strategy/manual_commit_types.go index 65d490157..a9b459f06 100644 --- a/cmd/entire/cli/strategy/manual_commit_types.go +++ b/cmd/entire/cli/strategy/manual_commit_types.go @@ -48,13 +48,14 @@ type CheckpointInfo struct { // CondenseResult contains the result of a session condensation operation. type CondenseResult struct { - CheckpointID id.CheckpointID // 12-hex-char from Entire-Checkpoint trailer, used as directory path - SessionID string - CheckpointsCount int - FilesTouched []string - Prompts []string // User prompts from the condensed session - TotalTranscriptLines int // Total lines in transcript after this condensation - Transcript []byte // Raw transcript bytes for downstream consumers (trail title generation) + CheckpointID id.CheckpointID // 12-hex-char from Entire-Checkpoint trailer, used as directory path + SessionID string + CheckpointsCount int + FilesTouched []string + Prompts []string // User prompts from the condensed session + TotalTranscriptLines int // Total transcript units after this condensation (JSONL line count or message count by agent format) + CompactTranscriptLines int // New compact transcript lines added by this checkpoint (0 if v2 disabled); used to advance CompactTranscriptStart + Transcript []byte // Raw transcript bytes for downstream consumers (trail title generation) } // ExtractedSessionData contains data extracted from a shadow branch.