Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,11 @@ type WriteCommittedOptions struct {
// CheckpointTranscriptStart is written to both CommittedMetadata.CheckpointTranscriptStart
// and the deprecated CommittedMetadata.TranscriptLinesAtStart for backward compatibility.

// CompactTranscriptStart is the transcript.jsonl line offset at checkpoint start.
// V2 /main writes this to checkpoint_transcript_start; v1 continues to use
// CheckpointTranscriptStart (full.jsonl).
CompactTranscriptStart int

// TokenUsage contains the token usage for this checkpoint
TokenUsage *agent.TokenUsage

Expand Down
2 changes: 1 addition & 1 deletion cmd/entire/cli/checkpoint/v2_committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ func (s *V2GitStore) writeMainSessionToSubdirectory(opts WriteCommittedOptions,
IsTask: opts.IsTask,
ToolUseID: opts.ToolUseID,
TranscriptIdentifierAtStart: opts.TranscriptIdentifierAtStart,
CheckpointTranscriptStart: opts.CheckpointTranscriptStart,
CheckpointTranscriptStart: opts.CompactTranscriptStart,
TokenUsage: opts.TokenUsage,
SessionMetrics: opts.SessionMetrics,
InitialAttribution: opts.InitialAttribution,
Expand Down
55 changes: 42 additions & 13 deletions cmd/entire/cli/checkpoint/v2_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@ import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"testing"

"github.com/entireio/cli/cmd/entire/cli/agent"
"github.com/entireio/cli/cmd/entire/cli/checkpoint/id"
"github.com/entireio/cli/cmd/entire/cli/paths"
"github.com/entireio/cli/cmd/entire/cli/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

Expand All @@ -25,18 +24,12 @@ func initTestRepo(t *testing.T) *git.Repository {
t.Helper()
dir := t.TempDir()

repo, err := git.PlainInit(dir, false)
require.NoError(t, err)

wt, err := repo.Worktree()
require.NoError(t, err)
testutil.InitRepo(t, dir)
testutil.WriteFile(t, dir, "README.md", "init")
testutil.GitAdd(t, dir, "README.md")
testutil.GitCommit(t, dir, "initial")

require.NoError(t, os.WriteFile(filepath.Join(dir, "README.md"), []byte("init"), 0o644))
_, err = wt.Add("README.md")
require.NoError(t, err)
_, err = wt.Commit("initial", &git.CommitOptions{
Author: &object.Signature{Name: "Test", Email: "test@test.com"},
})
repo, err := git.PlainOpen(dir)
require.NoError(t, err)

return repo
Expand Down Expand Up @@ -379,6 +372,42 @@ func TestV2GitStore_WriteCommittedMain_NoCompactTranscript_SkipsGracefully(t *te
assert.Error(t, err, "transcript.jsonl should not exist when CompactTranscript is nil")
}

func TestV2GitStore_WriteCommittedMain_UsesCompactTranscriptStart(t *testing.T) {
t.Parallel()
repo := initTestRepo(t)
store := NewV2GitStore(repo, "origin")
ctx := context.Background()

cpID := id.MustCheckpointID("a1b2c3d4e5f7")
compactData := []byte("{\"v\":1,\"type\":\"user\",\"content\":\"hello\"}\n{\"v\":1,\"type\":\"assistant\",\"content\":\"hi\"}\n")

_, err := store.writeCommittedMain(ctx, WriteCommittedOptions{
CheckpointID: cpID,
SessionID: "test-session-compact-start",
Strategy: "manual-commit",
Transcript: []byte(`{"type":"human","message":"hello"}`),
CompactTranscript: compactData,
Prompts: []string{"hello"},
AuthorName: "Test",
AuthorEmail: "test@test.com",
CheckpointTranscriptStart: 42, // full.jsonl offset (must not be used in v2 metadata)
CompactTranscriptStart: 15, // transcript.jsonl offset (must be used in v2 metadata)
})
require.NoError(t, err)

tree := v2MainTree(t, repo)
cpPath := cpID.Path()

// Read session metadata from /main
metadataContent := v2ReadFile(t, tree, cpPath+"/0/"+paths.MetadataFileName)
var metadata CommittedMetadata
require.NoError(t, json.Unmarshal([]byte(metadataContent), &metadata))

// v2 should store the compact offset, not the full transcript offset.
assert.Equal(t, 15, metadata.CheckpointTranscriptStart,
"v2 /main metadata should use CompactTranscriptStart for checkpoint_transcript_start")
}

func TestV2GitStore_UpdateCommitted_WritesCompactTranscript(t *testing.T) {
t.Parallel()
repo := initTestRepo(t)
Expand Down
52 changes: 51 additions & 1 deletion cmd/entire/cli/migrate.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cli

import (
"bytes"
"context"
"errors"
"fmt"
Expand Down Expand Up @@ -203,6 +204,7 @@ func migrateOneCheckpoint(ctx context.Context, repo *git.Repository, v1Store *ch
compacted := tryCompactTranscript(ctx, content.Transcript, content.Metadata)
if compacted != nil {
opts.CompactTranscript = compacted
opts.CompactTranscriptStart = computeCompactOffset(ctx, content.Transcript, compacted, content.Metadata)
} else if len(content.Transcript) > 0 {
compactFailed = true
}
Expand Down Expand Up @@ -412,6 +414,10 @@ func buildMigrateWriteOpts(content *checkpoint.SessionContent, info checkpoint.C
}

func tryCompactTranscript(ctx context.Context, transcript []byte, m checkpoint.CommittedMetadata) []byte {
return compactTranscriptForStartLine(ctx, transcript, m, 0)
}

func compactTranscriptForStartLine(ctx context.Context, transcript []byte, m checkpoint.CommittedMetadata, startLine int) []byte {
if len(transcript) == 0 {
return nil
}
Expand All @@ -425,7 +431,7 @@ func tryCompactTranscript(ctx context.Context, transcript []byte, m checkpoint.C
compacted, err := compact.Compact(transcript, compact.MetadataFields{
Agent: string(m.Agent),
CLIVersion: versioninfo.Version,
StartLine: m.GetTranscriptStart(),
StartLine: startLine,
})
if err != nil {
logging.Warn(ctx, "compact transcript generation failed during migration",
Expand All @@ -446,6 +452,50 @@ func tryCompactTranscript(ctx context.Context, transcript []byte, m checkpoint.C
return compacted
}

// computeCompactOffset determines the transcript.jsonl line offset for a checkpoint
// by comparing a full compact (startLine=0) against the scoped compact. The difference
// is the number of compact lines before this checkpoint's data.
func computeCompactOffset(ctx context.Context, fullTranscript, fullCompact []byte, m checkpoint.CommittedMetadata) int {
startLine := m.GetTranscriptStart()
if startLine == 0 || len(fullTranscript) == 0 || m.Agent == "" {
return 0
}

if len(fullCompact) == 0 {
return 0
}

scopedCompact, err := compact.Compact(fullTranscript, compact.MetadataFields{
Agent: string(m.Agent),
CLIVersion: versioninfo.Version,
StartLine: startLine,
})
if err != nil {
logging.Warn(ctx, "compact transcript offset calculation failed during migration",
slog.String("checkpoint_id", string(m.CheckpointID)),
slog.String("agent", string(m.Agent)),
slog.String("error", err.Error()),
)
return 0
}
if len(scopedCompact) == 0 {
return 0
}

fullLines := bytes.Count(fullCompact, []byte{'\n'})
scopedLines := bytes.Count(scopedCompact, []byte{'\n'})
offset := fullLines - scopedLines
if offset < 0 {
logging.Warn(ctx, "compact transcript offset was negative during migration, defaulting to 0",
slog.String("checkpoint_id", string(m.CheckpointID)),
slog.Int("full_lines", fullLines),
slog.Int("scoped_lines", scopedLines),
)
return 0
}
return offset
}

// copyTaskMetadataToV2 copies task metadata files (subagent transcripts, checkpoint JSONs)
// from the v1 branch to the v2 /full/current ref via tree surgery.
func copyTaskMetadataToV2(repo *git.Repository, _ *checkpoint.GitStore, v2Store *checkpoint.V2GitStore, cpID id.CheckpointID, summary *checkpoint.CheckpointSummary) error {
Expand Down
71 changes: 71 additions & 0 deletions cmd/entire/cli/migrate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@ package cli
import (
"bytes"
"context"
"encoding/json"
"strconv"
"strings"
"testing"

"github.com/entireio/cli/cmd/entire/cli/agent"
"github.com/entireio/cli/cmd/entire/cli/checkpoint"
"github.com/entireio/cli/cmd/entire/cli/checkpoint/id"
"github.com/entireio/cli/cmd/entire/cli/paths"
"github.com/entireio/cli/cmd/entire/cli/testutil"
"github.com/entireio/cli/cmd/entire/cli/transcript/compact"
"github.com/entireio/cli/cmd/entire/cli/versioninfo"
"github.com/go-git/go-git/v6"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/filemode"
Expand Down Expand Up @@ -326,6 +330,73 @@ func TestMigrateCheckpointsV2_BackfillCompactTranscript(t *testing.T) {
assert.NotEmpty(t, summary2.Sessions[0].Transcript, "should have compact transcript after backfill")
}

func TestMigrateCheckpointsV2_UsesComputedCompactTranscriptStart(t *testing.T) {
t.Parallel()
repo := initMigrateTestRepo(t)
v1Store, v2Store := newMigrateStores(repo)
ctx := context.Background()

cpID := id.MustCheckpointID("5566778899aa")
transcript := []byte(
"{\"type\":\"human\",\"message\":{\"content\":\"prompt 1\"}}\n" +
"{\"type\":\"assistant\",\"message\":{\"content\":\"reply 1\"}}\n" +
"{\"type\":\"human\",\"message\":{\"content\":\"prompt 2\"}}\n" +
"{\"type\":\"assistant\",\"message\":{\"content\":\"reply 2\"}}\n",
)
err := v1Store.WriteCommitted(ctx, checkpoint.WriteCommittedOptions{
CheckpointID: cpID,
SessionID: "session-compact-start-migrate",
Strategy: "manual-commit",
Transcript: transcript,
Prompts: []string{"prompt 2"},
Agent: agent.AgentTypeClaudeCode,
CheckpointTranscriptStart: 2, // full transcript line domain
AuthorName: "Test",
AuthorEmail: "test@test.com",
})
require.NoError(t, err)

v1Content, err := v1Store.ReadSessionContent(ctx, cpID, 0)
require.NoError(t, err)
fullCompacted := tryCompactTranscript(ctx, v1Content.Transcript, v1Content.Metadata)
require.NotNil(t, fullCompacted)
scopedCompacted, err := compact.Compact(v1Content.Transcript, compact.MetadataFields{
Agent: string(v1Content.Metadata.Agent),
CLIVersion: versioninfo.Version,
StartLine: v1Content.Metadata.GetTranscriptStart(),
})
require.NoError(t, err)
require.NotNil(t, scopedCompacted)
require.Greater(t, bytes.Count(fullCompacted, []byte{'\n'}), bytes.Count(scopedCompacted, []byte{'\n'}))
expectedOffset := computeCompactOffset(ctx, v1Content.Transcript, fullCompacted, v1Content.Metadata)
require.Positive(t, expectedOffset, "expected non-zero compact transcript start")

var stdout bytes.Buffer
result, migrateErr := migrateCheckpointsV2(ctx, repo, v1Store, v2Store, &stdout)
require.NoError(t, migrateErr)
assert.Equal(t, 1, result.migrated)

v2MainRef, err := repo.Reference(plumbing.ReferenceName(paths.V2MainRefName), true)
require.NoError(t, err)
v2MainCommit, err := repo.CommitObject(v2MainRef.Hash())
require.NoError(t, err)
v2MainTree, err := v2MainCommit.Tree()
require.NoError(t, err)

metadataFile, err := v2MainTree.File(cpID.Path() + "/0/" + paths.MetadataFileName)
require.NoError(t, err)
metadataContent, err := metadataFile.Contents()
require.NoError(t, err)

var metadata checkpoint.CommittedMetadata
require.NoError(t, json.Unmarshal([]byte(metadataContent), &metadata))
assert.Equal(t, expectedOffset, metadata.CheckpointTranscriptStart)

storedCompact, err := v2Store.ReadSessionCompactTranscript(ctx, cpID, 0)
require.NoError(t, err)
assert.Equal(t, fullCompacted, storedCompact, "migration should persist cumulative compact transcript")
}

func TestMigrateCheckpointsV2_RepairsMissingFullTranscriptBeforeBackfill(t *testing.T) {
t.Parallel()
repo := initMigrateTestRepo(t)
Expand Down
5 changes: 5 additions & 0 deletions cmd/entire/cli/session/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ type State struct {
// against this value without reading the full transcript content.
CheckpointTranscriptSize int64 `json:"checkpoint_transcript_size,omitempty"`

// CompactTranscriptStart is the transcript.jsonl line offset where the current
// checkpoint cycle began. It parallels CheckpointTranscriptStart (full.jsonl)
// and is updated after each condensation.
CompactTranscriptStart int `json:"compact_transcript_start,omitempty"`

// Deprecated: CondensedTranscriptLines is replaced by CheckpointTranscriptStart.
// Kept for backward compatibility with existing state files.
// Use NormalizeAfterLoad() to migrate.
Expand Down
67 changes: 49 additions & 18 deletions cmd/entire/cli/session/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,36 +77,66 @@ func TestState_NormalizeAfterLoad(t *testing.T) {
assert.Equal(t, 200, state.CheckpointTranscriptStart)
assert.Equal(t, 0, state.TranscriptLinesAtStart)
})

t.Run("leaves_CompactTranscriptStart_zero_when_missing", func(t *testing.T) {
t.Parallel()
state := &State{
CheckpointTranscriptStart: 120,
}
state.NormalizeAfterLoad(context.Background())
assert.Equal(t, 0, state.CompactTranscriptStart)
})

t.Run("preserves_existing_CompactTranscriptStart", func(t *testing.T) {
t.Parallel()
state := &State{
CheckpointTranscriptStart: 120,
CompactTranscriptStart: 45,
}
state.NormalizeAfterLoad(context.Background())
assert.Equal(t, 45, state.CompactTranscriptStart)
})
}

func TestState_NormalizeAfterLoad_JSONRoundTrip(t *testing.T) {
tests := []struct {
name string
json string
wantCTS int // CheckpointTranscriptStart
wantStep int // StepCount
name string
json string
wantCTS int // CheckpointTranscriptStart
wantCompact int // CompactTranscriptStart
wantStep int // StepCount
}{
{
name: "migrates old condensed_transcript_lines",
json: `{"session_id":"s1","condensed_transcript_lines":42,"checkpoint_count":5}`,
wantCTS: 42,
wantStep: 5,
name: "migrates old condensed_transcript_lines",
json: `{"session_id":"s1","condensed_transcript_lines":42,"checkpoint_count":5}`,
wantCTS: 42,
wantCompact: 0,
wantStep: 5,
},
{
name: "migrates old transcript_lines_at_start",
json: `{"session_id":"s1","transcript_lines_at_start":75}`,
wantCTS: 75,
wantCompact: 0,
},
{
name: "migrates old transcript_lines_at_start",
json: `{"session_id":"s1","transcript_lines_at_start":75}`,
wantCTS: 75,
name: "preserves new field over old",
json: `{"session_id":"s1","condensed_transcript_lines":10,"checkpoint_transcript_start":50}`,
wantCTS: 50,
wantCompact: 0,
},
{
name: "preserves new field over old",
json: `{"session_id":"s1","condensed_transcript_lines":10,"checkpoint_transcript_start":50}`,
wantCTS: 50,
name: "handles clean new format",
json: `{"session_id":"s1","checkpoint_transcript_start":25,"checkpoint_count":3}`,
wantCTS: 25,
wantCompact: 0,
wantStep: 3,
},
{
name: "handles clean new format",
json: `{"session_id":"s1","checkpoint_transcript_start":25,"checkpoint_count":3}`,
wantCTS: 25,
wantStep: 3,
name: "preserves explicit compact_transcript_start",
json: `{"session_id":"s1","checkpoint_transcript_start":25,"compact_transcript_start":9}`,
wantCTS: 25,
wantCompact: 9,
},
}

Expand All @@ -117,6 +147,7 @@ func TestState_NormalizeAfterLoad_JSONRoundTrip(t *testing.T) {
state.NormalizeAfterLoad(context.Background())

assert.Equal(t, tt.wantCTS, state.CheckpointTranscriptStart)
assert.Equal(t, tt.wantCompact, state.CompactTranscriptStart)
assert.Equal(t, tt.wantStep, state.StepCount)
assert.Equal(t, 0, state.CondensedTranscriptLines, "deprecated field should be cleared")
assert.Equal(t, 0, state.TranscriptLinesAtStart, "deprecated field should be cleared")
Expand Down
Loading
Loading