Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,10 @@ type WriteCommittedOptions struct {
// - the checkpoint predates the summarization feature
Summary *Summary

// Linkage contains content-based signals for re-linking after history rewrites.
// Written to the root-level CheckpointSummary, not per-session metadata.
Linkage *LinkageMetadata

// CompactTranscript is the Entire Transcript Format (transcript.jsonl) bytes.
// Written to v2 /main ref alongside metadata. May be nil if compaction
// was not performed (unknown agent, compaction error, empty transcript).
Expand Down Expand Up @@ -445,6 +449,23 @@ type SessionFilePaths struct {
Prompt string `json:"prompt"`
}

// LinkageMetadata contains Git-native signals for limited fallback re-linking
// after git history rewrites (rebase, reword, amend, filter-branch).
// Stored at the checkpoint level (root metadata.json), not per-session.
//
// The web uses a fallback chain when a commit arrives without an Entire-Checkpoint trailer:
// 1. TreeHash match - covers: reword, amend (msg-only), filter-branch (msg-only)
// 2. PatchID match - covers: clean rebase, cherry-pick to other branch
type LinkageMetadata struct {
// TreeHash is the git tree hash of the commit (full repo snapshot).
// Survives rewrites that don't change code (reword, msg-only amend).
TreeHash string `json:"tree_hash,omitempty"`

// PatchID is the git patch-id of the commit's diff (parent->HEAD).
// Survives rebase (same diff replayed on different base).
PatchID string `json:"patch_id,omitempty"`
}

// CheckpointSummary is the root-level metadata.json for a checkpoint.
// It contains aggregated statistics from all sessions and a map of session IDs
// to their file paths. Session-specific data (including initial_attribution)
Expand Down Expand Up @@ -473,6 +494,7 @@ type CheckpointSummary struct {
Sessions []SessionFilePaths `json:"sessions"`
TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"`
CombinedAttribution *InitialAttribution `json:"combined_attribution,omitempty"`
Linkage *LinkageMetadata `json:"linkage,omitempty"`
}

// SessionMetrics contains hook-provided session metrics from agents that report
Expand Down
73 changes: 73 additions & 0 deletions cmd/entire/cli/checkpoint/checkpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3624,6 +3624,79 @@ func TestWriteCommitted_SubagentTranscript_JSONLFallback(t *testing.T) {
}
}

func TestWriteCommitted_IncludesLinkage(t *testing.T) {
t.Parallel()
repo, _ := setupBranchTestRepo(t)
store := NewGitStore(repo)
checkpointID := id.MustCheckpointID("a1b2c3d4e5f6")

linkage := &LinkageMetadata{
TreeHash: "abc123def456abc123def456abc123def456abc1",
PatchID: "def456abc123def456abc123def456abc123def4",
}

err := store.WriteCommitted(context.Background(), WriteCommittedOptions{
CheckpointID: checkpointID,
SessionID: "linkage-test-session",
Strategy: "manual-commit",
Agent: agent.AgentTypeClaudeCode,
Linkage: linkage,
Transcript: []byte(`{"type":"human","message":{"content":"test"}}` + "\n"),
FilesTouched: []string{"file.go"},
CheckpointsCount: 1,
AuthorName: "Test Author",
AuthorEmail: "test@example.com",
})
if err != nil {
t.Fatalf("WriteCommitted() error = %v", err)
}

// Read back the CheckpointSummary
summary, err := store.ReadCommitted(context.Background(), checkpointID)
if err != nil {
t.Fatalf("ReadCommitted() error = %v", err)
}
if summary.Linkage == nil {
t.Fatal("Linkage should be present in CheckpointSummary")
}
if summary.Linkage.TreeHash != linkage.TreeHash {
t.Errorf("TreeHash = %q, want %q", summary.Linkage.TreeHash, linkage.TreeHash)
}
if summary.Linkage.PatchID != linkage.PatchID {
t.Errorf("PatchID = %q, want %q", summary.Linkage.PatchID, linkage.PatchID)
}
}

func TestWriteCommitted_NilLinkageOmitted(t *testing.T) {
t.Parallel()
repo, _ := setupBranchTestRepo(t)
store := NewGitStore(repo)
checkpointID := id.MustCheckpointID("a0b1c2d3e4f5")

err := store.WriteCommitted(context.Background(), WriteCommittedOptions{
CheckpointID: checkpointID,
SessionID: "no-linkage-session",
Strategy: "manual-commit",
Agent: agent.AgentTypeClaudeCode,
Transcript: []byte(`{"type":"human","message":{"content":"test"}}` + "\n"),
FilesTouched: []string{"file.go"},
CheckpointsCount: 1,
AuthorName: "Test Author",
AuthorEmail: "test@example.com",
})
if err != nil {
t.Fatalf("WriteCommitted() error = %v", err)
}

summary, err := store.ReadCommitted(context.Background(), checkpointID)
if err != nil {
t.Fatalf("ReadCommitted() error = %v", err)
}
if summary.Linkage != nil {
t.Errorf("Linkage should be nil when not provided, got %+v", summary.Linkage)
}
}

func TestWriteTemporaryTask_SubagentTranscript_RedactsSecrets(t *testing.T) {
// Cannot use t.Parallel() because t.Chdir is required for paths.WorktreeRoot()
tempDir := t.TempDir()
Expand Down
1 change: 1 addition & 0 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ func (s *GitStore) writeCheckpointSummary(opts WriteCommittedOptions, basePath s
Sessions: sessions,
TokenUsage: tokenUsage,
CombinedAttribution: combinedAttribution,
Linkage: opts.Linkage,
}

metadataJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ")
Expand Down
49 changes: 49 additions & 0 deletions cmd/entire/cli/gitops/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,52 @@ func extractStatus(statusLine string) byte {
}
return statusField[0]
}

// ComputePatchID computes the git patch-id for the diff between two commits.
// Patch IDs are content hashes of the diff itself, independent of commit metadata
// and parent position. This means the same code change produces the same patch ID
// even after rebase (which changes the parent/base but not the diff content).
//
// For initial commits (parentHash is empty), uses --root mode.
// Returns a 40-char hex SHA1 string, or empty string for empty diffs.
func ComputePatchID(ctx context.Context, repoDir, parentHash, commitHash string) (string, error) {
var diffCmd *exec.Cmd
if parentHash == "" {
diffCmd = exec.CommandContext(ctx, "git", "diff-tree", "--root", "-p", commitHash)
} else {
diffCmd = exec.CommandContext(ctx, "git", "diff-tree", "-p", parentHash, commitHash)
}
diffCmd.Dir = repoDir

var diffOut, diffErr bytes.Buffer
diffCmd.Stdout = &diffOut
diffCmd.Stderr = &diffErr

if err := diffCmd.Run(); err != nil {
return "", fmt.Errorf("git diff-tree failed: %w: %s", err, strings.TrimSpace(diffErr.String()))
}

if diffOut.Len() == 0 {
return "", nil
}

patchIDCmd := exec.CommandContext(ctx, "git", "patch-id", "--stable")
patchIDCmd.Dir = repoDir
patchIDCmd.Stdin = &diffOut

var patchOut, patchErr bytes.Buffer
patchIDCmd.Stdout = &patchOut
patchIDCmd.Stderr = &patchErr

if err := patchIDCmd.Run(); err != nil {
return "", fmt.Errorf("git patch-id failed: %w: %s", err, strings.TrimSpace(patchErr.String()))
}

output := strings.TrimSpace(patchOut.String())
if output == "" {
return "", nil
}
// git patch-id outputs "<patch-id> <commit-id>"; we want the first field.
patchID, _, _ := strings.Cut(output, " ")
return patchID, nil
}
129 changes: 129 additions & 0 deletions cmd/entire/cli/gitops/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"os/exec"
"path/filepath"
"sort"
"strings"
"testing"
)

Expand All @@ -31,6 +32,8 @@ func initTestRepo(t *testing.T) string {
}

run("init", "-b", "main")
run("config", "user.name", "Test")
run("config", "user.email", "test@test.com")
run("config", "commit.gpgsign", "false")

return dir
Expand Down Expand Up @@ -84,6 +87,44 @@ func gitCommit(t *testing.T, dir, msg string) {
}
}

func revParse(t *testing.T, dir, ref string) string {
t.Helper()
cmd := exec.CommandContext(context.Background(), "git", "rev-parse", ref)
cmd.Dir = dir
out, err := cmd.Output()
if err != nil {
t.Fatalf("git rev-parse %s failed: %v", ref, err)
}
return strings.TrimSpace(string(out))
}

func gitCheckoutBranch(t *testing.T, dir, branchName string) {
t.Helper()
cmd := exec.CommandContext(context.Background(), "git", "checkout", "-b", branchName)
cmd.Dir = dir
if out, err := cmd.CombinedOutput(); err != nil {
t.Fatalf("git checkout -b %s failed: %v\n%s", branchName, err, out)
}
}

func gitCheckout(t *testing.T, dir, ref string) {
t.Helper()
cmd := exec.CommandContext(context.Background(), "git", "checkout", ref)
cmd.Dir = dir
if out, err := cmd.CombinedOutput(); err != nil {
t.Fatalf("git checkout %s failed: %v\n%s", ref, err, out)
}
}

func gitRebase(t *testing.T, dir, onto string) {
t.Helper()
cmd := exec.CommandContext(context.Background(), "git", "rebase", onto)
cmd.Dir = dir
if out, err := cmd.CombinedOutput(); err != nil {
t.Fatalf("git rebase %s failed: %v\n%s", onto, err, out)
}
}

func TestDiffTreeFiles_NormalCommit(t *testing.T) {
t.Parallel()
dir := initTestRepo(t)
Expand Down Expand Up @@ -405,3 +446,91 @@ func TestExtractStatus(t *testing.T) {
})
}
}

func TestComputePatchID(t *testing.T) {
t.Parallel()
dir := initTestRepo(t)

writeFile(t, dir, "file.txt", "initial")
gitAdd(t, dir, "file.txt")
gitCommit(t, dir, "initial")

writeFile(t, dir, "file.txt", "modified")
gitAdd(t, dir, "file.txt")
gitCommit(t, dir, "modify file")

head := revParse(t, dir, "HEAD")
parent := revParse(t, dir, "HEAD~1")

patchID, err := ComputePatchID(context.Background(), dir, parent, head)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if patchID == "" {
t.Fatal("expected non-empty patch ID")
}
if len(patchID) != 40 {
t.Fatalf("expected 40-char hex, got %d chars: %s", len(patchID), patchID)
}
}

func TestComputePatchID_StableAcrossRebase(t *testing.T) {
t.Parallel()
dir := initTestRepo(t)

writeFile(t, dir, "base.txt", "base")
gitAdd(t, dir, "base.txt")
gitCommit(t, dir, "base")

gitCheckoutBranch(t, dir, "feature")
writeFile(t, dir, "feature.txt", "feature work")
gitAdd(t, dir, "feature.txt")
gitCommit(t, dir, "add feature")

featureHead := revParse(t, dir, "HEAD")
featureParent := revParse(t, dir, "HEAD~1")

patchIDBefore, err := ComputePatchID(context.Background(), dir, featureParent, featureHead)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

gitCheckout(t, dir, "main")
writeFile(t, dir, "other.txt", "other work")
gitAdd(t, dir, "other.txt")
gitCommit(t, dir, "unrelated work on main")

gitCheckout(t, dir, "feature")
gitRebase(t, dir, "main")

rebasedHead := revParse(t, dir, "HEAD")
rebasedParent := revParse(t, dir, "HEAD~1")

patchIDAfter, err := ComputePatchID(context.Background(), dir, rebasedParent, rebasedHead)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

if patchIDBefore != patchIDAfter {
t.Errorf("patch ID should survive clean rebase: before=%s, after=%s", patchIDBefore, patchIDAfter)
}
}

func TestComputePatchID_InitialCommit(t *testing.T) {
t.Parallel()
dir := initTestRepo(t)

writeFile(t, dir, "file.txt", "initial")
gitAdd(t, dir, "file.txt")
gitCommit(t, dir, "initial")

head := revParse(t, dir, "HEAD")

patchID, err := ComputePatchID(context.Background(), dir, "", head)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if patchID == "" {
t.Fatal("expected non-empty patch ID for initial commit")
}
}
16 changes: 9 additions & 7 deletions cmd/entire/cli/strategy/manual_commit_condensation.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,14 @@ func (s *ManualCommitStrategy) getCheckpointLog(ctx context.Context, checkpointI

// condenseOpts provides pre-resolved git objects to avoid redundant reads.
type condenseOpts struct {
shadowRef *plumbing.Reference // Pre-resolved shadow branch ref (nil = resolve from repo)
headTree *object.Tree // Pre-resolved HEAD tree (passed through to calculateSessionAttributions)
parentTree *object.Tree // Pre-resolved parent tree (nil for initial commits, for consistent non-agent line counting)
repoDir string // Repository worktree path for git CLI commands
parentCommitHash string // HEAD's first parent hash for per-commit non-agent file detection
headCommitHash string // HEAD commit hash (passed through for attribution)
allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session exclusion (nil = single-session)
shadowRef *plumbing.Reference // Pre-resolved shadow branch ref (nil = resolve from repo)
headTree *object.Tree // Pre-resolved HEAD tree (passed through to calculateSessionAttributions)
parentTree *object.Tree // Pre-resolved parent tree (nil for initial commits, for consistent non-agent line counting)
repoDir string // Repository worktree path for git CLI commands
parentCommitHash string // HEAD's first parent hash for per-commit non-agent file detection
headCommitHash string // HEAD commit hash (passed through for attribution)
allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session exclusion (nil = single-session)
linkage *cpkg.LinkageMetadata // Content-based signals for re-linking after history rewrites
}

// CondenseSession condenses a session's shadow branch to permanent storage.
Expand Down Expand Up @@ -229,6 +230,7 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re
AuthorEmail: authorEmail,
Agent: state.AgentType,
Model: state.ModelName,
Linkage: o.linkage,
TurnID: state.TurnID,
TranscriptIdentifierAtStart: state.TranscriptIdentifierAtStart,
CheckpointTranscriptStart: state.CheckpointTranscriptStart,
Expand Down
Loading