From 3248cc2131b754633aaf16548686016d4194a530 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Thu, 2 Apr 2026 15:03:44 -0700 Subject: [PATCH 01/14] fix: allow checkpoint trailers on agent-initiated sequence operations When an agent runs git revert or cherry-pick as part of its work, the commit should be checkpointed. Previously prepare-commit-msg unconditionally skipped during sequence operations, making the agent's work invisible to Entire. Now checks for active sessions: if an agent session is ACTIVE, the operation is agent-initiated and gets a trailer. If no active session, it's user-initiated and is skipped as before. Part of fix for #834. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 85df9ac94bc7 --- .../cli/strategy/manual_commit_hooks.go | 16 +++- .../cli/strategy/manual_commit_session.go | 21 +++++ cmd/entire/cli/strategy/manual_commit_test.go | 93 +++++++++++++++++++ 3 files changed, 126 insertions(+), 4 deletions(-) diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 486aa8fa0..9cc417774 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -309,14 +309,22 @@ func isGitSequenceOperation(ctx context.Context) bool { func (s *ManualCommitStrategy) PrepareCommitMsg(ctx context.Context, commitMsgFile string, source string) error { logCtx := logging.WithComponent(ctx, "checkpoint") - // Skip during rebase, cherry-pick, or revert operations - // These are replaying existing commits and should not be linked to agent sessions + // Skip during rebase, cherry-pick, or revert operations — UNLESS an agent + // session is ACTIVE. When an agent runs git revert/cherry-pick as part of + // its work, the commit should be checkpointed. When the user does it + // manually (no active session), skip as before. if isGitSequenceOperation(ctx) { - logging.Debug(logCtx, "prepare-commit-msg: skipped during git sequence operation", + if !s.hasActiveSessionInWorktree(ctx) { + logging.Debug(logCtx, "prepare-commit-msg: skipped during git sequence operation (no active session)", + slog.String("strategy", "manual-commit"), + slog.String("source", source), + ) + return nil + } + logging.Debug(logCtx, "prepare-commit-msg: sequence operation with active session, proceeding", slog.String("strategy", "manual-commit"), slog.String("source", source), ) - return nil } // Skip for merge and squash sources diff --git a/cmd/entire/cli/strategy/manual_commit_session.go b/cmd/entire/cli/strategy/manual_commit_session.go index ad4d852fd..bb7bc9f7b 100644 --- a/cmd/entire/cli/strategy/manual_commit_session.go +++ b/cmd/entire/cli/strategy/manual_commit_session.go @@ -148,6 +148,27 @@ func (s *ManualCommitStrategy) findSessionsForWorktree(ctx context.Context, work return matching, nil } +// hasActiveSessionInWorktree returns true if any session in the current worktree +// is in ACTIVE phase. Used to distinguish agent-initiated git operations (revert, +// cherry-pick) from user-initiated ones. Agent-initiated operations should be +// checkpointed; user-initiated ones should be skipped. +func (s *ManualCommitStrategy) hasActiveSessionInWorktree(ctx context.Context) bool { + worktreePath, err := paths.WorktreeRoot(ctx) + if err != nil { + return false + } + sessions, err := s.findSessionsForWorktree(ctx, worktreePath) + if err != nil { + return false + } + for _, state := range sessions { + if state.Phase.IsActive() { + return true + } + } + return false +} + // findSessionsForCommit finds all sessions where base_commit matches the given SHA. func (s *ManualCommitStrategy) findSessionsForCommit(ctx context.Context, baseCommitSHA string) ([]*SessionState, error) { allStates, err := s.listAllSessionStates(ctx) diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index ae5c08450..8a06833cf 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -15,10 +15,12 @@ import ( "github.com/entireio/cli/cmd/entire/cli/checkpoint" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/session" "github.com/entireio/cli/cmd/entire/cli/trailers" "github.com/go-git/go-git/v6" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/object" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -863,6 +865,97 @@ func TestShadowStrategy_PrepareCommitMsg_SkipsSessionWhenContentCheckFails(t *te require.Equal(t, originalMsg, string(content)) } +// TestShadowStrategy_PrepareCommitMsg_AgentRevertGetsTrailer verifies that when an +// agent runs git revert (REVERT_HEAD exists) and the session is ACTIVE, the commit +// gets a checkpoint trailer. The agent's work should be checkpointed. +func TestShadowStrategy_PrepareCommitMsg_AgentRevertGetsTrailer(t *testing.T) { + dir := setupGitRepo(t) + t.Chdir(dir) + t.Setenv("ENTIRE_TEST_TTY", "1") + + s := &ManualCommitStrategy{} + + // Create an ACTIVE session (agent is running) + err := s.InitializeSession(context.Background(), "agent-revert-session", agent.AgentTypeClaudeCode, "", "revert the change", "") + require.NoError(t, err) + + // Save a checkpoint so there's content + metaDir := filepath.Join(".entire", "metadata", "agent-revert-session") + require.NoError(t, os.MkdirAll(filepath.Join(dir, metaDir), 0o755)) + transcript := `{"type":"human","message":{"content":"revert the change"}}` + "\n" + + `{"type":"assistant","message":{"content":"I'll revert that"}}` + "\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, metaDir, "full.jsonl"), []byte(transcript), 0o644)) + + err = s.SaveStep(context.Background(), StepContext{ + SessionID: "agent-revert-session", + MetadataDir: metaDir, + ModifiedFiles: []string{"test.txt"}, + NewFiles: []string{}, + AgentType: agent.AgentTypeClaudeCode, + }) + require.NoError(t, err) + + // Simulate REVERT_HEAD existing (git revert in progress) + gitDir, err := GetGitDir(context.Background()) + require.NoError(t, err) + revertHeadPath := filepath.Join(gitDir, "REVERT_HEAD") + require.NoError(t, os.WriteFile(revertHeadPath, []byte("fake-revert-head"), 0o644)) + defer os.Remove(revertHeadPath) + + // PrepareCommitMsg should add a trailer (active session = agent doing the revert) + commitMsgFile := filepath.Join(t.TempDir(), "COMMIT_EDITMSG") + require.NoError(t, os.WriteFile(commitMsgFile, []byte("Revert \"add feature\"\n"), 0o644)) + + err = s.PrepareCommitMsg(context.Background(), commitMsgFile, "") + require.NoError(t, err) + + content, err := os.ReadFile(commitMsgFile) + require.NoError(t, err) + + _, found := trailers.ParseCheckpoint(string(content)) + assert.True(t, found, "agent-initiated revert should get a checkpoint trailer") +} + +// TestShadowStrategy_PrepareCommitMsg_UserRevertSkipped verifies that when a user +// runs git revert manually (no ACTIVE session), the commit does NOT get a trailer. +func TestShadowStrategy_PrepareCommitMsg_UserRevertSkipped(t *testing.T) { + dir := setupGitRepo(t) + t.Chdir(dir) + t.Setenv("ENTIRE_TEST_TTY", "1") + + s := &ManualCommitStrategy{} + + // Create an IDLE session (agent finished, user is now doing manual work) + err := s.InitializeSession(context.Background(), "idle-session-revert", agent.AgentTypeClaudeCode, "", "done", "") + require.NoError(t, err) + + state, err := s.loadSessionState(context.Background(), "idle-session-revert") + require.NoError(t, err) + require.NoError(t, TransitionAndLog(context.Background(), state, session.EventTurnEnd, session.TransitionContext{}, session.NoOpActionHandler{})) + require.NoError(t, s.saveSessionState(context.Background(), state)) + + // Simulate REVERT_HEAD existing + gitDir, err := GetGitDir(context.Background()) + require.NoError(t, err) + revertHeadPath := filepath.Join(gitDir, "REVERT_HEAD") + require.NoError(t, os.WriteFile(revertHeadPath, []byte("fake-revert-head"), 0o644)) + defer os.Remove(revertHeadPath) + + // PrepareCommitMsg should skip (no ACTIVE session = user doing the revert) + commitMsgFile := filepath.Join(t.TempDir(), "COMMIT_EDITMSG") + originalMsg := "Revert \"add feature\"\n" + require.NoError(t, os.WriteFile(commitMsgFile, []byte(originalMsg), 0o644)) + + err = s.PrepareCommitMsg(context.Background(), commitMsgFile, "") + require.NoError(t, err) + + content, err := os.ReadFile(commitMsgFile) + require.NoError(t, err) + + _, found := trailers.ParseCheckpoint(string(content)) + assert.False(t, found, "user-initiated revert (no active session) should not get a trailer") +} + func TestAddCheckpointTrailer_NoComment(t *testing.T) { // Test that addCheckpointTrailer adds trailer without any comment lines message := "Test commit message\n" //nolint:goconst // already present in codebase From 3d87319592b0c38092bc482671508f6697523950 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Thu, 2 Apr 2026 15:31:14 -0700 Subject: [PATCH 02/14] feat: store tree hash in checkpoint metadata Add tree_hash field to committed checkpoint metadata. Records the git tree hash of the commit being condensed, enabling fallback checkpoint lookup by tree hash when the Entire-Checkpoint trailer is stripped by git history rewrites (rebase, filter-branch, amend). Part of fix for #834. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 77773a25069e --- cmd/entire/cli/checkpoint/checkpoint.go | 10 ++++++ cmd/entire/cli/checkpoint/checkpoint_test.go | 35 +++++++++++++++++++ cmd/entire/cli/checkpoint/committed.go | 1 + .../strategy/manual_commit_condensation.go | 2 ++ .../cli/strategy/manual_commit_hooks.go | 2 ++ 5 files changed, 50 insertions(+) diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index b9db5476e..414231de2 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -263,6 +263,11 @@ type WriteCommittedOptions struct { // Model is the LLM model used during the session (e.g., "claude-sonnet-4-20250514") Model string + // TreeHash is the git tree hash of the commit this checkpoint is linked to. + // Used as a fallback to re-link checkpoints when the Entire-Checkpoint trailer + // is stripped by git history rewrites (rebase, filter-branch, amend). + TreeHash string + // TurnID correlates checkpoints from the same agent turn. TurnID string @@ -391,6 +396,11 @@ type CommittedMetadata struct { // Always written to metadata (empty string when unknown) so consumers can rely on the field's presence. Model string `json:"model"` + // TreeHash is the git tree hash of the commit this checkpoint is linked to. + // Enables fallback re-linking when the Entire-Checkpoint trailer is stripped + // by git history rewrites (rebase, filter-branch, amend). + TreeHash string `json:"tree_hash,omitempty"` + // TurnID correlates checkpoints from the same agent turn. // When a turn's work spans multiple commits, each gets its own checkpoint // but they share the same TurnID for future aggregation/deduplication. diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index 711bdce54..e732e1417 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -1336,6 +1336,41 @@ func TestListCommitted_MultiSessionInfo(t *testing.T) { } } +// TestWriteCommitted_IncludesTreeHash verifies that tree_hash is stored in +// checkpoint metadata and can be read back. +func TestWriteCommitted_IncludesTreeHash(t *testing.T) { + t.Parallel() + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabb11223344") + treeHash := "abc123def456abc123def456abc123def456abc1" + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "tree-hash-session", + Strategy: "manual-commit", + Agent: agent.AgentTypeClaudeCode, + TreeHash: treeHash, + Transcript: []byte(`{"type":"human","message":{"content":"test"}}`), + FilesTouched: []string{"file.go"}, + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + // Read back session metadata (session index 0) + sessionContent, err := store.ReadSessionContent(context.Background(), checkpointID, 0) + if err != nil { + t.Fatalf("ReadSessionContent() error = %v", err) + } + if sessionContent.Metadata.TreeHash != treeHash { + t.Errorf("TreeHash = %q, want %q", sessionContent.Metadata.TreeHash, treeHash) + } +} + // TestWriteCommitted_SessionWithNoPrompts verifies that a session can be // written without prompts and still be read correctly. func TestWriteCommitted_SessionWithNoPrompts(t *testing.T) { diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index f2b6633f7..43008b9d6 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -374,6 +374,7 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom FilesTouched: opts.FilesTouched, Agent: opts.Agent, Model: opts.Model, + TreeHash: opts.TreeHash, TurnID: opts.TurnID, IsTask: opts.IsTask, ToolUseID: opts.ToolUseID, diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index b84d6458c..519b44b94 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -97,6 +97,7 @@ type condenseOpts struct { parentCommitHash string // HEAD's first parent hash for per-commit non-agent file detection headCommitHash string // HEAD commit hash (passed through for attribution) allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session exclusion (nil = single-session) + treeHash string // Tree hash of the commit being condensed (for fallback linkage after history rewrites) } // CondenseSession condenses a session's shadow branch to permanent storage. @@ -229,6 +230,7 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re AuthorEmail: authorEmail, Agent: state.AgentType, Model: state.ModelName, + TreeHash: o.treeHash, TurnID: state.TurnID, TranscriptIdentifierAtStart: state.TranscriptIdentifierAtStart, CheckpointTranscriptStart: state.CheckpointTranscriptStart, diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 9cc417774..1e4f8b1b5 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -666,6 +666,7 @@ func (h *postCommitActionHandler) HandleCondense(state *session.State) error { parentCommitHash: h.parentCommitHash(), headCommitHash: h.newHead, allAgentFiles: h.allAgentFiles, + treeHash: h.commit.TreeHash.String(), }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) @@ -695,6 +696,7 @@ func (h *postCommitActionHandler) HandleCondenseIfFilesTouched(state *session.St parentCommitHash: h.parentCommitHash(), headCommitHash: h.newHead, allAgentFiles: h.allAgentFiles, + treeHash: h.commit.TreeHash.String(), }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) From 8490173a7fc33226f08215af1d79e68736be64ec Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Thu, 2 Apr 2026 18:57:58 -0700 Subject: [PATCH 03/14] fix: add debug logging + remove unrelated files - Add debug logging to hasActiveSessionInWorktree error paths - Remove unrelated files (greetings.md, agent configs) from PR Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: bacd9b68b1c0 --- cmd/entire/cli/strategy/manual_commit_session.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cmd/entire/cli/strategy/manual_commit_session.go b/cmd/entire/cli/strategy/manual_commit_session.go index bb7bc9f7b..cecc4ffee 100644 --- a/cmd/entire/cli/strategy/manual_commit_session.go +++ b/cmd/entire/cli/strategy/manual_commit_session.go @@ -3,11 +3,13 @@ package strategy import ( "context" "fmt" + "log/slog" "time" "github.com/entireio/cli/cmd/entire/cli/agent/types" "github.com/entireio/cli/cmd/entire/cli/checkpoint" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/logging" "github.com/entireio/cli/cmd/entire/cli/paths" "github.com/entireio/cli/cmd/entire/cli/session" "github.com/entireio/cli/cmd/entire/cli/versioninfo" @@ -153,12 +155,17 @@ func (s *ManualCommitStrategy) findSessionsForWorktree(ctx context.Context, work // cherry-pick) from user-initiated ones. Agent-initiated operations should be // checkpointed; user-initiated ones should be skipped. func (s *ManualCommitStrategy) hasActiveSessionInWorktree(ctx context.Context) bool { + logCtx := logging.WithComponent(ctx, "checkpoint") worktreePath, err := paths.WorktreeRoot(ctx) if err != nil { + logging.Debug(logCtx, "hasActiveSessionInWorktree: failed to get worktree root", + slog.String("error", err.Error())) return false } sessions, err := s.findSessionsForWorktree(ctx, worktreePath) if err != nil { + logging.Debug(logCtx, "hasActiveSessionInWorktree: failed to find sessions", + slog.String("error", err.Error())) return false } for _, state := range sessions { From 61e09655640971c1768cd683f8aafd80d9de8632 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 6 Apr 2026 21:54:02 -0700 Subject: [PATCH 04/14] feat: add LinkageMetadata struct to CheckpointSummary Define content-based linkage signals (tree_hash, patch_id, files_changed_hash, session_files_hash) for re-linking checkpoints after git history rewrites. Stored at checkpoint level, not per-session. Part of fix for #834. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 4661e8c50610 --- cmd/entire/cli/checkpoint/checkpoint.go | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index 414231de2..5b6ab91db 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -302,6 +302,10 @@ type WriteCommittedOptions struct { // - the checkpoint predates the summarization feature Summary *Summary + // Linkage contains content-based signals for re-linking after history rewrites. + // Written to the root-level CheckpointSummary, not per-session metadata. + Linkage *LinkageMetadata + // CompactTranscript is the Entire Transcript Format (transcript.jsonl) bytes. // Written to v2 /main ref alongside metadata. May be nil if compaction // was not performed (unknown agent, compaction error, empty transcript). @@ -455,6 +459,33 @@ type SessionFilePaths struct { Prompt string `json:"prompt"` } +// LinkageMetadata contains content-based signals for re-linking checkpoints +// after git history rewrites (rebase, reword, amend, filter-branch). +// Stored at the checkpoint level (root metadata.json), not per-session. +// +// The web uses a fallback chain when a commit arrives without an Entire-Checkpoint trailer: +// 1. TreeHash match - covers: reword, amend (msg-only), filter-branch (msg-only) +// 2. PatchID match - covers: clean rebase, cherry-pick to other branch +// 3. FilesChangedHash - covers: rebase with conflicts in non-agent files +// 4. SessionFilesHash - covers: local squash merge (cumulative agent files) +type LinkageMetadata struct { + // TreeHash is the git tree hash of the commit (full repo snapshot). + // Survives rewrites that don't change code (reword, msg-only amend). + TreeHash string `json:"tree_hash"` + + // PatchID is the git patch-id of the commit's diff (parent->HEAD). + // Survives rebase (same diff replayed on different base). + PatchID string `json:"patch_id"` + + // FilesChangedHash is SHA256 of sorted file:blob pairs for files changed in this commit. + // Survives rebase even with conflicts in other files (only agent-file blobs matter). + FilesChangedHash string `json:"files_changed_hash"` + + // SessionFilesHash is SHA256 of sorted file:blob pairs for ALL files touched across the session. + // Survives local squash merges where individual patch IDs don't match the combined diff. + SessionFilesHash string `json:"session_files_hash,omitempty"` +} + // CheckpointSummary is the root-level metadata.json for a checkpoint. // It contains aggregated statistics from all sessions and a map of session IDs // to their file paths. Session-specific data (including initial_attribution) @@ -483,6 +514,7 @@ type CheckpointSummary struct { Sessions []SessionFilePaths `json:"sessions"` TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"` CombinedAttribution *InitialAttribution `json:"combined_attribution,omitempty"` + Linkage *LinkageMetadata `json:"linkage,omitempty"` } // SessionMetrics contains hook-provided session metrics from agents that report From cce8e642fd55ef01a81338a8500f288c7a556944 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 6 Apr 2026 21:54:08 -0700 Subject: [PATCH 05/14] test: add GitRevParse, GitCheckout, GitRebase testutil helpers Needed by linkage signal tests that verify patch ID stability across rebase. Part of fix for #834. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 4129c08c80b0 --- cmd/entire/cli/testutil/testutil.go | 35 +++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/cmd/entire/cli/testutil/testutil.go b/cmd/entire/cli/testutil/testutil.go index 474e6f137..e199d0d57 100644 --- a/cmd/entire/cli/testutil/testutil.go +++ b/cmd/entire/cli/testutil/testutil.go @@ -325,3 +325,38 @@ func GitIsolatedEnv() []string { "GIT_CONFIG_SYSTEM="+gitEmptyConfigPath(), // Isolate from system git config ) } + +// GitRevParse returns the full commit hash for a given ref (e.g., "HEAD", "HEAD~1"). +func GitRevParse(t *testing.T, repoDir, ref string) string { + t.Helper() + //nolint:noctx // test code, no context needed for git rev-parse + cmd := exec.Command("git", "rev-parse", ref) + cmd.Dir = repoDir + out, err := cmd.Output() + if err != nil { + t.Fatalf("git rev-parse %s failed: %v", ref, err) + } + return strings.TrimSpace(string(out)) +} + +// GitCheckout checks out an existing branch. +func GitCheckout(t *testing.T, repoDir, branchName string) { + t.Helper() + //nolint:noctx // test code, no context needed for git checkout + cmd := exec.Command("git", "checkout", branchName) + cmd.Dir = repoDir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git checkout %s failed: %v\n%s", branchName, err, string(out)) + } +} + +// GitRebase rebases the current branch onto the given base. +func GitRebase(t *testing.T, repoDir, onto string) { + t.Helper() + //nolint:noctx // test code, no context needed for git rebase + cmd := exec.Command("git", "rebase", onto) + cmd.Dir = repoDir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git rebase %s failed: %v\n%s", onto, err, string(out)) + } +} From e8242ee1434dca6183cf39681b0c5861e9e07210 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 6 Apr 2026 22:41:39 -0700 Subject: [PATCH 06/14] feat: add ComputePatchID and ComputeFilesChangedHash to gitops ComputePatchID: git patch-id of the commit diff, survives rebase. ComputeFilesChangedHash: SHA256 of sorted file:blob pairs, survives rebase even with conflicts in non-agent files. Uses single git ls-tree call for all files (O(1) subprocess). Part of fix for #834. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: c100b592e3a0 --- cmd/entire/cli/gitops/diff.go | 115 ++++++++++++++++ cmd/entire/cli/gitops/diff_test.go | 211 +++++++++++++++++++++++++++++ 2 files changed, 326 insertions(+) diff --git a/cmd/entire/cli/gitops/diff.go b/cmd/entire/cli/gitops/diff.go index 34d8e4c3a..ee0854b80 100644 --- a/cmd/entire/cli/gitops/diff.go +++ b/cmd/entire/cli/gitops/diff.go @@ -3,8 +3,10 @@ package gitops import ( "bytes" "context" + "crypto/sha256" "fmt" "os/exec" + "sort" "strings" ) @@ -125,3 +127,116 @@ func extractStatus(statusLine string) byte { } return statusField[0] } + +// ComputePatchID computes the git patch-id for the diff between two commits. +// Patch IDs are content hashes of the diff itself, independent of commit metadata +// and parent position. This means the same code change produces the same patch ID +// even after rebase (which changes the parent/base but not the diff content). +// +// For initial commits (parentHash is empty), uses --root mode. +// Returns a 40-char hex SHA1 string, or empty string for empty diffs. +func ComputePatchID(ctx context.Context, repoDir, parentHash, commitHash string) (string, error) { + var diffCmd *exec.Cmd + if parentHash == "" { + diffCmd = exec.CommandContext(ctx, "git", "diff-tree", "--root", "-p", commitHash) + } else { + diffCmd = exec.CommandContext(ctx, "git", "diff-tree", "-p", parentHash, commitHash) + } + diffCmd.Dir = repoDir + + var diffOut, diffErr bytes.Buffer + diffCmd.Stdout = &diffOut + diffCmd.Stderr = &diffErr + + if err := diffCmd.Run(); err != nil { + return "", fmt.Errorf("git diff-tree failed: %w: %s", err, strings.TrimSpace(diffErr.String())) + } + + if diffOut.Len() == 0 { + return "", nil + } + + patchIDCmd := exec.CommandContext(ctx, "git", "patch-id", "--stable") + patchIDCmd.Dir = repoDir + patchIDCmd.Stdin = &diffOut + + var patchOut, patchErr bytes.Buffer + patchIDCmd.Stdout = &patchOut + patchIDCmd.Stderr = &patchErr + + if err := patchIDCmd.Run(); err != nil { + return "", fmt.Errorf("git patch-id failed: %w: %s", err, strings.TrimSpace(patchErr.String())) + } + + output := strings.TrimSpace(patchOut.String()) + if output == "" { + return "", nil + } + fields := strings.Fields(output) + if len(fields) < 1 { + return "", fmt.Errorf("unexpected git patch-id output: %q", output) + } + return fields[0], nil +} + +// ComputeFilesChangedHash computes a SHA256 hash of the given files' blob hashes +// at the specified commit. The hash is computed from sorted "filepath:blobhash" pairs, +// making it independent of input order and stable across rebases. +// +// Uses a single git ls-tree call for all files (O(1) subprocess, not O(N)). +// Returns a 64-char hex SHA256 string, or empty string if no files. +func ComputeFilesChangedHash(ctx context.Context, repoDir, commitHash string, filePaths []string) (string, error) { + if len(filePaths) == 0 { + return "", nil + } + + args := []string{"ls-tree", commitHash, "--"} + args = append(args, filePaths...) + cmd := exec.CommandContext(ctx, "git", args...) + cmd.Dir = repoDir + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("git ls-tree failed: %w: %s", err, strings.TrimSpace(stderr.String())) + } + + // Parse ls-tree output: " \t" per line + blobMap := make(map[string]string) + for _, line := range strings.Split(strings.TrimSpace(stdout.String()), "\n") { + if line == "" { + continue + } + tabIdx := strings.IndexByte(line, '\t') + if tabIdx == -1 { + continue + } + meta := line[:tabIdx] + path := line[tabIdx+1:] + fields := strings.Fields(meta) + if len(fields) < 3 { + continue + } + blobMap[path] = fields[2] + } + + sorted := make([]string, len(filePaths)) + copy(sorted, filePaths) + sort.Strings(sorted) + + var pairs []string + for _, fp := range sorted { + if blobHash, ok := blobMap[fp]; ok { + pairs = append(pairs, fp+":"+blobHash) + } + } + + if len(pairs) == 0 { + return "", nil + } + + h := sha256.Sum256([]byte(strings.Join(pairs, "\n"))) + return fmt.Sprintf("%x", h), nil +} diff --git a/cmd/entire/cli/gitops/diff_test.go b/cmd/entire/cli/gitops/diff_test.go index 33d410662..6b52bacb6 100644 --- a/cmd/entire/cli/gitops/diff_test.go +++ b/cmd/entire/cli/gitops/diff_test.go @@ -84,6 +84,44 @@ func gitCommit(t *testing.T, dir, msg string) { } } +func revParse(t *testing.T, dir, ref string) string { + t.Helper() + cmd := exec.CommandContext(context.Background(), "git", "rev-parse", ref) + cmd.Dir = dir + out, err := cmd.Output() + if err != nil { + t.Fatalf("git rev-parse %s failed: %v", ref, err) + } + return string(out[:len(out)-1]) +} + +func gitCheckoutBranch(t *testing.T, dir, branchName string) { + t.Helper() + cmd := exec.CommandContext(context.Background(), "git", "checkout", "-b", branchName) + cmd.Dir = dir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git checkout -b %s failed: %v\n%s", branchName, err, out) + } +} + +func gitCheckout(t *testing.T, dir, ref string) { + t.Helper() + cmd := exec.CommandContext(context.Background(), "git", "checkout", ref) + cmd.Dir = dir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git checkout %s failed: %v\n%s", ref, err, out) + } +} + +func gitRebase(t *testing.T, dir, onto string) { + t.Helper() + cmd := exec.CommandContext(context.Background(), "git", "rebase", onto) + cmd.Dir = dir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git rebase %s failed: %v\n%s", onto, err, out) + } +} + func TestDiffTreeFiles_NormalCommit(t *testing.T) { t.Parallel() dir := initTestRepo(t) @@ -405,3 +443,176 @@ func TestExtractStatus(t *testing.T) { }) } } + +func TestComputePatchID(t *testing.T) { + t.Parallel() + dir := initTestRepo(t) + + writeFile(t, dir, "file.txt", "initial") + gitAdd(t, dir, "file.txt") + gitCommit(t, dir, "initial") + + writeFile(t, dir, "file.txt", "modified") + gitAdd(t, dir, "file.txt") + gitCommit(t, dir, "modify file") + + head := revParse(t, dir, "HEAD") + parent := revParse(t, dir, "HEAD~1") + + patchID, err := ComputePatchID(context.Background(), dir, parent, head) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if patchID == "" { + t.Fatal("expected non-empty patch ID") + } + if len(patchID) != 40 { + t.Fatalf("expected 40-char hex, got %d chars: %s", len(patchID), patchID) + } +} + +func TestComputePatchID_StableAcrossRebase(t *testing.T) { + t.Parallel() + dir := initTestRepo(t) + + writeFile(t, dir, "base.txt", "base") + gitAdd(t, dir, "base.txt") + gitCommit(t, dir, "base") + + gitCheckoutBranch(t, dir, "feature") + writeFile(t, dir, "feature.txt", "feature work") + gitAdd(t, dir, "feature.txt") + gitCommit(t, dir, "add feature") + + featureHead := revParse(t, dir, "HEAD") + featureParent := revParse(t, dir, "HEAD~1") + + patchIDBefore, err := ComputePatchID(context.Background(), dir, featureParent, featureHead) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + gitCheckout(t, dir, "main") + writeFile(t, dir, "other.txt", "other work") + gitAdd(t, dir, "other.txt") + gitCommit(t, dir, "unrelated work on main") + + gitCheckout(t, dir, "feature") + gitRebase(t, dir, "main") + + rebasedHead := revParse(t, dir, "HEAD") + rebasedParent := revParse(t, dir, "HEAD~1") + + patchIDAfter, err := ComputePatchID(context.Background(), dir, rebasedParent, rebasedHead) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if patchIDBefore != patchIDAfter { + t.Errorf("patch ID should survive clean rebase: before=%s, after=%s", patchIDBefore, patchIDAfter) + } +} + +func TestComputePatchID_InitialCommit(t *testing.T) { + t.Parallel() + dir := initTestRepo(t) + + writeFile(t, dir, "file.txt", "initial") + gitAdd(t, dir, "file.txt") + gitCommit(t, dir, "initial") + + head := revParse(t, dir, "HEAD") + + patchID, err := ComputePatchID(context.Background(), dir, "", head) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if patchID == "" { + t.Fatal("expected non-empty patch ID for initial commit") + } +} + +func TestComputeFilesChangedHash(t *testing.T) { + t.Parallel() + dir := initTestRepo(t) + + writeFile(t, dir, "file.txt", "initial") + gitAdd(t, dir, "file.txt") + gitCommit(t, dir, "initial") + + writeFile(t, dir, "file.txt", "modified") + writeFile(t, dir, "new.txt", "new file") + gitAdd(t, dir, "file.txt", "new.txt") + gitCommit(t, dir, "changes") + + head := revParse(t, dir, "HEAD") + + hash, err := ComputeFilesChangedHash(context.Background(), dir, head, []string{"file.txt", "new.txt"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if hash == "" { + t.Fatal("expected non-empty hash") + } + if len(hash) != 64 { + t.Fatalf("expected 64-char SHA256 hex, got %d chars: %s", len(hash), hash) + } + + // Same inputs in different order produce same hash + hash2, err := ComputeFilesChangedHash(context.Background(), dir, head, []string{"new.txt", "file.txt"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if hash != hash2 { + t.Errorf("hash should be stable regardless of input order: %s != %s", hash, hash2) + } +} + +func TestComputeFilesChangedHash_StableAcrossRebase(t *testing.T) { + t.Parallel() + dir := initTestRepo(t) + + writeFile(t, dir, "base.txt", "base") + gitAdd(t, dir, "base.txt") + gitCommit(t, dir, "base") + + gitCheckoutBranch(t, dir, "feature") + writeFile(t, dir, "feature.txt", "feature work") + gitAdd(t, dir, "feature.txt") + gitCommit(t, dir, "add feature") + + headBefore := revParse(t, dir, "HEAD") + hashBefore, err := ComputeFilesChangedHash(context.Background(), dir, headBefore, []string{"feature.txt"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + gitCheckout(t, dir, "main") + writeFile(t, dir, "other.txt", "other") + gitAdd(t, dir, "other.txt") + gitCommit(t, dir, "other work") + + gitCheckout(t, dir, "feature") + gitRebase(t, dir, "main") + + headAfter := revParse(t, dir, "HEAD") + hashAfter, err := ComputeFilesChangedHash(context.Background(), dir, headAfter, []string{"feature.txt"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if hashBefore != hashAfter { + t.Errorf("files-changed hash should survive clean rebase: before=%s, after=%s", hashBefore, hashAfter) + } +} + +func TestComputeFilesChangedHash_EmptyFiles(t *testing.T) { + t.Parallel() + hash, err := ComputeFilesChangedHash(context.Background(), "/tmp", "HEAD", nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if hash != "" { + t.Errorf("expected empty hash for nil files, got %s", hash) + } +} From 1471da74326c5f8f94d882561fd2c58bddbf2ffc Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 6 Apr 2026 23:05:46 -0700 Subject: [PATCH 07/14] feat: wire multi-signal linkage through condensation pipeline Replace per-session TreeHash with checkpoint-level LinkageMetadata containing tree_hash, patch_id, files_changed_hash, and session_files_hash. Computed in PostCommit handlers, passed through condenseOpts to CondenseSession, written to CheckpointSummary on entire/checkpoints/v1. Part of fix for #834. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 75ce05cfe11b --- cmd/entire/cli/checkpoint/checkpoint.go | 10 ---- cmd/entire/cli/checkpoint/checkpoint_test.go | 35 ------------ cmd/entire/cli/checkpoint/committed.go | 2 +- .../strategy/manual_commit_condensation.go | 6 +- .../cli/strategy/manual_commit_hooks.go | 55 ++++++++++++++++++- 5 files changed, 57 insertions(+), 51 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index 5b6ab91db..d9c920eb5 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -263,11 +263,6 @@ type WriteCommittedOptions struct { // Model is the LLM model used during the session (e.g., "claude-sonnet-4-20250514") Model string - // TreeHash is the git tree hash of the commit this checkpoint is linked to. - // Used as a fallback to re-link checkpoints when the Entire-Checkpoint trailer - // is stripped by git history rewrites (rebase, filter-branch, amend). - TreeHash string - // TurnID correlates checkpoints from the same agent turn. TurnID string @@ -400,11 +395,6 @@ type CommittedMetadata struct { // Always written to metadata (empty string when unknown) so consumers can rely on the field's presence. Model string `json:"model"` - // TreeHash is the git tree hash of the commit this checkpoint is linked to. - // Enables fallback re-linking when the Entire-Checkpoint trailer is stripped - // by git history rewrites (rebase, filter-branch, amend). - TreeHash string `json:"tree_hash,omitempty"` - // TurnID correlates checkpoints from the same agent turn. // When a turn's work spans multiple commits, each gets its own checkpoint // but they share the same TurnID for future aggregation/deduplication. diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index e732e1417..711bdce54 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -1336,41 +1336,6 @@ func TestListCommitted_MultiSessionInfo(t *testing.T) { } } -// TestWriteCommitted_IncludesTreeHash verifies that tree_hash is stored in -// checkpoint metadata and can be read back. -func TestWriteCommitted_IncludesTreeHash(t *testing.T) { - t.Parallel() - repo, _ := setupBranchTestRepo(t) - store := NewGitStore(repo) - checkpointID := id.MustCheckpointID("aabb11223344") - treeHash := "abc123def456abc123def456abc123def456abc1" - - err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ - CheckpointID: checkpointID, - SessionID: "tree-hash-session", - Strategy: "manual-commit", - Agent: agent.AgentTypeClaudeCode, - TreeHash: treeHash, - Transcript: []byte(`{"type":"human","message":{"content":"test"}}`), - FilesTouched: []string{"file.go"}, - CheckpointsCount: 1, - AuthorName: "Test Author", - AuthorEmail: "test@example.com", - }) - if err != nil { - t.Fatalf("WriteCommitted() error = %v", err) - } - - // Read back session metadata (session index 0) - sessionContent, err := store.ReadSessionContent(context.Background(), checkpointID, 0) - if err != nil { - t.Fatalf("ReadSessionContent() error = %v", err) - } - if sessionContent.Metadata.TreeHash != treeHash { - t.Errorf("TreeHash = %q, want %q", sessionContent.Metadata.TreeHash, treeHash) - } -} - // TestWriteCommitted_SessionWithNoPrompts verifies that a session can be // written without prompts and still be read correctly. func TestWriteCommitted_SessionWithNoPrompts(t *testing.T) { diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 43008b9d6..f429a7ab3 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -374,7 +374,6 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom FilesTouched: opts.FilesTouched, Agent: opts.Agent, Model: opts.Model, - TreeHash: opts.TreeHash, TurnID: opts.TurnID, IsTask: opts.IsTask, ToolUseID: opts.ToolUseID, @@ -435,6 +434,7 @@ func (s *GitStore) writeCheckpointSummary(opts WriteCommittedOptions, basePath s Sessions: sessions, TokenUsage: tokenUsage, CombinedAttribution: combinedAttribution, + Linkage: opts.Linkage, } metadataJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ") diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index 519b44b94..72f530760 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -96,8 +96,8 @@ type condenseOpts struct { repoDir string // Repository worktree path for git CLI commands parentCommitHash string // HEAD's first parent hash for per-commit non-agent file detection headCommitHash string // HEAD commit hash (passed through for attribution) - allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session exclusion (nil = single-session) - treeHash string // Tree hash of the commit being condensed (for fallback linkage after history rewrites) + allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session exclusion (nil = single-session) + linkage *cpkg.LinkageMetadata // Content-based signals for re-linking after history rewrites } // CondenseSession condenses a session's shadow branch to permanent storage. @@ -230,7 +230,7 @@ func (s *ManualCommitStrategy) CondenseSession(ctx context.Context, repo *git.Re AuthorEmail: authorEmail, Agent: state.AgentType, Model: state.ModelName, - TreeHash: o.treeHash, + Linkage: o.linkage, TurnID: state.TurnID, TranscriptIdentifierAtStart: state.TranscriptIdentifierAtStart, CheckpointTranscriptStart: state.CheckpointTranscriptStart, diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 1e4f8b1b5..a9e901a2d 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -645,6 +645,57 @@ func (h *postCommitActionHandler) parentCommitHash() string { return "" } +// computeLinkage computes content-based linkage signals for re-linking +// checkpoints after git history rewrites. Called at PostCommit time when +// all commit data is available. +func (h *postCommitActionHandler) computeLinkage(ctx context.Context, sessionFilesTouched []string) *checkpoint.LinkageMetadata { + logCtx := logging.WithComponent(ctx, "checkpoint") + linkage := &checkpoint.LinkageMetadata{ + TreeHash: h.commit.TreeHash.String(), + } + + // Compute patch ID (diff content hash — survives rebase) + patchID, err := gitops.ComputePatchID(ctx, h.repoDir, h.parentCommitHash(), h.newHead) + if err != nil { + logging.Warn(logCtx, "failed to compute patch ID for linkage", + slog.String("commit", h.newHead), + slog.String("error", err.Error()), + ) + } else { + linkage.PatchID = patchID + } + + // Compute files-changed hash (committed files' blob hashes — survives rebase + other-file conflicts) + committedFiles := make([]string, 0, len(h.committedFileSet)) + for f := range h.committedFileSet { + committedFiles = append(committedFiles, f) + } + fch, err := gitops.ComputeFilesChangedHash(ctx, h.repoDir, h.newHead, committedFiles) + if err != nil { + logging.Warn(logCtx, "failed to compute files-changed hash for linkage", + slog.String("commit", h.newHead), + slog.String("error", err.Error()), + ) + } else { + linkage.FilesChangedHash = fch + } + + // Compute session files hash (all files touched across session — survives squash merge) + if len(sessionFilesTouched) > 0 { + sfh, err := gitops.ComputeFilesChangedHash(ctx, h.repoDir, h.newHead, sessionFilesTouched) + if err != nil { + logging.Warn(logCtx, "failed to compute session files hash for linkage", + slog.String("commit", h.newHead), + slog.String("error", err.Error()), + ) + } else { + linkage.SessionFilesHash = sfh + } + } + + return linkage +} + func (h *postCommitActionHandler) HandleCondense(state *session.State) error { logCtx := logging.WithComponent(h.ctx, "checkpoint") shouldCondense := h.shouldCondenseWithOverlapCheck(state.Phase.IsActive(), state.LastInteractionTime) @@ -666,7 +717,7 @@ func (h *postCommitActionHandler) HandleCondense(state *session.State) error { parentCommitHash: h.parentCommitHash(), headCommitHash: h.newHead, allAgentFiles: h.allAgentFiles, - treeHash: h.commit.TreeHash.String(), + linkage: h.computeLinkage(h.ctx, state.FilesTouched), }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) @@ -696,7 +747,7 @@ func (h *postCommitActionHandler) HandleCondenseIfFilesTouched(state *session.St parentCommitHash: h.parentCommitHash(), headCommitHash: h.newHead, allAgentFiles: h.allAgentFiles, - treeHash: h.commit.TreeHash.String(), + linkage: h.computeLinkage(h.ctx, state.FilesTouched), }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) From 99bcd27417b4f3d2e50a49cc5f528e6d0e02524a Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 6 Apr 2026 23:08:10 -0700 Subject: [PATCH 08/14] test: add linkage round-trip tests for WriteCommitted/ReadCommitted Verify LinkageMetadata is stored in CheckpointSummary and readable. Also verify nil linkage is omitted (backward compat with old checkpoints). Part of fix for #834. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 10dae87903d7 --- cmd/entire/cli/checkpoint/checkpoint_test.go | 81 ++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index 711bdce54..e741657b4 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -3624,6 +3624,87 @@ func TestWriteCommitted_SubagentTranscript_JSONLFallback(t *testing.T) { } } +func TestWriteCommitted_IncludesLinkage(t *testing.T) { + t.Parallel() + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("a1b2c3d4e5f6") + + linkage := &LinkageMetadata{ + TreeHash: "abc123def456abc123def456abc123def456abc1", + PatchID: "def456abc123def456abc123def456abc123def4", + FilesChangedHash: "7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069", + SessionFilesHash: "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2", + } + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "linkage-test-session", + Strategy: "manual-commit", + Agent: agent.AgentTypeClaudeCode, + Linkage: linkage, + Transcript: []byte(`{"type":"human","message":{"content":"test"}}` + "\n"), + FilesTouched: []string{"file.go"}, + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + // Read back the CheckpointSummary + summary, err := store.ReadCommitted(context.Background(), checkpointID) + if err != nil { + t.Fatalf("ReadCommitted() error = %v", err) + } + if summary.Linkage == nil { + t.Fatal("Linkage should be present in CheckpointSummary") + } + if summary.Linkage.TreeHash != linkage.TreeHash { + t.Errorf("TreeHash = %q, want %q", summary.Linkage.TreeHash, linkage.TreeHash) + } + if summary.Linkage.PatchID != linkage.PatchID { + t.Errorf("PatchID = %q, want %q", summary.Linkage.PatchID, linkage.PatchID) + } + if summary.Linkage.FilesChangedHash != linkage.FilesChangedHash { + t.Errorf("FilesChangedHash = %q, want %q", summary.Linkage.FilesChangedHash, linkage.FilesChangedHash) + } + if summary.Linkage.SessionFilesHash != linkage.SessionFilesHash { + t.Errorf("SessionFilesHash = %q, want %q", summary.Linkage.SessionFilesHash, linkage.SessionFilesHash) + } +} + +func TestWriteCommitted_NilLinkageOmitted(t *testing.T) { + t.Parallel() + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("a0b1c2d3e4f5") + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "no-linkage-session", + Strategy: "manual-commit", + Agent: agent.AgentTypeClaudeCode, + Transcript: []byte(`{"type":"human","message":{"content":"test"}}` + "\n"), + FilesTouched: []string{"file.go"}, + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + summary, err := store.ReadCommitted(context.Background(), checkpointID) + if err != nil { + t.Fatalf("ReadCommitted() error = %v", err) + } + if summary.Linkage != nil { + t.Errorf("Linkage should be nil when not provided, got %+v", summary.Linkage) + } +} + func TestWriteTemporaryTask_SubagentTranscript_RedactsSecrets(t *testing.T) { // Cannot use t.Parallel() because t.Chdir is required for paths.WorktreeRoot() tempDir := t.TempDir() From b41e1f7b84d527f815fd02c3744601b7ebd33ad4 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 6 Apr 2026 23:21:51 -0700 Subject: [PATCH 09/14] fix: restore nolint:ireturn comments and add encoding/hex import gofmt stripped nolint directives from capabilities.go. Restore from main. Add encoding/hex import for ComputeFilesChangedHash. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: c26d3dce1d32 --- cmd/entire/cli/gitops/diff.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/entire/cli/gitops/diff.go b/cmd/entire/cli/gitops/diff.go index ee0854b80..e007f37cd 100644 --- a/cmd/entire/cli/gitops/diff.go +++ b/cmd/entire/cli/gitops/diff.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "crypto/sha256" + "encoding/hex" "fmt" "os/exec" "sort" @@ -238,5 +239,5 @@ func ComputeFilesChangedHash(ctx context.Context, repoDir, commitHash string, fi } h := sha256.Sum256([]byte(strings.Join(pairs, "\n"))) - return fmt.Sprintf("%x", h), nil + return hex.EncodeToString(h[:]), nil } From 7c3da599dc5e3dd67335d4af9bf1042ed6f56e2f Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 7 Apr 2026 10:36:52 -0700 Subject: [PATCH 10/14] fix: restore nolint:ireturn comments and add git user config for CI - Restore nolint:ireturn on capabilities.go (gofmt stripped them) - Set user.name/email in gitops initTestRepo for CI compatibility (git rebase fails without repo-level config on CI runners) Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: f1ca53b63c79 --- cmd/entire/cli/gitops/diff_test.go | 2 ++ .../cli/strategy/manual_commit_condensation.go | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cmd/entire/cli/gitops/diff_test.go b/cmd/entire/cli/gitops/diff_test.go index 6b52bacb6..56dcc9ef7 100644 --- a/cmd/entire/cli/gitops/diff_test.go +++ b/cmd/entire/cli/gitops/diff_test.go @@ -31,6 +31,8 @@ func initTestRepo(t *testing.T) string { } run("init", "-b", "main") + run("config", "user.name", "Test") + run("config", "user.email", "test@test.com") run("config", "commit.gpgsign", "false") return dir diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index 72f530760..a0ae0d3c3 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -90,13 +90,13 @@ func (s *ManualCommitStrategy) getCheckpointLog(ctx context.Context, checkpointI // condenseOpts provides pre-resolved git objects to avoid redundant reads. type condenseOpts struct { - shadowRef *plumbing.Reference // Pre-resolved shadow branch ref (nil = resolve from repo) - headTree *object.Tree // Pre-resolved HEAD tree (passed through to calculateSessionAttributions) - parentTree *object.Tree // Pre-resolved parent tree (nil for initial commits, for consistent non-agent line counting) - repoDir string // Repository worktree path for git CLI commands - parentCommitHash string // HEAD's first parent hash for per-commit non-agent file detection - headCommitHash string // HEAD commit hash (passed through for attribution) - allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session exclusion (nil = single-session) + shadowRef *plumbing.Reference // Pre-resolved shadow branch ref (nil = resolve from repo) + headTree *object.Tree // Pre-resolved HEAD tree (passed through to calculateSessionAttributions) + parentTree *object.Tree // Pre-resolved parent tree (nil for initial commits, for consistent non-agent line counting) + repoDir string // Repository worktree path for git CLI commands + parentCommitHash string // HEAD's first parent hash for per-commit non-agent file detection + headCommitHash string // HEAD commit hash (passed through for attribution) + allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session exclusion (nil = single-session) linkage *cpkg.LinkageMetadata // Content-based signals for re-linking after history rewrites } From 7e4d536596d229a5baf7ded985263f6dddae99d0 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 7 Apr 2026 16:42:28 -0700 Subject: [PATCH 11/14] fix: address review feedback for multi-signal linkage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add omitempty to all LinkageMetadata JSON tags for consistency - Return error for malformed git ls-tree lines instead of silent skip - Compute commit-level linkage once (not per-session) via baseLinkage cache; only SessionFilesHash varies per session - Add code comment explaining deferred condensation for agent reverts - Add integration test verifying full linkage pipeline (PostCommit → condensation → ReadCommitted with all four signals populated) Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: e3539c5bfa31 --- cmd/entire/cli/checkpoint/checkpoint.go | 6 +-- cmd/entire/cli/gitops/diff.go | 4 +- .../cli/strategy/manual_commit_hooks.go | 52 +++++++++++------- cmd/entire/cli/strategy/manual_commit_test.go | 54 +++++++++++++++++++ 4 files changed, 93 insertions(+), 23 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index d9c920eb5..410138199 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -461,15 +461,15 @@ type SessionFilePaths struct { type LinkageMetadata struct { // TreeHash is the git tree hash of the commit (full repo snapshot). // Survives rewrites that don't change code (reword, msg-only amend). - TreeHash string `json:"tree_hash"` + TreeHash string `json:"tree_hash,omitempty"` // PatchID is the git patch-id of the commit's diff (parent->HEAD). // Survives rebase (same diff replayed on different base). - PatchID string `json:"patch_id"` + PatchID string `json:"patch_id,omitempty"` // FilesChangedHash is SHA256 of sorted file:blob pairs for files changed in this commit. // Survives rebase even with conflicts in other files (only agent-file blobs matter). - FilesChangedHash string `json:"files_changed_hash"` + FilesChangedHash string `json:"files_changed_hash,omitempty"` // SessionFilesHash is SHA256 of sorted file:blob pairs for ALL files touched across the session. // Survives local squash merges where individual patch IDs don't match the combined diff. diff --git a/cmd/entire/cli/gitops/diff.go b/cmd/entire/cli/gitops/diff.go index e007f37cd..14d0a31a7 100644 --- a/cmd/entire/cli/gitops/diff.go +++ b/cmd/entire/cli/gitops/diff.go @@ -212,13 +212,13 @@ func ComputeFilesChangedHash(ctx context.Context, repoDir, commitHash string, fi } tabIdx := strings.IndexByte(line, '\t') if tabIdx == -1 { - continue + return "", fmt.Errorf("unexpected ls-tree output (no tab separator): %q", line) } meta := line[:tabIdx] path := line[tabIdx+1:] fields := strings.Fields(meta) if len(fields) < 3 { - continue + return "", fmt.Errorf("unexpected ls-tree output (incomplete metadata): %q", line) } blobMap[path] = fields[2] } diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index a9e901a2d..0f586b07e 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -313,6 +313,12 @@ func (s *ManualCommitStrategy) PrepareCommitMsg(ctx context.Context, commitMsgFi // session is ACTIVE. When an agent runs git revert/cherry-pick as part of // its work, the commit should be checkpointed. When the user does it // manually (no active session), skip as before. + // + // Note: The trailer is added here, but condensation is deferred. PostCommit's + // state machine skips ActionCondense when IsRebaseInProgress=true (sequence + // operation files like REVERT_HEAD still exist during post-commit). The + // checkpoint data is preserved on the shadow branch and will be condensed + // on the next normal commit or when the session ends (TurnEnd/Stop). if isGitSequenceOperation(ctx) { if !s.hasActiveSessionInWorktree(ctx) { logging.Debug(logCtx, "prepare-commit-msg: skipped during git sequence operation (no active session)", @@ -627,11 +633,12 @@ type postCommitActionHandler struct { // Cached git objects — resolved once per PostCommit invocation to avoid // redundant reads across filesOverlapWithContent, filesWithRemainingAgentChanges, // CondenseSession, and calculateSessionAttributions. - headTree *object.Tree // HEAD commit tree (shared across all sessions) - parentTree *object.Tree // HEAD's first parent tree (shared, nil for initial commits) - shadowRef *plumbing.Reference // Per-session shadow branch ref (nil if branch doesn't exist) - shadowTree *object.Tree // Per-session shadow commit tree (nil if branch doesn't exist) - allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session attribution + headTree *object.Tree // HEAD commit tree (shared across all sessions) + parentTree *object.Tree // HEAD's first parent tree (shared, nil for initial commits) + shadowRef *plumbing.Reference // Per-session shadow branch ref (nil if branch doesn't exist) + shadowTree *object.Tree // Per-session shadow commit tree (nil if branch doesn't exist) + allAgentFiles map[string]struct{} // Union of all sessions' FilesTouched for cross-session attribution + baseLinkage *checkpoint.LinkageMetadata // Commit-level linkage signals (computed once, shared across sessions) // Output: set by handler methods, read by caller after TransitionAndLog. condensed bool @@ -645,12 +652,13 @@ func (h *postCommitActionHandler) parentCommitHash() string { return "" } -// computeLinkage computes content-based linkage signals for re-linking -// checkpoints after git history rewrites. Called at PostCommit time when -// all commit data is available. -func (h *postCommitActionHandler) computeLinkage(ctx context.Context, sessionFilesTouched []string) *checkpoint.LinkageMetadata { +// computeBaseLinkage computes commit-level linkage signals (tree hash, patch ID, +// files-changed hash). These are identical across sessions since they depend on +// the commit, not the session. Called once per PostCommit invocation and cached +// on the handler's baseLinkage field. +func (h *postCommitActionHandler) computeBaseLinkage(ctx context.Context) { logCtx := logging.WithComponent(ctx, "checkpoint") - linkage := &checkpoint.LinkageMetadata{ + h.baseLinkage = &checkpoint.LinkageMetadata{ TreeHash: h.commit.TreeHash.String(), } @@ -662,7 +670,7 @@ func (h *postCommitActionHandler) computeLinkage(ctx context.Context, sessionFil slog.String("error", err.Error()), ) } else { - linkage.PatchID = patchID + h.baseLinkage.PatchID = patchID } // Compute files-changed hash (committed files' blob hashes — survives rebase + other-file conflicts) @@ -677,14 +685,23 @@ func (h *postCommitActionHandler) computeLinkage(ctx context.Context, sessionFil slog.String("error", err.Error()), ) } else { - linkage.FilesChangedHash = fch + h.baseLinkage.FilesChangedHash = fch } +} - // Compute session files hash (all files touched across session — survives squash merge) +// linkageForSession returns linkage metadata for a specific session by copying +// the commit-level base linkage and adding the session-specific SessionFilesHash. +func (h *postCommitActionHandler) linkageForSession(ctx context.Context, sessionFilesTouched []string) *checkpoint.LinkageMetadata { + if h.baseLinkage == nil { + h.computeBaseLinkage(ctx) + } + + // Copy base linkage so each session gets its own SessionFilesHash + linkage := *h.baseLinkage if len(sessionFilesTouched) > 0 { sfh, err := gitops.ComputeFilesChangedHash(ctx, h.repoDir, h.newHead, sessionFilesTouched) if err != nil { - logging.Warn(logCtx, "failed to compute session files hash for linkage", + logging.Warn(logging.WithComponent(ctx, "checkpoint"), "failed to compute session files hash for linkage", slog.String("commit", h.newHead), slog.String("error", err.Error()), ) @@ -692,8 +709,7 @@ func (h *postCommitActionHandler) computeLinkage(ctx context.Context, sessionFil linkage.SessionFilesHash = sfh } } - - return linkage + return &linkage } func (h *postCommitActionHandler) HandleCondense(state *session.State) error { @@ -717,7 +733,7 @@ func (h *postCommitActionHandler) HandleCondense(state *session.State) error { parentCommitHash: h.parentCommitHash(), headCommitHash: h.newHead, allAgentFiles: h.allAgentFiles, - linkage: h.computeLinkage(h.ctx, state.FilesTouched), + linkage: h.linkageForSession(h.ctx, state.FilesTouched), }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) @@ -747,7 +763,7 @@ func (h *postCommitActionHandler) HandleCondenseIfFilesTouched(state *session.St parentCommitHash: h.parentCommitHash(), headCommitHash: h.newHead, allAgentFiles: h.allAgentFiles, - linkage: h.computeLinkage(h.ctx, state.FilesTouched), + linkage: h.linkageForSession(h.ctx, state.FilesTouched), }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index 8a06833cf..ef7309acc 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -4286,6 +4286,60 @@ func TestMarshalPromptAttributionsIncludingPending_OnlyPending(t *testing.T) { require.Equal(t, 7, result[0].UserLinesAdded) } +// TestShadowStrategy_PostCommit_LinkagePopulated verifies the full linkage pipeline: +// PostCommit computes linkage signals, passes them through condensation, and the +// committed checkpoint has all four LinkageMetadata fields populated with correct +// hash lengths (tree_hash=40 hex, patch_id=40 hex, files_changed_hash=64 hex, +// session_files_hash=64 hex). +func TestShadowStrategy_PostCommit_LinkagePopulated(t *testing.T) { + dir := setupGitRepo(t) + t.Chdir(dir) + + repo, err := git.PlainOpen(dir) + require.NoError(t, err) + + s := &ManualCommitStrategy{} + sessionID := "linkage-pipeline-session" + + // Initialize session and save a checkpoint so the shadow branch has content. + // setupGitRepo creates one initial commit; commitWithCheckpointTrailer will + // create a second, giving us a parent for patch-id computation. + setupSessionWithCheckpoint(t, s, repo, dir, sessionID) + + // Create a commit WITH the Entire-Checkpoint trailer on the main branch. + // The checkpoint ID here will be used by PostCommit for condensation. + checkpointIDStr := "f1e2d3c4b5a6" + commitWithCheckpointTrailer(t, repo, dir, checkpointIDStr) + + // Trigger PostCommit — this should condense with linkage signals + err = s.PostCommit(context.Background()) + require.NoError(t, err) + + // Re-open the repo to pick up any ref changes from condensation + repo, err = git.PlainOpen(dir) + require.NoError(t, err) + + // Read back the committed checkpoint from the metadata branch + store := checkpoint.NewGitStore(repo) + cpID := id.MustCheckpointID(checkpointIDStr) + summary, err := store.ReadCommitted(context.Background(), cpID) + require.NoError(t, err) + require.NotNil(t, summary, "checkpoint should exist on metadata branch after PostCommit") + + // Verify linkage is populated with all four signals + require.NotNil(t, summary.Linkage, "Linkage should be populated after condensation") + assert.NotEmpty(t, summary.Linkage.TreeHash, "TreeHash should be set") + assert.NotEmpty(t, summary.Linkage.PatchID, "PatchID should be set") + assert.NotEmpty(t, summary.Linkage.FilesChangedHash, "FilesChangedHash should be set") + assert.NotEmpty(t, summary.Linkage.SessionFilesHash, "SessionFilesHash should be set") + + // Verify hash format and lengths + assert.Len(t, summary.Linkage.TreeHash, 40, "TreeHash should be 40-char hex (git tree hash)") + assert.Len(t, summary.Linkage.PatchID, 40, "PatchID should be 40-char hex (git patch-id)") + assert.Len(t, summary.Linkage.FilesChangedHash, 64, "FilesChangedHash should be 64-char hex (SHA256)") + assert.Len(t, summary.Linkage.SessionFilesHash, 64, "SessionFilesHash should be 64-char hex (SHA256)") +} + func TestCommittedFilesExcludingMetadata_AllMetadata(t *testing.T) { t.Parallel() From 7cfa919bb720de86303f6afbb27b0273be5193af Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 7 Apr 2026 16:53:04 -0700 Subject: [PATCH 12/14] style: simplify ComputePatchID output parsing and fix minor issues - Replace unreachable Fields/len guard with strings.Cut in ComputePatchID - Use logCtx variable in linkageForSession for logging consistency - Use strings.TrimSpace in revParse test helper instead of raw byte slice Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 91b06c4aabdb --- cmd/entire/cli/gitops/diff.go | 8 +++----- cmd/entire/cli/gitops/diff_test.go | 3 ++- cmd/entire/cli/strategy/manual_commit_hooks.go | 3 ++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cmd/entire/cli/gitops/diff.go b/cmd/entire/cli/gitops/diff.go index 14d0a31a7..b950f612e 100644 --- a/cmd/entire/cli/gitops/diff.go +++ b/cmd/entire/cli/gitops/diff.go @@ -173,11 +173,9 @@ func ComputePatchID(ctx context.Context, repoDir, parentHash, commitHash string) if output == "" { return "", nil } - fields := strings.Fields(output) - if len(fields) < 1 { - return "", fmt.Errorf("unexpected git patch-id output: %q", output) - } - return fields[0], nil + // git patch-id outputs " "; we want the first field. + patchID, _, _ := strings.Cut(output, " ") + return patchID, nil } // ComputeFilesChangedHash computes a SHA256 hash of the given files' blob hashes diff --git a/cmd/entire/cli/gitops/diff_test.go b/cmd/entire/cli/gitops/diff_test.go index 56dcc9ef7..1ddfbf23c 100644 --- a/cmd/entire/cli/gitops/diff_test.go +++ b/cmd/entire/cli/gitops/diff_test.go @@ -6,6 +6,7 @@ import ( "os/exec" "path/filepath" "sort" + "strings" "testing" ) @@ -94,7 +95,7 @@ func revParse(t *testing.T, dir, ref string) string { if err != nil { t.Fatalf("git rev-parse %s failed: %v", ref, err) } - return string(out[:len(out)-1]) + return strings.TrimSpace(string(out)) } func gitCheckoutBranch(t *testing.T, dir, branchName string) { diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 0f586b07e..738314a88 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -697,11 +697,12 @@ func (h *postCommitActionHandler) linkageForSession(ctx context.Context, session } // Copy base linkage so each session gets its own SessionFilesHash + logCtx := logging.WithComponent(ctx, "checkpoint") linkage := *h.baseLinkage if len(sessionFilesTouched) > 0 { sfh, err := gitops.ComputeFilesChangedHash(ctx, h.repoDir, h.newHead, sessionFilesTouched) if err != nil { - logging.Warn(logging.WithComponent(ctx, "checkpoint"), "failed to compute session files hash for linkage", + logging.Warn(logCtx, "failed to compute session files hash for linkage", slog.String("commit", h.newHead), slog.String("error", err.Error()), ) From fcb4e4a3b3a36dcc96bb6debdeb32e4575e7722e Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 13 Apr 2026 15:14:02 -0700 Subject: [PATCH 13/14] Narrow checkpoint linkage to tree hash and patch ID Entire-Checkpoint: 5f47003ea308 --- cmd/entire/cli/checkpoint/checkpoint.go | 12 +-- cmd/entire/cli/checkpoint/checkpoint_test.go | 12 +-- cmd/entire/cli/gitops/diff.go | 65 -------------- cmd/entire/cli/gitops/diff_test.go | 85 ------------------- .../cli/strategy/manual_commit_hooks.go | 41 ++------- cmd/entire/cli/strategy/manual_commit_test.go | 16 ++-- 6 files changed, 13 insertions(+), 218 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index 410138199..bfb863df7 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -449,15 +449,13 @@ type SessionFilePaths struct { Prompt string `json:"prompt"` } -// LinkageMetadata contains content-based signals for re-linking checkpoints +// LinkageMetadata contains Git-native signals for limited fallback re-linking // after git history rewrites (rebase, reword, amend, filter-branch). // Stored at the checkpoint level (root metadata.json), not per-session. // // The web uses a fallback chain when a commit arrives without an Entire-Checkpoint trailer: // 1. TreeHash match - covers: reword, amend (msg-only), filter-branch (msg-only) // 2. PatchID match - covers: clean rebase, cherry-pick to other branch -// 3. FilesChangedHash - covers: rebase with conflicts in non-agent files -// 4. SessionFilesHash - covers: local squash merge (cumulative agent files) type LinkageMetadata struct { // TreeHash is the git tree hash of the commit (full repo snapshot). // Survives rewrites that don't change code (reword, msg-only amend). @@ -466,14 +464,6 @@ type LinkageMetadata struct { // PatchID is the git patch-id of the commit's diff (parent->HEAD). // Survives rebase (same diff replayed on different base). PatchID string `json:"patch_id,omitempty"` - - // FilesChangedHash is SHA256 of sorted file:blob pairs for files changed in this commit. - // Survives rebase even with conflicts in other files (only agent-file blobs matter). - FilesChangedHash string `json:"files_changed_hash,omitempty"` - - // SessionFilesHash is SHA256 of sorted file:blob pairs for ALL files touched across the session. - // Survives local squash merges where individual patch IDs don't match the combined diff. - SessionFilesHash string `json:"session_files_hash,omitempty"` } // CheckpointSummary is the root-level metadata.json for a checkpoint. diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index e741657b4..8c6ddaf99 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -3631,10 +3631,8 @@ func TestWriteCommitted_IncludesLinkage(t *testing.T) { checkpointID := id.MustCheckpointID("a1b2c3d4e5f6") linkage := &LinkageMetadata{ - TreeHash: "abc123def456abc123def456abc123def456abc1", - PatchID: "def456abc123def456abc123def456abc123def4", - FilesChangedHash: "7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069", - SessionFilesHash: "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2", + TreeHash: "abc123def456abc123def456abc123def456abc1", + PatchID: "def456abc123def456abc123def456abc123def4", } err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ @@ -3667,12 +3665,6 @@ func TestWriteCommitted_IncludesLinkage(t *testing.T) { if summary.Linkage.PatchID != linkage.PatchID { t.Errorf("PatchID = %q, want %q", summary.Linkage.PatchID, linkage.PatchID) } - if summary.Linkage.FilesChangedHash != linkage.FilesChangedHash { - t.Errorf("FilesChangedHash = %q, want %q", summary.Linkage.FilesChangedHash, linkage.FilesChangedHash) - } - if summary.Linkage.SessionFilesHash != linkage.SessionFilesHash { - t.Errorf("SessionFilesHash = %q, want %q", summary.Linkage.SessionFilesHash, linkage.SessionFilesHash) - } } func TestWriteCommitted_NilLinkageOmitted(t *testing.T) { diff --git a/cmd/entire/cli/gitops/diff.go b/cmd/entire/cli/gitops/diff.go index b950f612e..163c571c0 100644 --- a/cmd/entire/cli/gitops/diff.go +++ b/cmd/entire/cli/gitops/diff.go @@ -3,11 +3,8 @@ package gitops import ( "bytes" "context" - "crypto/sha256" - "encoding/hex" "fmt" "os/exec" - "sort" "strings" ) @@ -177,65 +174,3 @@ func ComputePatchID(ctx context.Context, repoDir, parentHash, commitHash string) patchID, _, _ := strings.Cut(output, " ") return patchID, nil } - -// ComputeFilesChangedHash computes a SHA256 hash of the given files' blob hashes -// at the specified commit. The hash is computed from sorted "filepath:blobhash" pairs, -// making it independent of input order and stable across rebases. -// -// Uses a single git ls-tree call for all files (O(1) subprocess, not O(N)). -// Returns a 64-char hex SHA256 string, or empty string if no files. -func ComputeFilesChangedHash(ctx context.Context, repoDir, commitHash string, filePaths []string) (string, error) { - if len(filePaths) == 0 { - return "", nil - } - - args := []string{"ls-tree", commitHash, "--"} - args = append(args, filePaths...) - cmd := exec.CommandContext(ctx, "git", args...) - cmd.Dir = repoDir - - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { - return "", fmt.Errorf("git ls-tree failed: %w: %s", err, strings.TrimSpace(stderr.String())) - } - - // Parse ls-tree output: " \t" per line - blobMap := make(map[string]string) - for _, line := range strings.Split(strings.TrimSpace(stdout.String()), "\n") { - if line == "" { - continue - } - tabIdx := strings.IndexByte(line, '\t') - if tabIdx == -1 { - return "", fmt.Errorf("unexpected ls-tree output (no tab separator): %q", line) - } - meta := line[:tabIdx] - path := line[tabIdx+1:] - fields := strings.Fields(meta) - if len(fields) < 3 { - return "", fmt.Errorf("unexpected ls-tree output (incomplete metadata): %q", line) - } - blobMap[path] = fields[2] - } - - sorted := make([]string, len(filePaths)) - copy(sorted, filePaths) - sort.Strings(sorted) - - var pairs []string - for _, fp := range sorted { - if blobHash, ok := blobMap[fp]; ok { - pairs = append(pairs, fp+":"+blobHash) - } - } - - if len(pairs) == 0 { - return "", nil - } - - h := sha256.Sum256([]byte(strings.Join(pairs, "\n"))) - return hex.EncodeToString(h[:]), nil -} diff --git a/cmd/entire/cli/gitops/diff_test.go b/cmd/entire/cli/gitops/diff_test.go index 1ddfbf23c..70ef5fc2d 100644 --- a/cmd/entire/cli/gitops/diff_test.go +++ b/cmd/entire/cli/gitops/diff_test.go @@ -534,88 +534,3 @@ func TestComputePatchID_InitialCommit(t *testing.T) { t.Fatal("expected non-empty patch ID for initial commit") } } - -func TestComputeFilesChangedHash(t *testing.T) { - t.Parallel() - dir := initTestRepo(t) - - writeFile(t, dir, "file.txt", "initial") - gitAdd(t, dir, "file.txt") - gitCommit(t, dir, "initial") - - writeFile(t, dir, "file.txt", "modified") - writeFile(t, dir, "new.txt", "new file") - gitAdd(t, dir, "file.txt", "new.txt") - gitCommit(t, dir, "changes") - - head := revParse(t, dir, "HEAD") - - hash, err := ComputeFilesChangedHash(context.Background(), dir, head, []string{"file.txt", "new.txt"}) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if hash == "" { - t.Fatal("expected non-empty hash") - } - if len(hash) != 64 { - t.Fatalf("expected 64-char SHA256 hex, got %d chars: %s", len(hash), hash) - } - - // Same inputs in different order produce same hash - hash2, err := ComputeFilesChangedHash(context.Background(), dir, head, []string{"new.txt", "file.txt"}) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if hash != hash2 { - t.Errorf("hash should be stable regardless of input order: %s != %s", hash, hash2) - } -} - -func TestComputeFilesChangedHash_StableAcrossRebase(t *testing.T) { - t.Parallel() - dir := initTestRepo(t) - - writeFile(t, dir, "base.txt", "base") - gitAdd(t, dir, "base.txt") - gitCommit(t, dir, "base") - - gitCheckoutBranch(t, dir, "feature") - writeFile(t, dir, "feature.txt", "feature work") - gitAdd(t, dir, "feature.txt") - gitCommit(t, dir, "add feature") - - headBefore := revParse(t, dir, "HEAD") - hashBefore, err := ComputeFilesChangedHash(context.Background(), dir, headBefore, []string{"feature.txt"}) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - gitCheckout(t, dir, "main") - writeFile(t, dir, "other.txt", "other") - gitAdd(t, dir, "other.txt") - gitCommit(t, dir, "other work") - - gitCheckout(t, dir, "feature") - gitRebase(t, dir, "main") - - headAfter := revParse(t, dir, "HEAD") - hashAfter, err := ComputeFilesChangedHash(context.Background(), dir, headAfter, []string{"feature.txt"}) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if hashBefore != hashAfter { - t.Errorf("files-changed hash should survive clean rebase: before=%s, after=%s", hashBefore, hashAfter) - } -} - -func TestComputeFilesChangedHash_EmptyFiles(t *testing.T) { - t.Parallel() - hash, err := ComputeFilesChangedHash(context.Background(), "/tmp", "HEAD", nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if hash != "" { - t.Errorf("expected empty hash for nil files, got %s", hash) - } -} diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 738314a88..82f53c266 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -652,8 +652,8 @@ func (h *postCommitActionHandler) parentCommitHash() string { return "" } -// computeBaseLinkage computes commit-level linkage signals (tree hash, patch ID, -// files-changed hash). These are identical across sessions since they depend on +// computeBaseLinkage computes commit-level linkage signals (tree hash, patch ID). +// These are identical across sessions since they depend on // the commit, not the session. Called once per PostCommit invocation and cached // on the handler's baseLinkage field. func (h *postCommitActionHandler) computeBaseLinkage(ctx context.Context) { @@ -672,45 +672,14 @@ func (h *postCommitActionHandler) computeBaseLinkage(ctx context.Context) { } else { h.baseLinkage.PatchID = patchID } - - // Compute files-changed hash (committed files' blob hashes — survives rebase + other-file conflicts) - committedFiles := make([]string, 0, len(h.committedFileSet)) - for f := range h.committedFileSet { - committedFiles = append(committedFiles, f) - } - fch, err := gitops.ComputeFilesChangedHash(ctx, h.repoDir, h.newHead, committedFiles) - if err != nil { - logging.Warn(logCtx, "failed to compute files-changed hash for linkage", - slog.String("commit", h.newHead), - slog.String("error", err.Error()), - ) - } else { - h.baseLinkage.FilesChangedHash = fch - } } -// linkageForSession returns linkage metadata for a specific session by copying -// the commit-level base linkage and adding the session-specific SessionFilesHash. -func (h *postCommitActionHandler) linkageForSession(ctx context.Context, sessionFilesTouched []string) *checkpoint.LinkageMetadata { +// linkageForSession returns the cached commit-level linkage metadata. +func (h *postCommitActionHandler) linkageForSession(ctx context.Context, _ []string) *checkpoint.LinkageMetadata { if h.baseLinkage == nil { h.computeBaseLinkage(ctx) } - - // Copy base linkage so each session gets its own SessionFilesHash - logCtx := logging.WithComponent(ctx, "checkpoint") - linkage := *h.baseLinkage - if len(sessionFilesTouched) > 0 { - sfh, err := gitops.ComputeFilesChangedHash(ctx, h.repoDir, h.newHead, sessionFilesTouched) - if err != nil { - logging.Warn(logCtx, "failed to compute session files hash for linkage", - slog.String("commit", h.newHead), - slog.String("error", err.Error()), - ) - } else { - linkage.SessionFilesHash = sfh - } - } - return &linkage + return h.baseLinkage } func (h *postCommitActionHandler) HandleCondense(state *session.State) error { diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index ef7309acc..370a3f96d 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -4286,11 +4286,9 @@ func TestMarshalPromptAttributionsIncludingPending_OnlyPending(t *testing.T) { require.Equal(t, 7, result[0].UserLinesAdded) } -// TestShadowStrategy_PostCommit_LinkagePopulated verifies the full linkage pipeline: -// PostCommit computes linkage signals, passes them through condensation, and the -// committed checkpoint has all four LinkageMetadata fields populated with correct -// hash lengths (tree_hash=40 hex, patch_id=40 hex, files_changed_hash=64 hex, -// session_files_hash=64 hex). +// TestShadowStrategy_PostCommit_LinkagePopulated verifies the linkage pipeline: +// PostCommit computes tree_hash and patch_id, passes them through condensation, +// and the committed checkpoint stores both fields. func TestShadowStrategy_PostCommit_LinkagePopulated(t *testing.T) { dir := setupGitRepo(t) t.Chdir(dir) @@ -4326,18 +4324,14 @@ func TestShadowStrategy_PostCommit_LinkagePopulated(t *testing.T) { require.NoError(t, err) require.NotNil(t, summary, "checkpoint should exist on metadata branch after PostCommit") - // Verify linkage is populated with all four signals + // Verify linkage is populated with the supported fallback signals. require.NotNil(t, summary.Linkage, "Linkage should be populated after condensation") assert.NotEmpty(t, summary.Linkage.TreeHash, "TreeHash should be set") assert.NotEmpty(t, summary.Linkage.PatchID, "PatchID should be set") - assert.NotEmpty(t, summary.Linkage.FilesChangedHash, "FilesChangedHash should be set") - assert.NotEmpty(t, summary.Linkage.SessionFilesHash, "SessionFilesHash should be set") - // Verify hash format and lengths + // Verify hash format and lengths. assert.Len(t, summary.Linkage.TreeHash, 40, "TreeHash should be 40-char hex (git tree hash)") assert.Len(t, summary.Linkage.PatchID, 40, "PatchID should be 40-char hex (git patch-id)") - assert.Len(t, summary.Linkage.FilesChangedHash, 64, "FilesChangedHash should be 64-char hex (SHA256)") - assert.Len(t, summary.Linkage.SessionFilesHash, 64, "SessionFilesHash should be 64-char hex (SHA256)") } func TestCommittedFilesExcludingMetadata_AllMetadata(t *testing.T) { From 531a4d1d5a539cb5d7830b1fb3a29bbc56395202 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 13 Apr 2026 15:28:11 -0700 Subject: [PATCH 14/14] Clarify commit-level linkage caching Entire-Checkpoint: 391cbda054cc --- cmd/entire/cli/strategy/manual_commit_hooks.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 82f53c266..7f75b14c1 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -630,8 +630,8 @@ type postCommitActionHandler struct { filesTouchedBefore []string sessionsWithCommittedFiles int // number of processable sessions that have tracked files - // Cached git objects — resolved once per PostCommit invocation to avoid - // redundant reads across filesOverlapWithContent, filesWithRemainingAgentChanges, + // Cached git objects — resolved once per session handler to avoid redundant + // reads across filesOverlapWithContent, filesWithRemainingAgentChanges, // CondenseSession, and calculateSessionAttributions. headTree *object.Tree // HEAD commit tree (shared across all sessions) parentTree *object.Tree // HEAD's first parent tree (shared, nil for initial commits) @@ -653,9 +653,9 @@ func (h *postCommitActionHandler) parentCommitHash() string { } // computeBaseLinkage computes commit-level linkage signals (tree hash, patch ID). -// These are identical across sessions since they depend on -// the commit, not the session. Called once per PostCommit invocation and cached -// on the handler's baseLinkage field. +// These are identical across sessions since they depend on the commit, not the +// session. They are cached on the per-session handler to avoid duplicate work +// within one session's PostCommit flow. func (h *postCommitActionHandler) computeBaseLinkage(ctx context.Context) { logCtx := logging.WithComponent(ctx, "checkpoint") h.baseLinkage = &checkpoint.LinkageMetadata{ @@ -674,8 +674,8 @@ func (h *postCommitActionHandler) computeBaseLinkage(ctx context.Context) { } } -// linkageForSession returns the cached commit-level linkage metadata. -func (h *postCommitActionHandler) linkageForSession(ctx context.Context, _ []string) *checkpoint.LinkageMetadata { +// linkageForCommit returns the cached commit-level linkage metadata. +func (h *postCommitActionHandler) linkageForCommit(ctx context.Context) *checkpoint.LinkageMetadata { if h.baseLinkage == nil { h.computeBaseLinkage(ctx) } @@ -703,7 +703,7 @@ func (h *postCommitActionHandler) HandleCondense(state *session.State) error { parentCommitHash: h.parentCommitHash(), headCommitHash: h.newHead, allAgentFiles: h.allAgentFiles, - linkage: h.linkageForSession(h.ctx, state.FilesTouched), + linkage: h.linkageForCommit(h.ctx), }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead) @@ -733,7 +733,7 @@ func (h *postCommitActionHandler) HandleCondenseIfFilesTouched(state *session.St parentCommitHash: h.parentCommitHash(), headCommitHash: h.newHead, allAgentFiles: h.allAgentFiles, - linkage: h.linkageForSession(h.ctx, state.FilesTouched), + linkage: h.linkageForCommit(h.ctx), }) } else { h.s.updateBaseCommitIfChanged(h.ctx, state, h.newHead)