From a7c7de954c0d7756af4dff603ecc8d58eb08d784 Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Thu, 9 Apr 2026 16:20:53 +0100 Subject: [PATCH] fix: reject absolute and malformed paths in git tree writes On Windows, absolute paths (e.g., /C:/Users/...) could leak into ApplyTreeChanges, producing empty-named tree entries when split on "/". Add normalizeGitTreePath to validate tree paths are relative with no empty segments, and normalizeRepoRelativeTreePath to convert absolute paths to repo-relative before tree construction. Assisted-by: Claude Opus 4.6 Signed-off-by: Paulo Gomes --- cmd/entire/cli/checkpoint/checkpoint_test.go | 123 ++++++++++++++++ cmd/entire/cli/checkpoint/parse_tree.go | 56 +++++++- cmd/entire/cli/checkpoint/parse_tree_test.go | 144 +++++++++++++++++++ cmd/entire/cli/checkpoint/temporary.go | 47 ++++-- 4 files changed, 360 insertions(+), 10 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index 06c19e9f8..1dbd4104e 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -1638,6 +1638,129 @@ func TestWriteTemporary_FirstCheckpoint_CapturesModifiedTrackedFiles(t *testing. } } +// TestWriteTemporary_PathNormalizationAndSkipping verifies that shadow branch writes +// normalize absolute in-repo paths back to repo-relative tree entries and skip invalid +// paths rather than encoding them into git trees. +func TestWriteTemporary_PathNormalizationAndSkipping(t *testing.T) { + tests := []struct { + name string + modifiedFiles func(repoRoot, mainFile string) []string + wantUpdated bool + }{ + { + name: "absolute in repo path is normalized", + modifiedFiles: func(_, mainFile string) []string { + return []string{mainFile} + }, + wantUpdated: true, + }, + { + name: "absolute outside repo path is skipped", + modifiedFiles: func(_, _ string) []string { + return []string{"C:/Users/rober/Vaults/Flowsign/main.go"} + }, + wantUpdated: false, + }, + { + name: "empty segment path is skipped", + modifiedFiles: func(_, _ string) []string { + return []string{"dir//main.go"} + }, + wantUpdated: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + worktree, err := repo.Worktree() + if err != nil { + t.Fatalf("failed to get worktree: %v", err) + } + + mainFile := filepath.Join(tempDir, "main.go") + if err := os.WriteFile(mainFile, []byte("package main\n"), 0o644); err != nil { + t.Fatalf("failed to write main.go: %v", err) + } + if _, err := worktree.Add("main.go"); err != nil { + t.Fatalf("failed to add main.go: %v", err) + } + initialCommit, err := worktree.Commit("Initial commit", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@test.com"}, + }) + if err != nil { + t.Fatalf("failed to commit: %v", err) + } + + updatedContent := "package main\n\nfunc main() {}\n" + if err := os.WriteFile(mainFile, []byte(updatedContent), 0o644); err != nil { + t.Fatalf("failed to update main.go: %v", err) + } + + t.Chdir(tempDir) + + metadataDir := filepath.Join(tempDir, ".entire", "metadata", "test-session") + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + if err := os.WriteFile(filepath.Join(metadataDir, "full.jsonl"), []byte(`{"test": true}`), 0o644); err != nil { + t.Fatalf("failed to write transcript: %v", err) + } + + store := NewGitStore(repo) + result, err := store.WriteTemporary(context.Background(), WriteTemporaryOptions{ + SessionID: "test-session", + BaseCommit: initialCommit.String(), + ModifiedFiles: tt.modifiedFiles(tempDir, mainFile), + MetadataDir: ".entire/metadata/test-session", + MetadataDirAbs: metadataDir, + CommitMessage: "Checkpoint with path normalization", + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + if err != nil { + t.Fatalf("WriteTemporary() error = %v", err) + } + + commit, err := repo.CommitObject(result.CommitHash) + if err != nil { + t.Fatalf("failed to get commit object: %v", err) + } + + tree, err := commit.Tree() + if err != nil { + t.Fatalf("failed to get tree: %v", err) + } + + assertNoEmptyEntryNames(t, repo, commit.TreeHash, "") + + file, err := tree.File("main.go") + if err != nil { + t.Fatalf("main.go not found in checkpoint tree: %v", err) + } + + content, err := file.Contents() + if err != nil { + t.Fatalf("failed to read main.go content: %v", err) + } + + wantContent := "package main\n" + if tt.wantUpdated { + wantContent = updatedContent + } + if content != wantContent { + t.Errorf("unexpected main.go content\ngot:\n%s\nwant:\n%s", content, wantContent) + } + }) + } +} + // TestWriteTemporary_FirstCheckpoint_CapturesUntrackedFiles verifies that // the first checkpoint captures untracked files that exist in the working directory. func TestWriteTemporary_FirstCheckpoint_CapturesUntrackedFiles(t *testing.T) { diff --git a/cmd/entire/cli/checkpoint/parse_tree.go b/cmd/entire/cli/checkpoint/parse_tree.go index 63f97a0a4..644ba49c5 100644 --- a/cmd/entire/cli/checkpoint/parse_tree.go +++ b/cmd/entire/cli/checkpoint/parse_tree.go @@ -1,7 +1,10 @@ package checkpoint import ( + "errors" "fmt" + "log/slog" + "path/filepath" "strings" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" @@ -224,12 +227,19 @@ func ApplyTreeChanges( for i := range changes { c := changes[i] - first, rest := splitFirstSegment(c.Path) + normalizedPath, err := normalizeGitTreePath(c.Path) + if err != nil { + logInvalidGitTreePath("apply tree change", c.Path, err) + continue + } + + first, rest := splitFirstSegment(normalizedPath) if grouped[first] == nil { grouped[first] = &dirChanges{} } if rest == "" { cc := c + cc.Path = normalizedPath grouped[first].fileChange = &cc } else { grouped[first].subChanges = append(grouped[first].subChanges, TreeChange{ @@ -319,6 +329,50 @@ func WalkCheckpointShards(repo *git.Repository, tree *object.Tree, fn func(cpID return nil } +func normalizeGitTreePath(path string) (string, error) { + if path == "" { + return "", errors.New("path is empty") + } + + path = filepath.ToSlash(path) + if isAbsoluteGitTreePath(path) { + return "", errors.New("path must be relative") + } + + parts := strings.Split(path, "/") + for _, part := range parts { + if part == "" { + return "", errors.New("path contains empty segment") + } + if part == "." || part == ".." { + return "", fmt.Errorf("path contains invalid segment %q", part) + } + } + + return path, nil +} + +func isAbsoluteGitTreePath(path string) bool { + if filepath.IsAbs(path) { + return true + } + + if len(path) >= 3 && path[1] == ':' && path[2] == '/' { + drive := path[0] + return (drive >= 'a' && drive <= 'z') || (drive >= 'A' && drive <= 'Z') + } + + return false +} + +func logInvalidGitTreePath(operation, path string, err error) { + slog.Warn("skipping invalid git tree path", + slog.String("operation", operation), + slog.String("path", path), + slog.String("error", err.Error()), + ) +} + // splitFirstSegment splits "a/b/c" into ("a", "b/c"), and "file.txt" into ("file.txt", ""). func splitFirstSegment(path string) (first, rest string) { parts := strings.SplitN(path, "/", 2) diff --git a/cmd/entire/cli/checkpoint/parse_tree_test.go b/cmd/entire/cli/checkpoint/parse_tree_test.go index 90c77e471..adc16aa79 100644 --- a/cmd/entire/cli/checkpoint/parse_tree_test.go +++ b/cmd/entire/cli/checkpoint/parse_tree_test.go @@ -84,6 +84,25 @@ func flattenTreeHelper(t *testing.T, repo *git.Repository, treeHash plumbing.Has return result } +// assertNoEmptyEntryNames recursively verifies that a tree contains no empty entry names. +func assertNoEmptyEntryNames(t *testing.T, repo *git.Repository, treeHash plumbing.Hash, prefix string) { + t.Helper() + + tree := mustTreeObject(t, repo, treeHash) + for _, entry := range tree.Entries { + fullPath := entry.Name + if prefix != "" { + fullPath = prefix + "/" + entry.Name + } + if entry.Name == "" { + t.Fatalf("tree %s contains empty entry name at %q", treeHash, fullPath) + } + if entry.Mode == filemode.Dir { + assertNoEmptyEntryNames(t, repo, entry.Hash, fullPath) + } + } +} + func TestSplitFirstSegment(t *testing.T) { t.Parallel() @@ -137,6 +156,131 @@ func TestStoreTree_RoundTrip(t *testing.T) { } } +func TestApplyTreeChanges_SkipsInvalidPaths(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + path string + wantPresent string + }{ + { + name: "leading slash windows path", + path: "/C:/Users/r/Vaults/Flowsign/.entire/metadata/test-session/full.jsonl", + wantPresent: "valid.txt", + }, + { + name: "drive letter windows path", + path: "C:/Users/r/Vaults/Flowsign/.entire/metadata/test-session/full.jsonl", + wantPresent: "valid.txt", + }, + { + name: "empty segment", + path: "dir//file.txt", + wantPresent: "valid.txt", + }, + { + name: "dot segment", + path: "./dir/file.txt", + wantPresent: "valid.txt", + }, + { + name: "dot dot segment", + path: "../dir/file.txt", + wantPresent: "valid.txt", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + repo := mustInitBareRepo(t) + validBlob := storeBlob(t, repo, "valid") + invalidBlob := storeBlob(t, repo, "invalid") + + treeHash, err := ApplyTreeChanges(repo, plumbing.ZeroHash, []TreeChange{ + { + Path: "valid.txt", + Entry: &object.TreeEntry{ + Mode: filemode.Regular, + Hash: validBlob, + }, + }, + { + Path: tt.path, + Entry: &object.TreeEntry{ + Mode: filemode.Regular, + Hash: invalidBlob, + }, + }, + }) + if err != nil { + t.Fatalf("ApplyTreeChanges() error = %v", err) + } + + assertNoEmptyEntryNames(t, repo, treeHash, "") + files := flattenTreeHelper(t, repo, treeHash, "") + if len(files) != 1 { + t.Fatalf("expected 1 valid file, got %d: %v", len(files), files) + } + if files[tt.wantPresent] != validBlob { + t.Fatalf("expected valid file %q to be preserved", tt.wantPresent) + } + }) + } +} + +func TestBuildTreeFromEntries_SkipsInvalidPaths(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + path string + }{ + {name: "leading slash windows path", path: "/C:/repo/file.txt"}, + {name: "drive letter windows path", path: "C:/repo/file.txt"}, + {name: "empty segment", path: "dir//file.txt"}, + {name: "dot segment", path: "./file.txt"}, + {name: "dot dot segment", path: "../file.txt"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + repo := mustInitBareRepo(t) + validBlob := storeBlob(t, repo, "valid") + invalidBlob := storeBlob(t, repo, "invalid") + + treeHash, err := BuildTreeFromEntries(repo, map[string]object.TreeEntry{ + "valid.txt": { + Name: "valid.txt", + Mode: filemode.Regular, + Hash: validBlob, + }, + tt.path: { + Name: tt.path, + Mode: filemode.Regular, + Hash: invalidBlob, + }, + }) + if err != nil { + t.Fatalf("BuildTreeFromEntries() error = %v", err) + } + + assertNoEmptyEntryNames(t, repo, treeHash, "") + files := flattenTreeHelper(t, repo, treeHash, "") + if len(files) != 1 { + t.Fatalf("expected 1 valid file, got %d: %v", len(files), files) + } + if files["valid.txt"] != validBlob { + t.Fatal("expected valid.txt to be preserved") + } + }) + } +} + func TestUpdateSubtree_CreateFromEmpty(t *testing.T) { t.Parallel() repo := mustInitBareRepo(t) diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index b9442fe32..4b78eb20a 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -725,15 +725,26 @@ func (s *GitStore) buildTreeWithChanges( // Deleted files → nil Entry means deletion for _, file := range deletedFiles { - changes = append(changes, TreeChange{Path: file, Entry: nil}) + relPath, relErr := normalizeRepoRelativeTreePath(repoRoot, file) + if relErr != nil { + logInvalidGitTreePath("delete shadow branch entry", file, relErr) + continue + } + changes = append(changes, TreeChange{Path: relPath, Entry: nil}) } // Modified/new files → create blobs from disk for _, file := range modifiedFiles { - absPath := filepath.Join(repoRoot, file) + relPath, relErr := normalizeRepoRelativeTreePath(repoRoot, file) + if relErr != nil { + logInvalidGitTreePath("add shadow branch entry", file, relErr) + continue + } + + absPath := filepath.Join(repoRoot, filepath.FromSlash(relPath)) if !fileExists(absPath) { // File disappeared since detection — treat as deletion - changes = append(changes, TreeChange{Path: file, Entry: nil}) + changes = append(changes, TreeChange{Path: relPath, Entry: nil}) continue } @@ -744,7 +755,7 @@ func (s *GitStore) buildTreeWithChanges( } changes = append(changes, TreeChange{ - Path: file, + Path: relPath, Entry: &object.TreeEntry{ Mode: mode, Hash: blobHash, @@ -754,11 +765,16 @@ func (s *GitStore) buildTreeWithChanges( // Metadata directory files if metadataDir != "" && metadataDirAbs != "" { - metaChanges, metaErr := addDirectoryToChanges(s.repo, metadataDirAbs, metadataDir) - if metaErr != nil { - return plumbing.ZeroHash, fmt.Errorf("failed to add metadata directory: %w", metaErr) + metadataRel, relErr := normalizeRepoRelativeTreePath(repoRoot, metadataDir) + if relErr != nil { + logInvalidGitTreePath("add metadata directory", metadataDir, relErr) + } else { + metaChanges, metaErr := addDirectoryToChanges(s.repo, metadataDirAbs, metadataRel) + if metaErr != nil { + return plumbing.ZeroHash, fmt.Errorf("failed to add metadata directory: %w", metaErr) + } + changes = append(changes, metaChanges...) } - changes = append(changes, metaChanges...) } return ApplyTreeChanges(s.repo, baseTreeHash, changes) @@ -985,7 +1001,12 @@ func BuildTreeFromEntries(repo *git.Repository, entries map[string]object.TreeEn // Insert all entries into the tree structure for fullPath, entry := range entries { - parts := strings.Split(fullPath, "/") + normalizedPath, err := normalizeGitTreePath(fullPath) + if err != nil { + logInvalidGitTreePath("build tree entry", fullPath, err) + continue + } + parts := strings.Split(normalizedPath, "/") insertIntoTree(root, parts, entry) } @@ -993,6 +1014,14 @@ func BuildTreeFromEntries(repo *git.Repository, entries map[string]object.TreeEn return buildTreeObject(repo, root) } +func normalizeRepoRelativeTreePath(repoRoot, path string) (string, error) { + if rel := paths.ToRelativePath(path, repoRoot); rel != "" && rel != "." { + return normalizeGitTreePath(rel) + } + + return normalizeGitTreePath(path) +} + // insertIntoTree inserts a file entry into the tree structure. func insertIntoTree(node *treeNode, pathParts []string, entry object.TreeEntry) { if len(pathParts) == 1 {