From aa3ff6abb497324381a6e86ed7c1f95433ee034f Mon Sep 17 00:00:00 2001 From: Alec Thomas Date: Fri, 20 Mar 2026 20:26:04 +1100 Subject: [PATCH] feat: `cachew git restore` that restores snapshots and applies bundles Co-authored-by: Claude Code --- cmd/cachew/git.go | 125 ++++++++++++++++ cmd/cachew/git_test.go | 266 ++++++++++++++++++++++++++++++++++ cmd/cachew/main.go | 4 + internal/cache/remote.go | 14 +- internal/snapshot/snapshot.go | 18 ++- 5 files changed, 417 insertions(+), 10 deletions(-) create mode 100644 cmd/cachew/git.go create mode 100644 cmd/cachew/git_test.go diff --git a/cmd/cachew/git.go b/cmd/cachew/git.go new file mode 100644 index 0000000..925f22e --- /dev/null +++ b/cmd/cachew/git.go @@ -0,0 +1,125 @@ +package main + +import ( + "context" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "strings" + + "github.com/alecthomas/errors" + + "github.com/block/cachew/internal/gitclone" + "github.com/block/cachew/internal/snapshot" +) + +// GitCmd groups git-aware subcommands that talk directly to cachew's +// /git/ strategy endpoints (not the generic object-store API). +type GitCmd struct { + Restore GitRestoreCmd `cmd:"" help:"Restore a repository from a cachew git snapshot."` +} + +// GitRestoreCmd fetches a git snapshot, extracts it, and optionally applies +// a delta bundle to bring the working copy up to the mirror's current HEAD. +type GitRestoreCmd struct { + RepoURL string `arg:"" help:"Repository URL (e.g. https://github.com/org/repo)."` + Directory string `arg:"" help:"Target directory for the clone." type:"path"` + NoBundle bool `help:"Skip applying delta bundle."` + ZstdThreads int `help:"Threads for zstd decompression (0 = all CPU cores)." default:"0"` +} + +func (c *GitRestoreCmd) Run(ctx context.Context, cli *CLI, client *http.Client) error { + repoPath, err := gitclone.RepoPathFromURL(c.RepoURL) + if err != nil { + return errors.Wrap(err, "invalid repository URL") + } + + snapshotURL := fmt.Sprintf("%s/git/%s/snapshot.tar.zst", cli.URL, repoPath) + fmt.Fprintf(os.Stderr, "Fetching snapshot from %s\n", snapshotURL) //nolint:forbidigo + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, snapshotURL, nil) + if err != nil { + return errors.Wrap(err, "create snapshot request") + } + + resp, err := client.Do(req) //nolint:gosec // URL constructed from CLI flags + if err != nil { + return errors.Wrap(err, "fetch snapshot") + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + _, _ = io.Copy(io.Discard, resp.Body) //nolint:errcheck + return errors.Errorf("snapshot request failed with status %d", resp.StatusCode) + } + + fmt.Fprintf(os.Stderr, "Extracting to %s...\n", c.Directory) //nolint:forbidigo + if err := snapshot.Extract(ctx, resp.Body, c.Directory, c.ZstdThreads); err != nil { + return errors.Wrap(err, "extract snapshot") + } + fmt.Fprintf(os.Stderr, "Snapshot restored to %s\n", c.Directory) //nolint:forbidigo + + bundleURL := resp.Header.Get("X-Cachew-Bundle-Url") + if bundleURL == "" || c.NoBundle { + return nil + } + + fmt.Fprintf(os.Stderr, "Applying delta bundle...\n") //nolint:forbidigo + if err := applyBundle(ctx, cli.URL, client, bundleURL, c.Directory); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to apply delta bundle: %v\n", err) //nolint:forbidigo + return nil + } + fmt.Fprintf(os.Stderr, "Delta bundle applied\n") //nolint:forbidigo + + return nil +} + +func applyBundle(ctx context.Context, baseURL string, client *http.Client, bundlePath, directory string) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+bundlePath, nil) + if err != nil { + return errors.Wrap(err, "create bundle request") + } + + resp, err := client.Do(req) //nolint:gosec // URL constructed from CLI flags + if err != nil { + return errors.Wrap(err, "fetch bundle") + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + _, _ = io.Copy(io.Discard, resp.Body) //nolint:errcheck + return errors.Errorf("bundle request failed with status %d", resp.StatusCode) + } + + tmpFile, err := os.CreateTemp("", "cachew-bundle-*.bundle") + if err != nil { + return errors.Wrap(err, "create temp bundle file") + } + defer os.Remove(tmpFile.Name()) //nolint:errcheck + + if _, err := io.Copy(tmpFile, resp.Body); err != nil { + _ = tmpFile.Close() + return errors.Wrap(err, "download bundle") + } + if err := tmpFile.Close(); err != nil { + return errors.Wrap(err, "close temp bundle file") + } + + // Determine the current branch so we can pull from the bundle. + branchCmd := exec.CommandContext(ctx, "git", "-C", directory, "symbolic-ref", "--short", "HEAD") //nolint:gosec + branchOut, err := branchCmd.Output() + if err != nil { + return errors.Wrap(err, "determine current branch") + } + branch := strings.TrimSpace(string(branchOut)) + + // Pull the bundle's branch into the working tree via fast-forward. + cmd := exec.CommandContext(ctx, "git", "-C", directory, "pull", "--ff-only", tmpFile.Name(), branch) //nolint:gosec + if output, err := cmd.CombinedOutput(); err != nil { + return errors.Wrapf(err, "git pull from bundle: %s", string(output)) + } + + return nil +} diff --git a/cmd/cachew/git_test.go b/cmd/cachew/git_test.go new file mode 100644 index 0000000..e79ef51 --- /dev/null +++ b/cmd/cachew/git_test.go @@ -0,0 +1,266 @@ +package main + +import ( + "bytes" + "context" + "net/http" + "net/http/httptest" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/alecthomas/assert/v2" +) + +func initGitRepo(t *testing.T, dir string, files map[string]string) { + t.Helper() + run := func(args ...string) { + t.Helper() + cmd := exec.Command("git", append([]string{"-C", dir}, args...)...) //nolint:gosec + cmd.Env = append(os.Environ(), + "GIT_AUTHOR_NAME=test", "GIT_AUTHOR_EMAIL=test@test.com", + "GIT_COMMITTER_NAME=test", "GIT_COMMITTER_EMAIL=test@test.com", + ) + out, err := cmd.CombinedOutput() + assert.NoError(t, err, string(out)) + } + + run("init", "-b", "main") + for name, content := range files { + path := filepath.Join(dir, name) + assert.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + assert.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + } + run("add", "-A") + run("commit", "-m", "initial") +} + +func createTarZst(t *testing.T, dir string) []byte { + t.Helper() + var buf bytes.Buffer + tarCmd := exec.Command("tar", "-cpf", "-", "-C", dir, ".") + zstdCmd := exec.Command("zstd", "-c") + + tarOut, err := tarCmd.StdoutPipe() + assert.NoError(t, err) + zstdCmd.Stdin = tarOut + zstdCmd.Stdout = &buf + + assert.NoError(t, tarCmd.Start()) + assert.NoError(t, zstdCmd.Start()) + assert.NoError(t, tarCmd.Wait()) + assert.NoError(t, zstdCmd.Wait()) + return buf.Bytes() +} + +func gitRevParse(t *testing.T, dir, ref string) string { + t.Helper() + out, err := exec.Command("git", "-C", dir, "rev-parse", ref).Output() //nolint:gosec + assert.NoError(t, err) + return strings.TrimSpace(string(out)) +} + +func createBundle(t *testing.T, dir, baseCommit string) []byte { + t.Helper() + bundlePath := filepath.Join(t.TempDir(), "delta.bundle") + // Use refs/heads/main (not HEAD) to match server behaviour. + cmd := exec.Command("git", "-C", dir, "bundle", "create", bundlePath, "refs/heads/main", "^"+baseCommit) //nolint:gosec + out, err := cmd.CombinedOutput() + assert.NoError(t, err, string(out)) + + data, err := os.ReadFile(bundlePath) + assert.NoError(t, err) + return data +} + +func TestGitRestoreSnapshot(t *testing.T) { + srcDir := t.TempDir() + initGitRepo(t, srcDir, map[string]string{ + "hello.txt": "hello world", + "subdir/nested.txt": "nested content", + }) + + snapshotData := createTarZst(t, srcDir) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/snapshot.tar.zst") { + w.Header().Set("Content-Type", "application/zstd") + w.Write(snapshotData) //nolint:errcheck + return + } + http.NotFound(w, r) + })) + defer srv.Close() + + dstDir := filepath.Join(t.TempDir(), "restored") + cmd := &GitRestoreCmd{ + RepoURL: "https://github.com/test/repo", + Directory: dstDir, + } + cli := &CLI{URL: srv.URL} + err := cmd.Run(context.Background(), cli, srv.Client()) + assert.NoError(t, err) + + content, err := os.ReadFile(filepath.Join(dstDir, "hello.txt")) + assert.NoError(t, err) + assert.Equal(t, "hello world", string(content)) + + content, err = os.ReadFile(filepath.Join(dstDir, "subdir", "nested.txt")) + assert.NoError(t, err) + assert.Equal(t, "nested content", string(content)) +} + +func TestGitRestoreWithBundle(t *testing.T) { + srcDir := t.TempDir() + initGitRepo(t, srcDir, map[string]string{"file.txt": "v1"}) + baseCommit := gitRevParse(t, srcDir, "HEAD") + + snapshotData := createTarZst(t, srcDir) + + // Add a second commit for the bundle. + assert.NoError(t, os.WriteFile(filepath.Join(srcDir, "file.txt"), []byte("v2"), 0o644)) + assert.NoError(t, os.WriteFile(filepath.Join(srcDir, "new.txt"), []byte("new"), 0o644)) + cmd := exec.Command("git", "-C", srcDir, "add", "-A") + cmd.Env = append(os.Environ(), "GIT_AUTHOR_NAME=test", "GIT_AUTHOR_EMAIL=test@test.com", + "GIT_COMMITTER_NAME=test", "GIT_COMMITTER_EMAIL=test@test.com") + out, err := cmd.CombinedOutput() + assert.NoError(t, err, string(out)) + + cmd = exec.Command("git", "-C", srcDir, "commit", "-m", "update") + cmd.Env = append(os.Environ(), "GIT_AUTHOR_NAME=test", "GIT_AUTHOR_EMAIL=test@test.com", + "GIT_COMMITTER_NAME=test", "GIT_COMMITTER_EMAIL=test@test.com") + out, err = cmd.CombinedOutput() + assert.NoError(t, err, string(out)) + + bundleData := createBundle(t, srcDir, baseCommit) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case strings.HasSuffix(r.URL.Path, "/snapshot.tar.zst"): + w.Header().Set("Content-Type", "application/zstd") + w.Header().Set("X-Cachew-Bundle-Url", "/git/github.com/test/repo/snapshot.bundle?base="+baseCommit) + w.Write(snapshotData) //nolint:errcheck + + case strings.HasSuffix(r.URL.Path, "/snapshot.bundle"): + assert.Equal(t, baseCommit, r.URL.Query().Get("base")) + w.Header().Set("Content-Type", "application/x-git-bundle") + w.Write(bundleData) //nolint:errcheck + + default: + http.NotFound(w, r) + } + })) + defer srv.Close() + + dstDir := filepath.Join(t.TempDir(), "restored") + restoreCmd := &GitRestoreCmd{ + RepoURL: "https://github.com/test/repo", + Directory: dstDir, + } + cli := &CLI{URL: srv.URL} + err = restoreCmd.Run(context.Background(), cli, srv.Client()) + assert.NoError(t, err) + + content, err := os.ReadFile(filepath.Join(dstDir, "file.txt")) + assert.NoError(t, err) + assert.Equal(t, "v2", string(content)) + + content, err = os.ReadFile(filepath.Join(dstDir, "new.txt")) + assert.NoError(t, err) + assert.Equal(t, "new", string(content)) +} + +func TestGitRestoreNoBundle(t *testing.T) { + srcDir := t.TempDir() + initGitRepo(t, srcDir, map[string]string{"file.txt": "v1"}) + + snapshotData := createTarZst(t, srcDir) + + bundleRequested := false + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case strings.HasSuffix(r.URL.Path, "/snapshot.tar.zst"): + w.Header().Set("Content-Type", "application/zstd") + w.Header().Set("X-Cachew-Bundle-Url", "/git/github.com/test/repo/snapshot.bundle?base=abc") + w.Write(snapshotData) //nolint:errcheck + + case strings.HasSuffix(r.URL.Path, "/snapshot.bundle"): + bundleRequested = true + http.NotFound(w, r) + + default: + http.NotFound(w, r) + } + })) + defer srv.Close() + + dstDir := filepath.Join(t.TempDir(), "restored") + restoreCmd := &GitRestoreCmd{ + RepoURL: "https://github.com/test/repo", + Directory: dstDir, + NoBundle: true, + } + cli := &CLI{URL: srv.URL} + err := restoreCmd.Run(context.Background(), cli, srv.Client()) + assert.NoError(t, err) + assert.False(t, bundleRequested) + + content, err := os.ReadFile(filepath.Join(dstDir, "file.txt")) + assert.NoError(t, err) + assert.Equal(t, "v1", string(content)) +} + +func TestGitRestoreSnapshotNotFound(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.NotFound(w, r) + })) + defer srv.Close() + + dstDir := filepath.Join(t.TempDir(), "restored") + restoreCmd := &GitRestoreCmd{ + RepoURL: "https://github.com/test/repo", + Directory: dstDir, + } + cli := &CLI{URL: srv.URL} + err := restoreCmd.Run(context.Background(), cli, srv.Client()) + assert.Error(t, err) + assert.Contains(t, err.Error(), "status 404") +} + +func TestGitRestoreBundleFailureNonFatal(t *testing.T) { + srcDir := t.TempDir() + initGitRepo(t, srcDir, map[string]string{"file.txt": "v1"}) + + snapshotData := createTarZst(t, srcDir) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case strings.HasSuffix(r.URL.Path, "/snapshot.tar.zst"): + w.Header().Set("Content-Type", "application/zstd") + w.Header().Set("X-Cachew-Bundle-Url", "/git/github.com/test/repo/snapshot.bundle?base=abc") + w.Write(snapshotData) //nolint:errcheck + + case strings.HasSuffix(r.URL.Path, "/snapshot.bundle"): + http.Error(w, "internal error", http.StatusInternalServerError) + + default: + http.NotFound(w, r) + } + })) + defer srv.Close() + + dstDir := filepath.Join(t.TempDir(), "restored") + restoreCmd := &GitRestoreCmd{ + RepoURL: "https://github.com/test/repo", + Directory: dstDir, + } + cli := &CLI{URL: srv.URL} + err := restoreCmd.Run(context.Background(), cli, srv.Client()) + assert.NoError(t, err) + + content, err := os.ReadFile(filepath.Join(dstDir, "file.txt")) + assert.NoError(t, err) + assert.Equal(t, "v1", string(content)) +} diff --git a/cmd/cachew/main.go b/cmd/cachew/main.go index c89fe42..903bcb4 100644 --- a/cmd/cachew/main.go +++ b/cmd/cachew/main.go @@ -35,6 +35,8 @@ type CLI struct { Snapshot SnapshotCmd `cmd:"" help:"Create compressed archive of directory and upload." group:"Snapshots:"` Restore RestoreCmd `cmd:"" help:"Download and extract archive to directory." group:"Snapshots:"` + + Git GitCmd `cmd:"" help:"Git-aware operations." group:"Git:"` } func main() { @@ -51,9 +53,11 @@ func main() { } remote := cache.NewRemote(cli.URL, headerFunc) defer remote.Close() + httpClient := cache.NewHTTPClient(headerFunc) kctx.BindTo(ctx, (*context.Context)(nil)) kctx.BindTo(remote, (*cache.Cache)(nil)) + kctx.Bind(httpClient) kctx.FatalIfErrorf(kctx.Run(ctx)) } diff --git a/internal/cache/remote.go b/internal/cache/remote.go index 053bf3b..c2e8d09 100644 --- a/internal/cache/remote.go +++ b/internal/cache/remote.go @@ -29,9 +29,10 @@ var _ Cache = (*Remote)(nil) // HeaderFunc returns headers to attach to each outgoing request. type HeaderFunc func() http.Header -// NewRemote creates a new remote cache client. If headerFunc is non-nil, -// its returned headers are added to every outgoing request. -func NewRemote(baseURL string, headerFunc HeaderFunc) *Remote { +// NewHTTPClient creates an *http.Client that attaches headerFunc headers +// to every outgoing request. Useful for callers that need to talk to +// non-API endpoints (e.g. /git/) with the same auth as the cache client. +func NewHTTPClient(headerFunc HeaderFunc) *http.Client { transport := http.DefaultTransport.(*http.Transport).Clone() //nolint:errcheck transport.MaxIdleConns = 100 transport.MaxIdleConnsPerHost = 100 @@ -40,10 +41,15 @@ func NewRemote(baseURL string, headerFunc HeaderFunc) *Remote { if headerFunc != nil { rt = &headerTransport{base: transport, headerFunc: headerFunc} } + return &http.Client{Transport: rt} +} +// NewRemote creates a new remote cache client. If headerFunc is non-nil, +// its returned headers are added to every outgoing request. +func NewRemote(baseURL string, headerFunc HeaderFunc) *Remote { return &Remote{ baseURL: baseURL + "/api/v1", - client: &http.Client{Transport: rt}, + client: NewHTTPClient(headerFunc), } } diff --git a/internal/snapshot/snapshot.go b/internal/snapshot/snapshot.go index d29ecb0..49e88ef 100644 --- a/internal/snapshot/snapshot.go +++ b/internal/snapshot/snapshot.go @@ -166,17 +166,23 @@ func StreamTo(ctx context.Context, w io.Writer, directory string, excludePattern // The operation is fully streaming - no temporary files are created. // threads controls zstd parallelism; 0 uses all available CPU cores. func Restore(ctx context.Context, remote cache.Cache, key cache.Key, directory string, threads int) error { - if threads <= 0 { - threads = runtime.NumCPU() - } - rc, _, err := remote.Open(ctx, key) if err != nil { return errors.Wrap(err, "failed to open object") } defer rc.Close() - // Create target directory if it doesn't exist + return Extract(ctx, rc, directory, threads) +} + +// Extract decompresses a zstd+tar stream into directory, preserving all file +// permissions, ownership, and symlinks. threads controls zstd parallelism; +// 0 uses all available CPU cores. +func Extract(ctx context.Context, r io.Reader, directory string, threads int) error { + if threads <= 0 { + threads = runtime.NumCPU() + } + if err := os.MkdirAll(directory, 0o750); err != nil { return errors.Wrap(err, "failed to create target directory") } @@ -184,7 +190,7 @@ func Restore(ctx context.Context, remote cache.Cache, key cache.Key, directory s zstdCmd := exec.CommandContext(ctx, "zstd", "-dc", fmt.Sprintf("-T%d", threads)) //nolint:gosec // threads is a validated integer, not user input tarCmd := exec.CommandContext(ctx, "tar", "-xpf", "-", "-C", directory) - zstdCmd.Stdin = rc + zstdCmd.Stdin = r zstdStdout, err := zstdCmd.StdoutPipe() if err != nil { return errors.Wrap(err, "failed to create zstd stdout pipe")