diff --git a/.github/workflows/macos-latest.yml b/.github/workflows/macos-latest.yml index 9a1108b..6a14ce4 100644 --- a/.github/workflows/macos-latest.yml +++ b/.github/workflows/macos-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.24, 1.25] + go-version: [1.25] runs-on: macos-latest steps: - name: Install Go diff --git a/.github/workflows/ubuntu-latest.yml b/.github/workflows/ubuntu-latest.yml index 4ec9ee4..29d7f04 100644 --- a/.github/workflows/ubuntu-latest.yml +++ b/.github/workflows/ubuntu-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.24, 1.25] + go-version: [1.25] runs-on: ubuntu-latest steps: - name: Install Go diff --git a/.github/workflows/windows-latest.yml b/.github/workflows/windows-latest.yml index fb969af..4694316 100644 --- a/.github/workflows/windows-latest.yml +++ b/.github/workflows/windows-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.24, 1.25] + go-version: [1.25] runs-on: windows-latest steps: - name: Install Go diff --git a/archives.go b/archives.go index ee15ce5..4728b07 100644 --- a/archives.go +++ b/archives.go @@ -90,7 +90,7 @@ func FilesFromDisk(ctx context.Context, options *FromDiskOptions, filenames map[ return err } - nameInArchive := nameOnDiskToNameInArchive(filename, rootOnDisk, rootInArchive) + nameInArchive := nameOnDiskToNameInArchive(filename, rootOnDisk, rootInArchive, filepath.Separator) // this is the root folder and we are adding its contents to target rootInArchive if info.IsDir() && nameInArchive == "" { return nil @@ -148,17 +148,125 @@ func FilesFromDisk(ctx context.Context, options *FromDiskOptions, filenames map[ return files, nil } +// FilesFromFS is an opinionated function that returns a list of [FileInfo] by +// walking the provided [fs.FS] according to the filenames map or the entire +// [fs.FS] if filenames is set to nil or empty map. The keys are the names on +// the [fs.FS], and the values become their associated names in the archive. +// +// Map keys that specify directories on FS will be walked and added to the +// archive recursively, rooted at the named directory. They should use +// slash ('/') as the path separator. For convenience, map keys that end +// in a slash will enumerate contents only, without adding the folder itself +// to the archive. +// +// Map values should use slash ('/') as the separator. For convenience, map +// values that are empty string are interpreted as the base name of the file +// (sans path) in the root of the archive; and map values that end in a slash +// will use the base name of the file in that folder of the archive. +// +// File gathering will adhere to the settings specified in options. +// +// This function is used primarily when preparing a list of files to add to +// an archive. +func FilesFromFS(ctx context.Context, fsys fs.FS, options *FromFSOptions, filenames map[string]string) ([]FileInfo, error) { + var files []FileInfo + // if filenames is nil or empty map, default to walking the entire FS as-is + if len(filenames) == 0 { + filenames = map[string]string{ + ".": "", + } + } + for rootOnFS, rootInArchive := range filenames { + if err := ctx.Err(); err != nil { + return nil, err + } + walkErr := fs.WalkDir(fsys, rootOnFS, func(filename string, d fs.DirEntry, err error) error { + if err := ctx.Err(); err != nil { + return err + } + if err != nil { + return err + } + + info, err := d.Info() + if err != nil { + return err + } + + nameInArchive := nameOnDiskToNameInArchive(filename, rootOnFS, rootInArchive, '/') + // this is the root folder and we are adding its contents to target rootInArchive + if info.IsDir() && nameInArchive == "" { + return nil + } + + // handle symbolic links + var linkTarget string + if isSymlink(info) { + if options != nil && options.FollowSymlinks { + originalFilename := filename + filename, info, err = followSymlinkFS(fsys, filename) + if err != nil { + return err + } + if info.IsDir() { + subFsys, err := fs.Sub(fsys, filename) + if err != nil { + return fmt.Errorf("getting subtree to symlink directory %s dereferenced to %s: %w", originalFilename, filename, err) + } + symlinkDirFiles, err := FilesFromFS(ctx, subFsys, options, map[string]string{filename: nameInArchive}) + if err != nil { + return fmt.Errorf("getting files from symlink directory %s dereferenced to %s: %w", originalFilename, filename, err) + } + + files = append(files, symlinkDirFiles...) + return nil + } + } else { + // preserve symlinks + linkTarget, err = fs.ReadLink(fsys, filename) + if err != nil { + return fmt.Errorf("%s: ReadLink: %w", filename, err) + } + } + } + + // handle file attributes + if options != nil && options.ClearAttributes { + info = noAttrFileInfo{info} + } + + file := FileInfo{ + FileInfo: info, + NameInArchive: nameInArchive, + LinkTarget: linkTarget, + Open: func() (fs.File, error) { + return fsys.Open(filename) + }, + } + + files = append(files, file) + + return nil + }) + if walkErr != nil { + return nil, walkErr + } + } + + return files, nil +} + // nameOnDiskToNameInArchive converts a filename from disk to a name in an archive, // respecting rules defined by FilesFromDisk. nameOnDisk is the full filename on disk // which is expected to be prefixed by rootOnDisk (according to fs.WalkDirFunc godoc) // and which will be placed into a folder rootInArchive in the archive. -func nameOnDiskToNameInArchive(nameOnDisk, rootOnDisk, rootInArchive string) string { +func nameOnDiskToNameInArchive(nameOnDisk, rootOnDisk, rootInArchive string, separator rune) string { // These manipulations of rootInArchive could be done just once instead of on // every walked file since they don't rely on nameOnDisk which is the only // variable that changes during the walk, but combining all the logic into this // one function is easier to reason about and test. I suspect the performance // penalty is insignificant. - if strings.HasSuffix(rootOnDisk, string(filepath.Separator)) { + if strings.HasSuffix(rootOnDisk, string(separator)) { // "map keys that end in a separator will enumerate contents only, // without adding the folder itself to the archive." rootInArchive = trimTopDir(rootInArchive) @@ -228,6 +336,18 @@ type FromDiskOptions struct { ClearAttributes bool } +// FromFSOptions specifies various options for gathering files from a [fs.FS]. +type FromFSOptions struct { + // If true, symbolic links will be dereferenced, meaning that + // the link will not be added as a link, but what the link + // points to will be added as a file. + FollowSymlinks bool + + // If true, some file attributes will not be preserved. + // Name, size, type, and permissions will still be preserved. + ClearAttributes bool +} + // FileHandler is a callback function that is used to handle files as they are read // from an archive; it is kind of like fs.WalkDirFunc. Handler functions that open // their files must not overlap or run concurrently, as files may be read from the @@ -375,3 +495,42 @@ func followSymlink(filename string) (string, os.FileInfo, error) { filename = linkPath } } + +// followSymlinkFS follows a symlink within the provided FS until it finds +// a non-symlink, returning the target path, file info, and any error that +// occurs. +// It also checks for symlink loops and maximum depth. +func followSymlinkFS(fsys fs.FS, filename string) (string, os.FileInfo, error) { + visited := make(map[string]bool) + visited[filename] = true + // While not necessarily applicable to every possible [fs.ReadLinkFS] + // implementation, this seems like a reasonable limit to impose. + const maxDepth = 40 + + for { + linkPath, err := fs.ReadLink(fsys, filename) + if err != nil { + return "", nil, fmt.Errorf("%s: ReadLink: %w", filename, err) + } + info, err := fs.Lstat(fsys, linkPath) + if err != nil { + return "", nil, fmt.Errorf("%s: statting dereferenced symlink: %w", filename, err) + } + + // Not a symlink, we've found the target, return it + if info.Mode()&os.ModeSymlink == 0 { + return linkPath, info, nil + } + + if visited[linkPath] { + return "", nil, fmt.Errorf("%s: symlink loop", filename) + } + + if len(visited) >= maxDepth { + return "", nil, fmt.Errorf("%s: maximum symlink depth (%d) exceeded", filename, maxDepth) + } + + visited[linkPath] = true + filename = linkPath + } +} diff --git a/archives_test.go b/archives_test.go index b2058e1..7b11b07 100644 --- a/archives_test.go +++ b/archives_test.go @@ -3,6 +3,7 @@ package archives import ( "context" "fmt" + "io/fs" "os" "path/filepath" "reflect" @@ -10,6 +11,8 @@ import ( "sort" "strings" "testing" + "testing/fstest" + "time" ) func TestTrimTopDir(t *testing.T) { @@ -260,7 +263,7 @@ func TestNameOnDiskToNameInArchive(t *testing.T) { continue } - actual := nameOnDiskToNameInArchive(tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive) + actual := nameOnDiskToNameInArchive(tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive, filepath.Separator) if actual != tc.expect { t.Errorf("Test %d: Got '%s' but expected '%s' (nameOnDisk=%s rootOnDisk=%s rootInArchive=%s)", i, actual, tc.expect, tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive) @@ -562,3 +565,150 @@ func TestFilesFromDisk_SymlinkOutsideFileNamesMap(t *testing.T) { t.Fatalf("expected file name '%s', got '%s'", testFilePath, files[1].NameInArchive) } } + +func TestFilesFromFS(t *testing.T) { + now := time.Now() + testfs := fstest.MapFS{ + "file-a.txt": &fstest.MapFile{ + Data: []byte("file-a"), + Mode: 0o644, + ModTime: now, + Sys: nil, + }, + "dir/file-b.txt": &fstest.MapFile{ + Data: []byte("file-b"), + Mode: 0o644, + ModTime: now, + Sys: nil, + }, + "dir/subdir/file-c.txt": &fstest.MapFile{ + Data: []byte("file-c"), + Mode: 0o644, + ModTime: now, + Sys: nil, + }, + } + + t.Run("maps FS as-is without arguments", func(t *testing.T) { + files, err := FilesFromFS(t.Context(), testfs, nil, nil) + if err != nil { + t.Fatal(err) + } + for filename, _ := range testfs { + found := false + for _, fi := range files { + if filename == fi.NameInArchive { + found = true + break + } + } + if !found { + t.Errorf("expected to find a file named %q, but did not find it.", filename) + } + } + }) + + t.Run("maps subset of the FS filtered based on provided filenames", func(t *testing.T) { + files, err := FilesFromFS(t.Context(), testfs, nil, map[string]string{ + "dir/subdir": "", + }) + if err != nil { + t.Fatal(err) + } + + if len(files) != 2 { + t.Errorf("expected 2 entries to be mapped, but got %d", len(files)) + } + }) + + t.Run("passing in options.ClearAttributes remove file attributes", func(t *testing.T) { + options := &FromFSOptions{ClearAttributes: true} + files, err := FilesFromFS(t.Context(), testfs, options, map[string]string{ + "file-a.txt": "file-a.txt", + }) + if err != nil { + t.Fatal(err) + } + + file := files[0] + if file.ModTime().Equal(now) { + t.Errorf("expected file.ModTime to have been cleared, but got %q instead", file.ModTime()) + } + }) + + t.Run("supports symlinks", func(t *testing.T) { + symlinkName := "symlink" + symlinkTarget := "dir/symlink-target.file" + testfs = fstest.MapFS{ + symlinkName: &fstest.MapFile{ + Mode: fs.ModeSymlink, + Data: []byte(symlinkTarget), + }, + symlinkTarget: &fstest.MapFile{ + Mode: 0o644, + Data: []byte("symlink-target"), + ModTime: time.Now(), + Sys: nil, + }, + } + + files, err := FilesFromFS(t.Context(), testfs, nil, nil) + if err != nil { + t.Fatal(err) + } + + symlinkFound := false + for _, f := range files { + if f.NameInArchive != symlinkName { + continue + } + symlinkFound = true + if f.LinkTarget != symlinkTarget { + t.Fatalf("expected file.LinkTarget to be %q, but got %q", symlinkTarget, f.LinkTarget) + } + } + + if !symlinkFound { + t.Fatal("expected a symlink to be found, but it was not") + } + }) + + t.Run("supports following symlinks", func(t *testing.T) { + symlinkName := "symlink" + symlinkTarget := "dir/symlink-target.file" + testfs = fstest.MapFS{ + symlinkName: &fstest.MapFile{ + Mode: fs.ModeSymlink, + Data: []byte(symlinkTarget), + }, + symlinkTarget: &fstest.MapFile{ + Mode: 0o644, + Data: []byte("symlink-target"), + ModTime: time.Now(), + Sys: nil, + }, + } + + options := &FromFSOptions{FollowSymlinks: true} + files, err := FilesFromFS(t.Context(), testfs, options, nil) + if err != nil { + t.Fatal(err) + } + + symlinkFound := false + for _, f := range files { + if f.NameInArchive != symlinkName { + continue + } + + symlinkFound = true + if isSymlink(f.FileInfo) { + t.Fatal("expected symlink to be deferenced, but it was not") + } + } + + if !symlinkFound { + t.Fatal("expected a symlink to be found, but it was not") + } + }) +} diff --git a/go.mod b/go.mod index efe61a9..43884ec 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,8 @@ module github.com/mholt/archives -go 1.24.0 +go 1.25.0 -toolchain go1.24.6 +toolchain go1.25.5 require ( github.com/andybalholm/brotli v1.2.0