From b66bd180931e9ca264553dcfa16921ef72ffbf58 Mon Sep 17 00:00:00 2001 From: Joonas Bergius Date: Sat, 15 Nov 2025 12:21:17 -0600 Subject: [PATCH 1/5] feat: add FilesFromFS Add function to map a fs.FS into a list of FileInfo entries. Fixes #61 Signed-off-by: Joonas Bergius --- archives.go | 98 ++++++++++++++++++++++++++++++++++++++++++++++-- archives_test.go | 89 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 183 insertions(+), 4 deletions(-) diff --git a/archives.go b/archives.go index ee15ce5..c00689f 100644 --- a/archives.go +++ b/archives.go @@ -90,7 +90,7 @@ func FilesFromDisk(ctx context.Context, options *FromDiskOptions, filenames map[ return err } - nameInArchive := nameOnDiskToNameInArchive(filename, rootOnDisk, rootInArchive) + nameInArchive := nameOnDiskToNameInArchive(filename, rootOnDisk, rootInArchive, string(filepath.Separator)) // this is the root folder and we are adding its contents to target rootInArchive if info.IsDir() && nameInArchive == "" { return nil @@ -148,17 +148,102 @@ func FilesFromDisk(ctx context.Context, options *FromDiskOptions, filenames map[ return files, nil } +// FilesFromFS is an opinionated function that returns a list of [FileInfo] by +// walking the provided [fs.FS] according to the filenames map or the entire +// [fs.FS] if filenames is set to nil or empty map. The keys are the names on +// the [fs.FS], and the values become their associated names in the archive. +// +// Map keys that specify directories on FS will be walked and added to the +// archive recursively, rooted at the named directory. They should use +// slash ('/') as the path separator. For convenience, map keys that end +// in a slash will enumerate contents only, without adding the folder itself +// to the archive. +// +// Map values should use slash ('/') as the separator regardless of +// the platform, as most archive formats standardize on that rune as the +// directory separator for filenames within an archive. For convenience, map +// values that are empty string are interpreted as the base name of the file +// (sans path) in the root of the archive; and map values that end in a slash +// will use the base name of the file in that folder of the archive. +// +// File gathering will adhere to the settings specified in options. +// +// This function is used primarily when preparing a list of files to add to +// an archive. +func FilesFromFS(ctx context.Context, fsys fs.FS, options *FromFSOptions, filenames map[string]string) ([]FileInfo, error) { + var files []FileInfo + // if filenames is nil or empty map, default to walking the entire FS as-is + if len(filenames) == 0 { + filenames = map[string]string{ + ".": "", + } + } + for rootOnFS, rootInArchive := range filenames { + if err := ctx.Err(); err != nil { + return nil, err + } + walkErr := fs.WalkDir(fsys, rootOnFS, func(filename string, d fs.DirEntry, err error) error { + if err := ctx.Err(); err != nil { + return err + } + if err != nil { + return err + } + + info, err := d.Info() + if err != nil { + return err + } + + nameInArchive := nameOnDiskToNameInArchive(filename, rootOnFS, rootInArchive, "/") + // this is the root folder and we are adding its contents to target rootInArchive + if info.IsDir() && nameInArchive == "" { + return nil + } + + // handle symbolic links + var linkTarget string + if isSymlink(info) { + return fmt.Errorf("symlinks are not supported for fs.FS") + } + + // handle file attributes + if options != nil && options.ClearAttributes { + info = noAttrFileInfo{info} + } + + file := FileInfo{ + FileInfo: info, + NameInArchive: nameInArchive, + LinkTarget: linkTarget, + Open: func() (fs.File, error) { + return fsys.Open(filename) + }, + } + + files = append(files, file) + + return nil + }) + if walkErr != nil { + return nil, walkErr + } + } + + return files, nil +} + // nameOnDiskToNameInArchive converts a filename from disk to a name in an archive, // respecting rules defined by FilesFromDisk. nameOnDisk is the full filename on disk // which is expected to be prefixed by rootOnDisk (according to fs.WalkDirFunc godoc) // and which will be placed into a folder rootInArchive in the archive. -func nameOnDiskToNameInArchive(nameOnDisk, rootOnDisk, rootInArchive string) string { +func nameOnDiskToNameInArchive(nameOnDisk, rootOnDisk, rootInArchive, separator string) string { // These manipulations of rootInArchive could be done just once instead of on // every walked file since they don't rely on nameOnDisk which is the only // variable that changes during the walk, but combining all the logic into this // one function is easier to reason about and test. I suspect the performance // penalty is insignificant. - if strings.HasSuffix(rootOnDisk, string(filepath.Separator)) { + if strings.HasSuffix(rootOnDisk, separator) { // "map keys that end in a separator will enumerate contents only, // without adding the folder itself to the archive." rootInArchive = trimTopDir(rootInArchive) @@ -228,6 +313,13 @@ type FromDiskOptions struct { ClearAttributes bool } +// FromFSOptions specifies various options for gathering files from a [fs.FS]. +type FromFSOptions struct { + // If true, some file attributes will not be preserved. + // Name, size, type, and permissions will still be preserved. + ClearAttributes bool +} + // FileHandler is a callback function that is used to handle files as they are read // from an archive; it is kind of like fs.WalkDirFunc. Handler functions that open // their files must not overlap or run concurrently, as files may be read from the diff --git a/archives_test.go b/archives_test.go index b2058e1..a6ce116 100644 --- a/archives_test.go +++ b/archives_test.go @@ -3,6 +3,7 @@ package archives import ( "context" "fmt" + "io/fs" "os" "path/filepath" "reflect" @@ -10,6 +11,8 @@ import ( "sort" "strings" "testing" + "testing/fstest" + "time" ) func TestTrimTopDir(t *testing.T) { @@ -260,7 +263,7 @@ func TestNameOnDiskToNameInArchive(t *testing.T) { continue } - actual := nameOnDiskToNameInArchive(tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive) + actual := nameOnDiskToNameInArchive(tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive, string(filepath.Separator)) if actual != tc.expect { t.Errorf("Test %d: Got '%s' but expected '%s' (nameOnDisk=%s rootOnDisk=%s rootInArchive=%s)", i, actual, tc.expect, tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive) @@ -562,3 +565,87 @@ func TestFilesFromDisk_SymlinkOutsideFileNamesMap(t *testing.T) { t.Fatalf("expected file name '%s', got '%s'", testFilePath, files[1].NameInArchive) } } + +func TestFilesFromFS(t *testing.T) { + now := time.Now() + testfs := fstest.MapFS{ + "file-a.txt": &fstest.MapFile{ + Data: []byte("file-a"), + Mode: 0o644, + ModTime: now, + Sys: nil, + }, + "dir/file-b.txt": &fstest.MapFile{ + Data: []byte("file-b"), + Mode: 0o644, + ModTime: now, + Sys: nil, + }, + "dir/subdir/file-c.txt": &fstest.MapFile{ + Data: []byte("file-c"), + Mode: 0o644, + ModTime: now, + Sys: nil, + }, + } + + t.Run("maps FS as-is without arguments", func(t *testing.T) { + files, err := FilesFromFS(t.Context(), testfs, nil, nil) + if err != nil { + t.Fatal(err) + } + for filename, _ := range testfs { + found := false + for _, fi := range files { + if filename == fi.NameInArchive { + found = true + break + } + } + if !found { + t.Errorf("expected to find a file named %q, but did not find it.", filename) + } + } + }) + + t.Run("maps subset of the FS filtered based on provided filenames", func(t *testing.T) { + files, err := FilesFromFS(t.Context(), testfs, nil, map[string]string{ + "dir/subdir": "", + }) + if err != nil { + t.Fatal(err) + } + + if len(files) != 2 { + t.Errorf("expected 2 entries to be mapped, but got %d", len(files)) + } + }) + + t.Run("passing in options.ClearAttributes remove file attributes", func(t *testing.T) { + options := &FromFSOptions{ClearAttributes: true} + files, err := FilesFromFS(t.Context(), testfs, options, map[string]string{ + "file-a.txt": "file-a.txt", + }) + if err != nil { + t.Fatal(err) + } + + file := files[0] + if file.ModTime().Equal(now) { + t.Errorf("expected file.ModTime to have been cleared, but got %q instead", file.ModTime()) + } + }) + + t.Run("returns an error when fs.FS contains a symlink", func(t *testing.T) { + testfs = fstest.MapFS{ + "symlink": &fstest.MapFile{ + Mode: fs.ModeSymlink, + }, + } + + _, err := FilesFromFS(t.Context(), testfs, nil, nil) + if err == nil { + t.Error("expected to get an error, but got nil.") + } + }) +} From 18003f4f2730ac1f9ce748e46679d9f6cb3489dd Mon Sep 17 00:00:00 2001 From: Joonas Bergius Date: Thu, 4 Dec 2025 19:07:47 -0600 Subject: [PATCH 2/5] chore: update FilesFromFS comment Signed-off-by: Joonas Bergius --- archives.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/archives.go b/archives.go index c00689f..1999afe 100644 --- a/archives.go +++ b/archives.go @@ -159,9 +159,7 @@ func FilesFromDisk(ctx context.Context, options *FromDiskOptions, filenames map[ // in a slash will enumerate contents only, without adding the folder itself // to the archive. // -// Map values should use slash ('/') as the separator regardless of -// the platform, as most archive formats standardize on that rune as the -// directory separator for filenames within an archive. For convenience, map +// Map values should use slash ('/') as the separator. For convenience, map // values that are empty string are interpreted as the base name of the file // (sans path) in the root of the archive; and map values that end in a slash // will use the base name of the file in that folder of the archive. From 5a6e0766a9f5b65c3fa1c232faddc3b5d8837a15 Mon Sep 17 00:00:00 2001 From: Joonas Bergius Date: Thu, 4 Dec 2025 19:11:02 -0600 Subject: [PATCH 3/5] chore: maintain separator argument as rune to align with filepath.Separator values Signed-off-by: Joonas Bergius Co-authored-by: Matt Holt --- archives.go | 8 ++++---- archives_test.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/archives.go b/archives.go index 1999afe..61a59d0 100644 --- a/archives.go +++ b/archives.go @@ -90,7 +90,7 @@ func FilesFromDisk(ctx context.Context, options *FromDiskOptions, filenames map[ return err } - nameInArchive := nameOnDiskToNameInArchive(filename, rootOnDisk, rootInArchive, string(filepath.Separator)) + nameInArchive := nameOnDiskToNameInArchive(filename, rootOnDisk, rootInArchive, filepath.Separator) // this is the root folder and we are adding its contents to target rootInArchive if info.IsDir() && nameInArchive == "" { return nil @@ -193,7 +193,7 @@ func FilesFromFS(ctx context.Context, fsys fs.FS, options *FromFSOptions, filena return err } - nameInArchive := nameOnDiskToNameInArchive(filename, rootOnFS, rootInArchive, "/") + nameInArchive := nameOnDiskToNameInArchive(filename, rootOnFS, rootInArchive, '/') // this is the root folder and we are adding its contents to target rootInArchive if info.IsDir() && nameInArchive == "" { return nil @@ -235,13 +235,13 @@ func FilesFromFS(ctx context.Context, fsys fs.FS, options *FromFSOptions, filena // respecting rules defined by FilesFromDisk. nameOnDisk is the full filename on disk // which is expected to be prefixed by rootOnDisk (according to fs.WalkDirFunc godoc) // and which will be placed into a folder rootInArchive in the archive. -func nameOnDiskToNameInArchive(nameOnDisk, rootOnDisk, rootInArchive, separator string) string { +func nameOnDiskToNameInArchive(nameOnDisk, rootOnDisk, rootInArchive string, separator rune) string { // These manipulations of rootInArchive could be done just once instead of on // every walked file since they don't rely on nameOnDisk which is the only // variable that changes during the walk, but combining all the logic into this // one function is easier to reason about and test. I suspect the performance // penalty is insignificant. - if strings.HasSuffix(rootOnDisk, separator) { + if strings.HasSuffix(rootOnDisk, string(separator)) { // "map keys that end in a separator will enumerate contents only, // without adding the folder itself to the archive." rootInArchive = trimTopDir(rootInArchive) diff --git a/archives_test.go b/archives_test.go index a6ce116..3c6af66 100644 --- a/archives_test.go +++ b/archives_test.go @@ -263,7 +263,7 @@ func TestNameOnDiskToNameInArchive(t *testing.T) { continue } - actual := nameOnDiskToNameInArchive(tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive, string(filepath.Separator)) + actual := nameOnDiskToNameInArchive(tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive, filepath.Separator) if actual != tc.expect { t.Errorf("Test %d: Got '%s' but expected '%s' (nameOnDisk=%s rootOnDisk=%s rootInArchive=%s)", i, actual, tc.expect, tc.nameOnDisk, tc.rootOnDisk, tc.rootInArchive) From 42e64a71785f67584883da9149d9a3f50c3e921e Mon Sep 17 00:00:00 2001 From: Joonas Bergius Date: Sat, 6 Dec 2025 10:36:43 -0600 Subject: [PATCH 4/5] chore: drop Go 1.24 Signed-off-by: Joonas Bergius --- .github/workflows/macos-latest.yml | 2 +- .github/workflows/ubuntu-latest.yml | 2 +- .github/workflows/windows-latest.yml | 2 +- go.mod | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/macos-latest.yml b/.github/workflows/macos-latest.yml index 9a1108b..6a14ce4 100644 --- a/.github/workflows/macos-latest.yml +++ b/.github/workflows/macos-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.24, 1.25] + go-version: [1.25] runs-on: macos-latest steps: - name: Install Go diff --git a/.github/workflows/ubuntu-latest.yml b/.github/workflows/ubuntu-latest.yml index 4ec9ee4..29d7f04 100644 --- a/.github/workflows/ubuntu-latest.yml +++ b/.github/workflows/ubuntu-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.24, 1.25] + go-version: [1.25] runs-on: ubuntu-latest steps: - name: Install Go diff --git a/.github/workflows/windows-latest.yml b/.github/workflows/windows-latest.yml index fb969af..4694316 100644 --- a/.github/workflows/windows-latest.yml +++ b/.github/workflows/windows-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.24, 1.25] + go-version: [1.25] runs-on: windows-latest steps: - name: Install Go diff --git a/go.mod b/go.mod index efe61a9..43884ec 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,8 @@ module github.com/mholt/archives -go 1.24.0 +go 1.25.0 -toolchain go1.24.6 +toolchain go1.25.5 require ( github.com/andybalholm/brotli v1.2.0 From 8290164fdcc05793ab3c834eefb67efc8d8c57f9 Mon Sep 17 00:00:00 2001 From: Joonas Bergius Date: Sat, 6 Dec 2025 13:34:34 -0600 Subject: [PATCH 5/5] feat: add option to follow symlinks in FilesFromFS Signed-off-by: Joonas Bergius --- archives.go | 71 +++++++++++++++++++++++++++++++++++++++++++++- archives_test.go | 73 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 138 insertions(+), 6 deletions(-) diff --git a/archives.go b/archives.go index 61a59d0..4728b07 100644 --- a/archives.go +++ b/archives.go @@ -202,7 +202,32 @@ func FilesFromFS(ctx context.Context, fsys fs.FS, options *FromFSOptions, filena // handle symbolic links var linkTarget string if isSymlink(info) { - return fmt.Errorf("symlinks are not supported for fs.FS") + if options != nil && options.FollowSymlinks { + originalFilename := filename + filename, info, err = followSymlinkFS(fsys, filename) + if err != nil { + return err + } + if info.IsDir() { + subFsys, err := fs.Sub(fsys, filename) + if err != nil { + return fmt.Errorf("getting subtree to symlink directory %s dereferenced to %s: %w", originalFilename, filename, err) + } + symlinkDirFiles, err := FilesFromFS(ctx, subFsys, options, map[string]string{filename: nameInArchive}) + if err != nil { + return fmt.Errorf("getting files from symlink directory %s dereferenced to %s: %w", originalFilename, filename, err) + } + + files = append(files, symlinkDirFiles...) + return nil + } + } else { + // preserve symlinks + linkTarget, err = fs.ReadLink(fsys, filename) + if err != nil { + return fmt.Errorf("%s: ReadLink: %w", filename, err) + } + } } // handle file attributes @@ -313,6 +338,11 @@ type FromDiskOptions struct { // FromFSOptions specifies various options for gathering files from a [fs.FS]. type FromFSOptions struct { + // If true, symbolic links will be dereferenced, meaning that + // the link will not be added as a link, but what the link + // points to will be added as a file. + FollowSymlinks bool + // If true, some file attributes will not be preserved. // Name, size, type, and permissions will still be preserved. ClearAttributes bool @@ -465,3 +495,42 @@ func followSymlink(filename string) (string, os.FileInfo, error) { filename = linkPath } } + +// followSymlinkFS follows a symlink within the provided FS until it finds +// a non-symlink, returning the target path, file info, and any error that +// occurs. +// It also checks for symlink loops and maximum depth. +func followSymlinkFS(fsys fs.FS, filename string) (string, os.FileInfo, error) { + visited := make(map[string]bool) + visited[filename] = true + // While not necessarily applicable to every possible [fs.ReadLinkFS] + // implementation, this seems like a reasonable limit to impose. + const maxDepth = 40 + + for { + linkPath, err := fs.ReadLink(fsys, filename) + if err != nil { + return "", nil, fmt.Errorf("%s: ReadLink: %w", filename, err) + } + info, err := fs.Lstat(fsys, linkPath) + if err != nil { + return "", nil, fmt.Errorf("%s: statting dereferenced symlink: %w", filename, err) + } + + // Not a symlink, we've found the target, return it + if info.Mode()&os.ModeSymlink == 0 { + return linkPath, info, nil + } + + if visited[linkPath] { + return "", nil, fmt.Errorf("%s: symlink loop", filename) + } + + if len(visited) >= maxDepth { + return "", nil, fmt.Errorf("%s: maximum symlink depth (%d) exceeded", filename, maxDepth) + } + + visited[linkPath] = true + filename = linkPath + } +} diff --git a/archives_test.go b/archives_test.go index 3c6af66..7b11b07 100644 --- a/archives_test.go +++ b/archives_test.go @@ -636,16 +636,79 @@ func TestFilesFromFS(t *testing.T) { } }) - t.Run("returns an error when fs.FS contains a symlink", func(t *testing.T) { + t.Run("supports symlinks", func(t *testing.T) { + symlinkName := "symlink" + symlinkTarget := "dir/symlink-target.file" testfs = fstest.MapFS{ - "symlink": &fstest.MapFile{ + symlinkName: &fstest.MapFile{ Mode: fs.ModeSymlink, + Data: []byte(symlinkTarget), + }, + symlinkTarget: &fstest.MapFile{ + Mode: 0o644, + Data: []byte("symlink-target"), + ModTime: time.Now(), + Sys: nil, }, } - _, err := FilesFromFS(t.Context(), testfs, nil, nil) - if err == nil { - t.Error("expected to get an error, but got nil.") + files, err := FilesFromFS(t.Context(), testfs, nil, nil) + if err != nil { + t.Fatal(err) + } + + symlinkFound := false + for _, f := range files { + if f.NameInArchive != symlinkName { + continue + } + symlinkFound = true + if f.LinkTarget != symlinkTarget { + t.Fatalf("expected file.LinkTarget to be %q, but got %q", symlinkTarget, f.LinkTarget) + } + } + + if !symlinkFound { + t.Fatal("expected a symlink to be found, but it was not") + } + }) + + t.Run("supports following symlinks", func(t *testing.T) { + symlinkName := "symlink" + symlinkTarget := "dir/symlink-target.file" + testfs = fstest.MapFS{ + symlinkName: &fstest.MapFile{ + Mode: fs.ModeSymlink, + Data: []byte(symlinkTarget), + }, + symlinkTarget: &fstest.MapFile{ + Mode: 0o644, + Data: []byte("symlink-target"), + ModTime: time.Now(), + Sys: nil, + }, + } + + options := &FromFSOptions{FollowSymlinks: true} + files, err := FilesFromFS(t.Context(), testfs, options, nil) + if err != nil { + t.Fatal(err) + } + + symlinkFound := false + for _, f := range files { + if f.NameInArchive != symlinkName { + continue + } + + symlinkFound = true + if isSymlink(f.FileInfo) { + t.Fatal("expected symlink to be deferenced, but it was not") + } + } + + if !symlinkFound { + t.Fatal("expected a symlink to be found, but it was not") } }) }