From 048739e1a4d510fc39a932269ca1de08942d1a93 Mon Sep 17 00:00:00 2001 From: Oleh Konko Date: Mon, 26 Jan 2026 19:13:06 +0000 Subject: [PATCH] image/docker/internal/tarfile: Fix size reporting for symlink layers When a layer path in a docker-archive points at a symlink header, prepareLayerData recorded the symlink header size (often 0) while GetBlob followed the symlink and returned the target file bytes. Resolve a single symlink when determining the layer size so the reported size matches the bytes returned by GetBlob, and add a regression test. Signed-off-by: Oleh Konko --- image/docker/internal/tarfile/reader.go | 27 +++++----- image/docker/internal/tarfile/src.go | 42 ++++++++++----- image/docker/internal/tarfile/src_test.go | 63 +++++++++++++++++++++++ 3 files changed, 106 insertions(+), 26 deletions(-) diff --git a/image/docker/internal/tarfile/reader.go b/image/docker/internal/tarfile/reader.go index 12e656af41..7e303ef83e 100644 --- a/image/docker/internal/tarfile/reader.go +++ b/image/docker/internal/tarfile/reader.go @@ -184,21 +184,21 @@ func (t *tarReadCloser) Close() error { return t.backingFile.Close() } -// openTarComponent returns a ReadCloser for the specific file within the archive. +// openTarComponent returns a ReadCloser for the specific file within the archive, and its tar header. // This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers), // and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough. // It is safe to call this method from multiple goroutines simultaneously. // The caller should call .Close() on the returned stream. -func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) { +func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, *tar.Header, error) { // This is only a sanity check; if anyone did concurrently close ra, this access is technically // racy against the write in .Close(). if r.path == "" { - return nil, errors.New("Internal error: trying to read an already closed tarfile.Reader") + return nil, nil, errors.New("Internal error: trying to read an already closed tarfile.Reader") } f, err := os.Open(r.path) if err != nil { - return nil, err + return nil, nil, err } succeeded := false defer func() { @@ -209,32 +209,31 @@ func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) { tarReader, header, err := findTarComponent(f, componentPath) if err != nil { - return nil, err + return nil, nil, err } if header == nil { - return nil, os.ErrNotExist + return nil, nil, os.ErrNotExist } - if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // FIXME: untested - // We follow only one symlink; so no loops are possible. + if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // We follow only one symlink; so no loops are possible. if _, err := f.Seek(0, io.SeekStart); err != nil { - return nil, err + return nil, nil, err } // The new path could easily point "outside" the archive, but we only compare it to existing tar headers without extracting the archive, // so we don't care. tarReader, header, err = findTarComponent(f, path.Join(path.Dir(componentPath), header.Linkname)) if err != nil { - return nil, err + return nil, nil, err } if header == nil { - return nil, os.ErrNotExist + return nil, nil, os.ErrNotExist } } if !header.FileInfo().Mode().IsRegular() { - return nil, fmt.Errorf("Error reading tar archive component %q: not a regular file", header.Name) + return nil, nil, fmt.Errorf("Error reading tar archive component %q: not a regular file", header.Name) } succeeded = true - return &tarReadCloser{Reader: tarReader, backingFile: f}, nil + return &tarReadCloser{Reader: tarReader, backingFile: f}, header, nil } // findTarComponent returns a header and a reader matching componentPath within inputFile, @@ -260,7 +259,7 @@ func findTarComponent(inputFile io.Reader, componentPath string) (*tar.Reader, * // readTarComponent returns full contents of componentPath. // It is safe to call this method from multiple goroutines simultaneously. func (r *Reader) readTarComponent(path string, limit int) ([]byte, error) { - file, err := r.openTarComponent(path) + file, _, err := r.openTarComponent(path) if err != nil { return nil, fmt.Errorf("loading tar component %q: %w", path, err) } diff --git a/image/docker/internal/tarfile/src.go b/image/docker/internal/tarfile/src.go index 56421afa13..97320ca00c 100644 --- a/image/docker/internal/tarfile/src.go +++ b/image/docker/internal/tarfile/src.go @@ -128,6 +128,33 @@ func (s *Source) TarManifest() []ManifestItem { return s.archive.Manifest } +func (s *Source) uncompressedLayerSize(layerPath string, h *tar.Header, layerReader io.Reader) (int64, error) { + underlying := io.NopCloser(layerReader) + if h.FileInfo().Mode()&os.ModeType == os.ModeSymlink { + var err error + underlying, h, err = s.archive.openTarComponent(layerPath) + if err != nil { + return 0, err + } + } + defer underlying.Close() + + uncompressedStream, isCompressed, err := compression.AutoDecompress(underlying) + if err != nil { + return 0, err + } + defer uncompressedStream.Close() + + if isCompressed { + uncompressedSize, err := io.Copy(io.Discard, uncompressedStream) + if err != nil { + return 0, err + } + return uncompressedSize, nil + } + return h.Size, nil +} + func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manifest.Schema2Image) (map[digest.Digest]*layerInfo, error) { // Collect layer data available in manifest and config. if len(tarManifest.Layers) != len(parsedConfig.RootFS.DiffIDs) { @@ -177,18 +204,9 @@ func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manif // incorrectly report the size. Pretty critical, since tools like // umoci always compress layer blobs. Obviously we only bother with // the slower method of checking if it's compressed. - uncompressedStream, isCompressed, err := compression.AutoDecompress(t) + uncompressedSize, err := s.uncompressedLayerSize(layerPath, h, t) if err != nil { - return nil, fmt.Errorf("auto-decompressing %q to determine its size: %w", layerPath, err) - } - defer uncompressedStream.Close() - - uncompressedSize := h.Size - if isCompressed { - uncompressedSize, err = io.Copy(io.Discard, uncompressedStream) - if err != nil { - return nil, fmt.Errorf("reading %q to find its size: %w", layerPath, err) - } + return nil, fmt.Errorf("determining uncompressed size of %q: %w", layerPath, err) } li.size = uncompressedSize delete(unknownLayerSizes, layerPath) @@ -277,7 +295,7 @@ func (s *Source) GetBlob(ctx context.Context, info types.BlobInfo, cache types.B } if li, ok := s.knownLayers[info.Digest]; ok { // diffID is a digest of the uncompressed tarball, - underlyingStream, err := s.archive.openTarComponent(li.path) + underlyingStream, _, err := s.archive.openTarComponent(li.path) if err != nil { return nil, 0, err } diff --git a/image/docker/internal/tarfile/src_test.go b/image/docker/internal/tarfile/src_test.go index 4958037950..9de6baf1f1 100644 --- a/image/docker/internal/tarfile/src_test.go +++ b/image/docker/internal/tarfile/src_test.go @@ -1,12 +1,15 @@ package tarfile import ( + "archive/tar" "bytes" "context" + "encoding/json" "io" "strings" "testing" + digest "github.com/opencontainers/go-digest" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.podman.io/image/v5/manifest" @@ -64,3 +67,63 @@ func TestSourcePrepareLayerData(t *testing.T) { } } } + +func TestSourceGetBlobSymlinkLayerSizeMatchesBytesReturned(t *testing.T) { + ctx := context.Background() + cache := memory.New() + + layerBytes := []byte("not empty") + diffID := digest.FromBytes(layerBytes) + configBytes := []byte(`{"rootfs":{"type":"layers","diff_ids":["` + diffID.String() + `"]}}`) + + manifestBytes, err := json.Marshal([]ManifestItem{{Config: "config.json", Layers: []string{"layer-link.tar"}}}) + require.NoError(t, err) + + var tarfileBuffer bytes.Buffer + tw := tar.NewWriter(&tarfileBuffer) + for _, entry := range []struct { + name string + body []byte + }{ + {name: "manifest.json", body: manifestBytes}, + {name: "config.json", body: configBytes}, + {name: "layer.tar", body: layerBytes}, + } { + err = tw.WriteHeader(&tar.Header{ + Name: entry.name, + Mode: 0o644, + Size: int64(len(entry.body)), + }) + require.NoError(t, err) + _, err = tw.Write(entry.body) + require.NoError(t, err) + } + + err = tw.WriteHeader(&tar.Header{ + Name: "layer-link.tar", + Typeflag: tar.TypeSymlink, + Linkname: "layer.tar", + Mode: 0o777, + }) + require.NoError(t, err) + + err = tw.Close() + require.NoError(t, err) + + reader, err := NewReaderFromStream(nil, &tarfileBuffer) + require.NoError(t, err) + src := NewSource(reader, true, "transport name", nil, -1) + defer src.Close() + + layerStream, reportedSize, err := src.GetBlob(ctx, types.BlobInfo{ + Digest: diffID, + Size: -1, + }, cache) + require.NoError(t, err) + defer layerStream.Close() + + readBytes, err := io.ReadAll(layerStream) + require.NoError(t, err) + assert.Equal(t, layerBytes, readBytes) + assert.Equal(t, int64(len(layerBytes)), reportedSize) +}