Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 13 additions & 14 deletions image/docker/internal/tarfile/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,21 +184,21 @@ func (t *tarReadCloser) Close() error {
return t.backingFile.Close()
}

// openTarComponent returns a ReadCloser for the specific file within the archive.
// openTarComponent returns a ReadCloser for the specific file within the archive, and its tar header.
// This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers),
// and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough.
// It is safe to call this method from multiple goroutines simultaneously.
// The caller should call .Close() on the returned stream.
func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) {
func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, *tar.Header, error) {
// This is only a sanity check; if anyone did concurrently close ra, this access is technically
// racy against the write in .Close().
if r.path == "" {
return nil, errors.New("Internal error: trying to read an already closed tarfile.Reader")
return nil, nil, errors.New("Internal error: trying to read an already closed tarfile.Reader")
}

f, err := os.Open(r.path)
if err != nil {
return nil, err
return nil, nil, err
}
succeeded := false
defer func() {
Expand All @@ -209,32 +209,31 @@ func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) {

tarReader, header, err := findTarComponent(f, componentPath)
if err != nil {
return nil, err
return nil, nil, err
}
if header == nil {
return nil, os.ErrNotExist
return nil, nil, os.ErrNotExist
}
if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // FIXME: untested
// We follow only one symlink; so no loops are possible.
if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // We follow only one symlink; so no loops are possible.
if _, err := f.Seek(0, io.SeekStart); err != nil {
return nil, err
return nil, nil, err
}
// The new path could easily point "outside" the archive, but we only compare it to existing tar headers without extracting the archive,
// so we don't care.
tarReader, header, err = findTarComponent(f, path.Join(path.Dir(componentPath), header.Linkname))
if err != nil {
return nil, err
return nil, nil, err
}
if header == nil {
return nil, os.ErrNotExist
return nil, nil, os.ErrNotExist
}
}

if !header.FileInfo().Mode().IsRegular() {
return nil, fmt.Errorf("Error reading tar archive component %q: not a regular file", header.Name)
return nil, nil, fmt.Errorf("Error reading tar archive component %q: not a regular file", header.Name)
}
succeeded = true
return &tarReadCloser{Reader: tarReader, backingFile: f}, nil
return &tarReadCloser{Reader: tarReader, backingFile: f}, header, nil
}

// findTarComponent returns a header and a reader matching componentPath within inputFile,
Expand All @@ -260,7 +259,7 @@ func findTarComponent(inputFile io.Reader, componentPath string) (*tar.Reader, *
// readTarComponent returns full contents of componentPath.
// It is safe to call this method from multiple goroutines simultaneously.
func (r *Reader) readTarComponent(path string, limit int) ([]byte, error) {
file, err := r.openTarComponent(path)
file, _, err := r.openTarComponent(path)
if err != nil {
return nil, fmt.Errorf("loading tar component %q: %w", path, err)
}
Expand Down
42 changes: 30 additions & 12 deletions image/docker/internal/tarfile/src.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,33 @@ func (s *Source) TarManifest() []ManifestItem {
return s.archive.Manifest
}

func (s *Source) uncompressedLayerSize(layerPath string, h *tar.Header, layerReader io.Reader) (int64, error) {
underlying := io.NopCloser(layerReader)
if h.FileInfo().Mode()&os.ModeType == os.ModeSymlink {
var err error
underlying, h, err = s.archive.openTarComponent(layerPath)
if err != nil {
return 0, err
}
}
defer underlying.Close()

uncompressedStream, isCompressed, err := compression.AutoDecompress(underlying)
if err != nil {
return 0, err
}
defer uncompressedStream.Close()

if isCompressed {
uncompressedSize, err := io.Copy(io.Discard, uncompressedStream)
if err != nil {
return 0, err
}
return uncompressedSize, nil
}
return h.Size, nil
}

func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manifest.Schema2Image) (map[digest.Digest]*layerInfo, error) {
// Collect layer data available in manifest and config.
if len(tarManifest.Layers) != len(parsedConfig.RootFS.DiffIDs) {
Expand Down Expand Up @@ -177,18 +204,9 @@ func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manif
// incorrectly report the size. Pretty critical, since tools like
// umoci always compress layer blobs. Obviously we only bother with
// the slower method of checking if it's compressed.
uncompressedStream, isCompressed, err := compression.AutoDecompress(t)
uncompressedSize, err := s.uncompressedLayerSize(layerPath, h, t)
if err != nil {
return nil, fmt.Errorf("auto-decompressing %q to determine its size: %w", layerPath, err)
}
defer uncompressedStream.Close()

uncompressedSize := h.Size
if isCompressed {
uncompressedSize, err = io.Copy(io.Discard, uncompressedStream)
if err != nil {
return nil, fmt.Errorf("reading %q to find its size: %w", layerPath, err)
}
return nil, fmt.Errorf("determining uncompressed size of %q: %w", layerPath, err)
}
li.size = uncompressedSize
delete(unknownLayerSizes, layerPath)
Expand Down Expand Up @@ -277,7 +295,7 @@ func (s *Source) GetBlob(ctx context.Context, info types.BlobInfo, cache types.B
}

if li, ok := s.knownLayers[info.Digest]; ok { // diffID is a digest of the uncompressed tarball,
underlyingStream, err := s.archive.openTarComponent(li.path)
underlyingStream, _, err := s.archive.openTarComponent(li.path)
if err != nil {
return nil, 0, err
}
Expand Down
63 changes: 63 additions & 0 deletions image/docker/internal/tarfile/src_test.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
package tarfile

import (
"archive/tar"
"bytes"
"context"
"encoding/json"
"io"
"strings"
"testing"

digest "github.com/opencontainers/go-digest"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.podman.io/image/v5/manifest"
Expand Down Expand Up @@ -64,3 +67,63 @@ func TestSourcePrepareLayerData(t *testing.T) {
}
}
}

func TestSourceGetBlobSymlinkLayerSizeMatchesBytesReturned(t *testing.T) {
ctx := context.Background()
cache := memory.New()

layerBytes := []byte("not empty")
diffID := digest.FromBytes(layerBytes)
configBytes := []byte(`{"rootfs":{"type":"layers","diff_ids":["` + diffID.String() + `"]}}`)

manifestBytes, err := json.Marshal([]ManifestItem{{Config: "config.json", Layers: []string{"layer-link.tar"}}})
require.NoError(t, err)

var tarfileBuffer bytes.Buffer
tw := tar.NewWriter(&tarfileBuffer)
for _, entry := range []struct {
name string
body []byte
}{
{name: "manifest.json", body: manifestBytes},
{name: "config.json", body: configBytes},
{name: "layer.tar", body: layerBytes},
} {
err = tw.WriteHeader(&tar.Header{
Name: entry.name,
Mode: 0o644,
Size: int64(len(entry.body)),
})
require.NoError(t, err)
_, err = tw.Write(entry.body)
require.NoError(t, err)
}

err = tw.WriteHeader(&tar.Header{
Name: "layer-link.tar",
Typeflag: tar.TypeSymlink,
Linkname: "layer.tar",
Mode: 0o777,
})
require.NoError(t, err)

err = tw.Close()
require.NoError(t, err)

reader, err := NewReaderFromStream(nil, &tarfileBuffer)
require.NoError(t, err)
src := NewSource(reader, true, "transport name", nil, -1)
defer src.Close()

layerStream, reportedSize, err := src.GetBlob(ctx, types.BlobInfo{
Digest: diffID,
Size: -1,
}, cache)
require.NoError(t, err)
defer layerStream.Close()

readBytes, err := io.ReadAll(layerStream)
require.NoError(t, err)
assert.Equal(t, layerBytes, readBytes)
assert.Equal(t, int64(len(layerBytes)), reportedSize)
}