From 46a88cc4aa520424253f8453389dace94a53dc65 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Sun, 8 Feb 2026 18:40:54 +0000 Subject: [PATCH 1/2] drivers: Add package-level documentation comments Add doc comments to the overlay, vfs, btrfs, and zfs storage driver packages describing their implementation approach: - overlay: Uses Linux OverlayFS with composefs option for integrity - vfs: Copies directories, attempting reflinks first for efficiency - btrfs: Uses native subvolumes and snapshots with qgroup quotas - zfs: Uses datasets and clones with mountpoint=legacy Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- storage/drivers/btrfs/btrfs.go | 5 +++++ storage/drivers/overlay/overlay.go | 4 ++++ storage/drivers/vfs/driver.go | 5 +++++ storage/drivers/zfs/zfs.go | 5 +++++ 4 files changed, 19 insertions(+) diff --git a/storage/drivers/btrfs/btrfs.go b/storage/drivers/btrfs/btrfs.go index aba898ed55..d20c2141bb 100644 --- a/storage/drivers/btrfs/btrfs.go +++ b/storage/drivers/btrfs/btrfs.go @@ -1,5 +1,10 @@ //go:build linux && cgo +// Package btrfs implements the btrfs storage driver for container images. +// It uses native btrfs copy-on-write via subvolumes and snapshots, storing +// layers as subvolumes under a 'subvolumes/' directory. Child layers are +// created as snapshots for true copy-on-write semantics. Storage quotas +// are supported via btrfs qgroups. package btrfs /* diff --git a/storage/drivers/overlay/overlay.go b/storage/drivers/overlay/overlay.go index 2eb720c188..7647b53ace 100644 --- a/storage/drivers/overlay/overlay.go +++ b/storage/drivers/overlay/overlay.go @@ -1,5 +1,9 @@ //go:build linux +// Package overlay implements the overlay storage driver for container images. +// It uses Linux OverlayFS to provide copy-on-write semantics, allowing efficient +// storage sharing between image layers. When enabled, composefs can be used as +// an optional mode for enhanced integrity verification of container images. package overlay import ( diff --git a/storage/drivers/vfs/driver.go b/storage/drivers/vfs/driver.go index b90c2046cf..1665a4bc13 100644 --- a/storage/drivers/vfs/driver.go +++ b/storage/drivers/vfs/driver.go @@ -1,3 +1,8 @@ +// Package vfs implements the VFS storage driver for container images. +// It copies directories to create layers, attempting reflinks (FICLONE) first +// for efficient copy-on-write on supporting filesystems, then falling back to +// copy_file_range and regular copying. This provides maximum filesystem +// compatibility while achieving storage efficiency on reflink-capable filesystems. package vfs import ( diff --git a/storage/drivers/zfs/zfs.go b/storage/drivers/zfs/zfs.go index b994278bb2..0e00ced553 100644 --- a/storage/drivers/zfs/zfs.go +++ b/storage/drivers/zfs/zfs.go @@ -1,5 +1,10 @@ //go:build linux || freebsd +// Package zfs implements the ZFS storage driver for container images. +// It uses ZFS datasets and clones for copy-on-write semantics. Each layer +// is stored as a dataset under the parent specified by zfs.fsname, with +// child layers created by snapshotting and cloning. Datasets use +// mountpoint=legacy so containers-storage controls mount operations. package zfs import ( From 6050320d0cf240d1918cdcf5bc95fed91d311b9c Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 9 Feb 2026 18:51:56 +0000 Subject: [PATCH 2/2] docs: Add driver implementation documentation Create dedicated documentation files for each storage driver: - containers-storage-driver-overlay.md (includes zstd:chunked and composefs) - containers-storage-driver-vfs.md - containers-storage-driver-btrfs.md - containers-storage-driver-zfs.md The composefs and zstd:chunked documentation is consolidated into the overlay driver doc since these are overlay-specific features. Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- storage/docs/containers-storage-composefs.md | 72 ---------------- .../docs/containers-storage-driver-btrfs.md | 31 +++++++ .../docs/containers-storage-driver-overlay.md | 86 +++++++++++++++++++ storage/docs/containers-storage-driver-vfs.md | 31 +++++++ storage/docs/containers-storage-driver-zfs.md | 33 +++++++ .../docs/containers-storage-zstd-chunked.md | 58 ------------- 6 files changed, 181 insertions(+), 130 deletions(-) delete mode 100644 storage/docs/containers-storage-composefs.md create mode 100644 storage/docs/containers-storage-driver-btrfs.md create mode 100644 storage/docs/containers-storage-driver-overlay.md create mode 100644 storage/docs/containers-storage-driver-vfs.md create mode 100644 storage/docs/containers-storage-driver-zfs.md delete mode 100644 storage/docs/containers-storage-zstd-chunked.md diff --git a/storage/docs/containers-storage-composefs.md b/storage/docs/containers-storage-composefs.md deleted file mode 100644 index f0fba3ca00..0000000000 --- a/storage/docs/containers-storage-composefs.md +++ /dev/null @@ -1,72 +0,0 @@ -# containers-storage 1 "August 2024" - -## NAME -containers-storage-composefs - Information about composefs and containers/storage - -## DESCRIPTION - -To enable composefs at a baseline requires the following configuration in `containers-storage.conf`: - -``` -[storage.options.overlay] -use_composefs = "true" -``` - -This value must be a "string bool", it cannot be a native TOML boolean. - -However at the current time, composefs requires zstd:chunked images, so first -you must be sure that zstd:chunked is enabled. -For more, see [zstd:chunked](containers-storage-zstd-chunked.md). - -Additionally, not many images are in zstd:chunked format. In order to bridge this gap, -`convert_images = "true"` can be specified which does a dynamic conversion; this adds -latency to image pulls. - -Putting these things together, the following is required (in addition to the above config). - -``` -[storage.options.pull_options] -convert_images = "true" -``` - -This value must be a "string bool", it cannot be a native TOML boolean. - -## IMPLEMENTATION - -As is implied by the setting `use_composefs = "true"`, currently composefs -is implemented as an "option" for the `overlay` driver. Some file formats -remain unchanged and are inherited from the overlay driver, even when -composefs is in use. The primary differences are enumerated below. - -The `diff/` directory for each layer is no longer a plain unpacking of the -tarball, but becomes an "object hash directory", where each filename is the -sha256 of its contents. This `diff/` directory is the backing store for a -`composefs-data/composefs.blob` created for each layer which is the composefs -"superblock" containing all the non-regular-file content (i.e. metadata) from -the tarball. - -As with `zstd:chunked`, existing layers are scanned for matching objects, and reused -(via hardlink or reflink as configured) if objects with a matching "full sha256" are -found. - -There is currently no support for enforced integrity with composefs; -an attempt is made to enable fsverity for the backing files and the composefs file, -but it is not an error if unsupported. There is as of yet no defined mechanism to -verify the fsverity digest of the composefs block before mounting; some work on that is -ongoing. - -In order to mount a layer (or a full image, with all of its dependencies), any -layer that has a composefs blob is mounted and included in the "final" overlayfs -stack. This is optional - any layers that are not in "composefs format" but -in the "default overlay" (unpacked) format will be reused as is. - -## BUGS - -https://github.com/containers/storage/issues?q=is%3Aissue+is%3Aopen+label%3Aarea%2Fcomposefs - -## FOOTNOTES -The Containers Storage project is committed to inclusivity, a core value of open source. -The `master` and `slave` mount propagation terminology is used in this repository. -This language is problematic and divisive, and should be changed. -However, these terms are currently used within the Linux kernel and must be used as-is at this time. -When the kernel maintainers rectify this usage, Containers Storage will follow suit immediately. diff --git a/storage/docs/containers-storage-driver-btrfs.md b/storage/docs/containers-storage-driver-btrfs.md new file mode 100644 index 0000000000..93e79f3657 --- /dev/null +++ b/storage/docs/containers-storage-driver-btrfs.md @@ -0,0 +1,31 @@ +# containers-storage 1 "February 2026" + +## NAME +containers-storage-driver-btrfs - The btrfs storage driver + +## DESCRIPTION + +The btrfs driver uses native btrfs copy-on-write via subvolumes and snapshots. + +## IMPLEMENTATION + +The on-disk file layout is an internal implementation detail and may change between versions. The only stable interface is the Go library API. + +Requires a btrfs filesystem. Layers are stored as subvolumes under `btrfs/subvolumes/`. New empty layers are created as subvolumes; child layers are created as btrfs snapshots, providing true CoW semantics. Quotas are supported via btrfs qgroups. Set `btrfs.min_space` to enable quota enforcement. + +Reference: `drivers/btrfs/btrfs.go` + +## RUNTIME + +Like VFS, there is no mount involved. Btrfs subvolumes are accessible as regular directories, so `Get()` returns the subvolume path directly. If a quota was configured, the qgroup limit is applied at this point. `Put()` is a no-op. + +## BUGS + +https://github.com/containers/storage/issues?q=is%3Aissue+is%3Aopen+label%3Aarea%2Fbtrfs + +## FOOTNOTES +The Containers Storage project is committed to inclusivity, a core value of open source. +The `master` and `slave` mount propagation terminology is used in this repository. +This language is problematic and divisive, and should be changed. +However, these terms are currently used within the Linux kernel and must be used as-is at this time. +When the kernel maintainers rectify this usage, Containers Storage will follow suit immediately. diff --git a/storage/docs/containers-storage-driver-overlay.md b/storage/docs/containers-storage-driver-overlay.md new file mode 100644 index 0000000000..20ad329701 --- /dev/null +++ b/storage/docs/containers-storage-driver-overlay.md @@ -0,0 +1,86 @@ +# containers-storage 1 "February 2026" + +## NAME +containers-storage-driver-overlay - The overlay storage driver + +## DESCRIPTION + +The overlay driver uses Linux OverlayFS for copy-on-write semantics. This is the default and recommended driver for most use cases. See [containers-storage.conf.5.md](containers-storage.conf.5.md) for configuration options. + +## IMPLEMENTATION + +The on-disk file layout is an internal implementation detail and may change between versions. The only stable interface is the Go library API. +The description below is intended to aid debugging and recovery, but changing content directly is not supported. + +The top-level overlay directory holds layers keyed by a [chain ID](https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid) which identifies the precise sequence of parent layers leading to this one. A layer with the same DiffID can have multiple physical objects in this directory if it was created in different contexts (e.g. with or without zstd:chunked). + +Each layer has at least a `diff` directory and `link` file. If there are lower layers, the layer also has a `lower` file, `merged` directory, and `work` directory. The `diff` directory has the upper layer of the overlay and is used to capture any changes to the layer. The `lower` file contains all the lower layer mounts separated by `:` and ordered from uppermost to lowermost layers. The overlay itself is mounted in the `merged` directory, and the `work` dir is needed for overlay to work. + +The `link` file for each layer contains a unique string for the layer. Under the `l/` directory at the root there will be a symbolic link with that unique string pointing to the `diff` directory for the layer. The symbolic links are used to reference lower layers in the `lower` file and on mount. The links are used to shorten the total length of a layer reference without requiring changes to the layer identifier or root directory. Mounts are always done relative to root and referencing the symbolic links in order to ensure the number of lower directories can fit in a single page for making the mount syscall. + +A hard upper limit of 500 lower layers is enforced. + +The `overlay-layers/` directory alongside the per-layer directories contains metadata managed by the storage library. Each layer has a `${layerid}.tar-split.gz` file preserving the original tar stream structure (without file content) so that the original archive can be reconstructed exactly from the unpacked `diff/`. The directory also contains `layers.json` with global layer metadata and `layers.lock` for concurrency control. + +The `overlay-containers/` directory holds running container state: `containers.json` for metadata and `containers.lock` for concurrency control. + +Reference: `drivers/overlay/overlay.go` + +## RUNTIME + +When a container needs its filesystem, the driver performs a `mount(2)` with type `overlay`, passing the layer's `diff` directory as the upperdir and all parent layers' `diff` directories as lowerdirs. The kernel's overlayfs merges these at access time — no data is copied, and layers remain independent on disk. Writes go to the upperdir via copy-up. The mount is placed at the layer's `merged` directory, and the `work` directory is used internally by overlayfs for atomic operations like rename. + +If a mount program is configured (e.g. `fuse-overlayfs` for rootless operation), it is invoked instead of the `mount(2)` syscall. When the mount option string exceeds the kernel's page size limit, the driver forks a child process that `chdir`s into the storage root and uses relative paths to shorten the options. + +On `Put()`, the overlayfs mount is unmounted. + +### zstd:chunked + +`zstd:chunked` is a variant of the `application/vnd.oci.image.layer.v1.tar+zstd` media type that uses zstd skippable frames to include a table of contents with SHA-256 digests and offsets of individual file chunks. This allows fetching only content not already present via HTTP range requests. + +Note: The zstd:chunked format is not standardized, though it is an eventual goal to do so. + +Each layer has an associated big data key `chunked-manifest-cache` containing index metadata in a binary format suitable for mmap(). When pulling, existing layers are scanned for files with matching digests. Matching files are hardlinked if `use_hardlinks = "true"`, otherwise reflinked (or copied if reflinks are unsupported). + +Configuration (support is enabled by default in the code): + +``` +[storage.options.pull_options] +enable_partial_images = "true" +``` + +Configuration values must be string booleans (quoted), not native TOML booleans. + +Reference: `pkg/chunked/internal/compression.go` + +### composefs + +composefs provides an immutable filesystem layer with optional integrity verification. + +Configuration: + +``` +[storage.options.overlay] +use_composefs = "true" +``` + +Configuration values must be string booleans (quoted), not native TOML booleans. + +composefs requires zstd:chunked images. For non-zstd:chunked images, set `convert_images = "true"` in `[storage.options.pull_options]` to enable dynamic conversion during pulls. + +With composefs enabled, the `diff/` directory becomes an object hash directory where each filename is the sha256 of its contents. Each layer has a `composefs-data/composefs.blob` file containing the composefs superblock with all metadata. + +Existing layers are scanned for matching objects and reused via hardlink or reflink. An attempt is made to enable fsverity on backing files, but this is best-effort only; there is currently no support for enforced integrity verification. + +Layers with or without composefs format can be mixed in the same overlay stack. Layers with a composefs blob are mounted and included in the final overlayfs stack, while layers without composefs format are reused as-is. + +## BUGS + +https://github.com/containers/storage/issues?q=is%3Aissue+is%3Aopen+label%3Aarea%2Foverlay + +## FOOTNOTES +The Containers Storage project is committed to inclusivity, a core value of open source. +The `master` and `slave` mount propagation terminology is used in this repository. +This language is problematic and divisive, and should be changed. +However, these terms are currently used within the Linux kernel and must be used as-is at this time. +When the kernel maintainers rectify this usage, Containers Storage will follow suit immediately. diff --git a/storage/docs/containers-storage-driver-vfs.md b/storage/docs/containers-storage-driver-vfs.md new file mode 100644 index 0000000000..4fea1928d9 --- /dev/null +++ b/storage/docs/containers-storage-driver-vfs.md @@ -0,0 +1,31 @@ +# containers-storage 1 "February 2026" + +## NAME +containers-storage-driver-vfs - The VFS storage driver + +## DESCRIPTION + +The VFS driver copies directories to create layers. No kernel overlay filesystem support is required. + +## IMPLEMENTATION + +The on-disk file layout is an internal implementation detail and may change between versions. The only stable interface is the Go library API. + +Layers are stored under `vfs/dir/`. When creating a layer from a parent, the entire parent directory is copied. The copy uses reflinks (FICLONE) if supported by the filesystem, falling back to regular copying otherwise. The VFS driver works on any filesystem but is storage-inefficient without reflink support. + +Reference: `drivers/vfs/driver.go`, `drivers/copy/copy_linux.go` + +## RUNTIME + +There is no mount involved. When a container needs its filesystem, `Get()` simply returns the layer's directory path. All layer merging happened at create time when the parent was copied, so the directory is already a complete filesystem tree. `Put()` is a no-op since there is nothing to unmount. + +## BUGS + +https://github.com/containers/storage/issues?q=is%3Aissue+is%3Aopen+label%3Aarea%2Fvfs + +## FOOTNOTES +The Containers Storage project is committed to inclusivity, a core value of open source. +The `master` and `slave` mount propagation terminology is used in this repository. +This language is problematic and divisive, and should be changed. +However, these terms are currently used within the Linux kernel and must be used as-is at this time. +When the kernel maintainers rectify this usage, Containers Storage will follow suit immediately. diff --git a/storage/docs/containers-storage-driver-zfs.md b/storage/docs/containers-storage-driver-zfs.md new file mode 100644 index 0000000000..ddec0e210b --- /dev/null +++ b/storage/docs/containers-storage-driver-zfs.md @@ -0,0 +1,33 @@ +# containers-storage 1 "February 2026" + +## NAME +containers-storage-driver-zfs - The ZFS storage driver + +## DESCRIPTION + +The ZFS driver uses ZFS datasets and clones for copy-on-write semantics. + +## IMPLEMENTATION + +The on-disk file layout is an internal implementation detail and may change between versions. The only stable interface is the Go library API. + +Requires `/dev/zfs` and the `zfs` command. Configure the parent dataset via the `zfs.fsname` option. + +Layers are stored as datasets under `zfs.fsname` (e.g., `tank/containers/storage/$id`). Mountpoints are at `zfs/graph/`. All datasets use `mountpoint=legacy` so containers-storage controls mounts directly. New root layers are created with `zfs create`. Child layers are created by snapshotting the parent dataset and cloning the snapshot; the snapshot is marked for deferred deletion after cloning. + +Reference: `drivers/zfs/zfs.go` + +## RUNTIME + +When a container needs its filesystem, the driver performs `mount(2)` with type `zfs` to mount the dataset at a path under `zfs/graph/`. Because all datasets use `mountpoint=legacy`, ZFS does not auto-mount them — the driver controls when and where each dataset is mounted. A reference counter tracks multiple users of the same mountpoint. On `Put()`, the last reference triggers an unmount. + +## BUGS + +https://github.com/containers/storage/issues?q=is%3Aissue+is%3Aopen+label%3Aarea%2Fzfs + +## FOOTNOTES +The Containers Storage project is committed to inclusivity, a core value of open source. +The `master` and `slave` mount propagation terminology is used in this repository. +This language is problematic and divisive, and should be changed. +However, these terms are currently used within the Linux kernel and must be used as-is at this time. +When the kernel maintainers rectify this usage, Containers Storage will follow suit immediately. diff --git a/storage/docs/containers-storage-zstd-chunked.md b/storage/docs/containers-storage-zstd-chunked.md deleted file mode 100644 index de60a20b37..0000000000 --- a/storage/docs/containers-storage-zstd-chunked.md +++ /dev/null @@ -1,58 +0,0 @@ -# containers-storage 1 "August 2024" - -## NAME -containers-storage-zstd-chunked - Information about zstd:chunked - -## DESCRIPTION - -The traditional format for container image layers is [application/vnd.oci.image.layer.v1.tar+gzip](https://github.com/opencontainers/image-spec/blob/main/layer.md#gzip-media-types). -More recently, the standard was augmented with zstd: [application/vnd.oci.image.layer.v1.tar+zstd](https://github.com/opencontainers/image-spec/blob/main/layer.md#zstd-media-types) -which is a more modern and efficient compression format. - -`zstd:chunked` is a variant of the `application/vnd.oci.image.layer.v1.tar+zstd` media type that -uses zstd [skippable frames](https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#skippable-frames) -to include additional metadata (especially a "table of contents") that includes the SHA-256 and offsets of individual chunks of files. -Additionally chunks are compressed separately. This allows a client to dynamically fetch only content which -it doesn't already have using HTTP range requests. - -At the time of this writing, support for this is enabled by default in the code. - -You can explicitly enable or disable zstd:chunked with following changes to `containers-storage.conf`: - -``` -[storage.options.pull_options] -enable_partial_images = "true" | "false" -``` - -Note that the value of this field must be a "string bool", it cannot be a native TOML boolean. - -## IMPLEMENTATION - -Each layer has an associated "big data" key called `chunked-manifest-cache` that -is a custom binary format suitable for mmap() that contains index metadata -for each layer with the full sha256 digest of each file plus its "chunks" (as -computed by `zstd:chunked`). - -When any image is pulled all existing other layers are scanned using `chunked-manifest-cache` to see if they contain a file with a matching digest. If one is found, the other file is hardlinked if `use_hardlinks = "true`", -otherwise it is reflinked (if supported by the filesystem, or a full physical copy -is made). There is a best-effort attempt to enable fsverity on the file if configured -(see ). - -For more information, at the current time the file with the most information is [pkg/chunked/internal/compression.go](https://github.com/containers/storage/blob/39d469c34c96db67062e25954bc9d18f2bf6dae3/pkg/chunked/internal/compression.go). -The above is a permanent link for stability, but be sure to check to see if there are newer changes too. - -## STANDARDIZATION - -At the current time the format is not officially standardized or documented beyond -the comments and code in the reference implementation. - -## BUGS - -- https://github.com/containers/storage/issues?q=is%3Aissue+label%3Aarea%2Fzstd%3Achunked+is%3Aopen - -## FOOTNOTES -The Containers Storage project is committed to inclusivity, a core value of open source. -The `master` and `slave` mount propagation terminology is used in this repository. -This language is problematic and divisive, and should be changed. -However, these terms are currently used within the Linux kernel and must be used as-is at this time. -When the kernel maintainers rectify this usage, Containers Storage will follow suit immediately.