diff --git a/.gitignore b/.gitignore index 66e519a..0bd4b04 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,7 @@ target docker-squash.iml **/image.tar **/tox.tar + +.cursor/* + +*.tar \ No newline at end of file diff --git a/README.rst b/README.rst index b40d3c5..a34f057 100644 --- a/README.rst +++ b/README.rst @@ -25,6 +25,8 @@ Features - Can squash from a selected layer to the end (not always possible, depends on the image) - Support for Docker 1.9 or newer (older releases may run perfectly fine too, try it!) - Squashed image can be loaded back to the Docker daemon or stored as tar archive somewhere +- Automatic detection of input type (Docker image name vs tar file path) +- Works without Docker daemon when processing tar files Installation ------------ @@ -49,34 +51,34 @@ Usage :: $ docker-squash -h - usage: cli.py [-h] [-v] [--version] [-d] [-f FROM_LAYER] [-t TAG] - [--tmp-dir TMP_DIR] [--output-path OUTPUT_PATH] - image + usage: docker-squash [-h] [-v] [--version] [-f FROM_LAYER] [-t TAG] [-m MESSAGE] [-c] [--tmp-dir TMP_DIR] + [--output-path OUTPUT_PATH] [--load-image [LOAD_IMAGE]] + image Docker layer squashing tool positional arguments: - image Image to be squashed - - optional arguments: - -h, --help show this help message and exit - -v, --verbose Verbose output - --version Show version and exit - -f FROM_LAYER, --from-layer FROM_LAYER - Number of layers to squash or ID of the layer (or image ID or image name) to squash from. - In case the provided value is an integer, specified number of layers will be squashed. - Every layer in the image will be squashed if the parameter is not provided. - -t TAG, --tag TAG Specify the tag to be used for the new image. If not specified no tag will be applied - -m MESSAGE, --message MESSAGE + image Image name or tar file path to be squashed. If a .tar file is provided, it will be processed without + requiring Docker daemon. + + options: + -h, --help show this help message and exit + -v, --verbose Verbose output + --version Show version and exit + -f FROM_LAYER, --from-layer FROM_LAYER + Number of layers to squash or ID of the layer (or image ID or image name) to squash from. In case the + provided value is an integer, specified number of layers will be squashed. Every layer in the image will + be squashed if the parameter is not provided. + -t TAG, --tag TAG Specify the tag to be used for the squashed image (recommended). Without this, the squashed image will + have no repository tags to avoid overwriting the original image. + -m MESSAGE, --message MESSAGE Specify a commit message (comment) for the new image. - -c, --cleanup Remove source image from Docker after squashing - --tmp-dir TMP_DIR Temporary directory to be created and used. This will NOT be deleted afterwards for - easier debugging. - --output-path OUTPUT_PATH + -c, --cleanup Remove source image from Docker after squashing + --tmp-dir TMP_DIR Temporary directory to be created and used. This will NOT be deleted afterwards for easier debugging. + --output-path OUTPUT_PATH Path where the image may be stored after squashing. - --load-image [LOAD_IMAGE] + --load-image [LOAD_IMAGE] Whether to load the image into Docker daemon after squashing - Default: true Note that environment variables may be set as documented in `here `_. @@ -216,3 +218,122 @@ Let's confirm the image structure now: 6ee235cf4473 3 weeks ago /bin/sh -c #(nop) LABEL name=CentOS Base Imag 0 B 474c2ee77fa3 3 weeks ago /bin/sh -c #(nop) ADD file:72852fc7626d233343 196.6 MB 1544084fad81 6 months ago /bin/sh -c #(nop) MAINTAINER The CentOS Proje 0 B + +Working without Docker daemon +----------------------------- + +Sometimes you may want to squash an image without direct access to Docker daemon (e.g., in CI/CD pipelines, +air-gapped environments, or when Docker is not running). You can provide a tar file path directly as the ``image`` +parameter to process Docker images exported as tar files without requiring a Docker daemon connection. + +**Step 1**: Export the image to a tar file using ``docker save``: + +:: + + $ docker save -o source.tar jboss/wildfly:latest + +**Step 2**: Squash the image from the tar file. Let's squash the last 8 layers: + +Note: The tool automatically detects that ``source.tar`` is a tar file and processes it without Docker daemon. + +:: + + $ docker-squash --tag jboss/wildfly:squashed -f 10 --output-path squashed.tar --load-image false source.tar + 2025-08-20 07:58:45,338 tar_image.py:54 INFO Extracting tar image from source.tar + 2025-08-20 07:58:45,598 tar_image.py:73 INFO Detected OCI format image + 2025-08-20 07:58:45,599 tar_image.py:251 INFO Old image has 22 layers + 2025-08-20 07:58:45,599 tar_image.py:284 INFO Checking if squashing is necessary... + 2025-08-20 07:58:45,599 tar_image.py:298 INFO Attempting to squash last 10 layers... + 2025-08-20 07:58:45,599 tar_image.py:306 INFO Starting squashing process... + 2025-08-20 07:58:45,599 image.py:750 INFO Starting squashing for /tmp/docker-squash-7n3ui1ar/new/squashed/layer.tar... + 2025-08-20 07:58:47,713 image.py:775 INFO Squashing file '/tmp/docker-squash-7n3ui1ar/old/blobs/sha256/f26d32e28c292aba76defcdd67c267000d31a6ac3ebdab5c850aba90ef834927'... + 2025-08-20 07:58:49,041 image.py:923 INFO Squashing finished! + 2025-08-20 07:58:49,953 tar_image.py:660 WARNING OCI output format not fully implemented - creating Docker format + 2025-08-20 07:58:49,953 tar_image.py:570 INFO Using user-specified tag: jboss/wildfly:squashed + 2025-08-20 07:58:50,028 tar_image.py:349 INFO Squashing completed successfully + 2025-08-20 07:58:50,028 tar_image.py:359 INFO Original image size: 382.24 MB + 2025-08-20 07:58:50,028 tar_image.py:360 INFO Squashed image size: 421.59 MB + 2025-08-20 07:58:50,028 tar_image.py:363 INFO If the squashed image is larger than original it means that there were no meaningful files to squash and it just added metadata. Are you sure you specified correct parameters? + 2025-08-20 07:58:50,028 cli.py:176 INFO New squashed image ID is sha256:7ebd48ca15f2e8d937a6bf3d77e0b865feddebd3ec8f11532d8a30c0000f2b67 + 2025-08-20 07:58:50,028 tar_image.py:766 INFO Exporting squashed image to squashed.tar + 2025-08-20 07:58:51,257 tar_image.py:776 INFO Export completed successfully + 2025-08-20 07:58:51,257 cli.py:191 INFO Done + +**Step 3**: Load the squashed image back into Docker: + +:: + + $ docker load -i squashed.tar + Loaded image: jboss/wildfly:squashed + +Now you can verify the squashed image structure: + +:: + + $ docker history jboss/wildfly:squashed + IMAGE CREATED CREATED BY SIZE COMMENT + a8c48d9906a7 About a minute ago 270MB Squashed layers + 4 years ago /bin/sh -c #(nop) USER jboss 0B + 4 years ago /bin/sh -c yum -y install java-11-openjdk-de… 239MB + 4 years ago /bin/sh -c #(nop) USER root 0B + 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldmann… 0B + 4 years ago /bin/sh -c #(nop) USER jboss 0B + 4 years ago /bin/sh -c #(nop) WORKDIR /opt/jboss 0B + 4 years ago /bin/sh -c groupadd -r jboss -g 1000 && user… 406kB + 4 years ago /bin/sh -c yum update -y && yum -y install x… 33.5MB + 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldmann… 0B + 5 years ago /bin/sh -c #(nop) CMD ["/bin/bash"] 0B + 5 years ago /bin/sh -c #(nop) LABEL org.label-schema.sc… 0B + 5 years ago /bin/sh -c #(nop) ADD file:61908381d3142ffba… 222MB + +**Key advantages of tar mode:** + +- No Docker daemon required during squashing +- Works in CI/CD pipelines and restricted environments +- Supports both Docker format and OCI format images +- Maintains complete layer history compatibility +- Can process images on systems where Docker is not installed + +**Podman compatibility:** + +The squashed tar files are fully compatible with Podman. You can load them using: + +:: + + $ podman load -i squashed.tar + Getting image source signatures + Copying blob 8055a1084cfa done | + Copying blob 613be09ab3c0 done | + Copying blob 3fbe1e874b0d done | + Copying blob 869989761eb2 done | + Copying blob 115463be137a done | + Copying config 7ebd48ca15 done | + Writing manifest to image destination + Loaded image: localhost/jboss/wildfly:squashed + + $ podman history jboss/wildfly:squashed + ID CREATED CREATED BY SIZE COMMENT + 7ebd48ca15f2 5 minutes ago 268MB Squashed layers + 4 years ago /bin/sh -c #(nop) USER jboss 0B + 4 years ago /bin/sh -c yum -y install java-11-openjdk-... 237MB + 4 years ago /bin/sh -c #(nop) USER root 0B + 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldma... 0B + 4 years ago /bin/sh -c #(nop) USER jboss 0B + 4 years ago /bin/sh -c #(nop) WORKDIR /opt/jboss 0B + 4 years ago /bin/sh -c groupadd -r jboss -g 1000 && us... 374kB + 4 years ago /bin/sh -c yum update -y && yum -y install... 32.8MB + 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldma... 0B + 5 years ago /bin/sh -c #(nop) CMD ["/bin/bash"] 0B + 5 years ago /bin/sh -c #(nop) LABEL org.label-schema.... 0B + 5 years ago /bin/sh -c #(nop) ADD file:61908381d3142ff... 211MB + ... + +This enables docker-squash to work in Podman-only environments, rootless containers, and mixed container runtime scenarios. + +**Important notes:** + +- Always use ``--tag`` parameter to avoid overwriting the original image name +- Set ``--load-image false`` if you only want to export the squashed tar file +- Use ``--output-path`` to specify where the squashed tar should be saved +- The tool automatically detects input type (image name vs tar file) and image format (Docker vs OCI) +- Squashed images work seamlessly with both Docker and Podman diff --git a/docker_squash/cli.py b/docker_squash/cli.py index f82d453..5d8db1e 100644 --- a/docker_squash/cli.py +++ b/docker_squash/cli.py @@ -70,7 +70,11 @@ def run(self): "--version", action="version", help="Show version and exit", version=version ) - parser.add_argument("image", help="Image to be squashed") + parser.add_argument( + "image", + help="Image name or tar file path to be squashed. If a .tar file is provided, it will be processed without requiring Docker daemon.", + ) + parser.add_argument( "-f", "--from-layer", @@ -79,7 +83,7 @@ def run(self): parser.add_argument( "-t", "--tag", - help="Specify the tag to be used for the new image. If not specified no tag will be applied", + help="Specify the tag to be used for the squashed image (recommended). Without this, the squashed image will have no repository tags to avoid overwriting the original image.", ) parser.add_argument( "-m", @@ -118,18 +122,16 @@ def run(self): self.log.setLevel(logging.INFO) self.log.debug("Running version %s", version) + try: - squash.Squash( - log=self.log, - image=args.image, - from_layer=args.from_layer, - tag=args.tag, - comment=args.message, - output_path=args.output_path, - load_image=args.load_image, - tmp_dir=args.tmp_dir, - cleanup=args.cleanup, - ).run() + # Auto-detect if input is tar file or image name + if self._is_tar_file(args.image): + self.log.debug(f"Detected tar file: {args.image}") + self._run_tar_mode(args) + else: + self.log.debug(f"Detected image name: {args.image}") + self._run_image_mode(args) + except KeyboardInterrupt: self.log.error("Program interrupted by user, exiting...") sys.exit(1) @@ -150,6 +152,91 @@ def run(self): sys.exit(1) + def _run_tar_mode(self, args): + from docker_squash.tar_image import TarImage + + # Provide helpful guidance about --tag parameter + if not args.tag: + self.log.info( + "💡 Tip: Consider using --tag to specify a name for your squashed image" + ) + self.log.info(" Example: --tag myimage:squashed") + + tar_image = TarImage( + log=self.log, + tar_path=args.image, # 这里改为 args.image + from_layer=args.from_layer, + tmp_dir=args.tmp_dir, + tag=args.tag, + comment=args.message, + ) + + try: + new_image_id = tar_image.squash() + self.log.info("New squashed image ID is %s" % new_image_id) + + if not args.output_path: + import os + + self.output_path = os.path.join( + os.path.dirname(args.image), f"squashed-{new_image_id[:12]}.tar" + ) + + if args.output_path: + tar_image.export_tar_archive(args.output_path) + + if args.load_image: + tar_image.load_squashed_image() + + self.log.info("Done") + + finally: + if not args.tmp_dir: + tar_image.cleanup() + + def _run_image_mode(self, args): + squash.Squash( + log=self.log, + image=args.image, + from_layer=args.from_layer, + tag=args.tag, + comment=args.message, + output_path=args.output_path, + load_image=args.load_image, + tmp_dir=args.tmp_dir, + cleanup=args.cleanup, + ).run() + + def _is_tar_file(self, input_path): + """Detect if input is a tar file or image name""" + import os + import tarfile + + # Check if it's a file path that exists + if os.path.isfile(input_path): + # Check if it's a valid tar file + try: + with tarfile.open(input_path, "r"): + return True + except (tarfile.TarError, OSError): + return False + + # Check if it ends with .tar extension + if input_path.endswith((".tar", ".tar.gz", ".tgz")): + return True + + # Check for obvious file path patterns + if ( + input_path.startswith(("/")) # Absolute path + or input_path.startswith(("./")) # Current dir + or input_path.startswith(("../")) # Parent dir + or input_path.startswith(("~/")) + ): # Home dir + return True + + # Otherwise assume it's an image name (even if it contains '/') + return False + def run(): cli = CLI() diff --git a/docker_squash/tar_image.py b/docker_squash/tar_image.py new file mode 100644 index 0000000..3ee373b --- /dev/null +++ b/docker_squash/tar_image.py @@ -0,0 +1,834 @@ +# -*- coding: utf-8 -*- + +import hashlib +import json +import os +import shutil +import tarfile +from collections import OrderedDict + +from docker_squash.errors import SquashError +from docker_squash.image import Image + + +class TarImage(Image): + """Process images from tar files without requiring Docker daemon""" + + FORMAT = "tar" + + def __init__( + self, log, tar_path, from_layer=None, tmp_dir=None, tag=None, comment="" + ): + # Call parent constructor with adapted parameters + super().__init__( + log=log, + docker=None, # TarImage doesn't need Docker client + image=tar_path, # Use tar_path as image identifier + from_layer=from_layer, + tmp_dir=tmp_dir, + tag=tag, + comment=comment, + ) + + # TarImage specific attributes + self.tar_path = tar_path + + # *** Critical: Initialize directories immediately as subsequent operations require them *** + self._initialize_directories() + + # Initialize TarImage specific variables + self.manifest = None + self.old_image_config = None + self.original_image_name = None + self.old_image_layers = [] # This also needs initialization + + # Process tar file (TarImage specific logic) + self._extract_tar_image() + self._detect_image_format() + self._load_image_metadata() + self.size_before = self._dir_size(self.old_image_dir) + + def _extract_tar_image(self): + """Extract tar image to temporary directory""" + self.log.info(f"Extracting tar image from {self.tar_path}") + + if not os.path.exists(self.tar_path): + raise SquashError(f"Tar file not found: {self.tar_path}") + + try: + with tarfile.open(self.tar_path, "r") as tar: + tar.extractall(self.old_image_dir) + except Exception as e: + raise SquashError(f"Failed to extract tar file: {e}") + + self.log.debug(f"Tar image extracted to {self.old_image_dir}") + + def _detect_image_format(self): + """Detect if this is OCI format or Docker format""" + index_file = os.path.join(self.old_image_dir, "index.json") + manifest_file = os.path.join(self.old_image_dir, "manifest.json") + + if os.path.exists(index_file): + self.log.info("Detected OCI format image") + self.oci_format = True + elif os.path.exists(manifest_file): + self.log.info("Detected Docker format image") + self.oci_format = False + else: + raise SquashError("Unable to detect image format - missing manifest files") + + def _load_image_metadata(self): + """Load image metadata based on format""" + if self.oci_format: + self._load_oci_metadata() + else: + self._load_docker_metadata() + + def _load_oci_metadata(self): + """Load OCI format metadata""" + # Read index.json to get manifest reference + index_file = os.path.join(self.old_image_dir, "index.json") + with open(index_file, "r") as f: + index_data = json.load(f, object_pairs_hook=OrderedDict) + + # Get the first manifest (assuming single image) + if not index_data.get("manifests"): + raise SquashError("No manifests found in index.json") + + manifest_desc = index_data["manifests"][0] + manifest_digest = manifest_desc["digest"] + + # Read manifest from blobs + manifest_path = os.path.join( + self.old_image_dir, "blobs", "sha256", manifest_digest.split(":")[1] + ) + if not os.path.exists(manifest_path): + # Fallback to manifest.json if exists + fallback_manifest = os.path.join(self.old_image_dir, "manifest.json") + if os.path.exists(fallback_manifest): + self.log.warning("Using fallback manifest.json for OCI image") + self._load_docker_metadata() + return + else: + raise SquashError(f"Manifest blob not found: {manifest_path}") + + with open(manifest_path, "r") as f: + manifest = json.load(f, object_pairs_hook=OrderedDict) + + # Check if this is another index (nested structure) + if manifest.get("mediaType") == "application/vnd.oci.image.index.v1+json": + # This is a nested index, get the actual manifest + if not manifest.get("manifests"): + raise SquashError("No manifests found in nested index") + + nested_manifest_desc = manifest["manifests"][0] + nested_manifest_digest = nested_manifest_desc["digest"] + nested_manifest_path = os.path.join( + self.old_image_dir, + "blobs", + "sha256", + nested_manifest_digest.split(":")[1], + ) + + if not os.path.exists(nested_manifest_path): + raise SquashError( + f"Nested manifest blob not found: {nested_manifest_path}" + ) + + with open(nested_manifest_path, "r") as f: + self.manifest = json.load(f, object_pairs_hook=OrderedDict) + else: + # This is a direct manifest + self.manifest = manifest + + # Now check for config field + if "config" not in self.manifest: + raise SquashError("No config found in manifest - invalid OCI image") + + # Read config blob + config_desc = self.manifest["config"] + config_digest = config_desc["digest"] + config_path = os.path.join( + self.old_image_dir, "blobs", "sha256", config_digest.split(":")[1] + ) + + if not os.path.exists(config_path): + raise SquashError(f"Config blob not found: {config_path}") + + with open(config_path, "r") as f: + self.old_image_config = json.load(f, object_pairs_hook=OrderedDict) + + # Generate image ID from config hash + self.old_image_id = f"sha256:{config_digest.split(':')[1]}" + + # Extract layer information + self._extract_oci_layers() + + def _load_docker_metadata(self): + """Load Docker format metadata""" + manifest_file = os.path.join(self.old_image_dir, "manifest.json") + with open(manifest_file, "r") as f: + manifests = json.load(f, object_pairs_hook=OrderedDict) + + if not manifests: + raise SquashError("Empty manifest.json") + + # Use the first manifest + self.manifest = manifests[0] + + # Read config file + config_path = os.path.join(self.old_image_dir, self.manifest["Config"]) + with open(config_path, "r") as f: + self.old_image_config = json.load(f, object_pairs_hook=OrderedDict) + + # Generate image ID from config hash + config_content = json.dumps( + self.old_image_config, sort_keys=True, separators=(",", ":") + ) + self.old_image_id = ( + f"sha256:{hashlib.sha256(config_content.encode()).hexdigest()}" + ) + + # Extract layer information + self._extract_docker_layers() + + def _extract_oci_layers(self): + """Extract layer information for OCI format - based on config history""" + self.old_image_layers = [] + + # Get actual layer digests from manifest (only non-empty layers) + manifest_layers = [] + for layer_desc in self.manifest.get("layers", []): + manifest_layers.append(layer_desc["digest"]) + + # Build complete layer list from config.history (includes empty layers) + manifest_layer_index = 0 + + for i, history_entry in enumerate(self.old_image_config.get("history", [])): + is_empty = history_entry.get("empty_layer", False) + + if is_empty: + # Empty layer - create a virtual layer ID + layer_id = f"" + self.old_image_layers.append(layer_id) + else: + # Real layer - use digest from manifest + if manifest_layer_index < len(manifest_layers): + layer_id = manifest_layers[manifest_layer_index] + self.old_image_layers.append(layer_id) + manifest_layer_index += 1 + else: + self.log.warning(f"Missing layer data for history entry {i}") + + self.log.debug( + f"Found {len(self.old_image_layers)} layers in OCI image (including empty layers)" + ) + self.log.debug(f"Manifest has {len(manifest_layers)} actual layer files") + + def _extract_docker_layers(self): + """Extract layer information for Docker format - based on config history""" + self.old_image_layers = [] + + # Get actual layer paths from manifest (only non-empty layers) + manifest_layers = self.manifest.get("Layers", []) + manifest_layer_ids = [] + for layer_path in manifest_layers: + # Extract layer ID from path (e.g., "abc123.../layer.tar" -> "abc123...") + layer_id = layer_path.split("/")[0] + manifest_layer_ids.append(f"sha256:{layer_id}") + + # Build complete layer list from config.history (includes empty layers) + manifest_layer_index = 0 + + for i, history_entry in enumerate(self.old_image_config.get("history", [])): + is_empty = history_entry.get("empty_layer", False) + + if is_empty: + # Empty layer - create a virtual layer ID + layer_id = f"" + self.old_image_layers.append(layer_id) + else: + # Real layer - use ID from manifest + if manifest_layer_index < len(manifest_layer_ids): + layer_id = manifest_layer_ids[manifest_layer_index] + self.old_image_layers.append(layer_id) + manifest_layer_index += 1 + else: + self.log.warning(f"Missing layer data for history entry {i}") + + self.log.debug( + f"Found {len(self.old_image_layers)} layers in Docker image (including empty layers)" + ) + self.log.debug(f"Manifest has {len(manifest_layer_ids)} actual layer files") + + def _before_squashing(self): + """Prepare for squashing operation""" + # No need to call _initialize_directories() as it's already called in constructor + + # Location of the tar archive with squashed layers + self.squashed_tar = os.path.join(self.squashed_dir, "layer.tar") + + # Handle tags if provided + if self.tag: + self.image_name, self.image_tag = self._parse_image_name(self.tag) + + # TarImage specific: Ensure we have necessary layer information + if not hasattr(self, "old_image_layers") or not self.old_image_layers: + raise SquashError("No layers found in tar image") + + # *** Important: Copy layer calculation logic from base class *** + self.log.info("Old image has %s layers", len(self.old_image_layers)) + self.log.debug("Old layers: %s", self.old_image_layers) + + # By default - squash all layers. + if self.from_layer is None: + self.from_layer = len(self.old_image_layers) + + try: + number_of_layers = int(self.from_layer) + self.log.debug( + f"We detected number of layers ({number_of_layers}) as the argument to squash" + ) + except ValueError: + # For TarImage, we need to adapt this logic + # because we don't have Docker client to check layer IDs + if self.from_layer in self.old_image_layers: + number_of_layers = ( + len(self.old_image_layers) + - self.old_image_layers.index(self.from_layer) + - 1 + ) + else: + raise SquashError( + f"The {self.from_layer} layer could not be found in the image" + ) + + self._validate_number_of_layers(number_of_layers) + + marker = len(self.old_image_layers) - number_of_layers + + self.layers_to_squash = self.old_image_layers[marker:] + self.layers_to_move = self.old_image_layers[:marker] + + self.log.info("Checking if squashing is necessary...") + + if len(self.layers_to_squash) < 1: + raise SquashError( + f"Invalid number of layers to squash: {len(self.layers_to_squash)}" + ) + + if len(self.layers_to_squash) == 1: + from docker_squash.errors import SquashUnnecessaryError + + raise SquashUnnecessaryError( + "Single layer marked to squash, no squashing is required" + ) + + self.log.info(f"Attempting to squash last {number_of_layers} layers...") + self.log.debug(f"Layers to squash: {self.layers_to_squash}") + self.log.debug(f"Layers to move: {self.layers_to_move}") + + # TarImage doesn't need _save_image as data is already extracted in constructor + + def _squash(self): + """Perform the actual squashing""" + self.log.info("Starting squashing process...") + + # Create squashed layer directory + os.makedirs(self.squashed_dir, exist_ok=True) + + # Set up squashed tar path for base class + self.squashed_tar = os.path.join(self.squashed_dir, "layer.tar") + + # Filter out virtual layers for actual squashing + real_layers_to_squash = [ + layer_id + for layer_id in self.layers_to_squash + if not layer_id.startswith("= size_before_mb: + self.log.info( + "If the squashed image is larger than original it means that there were no meaningful files to squash and it just added metadata. Are you sure you specified correct parameters?" + ) + + def _dir_size(self, directory): + """Calculate directory size - borrowed from base class""" + size = 0 + for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + file_path = os.path.join(dirpath, filename) + if os.path.exists(file_path): + size += os.path.getsize(file_path) + return size + + def _move_preserved_layers(self): + """Move preserved layers to new image directory""" + for layer_id in self.layers_to_move: + layer_tar_path = self._get_layer_tar_path(layer_id) + + if layer_tar_path is None: + # Virtual/empty layer - skip moving + self.log.debug(f"Skipping move for virtual layer: {layer_id}") + continue + + if not os.path.exists(layer_tar_path): + self.log.warning(f"Preserved layer tar not found: {layer_tar_path}") + continue + + # Create layer directory in new image + if self.oci_format: + # For OCI format, copy the blob + layer_dir = layer_id.split(":", 1)[1] if ":" in layer_id else layer_id + dest_blob_dir = os.path.join(self.new_image_dir, "blobs", "sha256") + os.makedirs(dest_blob_dir, exist_ok=True) + dest_path = os.path.join(dest_blob_dir, layer_dir) + + # Copy the layer blob + shutil.copy2(layer_tar_path, dest_path) + else: + # For Docker format, copy to layer directory + layer_dir = layer_id.split(":", 1)[1] if ":" in layer_id else layer_id + dest_layer_dir = os.path.join(self.new_image_dir, layer_dir) + os.makedirs(dest_layer_dir, exist_ok=True) + dest_tar_path = os.path.join(dest_layer_dir, "layer.tar") + + # Copy the layer tar + shutil.copy2(layer_tar_path, dest_tar_path) + + # Copy the layer json metadata if it exists + source_json_path = os.path.join(self.old_image_dir, layer_dir, "json") + if os.path.exists(source_json_path): + dest_json_path = os.path.join(dest_layer_dir, "json") + shutil.copy2(source_json_path, dest_json_path) + + # Copy version file if it exists + source_version_path = os.path.join( + self.old_image_dir, layer_dir, "VERSION" + ) + if os.path.exists(source_version_path): + dest_version_path = os.path.join(dest_layer_dir, "VERSION") + shutil.copy2(source_version_path, dest_version_path) + + self.log.debug(f"Copied preserved layer {layer_id}") + + def _get_layer_tar_path(self, layer_id): + """Get the path to a layer's tar file""" + # Handle virtual/empty layers + if layer_id.startswith("