From f04a699407a205b54424022cbd2d8a6488360cf2 Mon Sep 17 00:00:00 2001 From: wuliang Date: Fri, 20 Jun 2025 08:26:02 +0000 Subject: [PATCH 1/5] support image tar --- .gitignore | 4 + README.rst | 82 ++++ docker_squash/cli.py | 87 ++++- docker_squash/tar_image.py | 782 +++++++++++++++++++++++++++++++++++++ 4 files changed, 942 insertions(+), 13 deletions(-) create mode 100644 docker_squash/tar_image.py diff --git a/.gitignore b/.gitignore index 66e519a..0bd4b04 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,7 @@ target docker-squash.iml **/image.tar **/tox.tar + +.cursor/* + +*.tar \ No newline at end of file diff --git a/README.rst b/README.rst index b40d3c5..45cedec 100644 --- a/README.rst +++ b/README.rst @@ -216,3 +216,85 @@ Let's confirm the image structure now: 6ee235cf4473 3 weeks ago /bin/sh -c #(nop) LABEL name=CentOS Base Imag 0 B 474c2ee77fa3 3 weeks ago /bin/sh -c #(nop) ADD file:72852fc7626d233343 196.6 MB 1544084fad81 6 months ago /bin/sh -c #(nop) MAINTAINER The CentOS Proje 0 B + +Working without Docker daemon +----------------------------- + +Sometimes you may want to squash an image without direct access to Docker daemon (e.g., in CI/CD pipelines, +air-gapped environments, or when Docker is not running). The ``--input-tar`` parameter allows you to process +Docker images exported as tar files without requiring a Docker daemon connection. + +**Step 1**: Export the image to a tar file using ``docker save``: + +:: + + $ docker save -o source.tar jboss/wildfly:latest + +**Step 2**: Squash the image from the tar file. Let's squash the last 8 layers: + +:: + + $ python -m docker_squash.cli --input-tar source.tar --tag jboss/wildfly:squashed -f 8 --output-path squashed.tar --load-image false + 2025-07-04 06:14:01,649 tar_image.py:83 INFO Extracting tar image from source.tar + 2025-07-04 06:14:01,918 tar_image.py:102 INFO Detected OCI format image + 2025-07-04 06:14:01,919 tar_image.py:254 INFO Preparing for squashing... + 2025-07-04 06:14:01,919 tar_image.py:259 INFO Old image has 22 layers + 2025-07-04 06:14:01,919 tar_image.py:305 INFO Will squash 8 layers + 2025-07-04 06:14:01,919 tar_image.py:313 INFO Starting squashing process... + 2025-07-04 06:14:01,919 image.py:750 INFO Starting squashing for /tmp/docker-squash-1strl2rh/new/squashed/layer.tar... + 2025-07-04 06:14:04,001 image.py:775 INFO Squashing file '/tmp/docker-squash-1strl2rh/old/blobs/sha256/f26d32e28c292aba76defcdd67c267000d31a6ac3ebdab5c850aba90ef834927'... + 2025-07-04 06:14:05,284 image.py:923 INFO Squashing finished! + 2025-07-04 06:14:06,202 tar_image.py:632 WARNING OCI output format not fully implemented - creating Docker format + 2025-07-04 06:14:06,202 tar_image.py:558 INFO Using user-specified tag: jboss/wildfly:squashed + 2025-07-04 06:14:06,277 tar_image.py:352 INFO Squashing completed successfully + 2025-07-04 06:14:06,277 tar_image.py:362 INFO Original image size: 382.24 MB + 2025-07-04 06:14:06,277 tar_image.py:363 INFO Squashed image size: 421.60 MB + 2025-07-04 06:14:06,277 tar_image.py:366 INFO If the squashed image is larger than original it means that there were no meaningful files to squash and it just added metadata. Are you sure you specified correct parameters? + 2025-07-04 06:14:06,277 cli.py:179 INFO New squashed image ID is sha256:dbde9a2e59a3975663b55773510f36c14b5046f4ef26a84f84445d406124772d + 2025-07-04 06:14:06,277 tar_image.py:732 INFO Exporting squashed image to squashed.tar + 2025-07-04 06:14:07,544 tar_image.py:742 INFO Export completed successfully + 2025-07-04 06:14:07,544 cli.py:195 INFO Done + +**Step 3**: Load the squashed image back into Docker: + +:: + + $ docker load -i squashed.tar + Loaded image: jboss/wildfly:squashed + +Now you can verify the squashed image structure: + +:: + + $ docker history jboss/wildfly:squashed + IMAGE CREATED CREATED BY SIZE COMMENT + 9d47ef6da59f 41 seconds ago 270MB Squashed layers + 3 years ago /bin/sh -c #(nop) ENV WILDFLY_VERSION=25.0.… 0B + 4 years ago /bin/sh -c #(nop) ENV JAVA_HOME=/usr/lib/jv… 0B + 4 years ago /bin/sh -c #(nop) USER jboss 0B + 4 years ago /bin/sh -c yum -y install java-11-openjdk-de… 239MB + 4 years ago /bin/sh -c #(nop) USER root 0B + 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldmann… 0B + 4 years ago /bin/sh -c #(nop) USER jboss 0B + 4 years ago /bin/sh -c #(nop) WORKDIR /opt/jboss 0B + 4 years ago /bin/sh -c groupadd -r jboss -g 1000 && user… 406kB + 4 years ago /bin/sh -c yum update -y && yum -y install x… 33.5MB + 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldmann… 0B + 4 years ago /bin/sh -c #(nop) CMD ["/bin/bash"] 0B + 4 years ago /bin/sh -c #(nop) LABEL org.label-schema.sc… 0B + 4 years ago /bin/sh -c #(nop) ADD file:61908381d3142ffba… 222MB + +**Key advantages of tar mode:** + +- No Docker daemon required during squashing +- Works in CI/CD pipelines and restricted environments +- Supports both Docker format and OCI format images +- Maintains complete layer history compatibility +- Can process images on systems where Docker is not installed + +**Important notes:** + +- Always use ``--tag`` parameter to avoid overwriting the original image name +- Set ``--load-image false`` if you only want to export the squashed tar file +- Use ``--output-path`` to specify where the squashed tar should be saved +- The tool automatically detects image format (Docker vs OCI) from the input tar diff --git a/docker_squash/cli.py b/docker_squash/cli.py index f82d453..d018a74 100644 --- a/docker_squash/cli.py +++ b/docker_squash/cli.py @@ -70,7 +70,13 @@ def run(self): "--version", action="version", help="Show version and exit", version=version ) - parser.add_argument("image", help="Image to be squashed") + parser.add_argument("image", nargs='?', help="Image to be squashed") + + parser.add_argument( + "--input-tar", + help="Path to tar file created by 'docker save'. Process tar file directly without requiring Docker daemon." + ) + parser.add_argument( "-f", "--from-layer", @@ -79,7 +85,7 @@ def run(self): parser.add_argument( "-t", "--tag", - help="Specify the tag to be used for the new image. If not specified no tag will be applied", + help="Specify the tag to be used for the squashed image (recommended). Without this, the squashed image will have no repository tags to avoid overwriting the original image.", ) parser.add_argument( "-m", @@ -112,24 +118,25 @@ def run(self): args = parser.parse_args() + if not args.input_tar and not args.image: + parser.error("Either 'image' or '--input-tar' must be specified") + + if args.input_tar and args.image: + parser.error("Cannot specify both 'image' and '--input-tar' at the same time") + if args.verbose: self.log.setLevel(logging.DEBUG) else: self.log.setLevel(logging.INFO) self.log.debug("Running version %s", version) + try: - squash.Squash( - log=self.log, - image=args.image, - from_layer=args.from_layer, - tag=args.tag, - comment=args.message, - output_path=args.output_path, - load_image=args.load_image, - tmp_dir=args.tmp_dir, - cleanup=args.cleanup, - ).run() + if args.input_tar: + self._run_tar_mode(args) + else: + self._run_image_mode(args) + except KeyboardInterrupt: self.log.error("Program interrupted by user, exiting...") sys.exit(1) @@ -149,6 +156,60 @@ def run(self): sys.exit(e.code) sys.exit(1) + + def _run_tar_mode(self, args): + from docker_squash.tar_image import TarImage + + # Provide helpful guidance about --tag parameter + if not args.tag: + self.log.info("💡 Tip: Consider using --tag to specify a name for your squashed image") + self.log.info(" Example: --tag myimage:squashed") + + tar_image = TarImage( + log=self.log, + tar_path=args.input_tar, + from_layer=args.from_layer, + tmp_dir=args.tmp_dir, + tag=args.tag, + comment=args.message + ) + + try: + new_image_id = tar_image.squash() + self.log.info("New squashed image ID is %s" % new_image_id) + + if args.output_path: + tar_image.export_tar_archive(args.output_path) + + if args.load_image: + tar_image.load_squashed_image() + + if not args.output_path and not args.load_image: + import tempfile + import os + temp_output = os.path.join(tempfile.gettempdir(), f"squashed-{new_image_id[:12]}.tar") + tar_image.export_tar_archive(temp_output) + self.log.info("Since no output path was specified and loading to Docker was disabled, " + f"the squashed image has been saved to: {temp_output}") + + self.log.info("Done") + + finally: + if not args.tmp_dir: + tar_image.cleanup() + + def _run_image_mode(self, args): + squash.Squash( + log=self.log, + image=args.image, + from_layer=args.from_layer, + tag=args.tag, + comment=args.message, + output_path=args.output_path, + load_image=args.load_image, + tmp_dir=args.tmp_dir, + cleanup=args.cleanup, + ).run() def run(): diff --git a/docker_squash/tar_image.py b/docker_squash/tar_image.py new file mode 100644 index 0000000..58d15b8 --- /dev/null +++ b/docker_squash/tar_image.py @@ -0,0 +1,782 @@ +# -*- coding: utf-8 -*- + +import json +import os +import tarfile +import tempfile +import time +from collections import OrderedDict +from pathlib import Path +import logging +import hashlib +import shutil + +from docker_squash.errors import SquashError +from docker_squash.image import Image + + +class TarImage(Image): + """Process images from tar files without requiring Docker daemon""" + + FORMAT = "tar" + + def __init__(self, log, tar_path, from_layer=None, tmp_dir=None, tag=None, comment=""): + self.tar_path = tar_path + self.log = log + self.debug = self.log.isEnabledFor(logging.DEBUG) + self.from_layer = from_layer + self.tag = tag + self.comment = comment + self.tmp_dir = self._prepare_tmp_directory(tmp_dir) + self.date = self._get_current_date() + + # Initialize attributes required by base class + self.image_name = None + self.image_tag = None + self.squash_id = None + self.oci_format = False + + # Set up directory structure + self.old_image_dir = os.path.join(self.tmp_dir, "old") + self.new_image_dir = os.path.join(self.tmp_dir, "new") + self.squashed_dir = os.path.join(self.new_image_dir, "squashed") + + # Ensure directories exist + os.makedirs(self.old_image_dir, exist_ok=True) + os.makedirs(self.new_image_dir, exist_ok=True) + os.makedirs(self.squashed_dir, exist_ok=True) + + # Initialize variables + self.manifest = None + self.old_image_config = None + self.old_image_layers = [] + self.original_image_name = None + self.old_image_id = None + + # Parse image name if provided + if self.tag: + self.image_name, self.image_tag = self._parse_image_name(self.tag) + + # Process the tar file + self._extract_tar_image() + self._detect_image_format() + self._load_image_metadata() + self.size_before = self._dir_size(self.old_image_dir) + + def squash(self): + """Main squash method - follows base class pattern""" + self._before_squashing() + ret = self._squash() + self._after_squashing() + return ret + + def _get_current_date(self): + """Get current date in Docker format""" + import datetime + import re + # Workaround for Golang microsecond formatting + date = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ") + return re.sub(r"0*Z$", "Z", date) + + def _extract_tar_image(self): + """Extract tar image to temporary directory""" + self.log.info(f"Extracting tar image from {self.tar_path}") + + if not os.path.exists(self.tar_path): + raise SquashError(f"Tar file not found: {self.tar_path}") + + try: + with tarfile.open(self.tar_path, 'r') as tar: + tar.extractall(self.old_image_dir) + except Exception as e: + raise SquashError(f"Failed to extract tar file: {e}") + + self.log.debug(f"Tar image extracted to {self.old_image_dir}") + + def _detect_image_format(self): + """Detect if this is OCI format or Docker format""" + index_file = os.path.join(self.old_image_dir, "index.json") + manifest_file = os.path.join(self.old_image_dir, "manifest.json") + + if os.path.exists(index_file): + self.log.info("Detected OCI format image") + self.oci_format = True + elif os.path.exists(manifest_file): + self.log.info("Detected Docker format image") + self.oci_format = False + else: + raise SquashError("Unable to detect image format - missing manifest files") + + def _load_image_metadata(self): + """Load image metadata based on format""" + if self.oci_format: + self._load_oci_metadata() + else: + self._load_docker_metadata() + + def _load_oci_metadata(self): + """Load OCI format metadata""" + # Read index.json to get manifest reference + index_file = os.path.join(self.old_image_dir, "index.json") + with open(index_file, 'r') as f: + index_data = json.load(f, object_pairs_hook=OrderedDict) + + # Get the first manifest (assuming single image) + if not index_data.get('manifests'): + raise SquashError("No manifests found in index.json") + + manifest_desc = index_data['manifests'][0] + manifest_digest = manifest_desc['digest'] + + # Read manifest from blobs + manifest_path = os.path.join(self.old_image_dir, "blobs", "sha256", manifest_digest.split(':')[1]) + if not os.path.exists(manifest_path): + # Fallback to manifest.json if exists + fallback_manifest = os.path.join(self.old_image_dir, "manifest.json") + if os.path.exists(fallback_manifest): + self.log.warning("Using fallback manifest.json for OCI image") + self._load_docker_metadata() + return + else: + raise SquashError(f"Manifest blob not found: {manifest_path}") + + with open(manifest_path, 'r') as f: + self.manifest = json.load(f, object_pairs_hook=OrderedDict) + + # Read config blob + config_desc = self.manifest['config'] + config_digest = config_desc['digest'] + config_path = os.path.join(self.old_image_dir, "blobs", "sha256", config_digest.split(':')[1]) + + if not os.path.exists(config_path): + raise SquashError(f"Config blob not found: {config_path}") + + with open(config_path, 'r') as f: + self.old_image_config = json.load(f, object_pairs_hook=OrderedDict) + + # Generate image ID from config hash + self.old_image_id = f"sha256:{config_digest.split(':')[1]}" + + # Extract layer information + self._extract_oci_layers() + + def _load_docker_metadata(self): + """Load Docker format metadata""" + manifest_file = os.path.join(self.old_image_dir, "manifest.json") + with open(manifest_file, 'r') as f: + manifests = json.load(f, object_pairs_hook=OrderedDict) + + if not manifests: + raise SquashError("Empty manifest.json") + + # Use the first manifest + self.manifest = manifests[0] + + # Read config file + config_path = os.path.join(self.old_image_dir, self.manifest['Config']) + with open(config_path, 'r') as f: + self.old_image_config = json.load(f, object_pairs_hook=OrderedDict) + + # Generate image ID from config hash + config_content = json.dumps(self.old_image_config, sort_keys=True, separators=(',', ':')) + self.old_image_id = f"sha256:{hashlib.sha256(config_content.encode()).hexdigest()}" + + # Extract layer information + self._extract_docker_layers() + + def _extract_oci_layers(self): + """Extract layer information for OCI format - based on config history""" + self.old_image_layers = [] + + # Get actual layer digests from manifest (only non-empty layers) + manifest_layers = [] + for layer_desc in self.manifest.get('layers', []): + manifest_layers.append(layer_desc['digest']) + + # Build complete layer list from config.history (includes empty layers) + manifest_layer_index = 0 + + for i, history_entry in enumerate(self.old_image_config.get('history', [])): + is_empty = history_entry.get('empty_layer', False) + + if is_empty: + # Empty layer - create a virtual layer ID + layer_id = f"" + self.old_image_layers.append(layer_id) + else: + # Real layer - use digest from manifest + if manifest_layer_index < len(manifest_layers): + layer_id = manifest_layers[manifest_layer_index] + self.old_image_layers.append(layer_id) + manifest_layer_index += 1 + else: + self.log.warning(f"Missing layer data for history entry {i}") + + self.log.debug(f"Found {len(self.old_image_layers)} layers in OCI image (including empty layers)") + self.log.debug(f"Manifest has {len(manifest_layers)} actual layer files") + + def _extract_docker_layers(self): + """Extract layer information for Docker format - based on config history""" + self.old_image_layers = [] + + # Get actual layer paths from manifest (only non-empty layers) + manifest_layers = self.manifest.get('Layers', []) + manifest_layer_ids = [] + for layer_path in manifest_layers: + # Extract layer ID from path (e.g., "abc123.../layer.tar" -> "abc123...") + layer_id = layer_path.split('/')[0] + manifest_layer_ids.append(f"sha256:{layer_id}") + + # Build complete layer list from config.history (includes empty layers) + manifest_layer_index = 0 + + for i, history_entry in enumerate(self.old_image_config.get('history', [])): + is_empty = history_entry.get('empty_layer', False) + + if is_empty: + # Empty layer - create a virtual layer ID + layer_id = f"" + self.old_image_layers.append(layer_id) + else: + # Real layer - use ID from manifest + if manifest_layer_index < len(manifest_layer_ids): + layer_id = manifest_layer_ids[manifest_layer_index] + self.old_image_layers.append(layer_id) + manifest_layer_index += 1 + else: + self.log.warning(f"Missing layer data for history entry {i}") + + self.log.debug(f"Found {len(self.old_image_layers)} layers in Docker image (including empty layers)") + self.log.debug(f"Manifest has {len(manifest_layer_ids)} actual layer files") + + def _before_squashing(self): + """Prepare for squashing operation""" + self.log.info("Preparing for squashing...") + + # Ensure we have image layers + if not self.old_image_layers: + raise SquashError("No layers found in image") + self.log.info("Old image has %s layers", len(self.old_image_layers)) + # Set up squashing parameters + if self.from_layer is None: + self.from_layer = len(self.old_image_layers) + + try: + number_of_layers = int(self.from_layer) + self.log.debug(f"Squashing last {number_of_layers} layers") + except ValueError: + # Handle layer ID as from_layer + if self.from_layer in self.old_image_layers: + layer_index = self.old_image_layers.index(self.from_layer) + number_of_layers = len(self.old_image_layers) - layer_index - 1 + else: + raise SquashError(f"Layer {self.from_layer} not found in image") + + if number_of_layers <= 0: + raise SquashError("Number of layers to squash must be positive") + + if number_of_layers > len(self.old_image_layers): + raise SquashError(f"Cannot squash {number_of_layers} layers from {len(self.old_image_layers)} total layers") + + marker = len(self.old_image_layers) - number_of_layers + self.layers_to_squash = self.old_image_layers[marker:] + self.layers_to_move = self.old_image_layers[:marker] + + if len(self.layers_to_squash) <= 1: + raise SquashError("Need at least 2 layers to squash") + + # Set squash_id like v2_image.py does - should be the last real (non-virtual) layer to move + self.squash_id = None + if self.layers_to_move: + # Find the last non-virtual layer in layers_to_move + for layer_id in reversed(self.layers_to_move): + if not layer_id.startswith('= size_before_mb: + self.log.info( + "If the squashed image is larger than original it means that there were no meaningful files to squash and it just added metadata. Are you sure you specified correct parameters?" + ) + + def _dir_size(self, directory): + """Calculate directory size - borrowed from base class""" + size = 0 + for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + file_path = os.path.join(dirpath, filename) + if os.path.exists(file_path): + size += os.path.getsize(file_path) + return size + + def _move_preserved_layers(self): + """Move preserved layers to new image directory""" + for layer_id in self.layers_to_move: + layer_tar_path = self._get_layer_tar_path(layer_id) + + if layer_tar_path is None: + # Virtual/empty layer - skip moving + self.log.debug(f"Skipping move for virtual layer: {layer_id}") + continue + + if not os.path.exists(layer_tar_path): + self.log.warning(f"Preserved layer tar not found: {layer_tar_path}") + continue + + # Create layer directory in new image + if self.oci_format: + # For OCI format, copy the blob + layer_dir = layer_id.split(':', 1)[1] if ':' in layer_id else layer_id + dest_blob_dir = os.path.join(self.new_image_dir, "blobs", "sha256") + os.makedirs(dest_blob_dir, exist_ok=True) + dest_path = os.path.join(dest_blob_dir, layer_dir) + + # Copy the layer blob + shutil.copy2(layer_tar_path, dest_path) + else: + # For Docker format, copy to layer directory + layer_dir = layer_id.split(':', 1)[1] if ':' in layer_id else layer_id + dest_layer_dir = os.path.join(self.new_image_dir, layer_dir) + os.makedirs(dest_layer_dir, exist_ok=True) + dest_tar_path = os.path.join(dest_layer_dir, "layer.tar") + + # Copy the layer tar + shutil.copy2(layer_tar_path, dest_tar_path) + + # Copy the layer json metadata if it exists + source_json_path = os.path.join(self.old_image_dir, layer_dir, "json") + if os.path.exists(source_json_path): + dest_json_path = os.path.join(dest_layer_dir, "json") + shutil.copy2(source_json_path, dest_json_path) + + # Copy version file if it exists + source_version_path = os.path.join(self.old_image_dir, layer_dir, "VERSION") + if os.path.exists(source_version_path): + dest_version_path = os.path.join(dest_layer_dir, "VERSION") + shutil.copy2(source_version_path, dest_version_path) + + self.log.debug(f"Copied preserved layer {layer_id}") + + def _get_layer_tar_path(self, layer_id): + """Get the path to a layer's tar file""" + # Handle virtual/empty layers + if layer_id.startswith(' Date: Sun, 6 Jul 2025 09:40:44 +0000 Subject: [PATCH 2/5] Fix: Apply code formatting --- docker_squash/cli.py | 57 +++-- docker_squash/tar_image.py | 465 +++++++++++++++++++++---------------- 2 files changed, 294 insertions(+), 228 deletions(-) diff --git a/docker_squash/cli.py b/docker_squash/cli.py index d018a74..f80397b 100644 --- a/docker_squash/cli.py +++ b/docker_squash/cli.py @@ -70,13 +70,13 @@ def run(self): "--version", action="version", help="Show version and exit", version=version ) - parser.add_argument("image", nargs='?', help="Image to be squashed") - + parser.add_argument("image", nargs="?", help="Image to be squashed") + parser.add_argument( "--input-tar", - help="Path to tar file created by 'docker save'. Process tar file directly without requiring Docker daemon." + help="Path to tar file created by 'docker save'. Process tar file directly without requiring Docker daemon.", ) - + parser.add_argument( "-f", "--from-layer", @@ -120,9 +120,11 @@ def run(self): if not args.input_tar and not args.image: parser.error("Either 'image' or '--input-tar' must be specified") - + if args.input_tar and args.image: - parser.error("Cannot specify both 'image' and '--input-tar' at the same time") + parser.error( + "Cannot specify both 'image' and '--input-tar' at the same time" + ) if args.verbose: self.log.setLevel(logging.DEBUG) @@ -130,13 +132,13 @@ def run(self): self.log.setLevel(logging.INFO) self.log.debug("Running version %s", version) - + try: if args.input_tar: self._run_tar_mode(args) else: self._run_image_mode(args) - + except KeyboardInterrupt: self.log.error("Program interrupted by user, exiting...") sys.exit(1) @@ -156,48 +158,55 @@ def run(self): sys.exit(e.code) sys.exit(1) - + def _run_tar_mode(self, args): from docker_squash.tar_image import TarImage - + # Provide helpful guidance about --tag parameter if not args.tag: - self.log.info("💡 Tip: Consider using --tag to specify a name for your squashed image") + self.log.info( + "💡 Tip: Consider using --tag to specify a name for your squashed image" + ) self.log.info(" Example: --tag myimage:squashed") - + tar_image = TarImage( log=self.log, tar_path=args.input_tar, from_layer=args.from_layer, tmp_dir=args.tmp_dir, tag=args.tag, - comment=args.message + comment=args.message, ) - + try: new_image_id = tar_image.squash() self.log.info("New squashed image ID is %s" % new_image_id) - + if args.output_path: tar_image.export_tar_archive(args.output_path) - + if args.load_image: tar_image.load_squashed_image() - + if not args.output_path and not args.load_image: - import tempfile import os - temp_output = os.path.join(tempfile.gettempdir(), f"squashed-{new_image_id[:12]}.tar") + import tempfile + + temp_output = os.path.join( + tempfile.gettempdir(), f"squashed-{new_image_id[:12]}.tar" + ) tar_image.export_tar_archive(temp_output) - self.log.info("Since no output path was specified and loading to Docker was disabled, " - f"the squashed image has been saved to: {temp_output}") - + self.log.info( + "Since no output path was specified and loading to Docker was disabled, " + f"the squashed image has been saved to: {temp_output}" + ) + self.log.info("Done") - + finally: if not args.tmp_dir: tar_image.cleanup() - + def _run_image_mode(self, args): squash.Squash( log=self.log, diff --git a/docker_squash/tar_image.py b/docker_squash/tar_image.py index 58d15b8..e9e7560 100644 --- a/docker_squash/tar_image.py +++ b/docker_squash/tar_image.py @@ -1,15 +1,12 @@ # -*- coding: utf-8 -*- +import hashlib import json +import logging import os +import shutil import tarfile -import tempfile -import time from collections import OrderedDict -from pathlib import Path -import logging -import hashlib -import shutil from docker_squash.errors import SquashError from docker_squash.image import Image @@ -17,10 +14,12 @@ class TarImage(Image): """Process images from tar files without requiring Docker daemon""" - + FORMAT = "tar" - - def __init__(self, log, tar_path, from_layer=None, tmp_dir=None, tag=None, comment=""): + + def __init__( + self, log, tar_path, from_layer=None, tmp_dir=None, tag=None, comment="" + ): self.tar_path = tar_path self.log = log self.debug = self.log.isEnabledFor(logging.DEBUG) @@ -29,34 +28,34 @@ def __init__(self, log, tar_path, from_layer=None, tmp_dir=None, tag=None, comme self.comment = comment self.tmp_dir = self._prepare_tmp_directory(tmp_dir) self.date = self._get_current_date() - + # Initialize attributes required by base class self.image_name = None self.image_tag = None self.squash_id = None self.oci_format = False - + # Set up directory structure self.old_image_dir = os.path.join(self.tmp_dir, "old") self.new_image_dir = os.path.join(self.tmp_dir, "new") self.squashed_dir = os.path.join(self.new_image_dir, "squashed") - + # Ensure directories exist os.makedirs(self.old_image_dir, exist_ok=True) os.makedirs(self.new_image_dir, exist_ok=True) os.makedirs(self.squashed_dir, exist_ok=True) - + # Initialize variables self.manifest = None self.old_image_config = None self.old_image_layers = [] self.original_image_name = None self.old_image_id = None - + # Parse image name if provided if self.tag: self.image_name, self.image_tag = self._parse_image_name(self.tag) - + # Process the tar file self._extract_tar_image() self._detect_image_format() @@ -74,6 +73,7 @@ def _get_current_date(self): """Get current date in Docker format""" import datetime import re + # Workaround for Golang microsecond formatting date = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ") return re.sub(r"0*Z$", "Z", date) @@ -81,23 +81,23 @@ def _get_current_date(self): def _extract_tar_image(self): """Extract tar image to temporary directory""" self.log.info(f"Extracting tar image from {self.tar_path}") - + if not os.path.exists(self.tar_path): raise SquashError(f"Tar file not found: {self.tar_path}") - + try: - with tarfile.open(self.tar_path, 'r') as tar: + with tarfile.open(self.tar_path, "r") as tar: tar.extractall(self.old_image_dir) except Exception as e: raise SquashError(f"Failed to extract tar file: {e}") - + self.log.debug(f"Tar image extracted to {self.old_image_dir}") def _detect_image_format(self): """Detect if this is OCI format or Docker format""" index_file = os.path.join(self.old_image_dir, "index.json") manifest_file = os.path.join(self.old_image_dir, "manifest.json") - + if os.path.exists(index_file): self.log.info("Detected OCI format image") self.oci_format = True @@ -118,18 +118,20 @@ def _load_oci_metadata(self): """Load OCI format metadata""" # Read index.json to get manifest reference index_file = os.path.join(self.old_image_dir, "index.json") - with open(index_file, 'r') as f: + with open(index_file, "r") as f: index_data = json.load(f, object_pairs_hook=OrderedDict) - + # Get the first manifest (assuming single image) - if not index_data.get('manifests'): + if not index_data.get("manifests"): raise SquashError("No manifests found in index.json") - - manifest_desc = index_data['manifests'][0] - manifest_digest = manifest_desc['digest'] - + + manifest_desc = index_data["manifests"][0] + manifest_digest = manifest_desc["digest"] + # Read manifest from blobs - manifest_path = os.path.join(self.old_image_dir, "blobs", "sha256", manifest_digest.split(':')[1]) + manifest_path = os.path.join( + self.old_image_dir, "blobs", "sha256", manifest_digest.split(":")[1] + ) if not os.path.exists(manifest_path): # Fallback to manifest.json if exists fallback_manifest = os.path.join(self.old_image_dir, "manifest.json") @@ -139,66 +141,72 @@ def _load_oci_metadata(self): return else: raise SquashError(f"Manifest blob not found: {manifest_path}") - - with open(manifest_path, 'r') as f: + + with open(manifest_path, "r") as f: self.manifest = json.load(f, object_pairs_hook=OrderedDict) - + # Read config blob - config_desc = self.manifest['config'] - config_digest = config_desc['digest'] - config_path = os.path.join(self.old_image_dir, "blobs", "sha256", config_digest.split(':')[1]) - + config_desc = self.manifest["config"] + config_digest = config_desc["digest"] + config_path = os.path.join( + self.old_image_dir, "blobs", "sha256", config_digest.split(":")[1] + ) + if not os.path.exists(config_path): raise SquashError(f"Config blob not found: {config_path}") - - with open(config_path, 'r') as f: + + with open(config_path, "r") as f: self.old_image_config = json.load(f, object_pairs_hook=OrderedDict) - + # Generate image ID from config hash self.old_image_id = f"sha256:{config_digest.split(':')[1]}" - + # Extract layer information self._extract_oci_layers() def _load_docker_metadata(self): """Load Docker format metadata""" manifest_file = os.path.join(self.old_image_dir, "manifest.json") - with open(manifest_file, 'r') as f: + with open(manifest_file, "r") as f: manifests = json.load(f, object_pairs_hook=OrderedDict) - + if not manifests: raise SquashError("Empty manifest.json") - + # Use the first manifest self.manifest = manifests[0] - + # Read config file - config_path = os.path.join(self.old_image_dir, self.manifest['Config']) - with open(config_path, 'r') as f: + config_path = os.path.join(self.old_image_dir, self.manifest["Config"]) + with open(config_path, "r") as f: self.old_image_config = json.load(f, object_pairs_hook=OrderedDict) - + # Generate image ID from config hash - config_content = json.dumps(self.old_image_config, sort_keys=True, separators=(',', ':')) - self.old_image_id = f"sha256:{hashlib.sha256(config_content.encode()).hexdigest()}" - + config_content = json.dumps( + self.old_image_config, sort_keys=True, separators=(",", ":") + ) + self.old_image_id = ( + f"sha256:{hashlib.sha256(config_content.encode()).hexdigest()}" + ) + # Extract layer information self._extract_docker_layers() def _extract_oci_layers(self): """Extract layer information for OCI format - based on config history""" self.old_image_layers = [] - + # Get actual layer digests from manifest (only non-empty layers) manifest_layers = [] - for layer_desc in self.manifest.get('layers', []): - manifest_layers.append(layer_desc['digest']) - + for layer_desc in self.manifest.get("layers", []): + manifest_layers.append(layer_desc["digest"]) + # Build complete layer list from config.history (includes empty layers) manifest_layer_index = 0 - - for i, history_entry in enumerate(self.old_image_config.get('history', [])): - is_empty = history_entry.get('empty_layer', False) - + + for i, history_entry in enumerate(self.old_image_config.get("history", [])): + is_empty = history_entry.get("empty_layer", False) + if is_empty: # Empty layer - create a virtual layer ID layer_id = f"" @@ -211,28 +219,30 @@ def _extract_oci_layers(self): manifest_layer_index += 1 else: self.log.warning(f"Missing layer data for history entry {i}") - - self.log.debug(f"Found {len(self.old_image_layers)} layers in OCI image (including empty layers)") + + self.log.debug( + f"Found {len(self.old_image_layers)} layers in OCI image (including empty layers)" + ) self.log.debug(f"Manifest has {len(manifest_layers)} actual layer files") def _extract_docker_layers(self): """Extract layer information for Docker format - based on config history""" self.old_image_layers = [] - + # Get actual layer paths from manifest (only non-empty layers) - manifest_layers = self.manifest.get('Layers', []) + manifest_layers = self.manifest.get("Layers", []) manifest_layer_ids = [] for layer_path in manifest_layers: # Extract layer ID from path (e.g., "abc123.../layer.tar" -> "abc123...") - layer_id = layer_path.split('/')[0] + layer_id = layer_path.split("/")[0] manifest_layer_ids.append(f"sha256:{layer_id}") - + # Build complete layer list from config.history (includes empty layers) manifest_layer_index = 0 - - for i, history_entry in enumerate(self.old_image_config.get('history', [])): - is_empty = history_entry.get('empty_layer', False) - + + for i, history_entry in enumerate(self.old_image_config.get("history", [])): + is_empty = history_entry.get("empty_layer", False) + if is_empty: # Empty layer - create a virtual layer ID layer_id = f"" @@ -245,14 +255,16 @@ def _extract_docker_layers(self): manifest_layer_index += 1 else: self.log.warning(f"Missing layer data for history entry {i}") - - self.log.debug(f"Found {len(self.old_image_layers)} layers in Docker image (including empty layers)") + + self.log.debug( + f"Found {len(self.old_image_layers)} layers in Docker image (including empty layers)" + ) self.log.debug(f"Manifest has {len(manifest_layer_ids)} actual layer files") def _before_squashing(self): """Prepare for squashing operation""" self.log.info("Preparing for squashing...") - + # Ensure we have image layers if not self.old_image_layers: raise SquashError("No layers found in image") @@ -260,7 +272,7 @@ def _before_squashing(self): # Set up squashing parameters if self.from_layer is None: self.from_layer = len(self.old_image_layers) - + try: number_of_layers = int(self.from_layer) self.log.debug(f"Squashing last {number_of_layers} layers") @@ -271,84 +283,90 @@ def _before_squashing(self): number_of_layers = len(self.old_image_layers) - layer_index - 1 else: raise SquashError(f"Layer {self.from_layer} not found in image") - + if number_of_layers <= 0: raise SquashError("Number of layers to squash must be positive") - + if number_of_layers > len(self.old_image_layers): - raise SquashError(f"Cannot squash {number_of_layers} layers from {len(self.old_image_layers)} total layers") - + raise SquashError( + f"Cannot squash {number_of_layers} layers from {len(self.old_image_layers)} total layers" + ) + marker = len(self.old_image_layers) - number_of_layers self.layers_to_squash = self.old_image_layers[marker:] self.layers_to_move = self.old_image_layers[:marker] - + if len(self.layers_to_squash) <= 1: raise SquashError("Need at least 2 layers to squash") - + # Set squash_id like v2_image.py does - should be the last real (non-virtual) layer to move self.squash_id = None if self.layers_to_move: # Find the last non-virtual layer in layers_to_move for layer_id in reversed(self.layers_to_move): - if not layer_id.startswith(' Date: Wed, 20 Aug 2025 08:08:26 +0000 Subject: [PATCH 3/5] fix code --- README.rst | 159 +++++++++++++++++++++------------- docker_squash/cli.py | 71 +++++++++------ docker_squash/tar_image.py | 172 +++++++++++++++---------------------- 3 files changed, 212 insertions(+), 190 deletions(-) diff --git a/README.rst b/README.rst index 45cedec..ccb2719 100644 --- a/README.rst +++ b/README.rst @@ -25,6 +25,8 @@ Features - Can squash from a selected layer to the end (not always possible, depends on the image) - Support for Docker 1.9 or newer (older releases may run perfectly fine too, try it!) - Squashed image can be loaded back to the Docker daemon or stored as tar archive somewhere +- Automatic detection of input type (Docker image name vs tar file path) +- Works without Docker daemon when processing tar files Installation ------------ @@ -49,34 +51,34 @@ Usage :: $ docker-squash -h - usage: cli.py [-h] [-v] [--version] [-d] [-f FROM_LAYER] [-t TAG] - [--tmp-dir TMP_DIR] [--output-path OUTPUT_PATH] - image + usage: docker-squash [-h] [-v] [--version] [-f FROM_LAYER] [-t TAG] [-m MESSAGE] [-c] [--tmp-dir TMP_DIR] + [--output-path OUTPUT_PATH] [--load-image [LOAD_IMAGE]] + image Docker layer squashing tool positional arguments: - image Image to be squashed - - optional arguments: - -h, --help show this help message and exit - -v, --verbose Verbose output - --version Show version and exit - -f FROM_LAYER, --from-layer FROM_LAYER - Number of layers to squash or ID of the layer (or image ID or image name) to squash from. - In case the provided value is an integer, specified number of layers will be squashed. - Every layer in the image will be squashed if the parameter is not provided. - -t TAG, --tag TAG Specify the tag to be used for the new image. If not specified no tag will be applied - -m MESSAGE, --message MESSAGE + image Image name or tar file path to be squashed. If a .tar file is provided, it will be processed without + requiring Docker daemon. + + options: + -h, --help show this help message and exit + -v, --verbose Verbose output + --version Show version and exit + -f FROM_LAYER, --from-layer FROM_LAYER + Number of layers to squash or ID of the layer (or image ID or image name) to squash from. In case the + provided value is an integer, specified number of layers will be squashed. Every layer in the image will + be squashed if the parameter is not provided. + -t TAG, --tag TAG Specify the tag to be used for the squashed image (recommended). Without this, the squashed image will + have no repository tags to avoid overwriting the original image. + -m MESSAGE, --message MESSAGE Specify a commit message (comment) for the new image. - -c, --cleanup Remove source image from Docker after squashing - --tmp-dir TMP_DIR Temporary directory to be created and used. This will NOT be deleted afterwards for - easier debugging. - --output-path OUTPUT_PATH + -c, --cleanup Remove source image from Docker after squashing + --tmp-dir TMP_DIR Temporary directory to be created and used. This will NOT be deleted afterwards for easier debugging. + --output-path OUTPUT_PATH Path where the image may be stored after squashing. - --load-image [LOAD_IMAGE] + --load-image [LOAD_IMAGE] Whether to load the image into Docker daemon after squashing - Default: true Note that environment variables may be set as documented in `here `_. @@ -221,8 +223,8 @@ Working without Docker daemon ----------------------------- Sometimes you may want to squash an image without direct access to Docker daemon (e.g., in CI/CD pipelines, -air-gapped environments, or when Docker is not running). The ``--input-tar`` parameter allows you to process -Docker images exported as tar files without requiring a Docker daemon connection. +air-gapped environments, or when Docker is not running). You can provide a tar file path directly as the ``image`` +parameter to process Docker images exported as tar files without requiring a Docker daemon connection. **Step 1**: Export the image to a tar file using ``docker save``: @@ -232,28 +234,30 @@ Docker images exported as tar files without requiring a Docker daemon connection **Step 2**: Squash the image from the tar file. Let's squash the last 8 layers: +Note: The tool automatically detects that ``source.tar`` is a tar file and processes it without Docker daemon. + :: - $ python -m docker_squash.cli --input-tar source.tar --tag jboss/wildfly:squashed -f 8 --output-path squashed.tar --load-image false - 2025-07-04 06:14:01,649 tar_image.py:83 INFO Extracting tar image from source.tar - 2025-07-04 06:14:01,918 tar_image.py:102 INFO Detected OCI format image - 2025-07-04 06:14:01,919 tar_image.py:254 INFO Preparing for squashing... - 2025-07-04 06:14:01,919 tar_image.py:259 INFO Old image has 22 layers - 2025-07-04 06:14:01,919 tar_image.py:305 INFO Will squash 8 layers - 2025-07-04 06:14:01,919 tar_image.py:313 INFO Starting squashing process... - 2025-07-04 06:14:01,919 image.py:750 INFO Starting squashing for /tmp/docker-squash-1strl2rh/new/squashed/layer.tar... - 2025-07-04 06:14:04,001 image.py:775 INFO Squashing file '/tmp/docker-squash-1strl2rh/old/blobs/sha256/f26d32e28c292aba76defcdd67c267000d31a6ac3ebdab5c850aba90ef834927'... - 2025-07-04 06:14:05,284 image.py:923 INFO Squashing finished! - 2025-07-04 06:14:06,202 tar_image.py:632 WARNING OCI output format not fully implemented - creating Docker format - 2025-07-04 06:14:06,202 tar_image.py:558 INFO Using user-specified tag: jboss/wildfly:squashed - 2025-07-04 06:14:06,277 tar_image.py:352 INFO Squashing completed successfully - 2025-07-04 06:14:06,277 tar_image.py:362 INFO Original image size: 382.24 MB - 2025-07-04 06:14:06,277 tar_image.py:363 INFO Squashed image size: 421.60 MB - 2025-07-04 06:14:06,277 tar_image.py:366 INFO If the squashed image is larger than original it means that there were no meaningful files to squash and it just added metadata. Are you sure you specified correct parameters? - 2025-07-04 06:14:06,277 cli.py:179 INFO New squashed image ID is sha256:dbde9a2e59a3975663b55773510f36c14b5046f4ef26a84f84445d406124772d - 2025-07-04 06:14:06,277 tar_image.py:732 INFO Exporting squashed image to squashed.tar - 2025-07-04 06:14:07,544 tar_image.py:742 INFO Export completed successfully - 2025-07-04 06:14:07,544 cli.py:195 INFO Done + $ docker-squash source.tar --tag jboss/wildfly:squashed -f 10 --output-path squashed.tar --load-image false + 2025-08-20 07:58:45,338 tar_image.py:54 INFO Extracting tar image from source.tar + 2025-08-20 07:58:45,598 tar_image.py:73 INFO Detected OCI format image + 2025-08-20 07:58:45,599 tar_image.py:251 INFO Old image has 22 layers + 2025-08-20 07:58:45,599 tar_image.py:284 INFO Checking if squashing is necessary... + 2025-08-20 07:58:45,599 tar_image.py:298 INFO Attempting to squash last 10 layers... + 2025-08-20 07:58:45,599 tar_image.py:306 INFO Starting squashing process... + 2025-08-20 07:58:45,599 image.py:750 INFO Starting squashing for /tmp/docker-squash-7n3ui1ar/new/squashed/layer.tar... + 2025-08-20 07:58:47,713 image.py:775 INFO Squashing file '/tmp/docker-squash-7n3ui1ar/old/blobs/sha256/f26d32e28c292aba76defcdd67c267000d31a6ac3ebdab5c850aba90ef834927'... + 2025-08-20 07:58:49,041 image.py:923 INFO Squashing finished! + 2025-08-20 07:58:49,953 tar_image.py:660 WARNING OCI output format not fully implemented - creating Docker format + 2025-08-20 07:58:49,953 tar_image.py:570 INFO Using user-specified tag: jboss/wildfly:squashed + 2025-08-20 07:58:50,028 tar_image.py:349 INFO Squashing completed successfully + 2025-08-20 07:58:50,028 tar_image.py:359 INFO Original image size: 382.24 MB + 2025-08-20 07:58:50,028 tar_image.py:360 INFO Squashed image size: 421.59 MB + 2025-08-20 07:58:50,028 tar_image.py:363 INFO If the squashed image is larger than original it means that there were no meaningful files to squash and it just added metadata. Are you sure you specified correct parameters? + 2025-08-20 07:58:50,028 cli.py:176 INFO New squashed image ID is sha256:7ebd48ca15f2e8d937a6bf3d77e0b865feddebd3ec8f11532d8a30c0000f2b67 + 2025-08-20 07:58:50,028 tar_image.py:766 INFO Exporting squashed image to squashed.tar + 2025-08-20 07:58:51,257 tar_image.py:776 INFO Export completed successfully + 2025-08-20 07:58:51,257 cli.py:191 INFO Done **Step 3**: Load the squashed image back into Docker: @@ -267,22 +271,20 @@ Now you can verify the squashed image structure: :: $ docker history jboss/wildfly:squashed - IMAGE CREATED CREATED BY SIZE COMMENT - 9d47ef6da59f 41 seconds ago 270MB Squashed layers - 3 years ago /bin/sh -c #(nop) ENV WILDFLY_VERSION=25.0.… 0B - 4 years ago /bin/sh -c #(nop) ENV JAVA_HOME=/usr/lib/jv… 0B - 4 years ago /bin/sh -c #(nop) USER jboss 0B - 4 years ago /bin/sh -c yum -y install java-11-openjdk-de… 239MB - 4 years ago /bin/sh -c #(nop) USER root 0B - 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldmann… 0B - 4 years ago /bin/sh -c #(nop) USER jboss 0B - 4 years ago /bin/sh -c #(nop) WORKDIR /opt/jboss 0B - 4 years ago /bin/sh -c groupadd -r jboss -g 1000 && user… 406kB - 4 years ago /bin/sh -c yum update -y && yum -y install x… 33.5MB - 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldmann… 0B - 4 years ago /bin/sh -c #(nop) CMD ["/bin/bash"] 0B - 4 years ago /bin/sh -c #(nop) LABEL org.label-schema.sc… 0B - 4 years ago /bin/sh -c #(nop) ADD file:61908381d3142ffba… 222MB + IMAGE CREATED CREATED BY SIZE COMMENT + a8c48d9906a7 About a minute ago 270MB Squashed layers + 4 years ago /bin/sh -c #(nop) USER jboss 0B + 4 years ago /bin/sh -c yum -y install java-11-openjdk-de… 239MB + 4 years ago /bin/sh -c #(nop) USER root 0B + 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldmann… 0B + 4 years ago /bin/sh -c #(nop) USER jboss 0B + 4 years ago /bin/sh -c #(nop) WORKDIR /opt/jboss 0B + 4 years ago /bin/sh -c groupadd -r jboss -g 1000 && user… 406kB + 4 years ago /bin/sh -c yum update -y && yum -y install x… 33.5MB + 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldmann… 0B + 5 years ago /bin/sh -c #(nop) CMD ["/bin/bash"] 0B + 5 years ago /bin/sh -c #(nop) LABEL org.label-schema.sc… 0B + 5 years ago /bin/sh -c #(nop) ADD file:61908381d3142ffba… 222MB **Key advantages of tar mode:** @@ -292,9 +294,46 @@ Now you can verify the squashed image structure: - Maintains complete layer history compatibility - Can process images on systems where Docker is not installed +**Podman compatibility:** + +The squashed tar files are fully compatible with Podman. You can load them using: + +:: + + $ podman load -i squashed.tar + Getting image source signatures + Copying blob 8055a1084cfa done | + Copying blob 613be09ab3c0 done | + Copying blob 3fbe1e874b0d done | + Copying blob 869989761eb2 done | + Copying blob 115463be137a done | + Copying config 7ebd48ca15 done | + Writing manifest to image destination + Loaded image: localhost/jboss/wildfly:squashed + + $ podman history jboss/wildfly:squashed + ID CREATED CREATED BY SIZE COMMENT + 7ebd48ca15f2 5 minutes ago 268MB Squashed layers + 4 years ago /bin/sh -c #(nop) USER jboss 0B + 4 years ago /bin/sh -c yum -y install java-11-openjdk-... 237MB + 4 years ago /bin/sh -c #(nop) USER root 0B + 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldma... 0B + 4 years ago /bin/sh -c #(nop) USER jboss 0B + 4 years ago /bin/sh -c #(nop) WORKDIR /opt/jboss 0B + 4 years ago /bin/sh -c groupadd -r jboss -g 1000 && us... 374kB + 4 years ago /bin/sh -c yum update -y && yum -y install... 32.8MB + 4 years ago /bin/sh -c #(nop) MAINTAINER Marek Goldma... 0B + 5 years ago /bin/sh -c #(nop) CMD ["/bin/bash"] 0B + 5 years ago /bin/sh -c #(nop) LABEL org.label-schema.... 0B + 5 years ago /bin/sh -c #(nop) ADD file:61908381d3142ff... 211MB + ... + +This enables docker-squash to work in Podman-only environments, rootless containers, and mixed container runtime scenarios. + **Important notes:** - Always use ``--tag`` parameter to avoid overwriting the original image name - Set ``--load-image false`` if you only want to export the squashed tar file - Use ``--output-path`` to specify where the squashed tar should be saved -- The tool automatically detects image format (Docker vs OCI) from the input tar +- The tool automatically detects input type (image name vs tar file) and image format (Docker vs OCI) +- Squashed images work seamlessly with both Docker and Podman diff --git a/docker_squash/cli.py b/docker_squash/cli.py index f80397b..5d8db1e 100644 --- a/docker_squash/cli.py +++ b/docker_squash/cli.py @@ -70,11 +70,9 @@ def run(self): "--version", action="version", help="Show version and exit", version=version ) - parser.add_argument("image", nargs="?", help="Image to be squashed") - parser.add_argument( - "--input-tar", - help="Path to tar file created by 'docker save'. Process tar file directly without requiring Docker daemon.", + "image", + help="Image name or tar file path to be squashed. If a .tar file is provided, it will be processed without requiring Docker daemon.", ) parser.add_argument( @@ -118,14 +116,6 @@ def run(self): args = parser.parse_args() - if not args.input_tar and not args.image: - parser.error("Either 'image' or '--input-tar' must be specified") - - if args.input_tar and args.image: - parser.error( - "Cannot specify both 'image' and '--input-tar' at the same time" - ) - if args.verbose: self.log.setLevel(logging.DEBUG) else: @@ -134,9 +124,12 @@ def run(self): self.log.debug("Running version %s", version) try: - if args.input_tar: + # Auto-detect if input is tar file or image name + if self._is_tar_file(args.image): + self.log.debug(f"Detected tar file: {args.image}") self._run_tar_mode(args) else: + self.log.debug(f"Detected image name: {args.image}") self._run_image_mode(args) except KeyboardInterrupt: @@ -171,7 +164,7 @@ def _run_tar_mode(self, args): tar_image = TarImage( log=self.log, - tar_path=args.input_tar, + tar_path=args.image, # 这里改为 args.image from_layer=args.from_layer, tmp_dir=args.tmp_dir, tag=args.tag, @@ -182,25 +175,19 @@ def _run_tar_mode(self, args): new_image_id = tar_image.squash() self.log.info("New squashed image ID is %s" % new_image_id) + if not args.output_path: + import os + + self.output_path = os.path.join( + os.path.dirname(args.image), f"squashed-{new_image_id[:12]}.tar" + ) + if args.output_path: tar_image.export_tar_archive(args.output_path) if args.load_image: tar_image.load_squashed_image() - if not args.output_path and not args.load_image: - import os - import tempfile - - temp_output = os.path.join( - tempfile.gettempdir(), f"squashed-{new_image_id[:12]}.tar" - ) - tar_image.export_tar_archive(temp_output) - self.log.info( - "Since no output path was specified and loading to Docker was disabled, " - f"the squashed image has been saved to: {temp_output}" - ) - self.log.info("Done") finally: @@ -220,6 +207,36 @@ def _run_image_mode(self, args): cleanup=args.cleanup, ).run() + def _is_tar_file(self, input_path): + """Detect if input is a tar file or image name""" + import os + import tarfile + + # Check if it's a file path that exists + if os.path.isfile(input_path): + # Check if it's a valid tar file + try: + with tarfile.open(input_path, "r"): + return True + except (tarfile.TarError, OSError): + return False + + # Check if it ends with .tar extension + if input_path.endswith((".tar", ".tar.gz", ".tgz")): + return True + + # Check for obvious file path patterns + if ( + input_path.startswith(("/")) # Absolute path + or input_path.startswith(("./")) # Current dir + or input_path.startswith(("../")) # Parent dir + or input_path.startswith(("~/")) + ): # Home dir + return True + + # Otherwise assume it's an image name (even if it contains '/') + return False + def run(): cli = CLI() diff --git a/docker_squash/tar_image.py b/docker_squash/tar_image.py index e9e7560..25f14d3 100644 --- a/docker_squash/tar_image.py +++ b/docker_squash/tar_image.py @@ -20,64 +20,35 @@ class TarImage(Image): def __init__( self, log, tar_path, from_layer=None, tmp_dir=None, tag=None, comment="" ): + # Call parent constructor with adapted parameters + super().__init__( + log=log, + docker=None, # TarImage doesn't need Docker client + image=tar_path, # Use tar_path as image identifier + from_layer=from_layer, + tmp_dir=tmp_dir, + tag=tag, + comment=comment, + ) + + # TarImage specific attributes self.tar_path = tar_path - self.log = log - self.debug = self.log.isEnabledFor(logging.DEBUG) - self.from_layer = from_layer - self.tag = tag - self.comment = comment - self.tmp_dir = self._prepare_tmp_directory(tmp_dir) - self.date = self._get_current_date() - - # Initialize attributes required by base class - self.image_name = None - self.image_tag = None - self.squash_id = None - self.oci_format = False - - # Set up directory structure - self.old_image_dir = os.path.join(self.tmp_dir, "old") - self.new_image_dir = os.path.join(self.tmp_dir, "new") - self.squashed_dir = os.path.join(self.new_image_dir, "squashed") - - # Ensure directories exist - os.makedirs(self.old_image_dir, exist_ok=True) - os.makedirs(self.new_image_dir, exist_ok=True) - os.makedirs(self.squashed_dir, exist_ok=True) - # Initialize variables + # *** Critical: Initialize directories immediately as subsequent operations require them *** + self._initialize_directories() + + # Initialize TarImage specific variables self.manifest = None self.old_image_config = None - self.old_image_layers = [] self.original_image_name = None - self.old_image_id = None + self.old_image_layers = [] # This also needs initialization - # Parse image name if provided - if self.tag: - self.image_name, self.image_tag = self._parse_image_name(self.tag) - - # Process the tar file + # Process tar file (TarImage specific logic) self._extract_tar_image() self._detect_image_format() self._load_image_metadata() self.size_before = self._dir_size(self.old_image_dir) - def squash(self): - """Main squash method - follows base class pattern""" - self._before_squashing() - ret = self._squash() - self._after_squashing() - return ret - - def _get_current_date(self): - """Get current date in Docker format""" - import datetime - import re - - # Workaround for Golang microsecond formatting - date = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ") - return re.sub(r"0*Z$", "Z", date) - def _extract_tar_image(self): """Extract tar image to temporary directory""" self.log.info(f"Extracting tar image from {self.tar_path}") @@ -263,64 +234,72 @@ def _extract_docker_layers(self): def _before_squashing(self): """Prepare for squashing operation""" - self.log.info("Preparing for squashing...") + # No need to call _initialize_directories() as it's already called in constructor - # Ensure we have image layers - if not self.old_image_layers: - raise SquashError("No layers found in image") + # Location of the tar archive with squashed layers + self.squashed_tar = os.path.join(self.squashed_dir, "layer.tar") + + # Handle tags if provided + if self.tag: + self.image_name, self.image_tag = self._parse_image_name(self.tag) + + # TarImage specific: Ensure we have necessary layer information + if not hasattr(self, "old_image_layers") or not self.old_image_layers: + raise SquashError("No layers found in tar image") + + # *** Important: Copy layer calculation logic from base class *** self.log.info("Old image has %s layers", len(self.old_image_layers)) - # Set up squashing parameters + self.log.debug("Old layers: %s", self.old_image_layers) + + # By default - squash all layers. if self.from_layer is None: self.from_layer = len(self.old_image_layers) try: number_of_layers = int(self.from_layer) - self.log.debug(f"Squashing last {number_of_layers} layers") + self.log.debug( + f"We detected number of layers ({number_of_layers}) as the argument to squash" + ) except ValueError: - # Handle layer ID as from_layer + # For TarImage, we need to adapt this logic + # because we don't have Docker client to check layer IDs if self.from_layer in self.old_image_layers: - layer_index = self.old_image_layers.index(self.from_layer) - number_of_layers = len(self.old_image_layers) - layer_index - 1 + number_of_layers = ( + len(self.old_image_layers) + - self.old_image_layers.index(self.from_layer) + - 1 + ) else: - raise SquashError(f"Layer {self.from_layer} not found in image") + raise SquashError( + f"The {self.from_layer} layer could not be found in the image" + ) - if number_of_layers <= 0: - raise SquashError("Number of layers to squash must be positive") - - if number_of_layers > len(self.old_image_layers): - raise SquashError( - f"Cannot squash {number_of_layers} layers from {len(self.old_image_layers)} total layers" - ) + self._validate_number_of_layers(number_of_layers) marker = len(self.old_image_layers) - number_of_layers + self.layers_to_squash = self.old_image_layers[marker:] self.layers_to_move = self.old_image_layers[:marker] - if len(self.layers_to_squash) <= 1: - raise SquashError("Need at least 2 layers to squash") + self.log.info("Checking if squashing is necessary...") - # Set squash_id like v2_image.py does - should be the last real (non-virtual) layer to move - self.squash_id = None - if self.layers_to_move: - # Find the last non-virtual layer in layers_to_move - for layer_id in reversed(self.layers_to_move): - if not layer_id.startswith(" Date: Wed, 20 Aug 2025 08:17:24 +0000 Subject: [PATCH 4/5] fix imported but unused --- docker_squash/tar_image.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docker_squash/tar_image.py b/docker_squash/tar_image.py index 25f14d3..4aa8fa4 100644 --- a/docker_squash/tar_image.py +++ b/docker_squash/tar_image.py @@ -2,7 +2,6 @@ import hashlib import json -import logging import os import shutil import tarfile From 465fcba8ff33e8466424af103867751987c7a728 Mon Sep 17 00:00:00 2001 From: wuliang Date: Thu, 21 Aug 2025 02:40:35 +0000 Subject: [PATCH 5/5] fix manifest & readme --- README.rst | 2 +- docker_squash/tar_image.py | 32 +++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index ccb2719..a34f057 100644 --- a/README.rst +++ b/README.rst @@ -238,7 +238,7 @@ Note: The tool automatically detects that ``source.tar`` is a tar file and proce :: - $ docker-squash source.tar --tag jboss/wildfly:squashed -f 10 --output-path squashed.tar --load-image false + $ docker-squash --tag jboss/wildfly:squashed -f 10 --output-path squashed.tar --load-image false source.tar 2025-08-20 07:58:45,338 tar_image.py:54 INFO Extracting tar image from source.tar 2025-08-20 07:58:45,598 tar_image.py:73 INFO Detected OCI format image 2025-08-20 07:58:45,599 tar_image.py:251 INFO Old image has 22 layers diff --git a/docker_squash/tar_image.py b/docker_squash/tar_image.py index 4aa8fa4..3ee373b 100644 --- a/docker_squash/tar_image.py +++ b/docker_squash/tar_image.py @@ -113,7 +113,37 @@ def _load_oci_metadata(self): raise SquashError(f"Manifest blob not found: {manifest_path}") with open(manifest_path, "r") as f: - self.manifest = json.load(f, object_pairs_hook=OrderedDict) + manifest = json.load(f, object_pairs_hook=OrderedDict) + + # Check if this is another index (nested structure) + if manifest.get("mediaType") == "application/vnd.oci.image.index.v1+json": + # This is a nested index, get the actual manifest + if not manifest.get("manifests"): + raise SquashError("No manifests found in nested index") + + nested_manifest_desc = manifest["manifests"][0] + nested_manifest_digest = nested_manifest_desc["digest"] + nested_manifest_path = os.path.join( + self.old_image_dir, + "blobs", + "sha256", + nested_manifest_digest.split(":")[1], + ) + + if not os.path.exists(nested_manifest_path): + raise SquashError( + f"Nested manifest blob not found: {nested_manifest_path}" + ) + + with open(nested_manifest_path, "r") as f: + self.manifest = json.load(f, object_pairs_hook=OrderedDict) + else: + # This is a direct manifest + self.manifest = manifest + + # Now check for config field + if "config" not in self.manifest: + raise SquashError("No config found in manifest - invalid OCI image") # Read config blob config_desc = self.manifest["config"]