From e092e9917817886eaa930e79bcb739ecd9a57c38 Mon Sep 17 00:00:00 2001 From: Technophobe01 Date: Tue, 30 Dec 2025 20:00:21 -0800 Subject: [PATCH 1/2] fix(xray): Add gzip decompression for OCI image layers Many Docker/OCI images use gzip-compressed layers with media types like: - application/vnd.docker.image.rootfs.diff.tar.gzip - application/vnd.oci.image.layer.v1.tar+gzip Previously, LoadPackage would fail with "archive/tar: invalid tar header" or "unexpected EOF" when processing these layers because it tried to read the gzip-compressed data directly as a tar archive. This fix: 1. Checks OCI manifest media type for gzip indication 2. Attempts gzip decompression using gzip.NewReader() which validates the gzip header automatically 3. Falls back to raw tar reading if the data is not gzip-compressed 4. Properly reports errors when media type indicates gzip but decompression fails Fixes processing of standard Docker Hub images that use compressed layers. Signed-off-by: Technophobe01 --- pkg/docker/dockerimage/dockerimage.go | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/pkg/docker/dockerimage/dockerimage.go b/pkg/docker/dockerimage/dockerimage.go index 2ff3665a..c0a025fb 100644 --- a/pkg/docker/dockerimage/dockerimage.go +++ b/pkg/docker/dockerimage/dockerimage.go @@ -1130,10 +1130,35 @@ func LoadPackage(archivePath string, layerID = hdr.Name } + // Handle gzip-compressed OCI image layers + // Many Docker/OCI images use gzip-compressed layers with media types like: + // - application/vnd.docker.image.rootfs.diff.tar.gzip + // - application/vnd.oci.image.layer.v1.tar+gzip + var layerReader io.Reader = tr + mediaType, hasMediaType := nonLayerFileNames[hdr.Name] + isGzipByMediaType := hasMediaType && (strings.Contains(mediaType, "gzip") || strings.Contains(mediaType, "+gzip")) + + // Try gzip decompression - gzip.NewReader validates the gzip header + gzReader, gzErr := gzip.NewReader(tr) + if gzErr == nil { + layerReader = gzReader + defer gzReader.Close() + if isGzipByMediaType { + log.Debugf("dockerimage.LoadPackage: using gzip decompression for layer '%s' (mediaType: %s)", hdr.Name, mediaType) + } else { + log.Debugf("dockerimage.LoadPackage: auto-detected gzip compression for layer '%s'", hdr.Name) + } + } else if isGzipByMediaType { + // Media type indicates gzip but decompression failed - this is an error + log.Errorf("dockerimage.LoadPackage: gzip decompression failed for layer(%s/%s) with gzip mediaType '%s' - %v", archivePath, hdr.Name, mediaType, gzErr) + return nil, gzErr + } + // else: not gzip compressed, use raw tar reader + layer, err := layerFromStream( pkg, hdr.Name, - tar.NewReader(tr), + tar.NewReader(layerReader), layerID, topChangesMax, doHashData, From 04c7d83f38b115ea0843cac3bb715550b751f5ba Mon Sep 17 00:00:00 2001 From: Technophobe01 Date: Tue, 30 Dec 2025 20:54:05 -0800 Subject: [PATCH 2/2] feat(xray): add --target-image-archive flag for analyzing saved image archives This adds a new --target-image-archive flag to the xray command that allows analyzing a pre-saved Docker image tar archive directly, without requiring access to the Docker daemon. Use case: When orchestrating multiple xray invocations (e.g., for initial analysis followed by file extraction), the image archive from the first invocation can be reused directly in subsequent invocations without re-pulling from registry. Changes: - Add FlagTargetImageArchive to cliflags.go - Register flag in xray command (cli.go) - Add GetArchiveInfo() helper to extract image ID from archive manifest - Add archive-based analysis path in handler.go Signed-off-by: Technophobe01 --- pkg/app/master/command/cliflags.go | 20 ++++-- pkg/app/master/command/xray/cli.go | 7 +- pkg/app/master/command/xray/handler.go | 98 ++++++++++++++++++++++++-- pkg/docker/dockerimage/dockerimage.go | 66 +++++++++++++++++ 4 files changed, 178 insertions(+), 13 deletions(-) diff --git a/pkg/app/master/command/cliflags.go b/pkg/app/master/command/cliflags.go index 842d908d..96997f4c 100644 --- a/pkg/app/master/command/cliflags.go +++ b/pkg/app/master/command/cliflags.go @@ -75,9 +75,10 @@ const ( // Shared command flag names const ( - FlagCommandParamsFile = "command-params-file" - FlagTarget = "target" - FlagPull = "pull" + FlagCommandParamsFile = "command-params-file" + FlagTarget = "target" + FlagTargetImageArchive = "target-image-archive" + FlagPull = "pull" FlagDockerConfigPath = "docker-config-path" FlagRegistryAccount = "registry-account" FlagRegistrySecret = "registry-secret" @@ -200,9 +201,10 @@ const ( // Shared command flag usage info const ( - FlagCommandParamsFileUsage = "JSON file with all command parameters" - FlagTargetUsage = "Target container image (name or ID)" - FlagPullUsage = "Try pulling target if it's not available locally" + FlagCommandParamsFileUsage = "JSON file with all command parameters" + FlagTargetUsage = "Target container image (name or ID)" + FlagTargetImageArchiveUsage = "Target container image archive (tar file path)" + FlagPullUsage = "Try pulling target if it's not available locally" FlagDockerConfigPathUsage = "Docker config path (used to fetch registry credentials)" FlagRegistryAccountUsage = "Target registry account used when pulling images from private registries" FlagRegistrySecretUsage = "Target registry secret used when pulling images from private registries" @@ -467,6 +469,12 @@ var CommonFlags = map[string]cli.Flag{ Usage: FlagTargetUsage, EnvVars: []string{"DSLIM_TARGET"}, }, + FlagTargetImageArchive: &cli.StringFlag{ + Name: FlagTargetImageArchive, + Value: "", + Usage: FlagTargetImageArchiveUsage, + EnvVars: []string{"DSLIM_TARGET_IMAGE_ARCHIVE"}, + }, FlagPull: &cli.BoolFlag{ Name: FlagPull, Value: true, //enabled by default diff --git a/pkg/app/master/command/xray/cli.go b/pkg/app/master/command/xray/cli.go index cbbb831f..530dd6bd 100644 --- a/pkg/app/master/command/xray/cli.go +++ b/pkg/app/master/command/xray/cli.go @@ -49,6 +49,7 @@ var XRayFlags = []cli.Flag{ command.Cflag(command.FlagRuntime), command.Cflag(command.FlagCommandParamsFile), command.Cflag(command.FlagTarget), + command.Cflag(command.FlagTargetImageArchive), command.Cflag(command.FlagPull), command.Cflag(command.FlagDockerConfigPath), command.Cflag(command.FlagRegistryAccount), @@ -114,9 +115,10 @@ var CLI = &cli.Command{ } targetRef := ctx.String(command.FlagTarget) - if targetRef == "" { + targetImageArchive := ctx.String(command.FlagTargetImageArchive) + if targetRef == "" && targetImageArchive == "" { if ctx.Args().Len() < 1 { - xc.Out.Error("param.target", "missing image ID/name") + xc.Out.Error("param.target", "missing image ID/name or archive path") cli.ShowCommandHelp(ctx, Name) return nil } else { @@ -345,6 +347,7 @@ var CLI = &cli.Command{ gcvalues, cparams, targetRef, + targetImageArchive, doPull, dockerConfigPath, registryAccount, diff --git a/pkg/app/master/command/xray/handler.go b/pkg/app/master/command/xray/handler.go index 258d52d1..ad53b392 100644 --- a/pkg/app/master/command/xray/handler.go +++ b/pkg/app/master/command/xray/handler.go @@ -102,6 +102,7 @@ func OnCommand( gparams *command.GenericParams, cparams *CommandParams, targetRef string, + targetImageArchive string, doPull bool, dockerConfigPath string, registryAccount string, @@ -162,13 +163,100 @@ func OnCommand( xc.Out.Info("cmd.input.params", ovars{ - "runtime": rr, - "target": targetRef, - "add-image-manifest": doAddImageManifest, - "add-image-config": doAddImageConfig, - "rm-file-artifacts": doRmFileArtifacts, + "runtime": rr, + "target": targetRef, + "target-image-archive": targetImageArchive, + "add-image-manifest": doAddImageManifest, + "add-image-config": doAddImageConfig, + "rm-file-artifacts": doRmFileArtifacts, }) + // Handle archive-based analysis (when --target-image-archive is provided) + if targetImageArchive != "" { + logger.Debugf("using target image archive: %s", targetImageArchive) + + if !fsutil.IsRegularFile(targetImageArchive) { + xc.Out.Error("target.image.archive", "archive file not found") + exitCode := command.ECTCommon | command.ECCImageNotFound + xc.Out.State("exited", ovars{"exit.code": exitCode}) + xc.Exit(exitCode) + } + + // Get image info from archive + archiveInfo, err := dockerimage.GetArchiveInfo(targetImageArchive) + if err != nil { + xc.Out.Error("target.image.archive", fmt.Sprintf("error reading archive info: %v", err)) + xc.Out.State("exited", ovars{"exit.code": -1}) + xc.Exit(-1) + } + + imageID := archiveInfo.ImageID + logger.Debugf("archive image ID: %s, tags: %v", imageID, archiveInfo.RepoTags) + + cmdReport.TargetReference = targetImageArchive + if len(archiveInfo.RepoTags) > 0 { + cmdReport.TargetReference = archiveInfo.RepoTags[0] + } + + xc.Out.State("image.data.inspection.start") + xc.Out.Info("image.archive", + ovars{ + "path": targetImageArchive, + "image.id": imageID, + "repo.tags": strings.Join(archiveInfo.RepoTags, ","), + }) + + pp := &dockerimage.ProcessorParams{ + DetectIdentities: &dockerimage.DetectOpParam{ + Enabled: cparams.DetectIdentities.Enabled, + DumpRaw: cparams.DetectIdentities.DumpRaw, + IsConsoleOut: cparams.DetectIdentities.IsConsoleOut, + IsDirOut: cparams.DetectIdentities.IsDirOut, + OutputPath: cparams.DetectIdentities.OutputPath, + InputParams: cparams.DetectIdentities.InputParams, + }, + DetectAllCertFiles: cparams.DetectAllCertFiles, + DetectAllCertPKFiles: cparams.DetectAllCertPKFiles, + } + + xc.Out.Info("image.data.inspection.process.image.start") + _, err = dockerimage.LoadPackage( + targetImageArchive, + imageID, + false, + topChangesMax, + doHashData, + doDetectDuplicates, + changeDataHashMatchers, + changePathMatchers, + changeDataMatchers, + utf8Detector, + pp) + + if err != nil { + xc.Out.Error("image.data.inspection", fmt.Sprintf("error loading package: %v", err)) + xc.Out.State("exited", ovars{"exit.code": -1}) + xc.Exit(-1) + } + xc.Out.Info("image.data.inspection.process.image.end") + + if utf8Detector != nil { + errutil.FailOn(utf8Detector.Close()) + } + + xc.Out.State("image.data.inspection.done") + + cmdReport.ImageArchiveLocation = targetImageArchive + cmdReport.State = cmd.StateCompleted + cmdReport.Save() + + vinfo := <-viChan + version.PrintCheckVersion(xc, "", vinfo) + + xc.Out.State("done") + return + } + resolved := command.ResolveAutoRuntime(cparams.Runtime) logger.Tracef("runtime.handler: rt=%s resolved=%s", cparams.Runtime, resolved) diff --git a/pkg/docker/dockerimage/dockerimage.go b/pkg/docker/dockerimage/dockerimage.go index c0a025fb..c13c2f48 100644 --- a/pkg/docker/dockerimage/dockerimage.go +++ b/pkg/docker/dockerimage/dockerimage.go @@ -516,6 +516,72 @@ const ( OCIImageManifestLocation = "ll.oci.imagemanifest" ) +// ArchiveInfo contains basic information extracted from an image archive +type ArchiveInfo struct { + ImageID string + RepoTags []string +} + +// GetArchiveInfo extracts basic image information from a Docker image archive +// by reading the manifest.json file. This is useful when you have an archive +// but don't have the image ID. +func GetArchiveInfo(archivePath string) (*ArchiveInfo, error) { + afile, err := os.Open(archivePath) + if err != nil { + log.Errorf("dockerimage.GetArchiveInfo: os.Open error - %v", err) + return nil, err + } + defer afile.Close() + + tr := tar.NewReader(afile) + for { + hdr, err := tr.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + log.Errorf("dockerimage.GetArchiveInfo: error reading archive - %v", err) + return nil, err + } + + if hdr == nil || hdr.Name == "" { + continue + } + + if hdr.Name == "manifest.json" { + var manifests []DockerManifestObject + if err := json.NewDecoder(tr).Decode(&manifests); err != nil { + log.Errorf("dockerimage.GetArchiveInfo: error decoding manifest - %v", err) + return nil, err + } + + if len(manifests) == 0 { + return nil, fmt.Errorf("no manifests found in archive") + } + + // Extract image ID from config path (e.g., "abc123.json" -> "abc123") + // or for OCI format: "blobs/sha256/DIGEST" -> "sha256:DIGEST" + configPath := manifests[0].Config + var imageID string + if strings.HasPrefix(configPath, "blobs/sha256/") { + // OCI format + digest := strings.TrimPrefix(configPath, "blobs/sha256/") + imageID = "sha256:" + digest + } else { + // Docker v1 format - strip .json extension + imageID = strings.TrimSuffix(configPath, ".json") + } + + return &ArchiveInfo{ + ImageID: imageID, + RepoTags: manifests[0].RepoTags, + }, nil + } + } + + return nil, fmt.Errorf("manifest.json not found in archive") +} + func LoadPackage(archivePath string, imageID string, skipObjects bool,