From dfc9e6a5d38833c0d593b7118d38e4a8afa15217 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 27 May 2026 18:34:12 -0700 Subject: [PATCH] image: resolve --image via the Docker daemon API instead of the CLI Sandlock used to shell out to the `docker` CLI on every --image invocation: `docker create` + `docker export` to dump the container's rootfs, `docker inspect` to read Entrypoint/Cmd, and `docker rm` to clean up. That made the `docker` executable a hard runtime dependency and meant parsing its textual output. Talk to the Docker daemon directly through its HTTP API (the bollard crate) instead. The --image flag still takes a local image reference (python:3.12-slim, a digest, an image id); only the mechanism changes. The `docker` binary no longer needs to be on PATH, though a running daemon with an accessible socket is still required. extract() and inspect_cmd() are now async (callers already run inside the tokio runtime). Both connect to the local daemon and ping it up front, so --image fails early with a clear message when the daemon is unreachable rather than partway through sandbox setup. Image lookup is local only: inspect_image never pulls, and a missing image surfaces the daemon's 404 as "image not found in local Docker storage". Rootfs materialization mirrors the old create+export path: a throwaway stopped container is created from the image, its flattened filesystem is streamed out with export_container into a temp tar (bounded memory, never held whole), then unpacked. The container is always removed afterward, even when the export fails. Because docker export yields an already merged filesystem, all the per-layer, gzip, and AUFS/OCI whiteout handling is gone; only the hard-link fixups remain, since the tar crate resolves link targets against the process cwd rather than the destination root. Extracted rootfs is cached at ~/.cache/sandlock/images//rootfs/, keyed by the image content id, with a .complete marker distinguishing a finished extraction from an interrupted one. Warm-cache invocations skip the daemon export entirely. Signed-off-by: Cong Wang --- Cargo.lock | 164 ++++++++++ crates/sandlock-cli/src/main.rs | 12 +- crates/sandlock-core/Cargo.toml | 5 +- crates/sandlock-core/src/image.rs | 498 ++++++++++++++++++++---------- 4 files changed, 509 insertions(+), 170 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 996638a..fb23e04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -192,6 +192,49 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bollard" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9d0a013e3d3ee4edd61e779adf117944c08902d375f18630a0c5b8f95659734" +dependencies = [ + "base64", + "bollard-stubs", + "bytes", + "futures-core", + "futures-util", + "hex", + "http", + "http-body-util", + "hyper", + "hyper-named-pipe", + "hyper-util", + "hyperlocal", + "log", + "pin-project-lite", + "serde", + "serde_derive", + "serde_json", + "serde_urlencoded", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tower-service", + "url", + "winapi", +] + +[[package]] +name = "bollard-stubs" +version = "1.53.1-rc.29.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce412eb6f7096743011dc3cb5c674caeb24ced61d8c498fe07cf7998a4fea889" +dependencies = [ + "serde", + "serde_json", + "serde_repr", +] + [[package]] name = "brotli" version = "8.0.2" @@ -486,6 +529,16 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a043dc74da1e37d6afe657061213aa6f425f855399a11d3463c6ecccc4dfda1f" +[[package]] +name = "filetime" +version = "0.2.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c287a33c7f0a620c38e641e7f60827713987b3c0f26e8ddc9462cc69cf75759" +dependencies = [ + "cfg-if", + "libc", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -698,6 +751,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "http" version = "1.4.0" @@ -792,6 +851,21 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-named-pipe" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" +dependencies = [ + "hex", + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", + "winapi", +] + [[package]] name = "hyper-rustls" version = "0.26.0" @@ -846,6 +920,21 @@ dependencies = [ "tracing", ] +[[package]] +name = "hyperlocal" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" +dependencies = [ + "hex", + "http-body-util", + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "icu_collections" version = "2.2.0" @@ -1549,6 +1638,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "same-file" version = "1.0.6" @@ -1578,7 +1673,9 @@ name = "sandlock-core" version = "0.8.2" dependencies = [ "bincode", + "bollard", "clap", + "futures-util", "goblin", "hudsucker", "jiff", @@ -1590,6 +1687,7 @@ dependencies = [ "serde", "serde_json", "syscalls", + "tar", "tempfile", "thiserror 2.0.18", "tokio", @@ -1689,6 +1787,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_spanned" version = "0.6.9" @@ -1698,6 +1807,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "sha1" version = "0.10.6" @@ -1814,6 +1935,17 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" +[[package]] +name = "tar" +version = "0.4.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6221d9a6003c78398e3b239969f352578258df48c8eb051caadae0015bc840" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.27.0" @@ -2350,6 +2482,22 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -2359,6 +2507,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-link" version = "0.2.1" @@ -2577,6 +2731,16 @@ dependencies = [ "time", ] +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + [[package]] name = "yasna" version = "0.5.2" diff --git a/crates/sandlock-cli/src/main.rs b/crates/sandlock-cli/src/main.rs index 40e64e8..74acce8 100644 --- a/crates/sandlock-cli/src/main.rs +++ b/crates/sandlock-cli/src/main.rs @@ -105,8 +105,12 @@ struct RunArgs { #[arg(short = 'i', long)] interactive: bool, - /// Use a local Docker image as chroot rootfs - #[arg(long)] + /// Use a local Docker image as chroot rootfs, given by reference + /// (e.g. `python:3.12-slim`, a digest, or an image id). The image + /// must already be present in local Docker storage; sandlock never + /// pulls from a registry. Requires a running Docker daemon and an + /// accessible socket; the run fails early if neither is reachable. + #[arg(long, value_name = "IMAGE")] image: Option, /// Dry-run: run the command, show filesystem changes, then discard @@ -429,13 +433,13 @@ async fn run_command(args: RunArgs) -> Result { // the shared image cache directly. let image_cmd: Option>; if let Some(ref img) = args.image { - let rootfs = sandlock_core::image::extract(img, None)?; + let rootfs = sandlock_core::image::extract(img, None).await?; builder = builder.chroot(&rootfs).fs_read("/"); if pb.workdir.is_none() { builder = builder.workdir(&rootfs); } if args.cmd.is_empty() { - image_cmd = Some(sandlock_core::image::inspect_cmd(img)?); + image_cmd = Some(sandlock_core::image::inspect_cmd(img).await?); } else { image_cmd = None; } diff --git a/crates/sandlock-core/Cargo.toml b/crates/sandlock-core/Cargo.toml index 4e83919..65e5824 100644 --- a/crates/sandlock-core/Cargo.toml +++ b/crates/sandlock-core/Cargo.toml @@ -12,7 +12,7 @@ description = "Lightweight process sandbox using Landlock, seccomp-bpf, and secc libc = "0.2" syscalls = { version = "0.8", default-features = false } nix = { version = "0.29", features = ["process", "signal", "fs", "ioctl", "poll"] } -tokio = { version = "1", features = ["rt", "net", "time", "sync", "macros", "io-util"] } +tokio = { version = "1", features = ["rt", "net", "time", "sync", "macros", "io-util", "fs"] } serde = { version = "1", features = ["derive"] } thiserror = "2" rand = "0.8" @@ -26,7 +26,10 @@ toml = "0.8" jiff = "0.2" pathdiff = "0.2" hudsucker = "0.22" +tar = "0.4" clap = { version = "4", features = ["derive"], optional = true } +bollard = "0.21" +futures-util = "0.3" [features] default = [] diff --git a/crates/sandlock-core/src/image.rs b/crates/sandlock-core/src/image.rs index 0fc6209..4656363 100644 --- a/crates/sandlock-core/src/image.rs +++ b/crates/sandlock-core/src/image.rs @@ -1,181 +1,284 @@ -//! Extract local Docker/OCI images into rootfs directories for sandboxing. +//! Materialize a local Docker image into a rootfs for sandboxing by +//! talking to the Docker daemon over its HTTP API (via bollard). //! -//! Uses `docker create` + `docker export` to extract the image. If the -//! image is present in local Docker storage it is used as-is; otherwise -//! `docker create` pulls it from the configured registry first. +//! `--image ` resolves a *local* image only; sandlock never pulls +//! from a registry. The daemon must be running and its socket +//! accessible: callers fail early (see [`extract`] / [`inspect_cmd`]) +//! when it is not reachable, before any sandbox is built. +//! +//! The image filesystem is obtained the same way `docker export` does: +//! a throwaway stopped container is created from the image, its +//! flattened rootfs is streamed out as a tar, and unpacked into a cache +//! keyed by the image's content id. //! //! ```ignore -//! let rootfs = image::extract("python:3.12-slim", None)?; -//! let cmd = image::inspect_cmd("python:3.12-slim")?; -//! // Use rootfs as chroot, cmd as default command +//! let rootfs = image::extract("python:3.12-slim", None).await?; +//! let cmd = image::inspect_cmd("python:3.12-slim").await?; //! ``` +//! +//! Extracted rootfs is cached at +//! `$HOME/.cache/sandlock/images//rootfs/` and reused on +//! subsequent invocations referencing the same image content. +use std::collections::HashSet; +use std::fs; use std::path::{Path, PathBuf}; -use std::process::Command; + +use bollard::models::{ContainerCreateBody, ImageInspect}; +use bollard::query_parameters::RemoveContainerOptionsBuilder; +use bollard::Docker; +use futures_util::StreamExt; +use tokio::io::AsyncWriteExt; use crate::error::{SandboxRuntimeError, SandlockError}; +// ============================================================ +// Public API +// ============================================================ + /// Default cache directory for extracted images. fn default_cache_dir() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".into()); PathBuf::from(home).join(".cache/sandlock/images") } -/// Compute a short cache key from the image name. -fn cache_key(image: &str) -> String { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - let mut h = DefaultHasher::new(); - image.hash(&mut h); - format!("{:016x}", h.finish()) -} - -/// Extract a local Docker image into a cached rootfs directory. +/// Resolve a local Docker image into a cached rootfs directory. /// -/// Creates a temporary container, exports its filesystem, and extracts -/// it. Returns the cached path on subsequent calls. +/// `image_ref` is a Docker image reference (`python:3.12-slim`, a +/// digest, an image id, ...) that must already be present in local +/// Docker storage. The extracted rootfs is keyed by the image's +/// content id so repeated calls hit the same cache. /// -/// If the image is not in local Docker storage, `docker create` pulls it -/// from the registry first. -pub fn extract(image: &str, cache_dir: Option<&Path>) -> Result { - let cache = cache_dir - .map(PathBuf::from) - .unwrap_or_else(default_cache_dir); - let key = cache_key(image); - let rootfs = cache.join(&key).join("rootfs"); - - // Return cached rootfs if it exists and has content - if rootfs.is_dir() { - if let Ok(mut entries) = std::fs::read_dir(&rootfs) { - if entries.next().is_some() { - return Ok(rootfs); - } - } +/// Fails early if the Docker daemon is not reachable, or if the image +/// is not in local storage. +pub async fn extract(image_ref: &str, cache_dir: Option<&Path>) -> Result { + let docker = connect().await?; + let info = inspect(&docker, image_ref).await?; + let id = info.id.ok_or_else(|| { + SandboxRuntimeError::Child(format!("Docker returned no id for image {image_ref}")) + })?; + + let cache = cache_dir.map(PathBuf::from).unwrap_or_else(default_cache_dir); + let dest = cache.join(sanitize_id(&id)); + let rootfs = dest.join("rootfs"); + + // Cache hit: the .complete marker means we fully unpacked this image + // before. A partial directory (interrupted run) lacks the marker. + if rootfs.is_dir() && dest.join(".complete").is_file() { + return Ok(rootfs); } - // Create container (does not start it) - let output = Command::new("docker") - .args(["create", image, "/bin/true"]) - .output() - .map_err(|e| SandboxRuntimeError::Child(format!("docker not found: {}", e)))?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - return Err(SandboxRuntimeError::Child( - format!("docker create failed: {}", stderr.trim()), - ).into()); - } + // Stale or partial cache: start clean. + let _ = fs::remove_dir_all(&dest); + fs::create_dir_all(&rootfs).map_err(SandboxRuntimeError::Io)?; + + // `docker create`: a stopped container we use only as an export + // source. No command is started. + let body = ContainerCreateBody { + image: Some(image_ref.to_string()), + ..Default::default() + }; + let created = docker + .create_container(None, body) + .await + .map_err(|e| SandboxRuntimeError::Child(format!("docker create failed: {e}")))?; + let cid = created.id; + + // `docker export`: stream the flattened rootfs to a temp tar so we + // never hold a whole image in memory. + let tar_path = dest.join("export.tar"); + let export_res = stream_export(&docker, &cid, &tar_path).await; + + // Always remove the throwaway container, even if the export failed. + let _ = docker + .remove_container( + &cid, + Some(RemoveContainerOptionsBuilder::new().force(true).build()), + ) + .await; + export_res?; + + // Unpack the tar (blocking work) off the async reactor. + let rootfs_out = rootfs.clone(); + let tar_in = tar_path.clone(); + tokio::task::spawn_blocking(move || unpack_rootfs(&tar_in, &rootfs_out)) + .await + .map_err(|e| SandboxRuntimeError::Child(format!("image unpack task failed: {e}")))??; + + let _ = fs::remove_file(&tar_path); + fs::write(dest.join(".complete"), b"").map_err(SandboxRuntimeError::Io)?; + Ok(rootfs) +} - let container_id = String::from_utf8_lossy(&output.stdout).trim().to_string(); +/// Get the default command (Entrypoint + Cmd) for a local Docker image. +/// +/// Returns the concatenation of Entrypoint and Cmd from the image +/// config, or `["/bin/sh"]` if neither is set. Fails early if the +/// daemon is unreachable or the image is not in local storage. +pub async fn inspect_cmd(image_ref: &str) -> Result, SandlockError> { + let docker = connect().await?; + let info = inspect(&docker, image_ref).await?; + Ok(default_cmd(&info)) +} - // Export and extract - let result = extract_container(&container_id, &rootfs); +// ============================================================ +// Docker daemon access +// ============================================================ - // Always remove the temporary container - let _ = Command::new("docker") - .args(["rm", &container_id]) - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .status(); +/// Connect to the local Docker daemon and verify it is actually +/// reachable, so `--image` fails up front rather than mid-setup. +async fn connect() -> Result { + let docker = Docker::connect_with_local_defaults().map_err(daemon_unreachable)?; + docker.ping().await.map_err(daemon_unreachable)?; + Ok(docker) +} - result?; - Ok(rootfs) +fn daemon_unreachable(e: bollard::errors::Error) -> SandlockError { + SandboxRuntimeError::Child(format!( + "cannot reach the Docker daemon, required for --image \ + (is dockerd running and the socket accessible?): {e}" + )) + .into() } -/// Export a container's filesystem and extract it to rootfs. -fn extract_container(container_id: &str, rootfs: &Path) -> Result<(), SandlockError> { - std::fs::create_dir_all(rootfs) - .map_err(|e| SandboxRuntimeError::Io(e))?; - - // docker export → tar stream → extract - let mut child = Command::new("docker") - .args(["export", container_id]) - .stdout(std::process::Stdio::piped()) - .stderr(std::process::Stdio::piped()) - .spawn() - .map_err(|e| SandboxRuntimeError::Child(format!("docker export: {}", e)))?; - - let stdout = child.stdout.take().unwrap(); - - // Use tar crate or shell tar to extract - let tar_status = Command::new("tar") - .args(["xf", "-", "-C"]) - .arg(rootfs) - .stdin(stdout) - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::piped()) - .status() - .map_err(|e| SandboxRuntimeError::Child(format!("tar extract: {}", e)))?; - - let docker_status = child.wait() - .map_err(|e| SandboxRuntimeError::Child(format!("docker export wait: {}", e)))?; - - if !docker_status.success() { - // Clean up partial extraction - let _ = std::fs::remove_dir_all(rootfs); - return Err(SandboxRuntimeError::Child("docker export failed".into()).into()); - } +/// Inspect a local image, mapping a missing image to a clear error. +async fn inspect(docker: &Docker, image_ref: &str) -> Result { + docker.inspect_image(image_ref).await.map_err(|e| { + SandboxRuntimeError::Child(format!( + "image not found in local Docker storage: {image_ref} ({e})" + )) + .into() + }) +} - if !tar_status.success() { - let _ = std::fs::remove_dir_all(rootfs); - return Err(SandboxRuntimeError::Child("tar extraction failed".into()).into()); +/// Stream a container's exported filesystem into `tar_path`. +async fn stream_export(docker: &Docker, cid: &str, tar_path: &Path) -> Result<(), SandlockError> { + let mut file = tokio::fs::File::create(tar_path) + .await + .map_err(SandboxRuntimeError::Io)?; + let mut stream = docker.export_container(cid); + while let Some(chunk) = stream.next().await { + let chunk = chunk.map_err(|e| SandboxRuntimeError::Child(format!("docker export failed: {e}")))?; + file.write_all(&chunk).await.map_err(SandboxRuntimeError::Io)?; } - + file.flush().await.map_err(SandboxRuntimeError::Io)?; Ok(()) } -/// Get the default command (ENTRYPOINT + CMD) for a local Docker image. -/// -/// Returns the combined entrypoint and cmd, or `["/bin/sh"]` if none configured. -pub fn inspect_cmd(image: &str) -> Result, SandlockError> { - let output = Command::new("docker") - .args([ - "inspect", "--format", - "{{json .Config.Entrypoint}}|{{json .Config.Cmd}}", - image, - ]) - .output() - .map_err(|_| SandboxRuntimeError::Child("docker inspect failed".into()))?; - - if !output.status.success() { - return Ok(vec!["/bin/sh".into()]); +fn default_cmd(info: &ImageInspect) -> Vec { + let cfg = info.config.as_ref(); + let entrypoint = cfg.and_then(|c| c.entrypoint.clone()).unwrap_or_default(); + let cmd = cfg.and_then(|c| c.cmd.clone()).unwrap_or_default(); + let combined: Vec = entrypoint.into_iter().chain(cmd).collect(); + if combined.is_empty() { + vec!["/bin/sh".into()] + } else { + combined } +} - let raw = String::from_utf8_lossy(&output.stdout).trim().to_string(); - let parts: Vec<&str> = raw.splitn(2, '|').collect(); +/// Turn an image id (`sha256:abcd...`) into a filesystem-safe cache key. +fn sanitize_id(id: &str) -> String { + id.split_once(':').map(|(_, h)| h).unwrap_or(id).to_string() +} - let entrypoint = parts.first().and_then(|s| parse_json_string_array(s)); - let cmd = parts.get(1).and_then(|s| parse_json_string_array(s)); +// ============================================================ +// Rootfs extraction +// ============================================================ - match (entrypoint, cmd) { - (Some(ep), Some(c)) => Ok([ep, c].concat()), - (Some(ep), None) => Ok(ep), - (None, Some(c)) => Ok(c), - (None, None) => Ok(vec!["/bin/sh".into()]), +/// Unpack a flattened container-export tarball into `rootfs`. +/// +/// `docker export` produces a single already-merged filesystem, so +/// there are no AUFS/OCI whiteouts to apply. Hard links still need +/// care: an entry can reference a target that appears later in the same +/// stream, and the tar crate's own hard-link handling resolves link +/// targets against the process cwd rather than the destination root. +/// We unpack non-link entries first, then resolve links manually under +/// `rootfs`. +fn unpack_rootfs(tar_path: &Path, rootfs: &Path) -> Result<(), SandlockError> { + // Pass 1: extract everything except hard links. + let mut deferred_hardlinks: Vec = Vec::new(); + { + let file = fs::File::open(tar_path).map_err(SandboxRuntimeError::Io)?; + let mut archive = tar::Archive::new(file); + archive.set_preserve_permissions(true); + archive.set_preserve_mtime(true); + archive.set_overwrite(true); + for entry in archive.entries().map_err(SandboxRuntimeError::Io)? { + let mut entry = entry.map_err(SandboxRuntimeError::Io)?; + let raw = entry.path().map_err(SandboxRuntimeError::Io)?.into_owned(); + let dest = rootfs.join(&raw); + if !dest.starts_with(rootfs) { + continue; + } + if entry.header().entry_type() == tar::EntryType::Link { + deferred_hardlinks.push(raw); + continue; + } + entry.unpack(&dest).map_err(SandboxRuntimeError::Io)?; + } } -} -/// Parse a JSON string array like `["a","b"]` or return None for `null`. -fn parse_json_string_array(s: &str) -> Option> { - let s = s.trim(); - if s == "null" || s.is_empty() { - return None; - } - if !s.starts_with('[') || !s.ends_with(']') { - return None; - } - let inner = &s[1..s.len() - 1]; - if inner.trim().is_empty() { - return Some(Vec::new()); - } - let mut result = Vec::new(); - for item in inner.split(',') { - let item = item.trim(); - if item.starts_with('"') && item.ends_with('"') && item.len() >= 2 { - result.push(item[1..item.len() - 1].replace("\\\"", "\"").replace("\\\\", "\\")); + // Pass 2: resolve hard links manually. The tar crate's + // entry.unpack(dest) passes the link_name straight to + // fs::hard_link, which resolves against the process cwd rather than + // the rootfs. We rewrite both endpoints to absolute paths under + // rootfs ourselves. A hard link's target can itself be another + // hard link, so loop until everything resolves or a full sweep + // makes no progress. + while !deferred_hardlinks.is_empty() { + let mut remaining: Vec = Vec::new(); + let mut applied_this_round = 0usize; + let target_set: HashSet = deferred_hardlinks.iter().cloned().collect(); + + let file = fs::File::open(tar_path).map_err(SandboxRuntimeError::Io)?; + let mut archive = tar::Archive::new(file); + for entry in archive.entries().map_err(SandboxRuntimeError::Io)? { + let entry = entry.map_err(SandboxRuntimeError::Io)?; + if entry.header().entry_type() != tar::EntryType::Link { + continue; + } + let raw = entry.path().map_err(SandboxRuntimeError::Io)?.into_owned(); + if !target_set.contains(&raw) { + continue; + } + let link_target = match entry.link_name().map_err(SandboxRuntimeError::Io)? { + Some(t) => t.into_owned(), + None => { + remaining.push(raw); + continue; + } + }; + let src = rootfs.join(&link_target); + let dest = rootfs.join(&raw); + if !src.starts_with(rootfs) || !dest.starts_with(rootfs) { + continue; + } + if let Some(parent) = dest.parent() { + let _ = fs::create_dir_all(parent); + } + // Remove any leftover from a previous failed round; + // fs::hard_link refuses to overwrite. + if dest.exists() || dest.is_symlink() { + let _ = fs::remove_file(&dest); + } + match fs::hard_link(&src, &dest) { + Ok(_) => applied_this_round += 1, + Err(_) => remaining.push(raw), + } } + + if applied_this_round == 0 { + return Err(SandboxRuntimeError::Child(format!( + "image export has {} unresolved hard link(s); broken export", + remaining.len(), + )) + .into()); + } + deferred_hardlinks = remaining; } - if result.is_empty() { None } else { Some(result) } + + Ok(()) } // ============================================================ @@ -185,50 +288,115 @@ fn parse_json_string_array(s: &str) -> Option> { #[cfg(test)] mod tests { use super::*; + use bollard::models::ImageConfig; + + /// Write a tar with the given entries to a temp file and return it. + fn write_tar(entries: impl FnOnce(&mut tar::Builder>)) -> (tempfile::TempDir, PathBuf) { + let mut builder = tar::Builder::new(Vec::new()); + entries(&mut builder); + let bytes = builder.into_inner().unwrap(); + let tmp = tempfile::tempdir().unwrap(); + let p = tmp.path().join("export.tar"); + fs::write(&p, bytes).unwrap(); + (tmp, p) + } - #[test] - fn test_cache_key_deterministic() { - let k1 = cache_key("python:3.12-slim"); - let k2 = cache_key("python:3.12-slim"); - assert_eq!(k1, k2); + fn append_file(b: &mut tar::Builder>, path: &str, data: &[u8]) { + let mut h = tar::Header::new_gnu(); + h.set_path(path).unwrap(); + h.set_size(data.len() as u64); + h.set_mode(0o644); + h.set_cksum(); + b.append(&h, data).unwrap(); } - #[test] - fn test_cache_key_different() { - let k1 = cache_key("python:3.12-slim"); - let k2 = cache_key("alpine:latest"); - assert_ne!(k1, k2); + fn append_dir(b: &mut tar::Builder>, path: &str) { + let mut h = tar::Header::new_gnu(); + h.set_path(path).unwrap(); + h.set_size(0); + h.set_mode(0o755); + h.set_entry_type(tar::EntryType::Directory); + h.set_cksum(); + b.append(&h, std::io::empty()).unwrap(); } #[test] - fn test_default_cache_dir() { - let dir = default_cache_dir(); - assert!(dir.to_str().unwrap().contains("sandlock/images")); + fn unpack_writes_regular_files() { + let (_tmp, tar_path) = write_tar(|b| { + append_file(b, "greeting.txt", b"hello sandlock"); + }); + let rootfs_tmp = tempfile::tempdir().unwrap(); + let rootfs = rootfs_tmp.path(); + + unpack_rootfs(&tar_path, rootfs).unwrap(); + let greeting = rootfs.join("greeting.txt"); + assert!(greeting.is_file()); + assert_eq!(fs::read_to_string(&greeting).unwrap(), "hello sandlock"); } + /// Real Docker images contain hard links whose source paths are + /// relative to the rootfs (e.g. `usr/bin/perl5.34.0` -> + /// `usr/bin/perl`), and the source can appear later in the same tar. + /// Regression test for the resolve-relative-to-rootfs + + /// defer-until-source-exists fix. #[test] - fn test_parse_json_array() { + fn unpack_resolves_hardlinks_with_forward_references() { + let (_tmp, tar_path) = write_tar(|b| { + append_dir(b, "usr/"); + append_dir(b, "usr/bin/"); + // Hard link entry referencing a file that appears LATER. + let mut h = tar::Header::new_gnu(); + h.set_path("usr/bin/perl5.34.0").unwrap(); + h.set_size(0); + h.set_mode(0o755); + h.set_entry_type(tar::EntryType::Link); + h.set_link_name("usr/bin/perl").unwrap(); + h.set_cksum(); + b.append(&h, std::io::empty()).unwrap(); + // The actual binary, defined after the hard link. + append_file(b, "usr/bin/perl", b"#!perl\nnop"); + }); + let rootfs_tmp = tempfile::tempdir().unwrap(); + let rootfs = rootfs_tmp.path(); + + unpack_rootfs(&tar_path, rootfs).unwrap(); + let perl = rootfs.join("usr/bin/perl"); + let perl_versioned = rootfs.join("usr/bin/perl5.34.0"); + assert!(perl.is_file(), "perl should exist as a regular file"); + assert!(perl_versioned.is_file(), "perl5.34.0 should exist as a hard link"); + use std::os::unix::fs::MetadataExt; assert_eq!( - parse_json_string_array(r#"["python3","-c","print(1)"]"#), - Some(vec!["python3".into(), "-c".into(), "print(1)".into()]) + fs::metadata(&perl).unwrap().ino(), + fs::metadata(&perl_versioned).unwrap().ino(), + "hard link should share inode with target", ); } #[test] - fn test_parse_json_null() { - assert_eq!(parse_json_string_array("null"), None); + fn default_cmd_combines_entrypoint_and_cmd() { + let info = ImageInspect { + config: Some(ImageConfig { + entrypoint: Some(vec!["/bin/sh".into(), "-c".into()]), + cmd: Some(vec!["echo hi".into()]), + ..Default::default() + }), + ..Default::default() + }; + assert_eq!(default_cmd(&info), vec!["/bin/sh", "-c", "echo hi"]); } #[test] - fn test_parse_json_empty_array() { - assert_eq!(parse_json_string_array("[]"), Some(vec![])); + fn default_cmd_falls_back_to_bin_sh() { + let info = ImageInspect { + config: Some(ImageConfig::default()), + ..Default::default() + }; + assert_eq!(default_cmd(&info), vec!["/bin/sh"]); } #[test] - fn test_parse_json_single() { - assert_eq!( - parse_json_string_array(r#"["/bin/sh"]"#), - Some(vec!["/bin/sh".into()]) - ); + fn sanitize_id_strips_algorithm_prefix() { + assert_eq!(sanitize_id("sha256:abcdef0123"), "abcdef0123"); + assert_eq!(sanitize_id("abcdef0123"), "abcdef0123"); } }