diff --git a/Cargo.lock b/Cargo.lock index 996638a..fb23e04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -192,6 +192,49 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bollard" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9d0a013e3d3ee4edd61e779adf117944c08902d375f18630a0c5b8f95659734" +dependencies = [ + "base64", + "bollard-stubs", + "bytes", + "futures-core", + "futures-util", + "hex", + "http", + "http-body-util", + "hyper", + "hyper-named-pipe", + "hyper-util", + "hyperlocal", + "log", + "pin-project-lite", + "serde", + "serde_derive", + "serde_json", + "serde_urlencoded", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tower-service", + "url", + "winapi", +] + +[[package]] +name = "bollard-stubs" +version = "1.53.1-rc.29.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce412eb6f7096743011dc3cb5c674caeb24ced61d8c498fe07cf7998a4fea889" +dependencies = [ + "serde", + "serde_json", + "serde_repr", +] + [[package]] name = "brotli" version = "8.0.2" @@ -486,6 +529,16 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a043dc74da1e37d6afe657061213aa6f425f855399a11d3463c6ecccc4dfda1f" +[[package]] +name = "filetime" +version = "0.2.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c287a33c7f0a620c38e641e7f60827713987b3c0f26e8ddc9462cc69cf75759" +dependencies = [ + "cfg-if", + "libc", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -698,6 +751,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "http" version = "1.4.0" @@ -792,6 +851,21 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-named-pipe" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" +dependencies = [ + "hex", + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", + "winapi", +] + [[package]] name = "hyper-rustls" version = "0.26.0" @@ -846,6 +920,21 @@ dependencies = [ "tracing", ] +[[package]] +name = "hyperlocal" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" +dependencies = [ + "hex", + "http-body-util", + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "icu_collections" version = "2.2.0" @@ -1549,6 +1638,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "same-file" version = "1.0.6" @@ -1578,7 +1673,9 @@ name = "sandlock-core" version = "0.8.2" dependencies = [ "bincode", + "bollard", "clap", + "futures-util", "goblin", "hudsucker", "jiff", @@ -1590,6 +1687,7 @@ dependencies = [ "serde", "serde_json", "syscalls", + "tar", "tempfile", "thiserror 2.0.18", "tokio", @@ -1689,6 +1787,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_spanned" version = "0.6.9" @@ -1698,6 +1807,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "sha1" version = "0.10.6" @@ -1814,6 +1935,17 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" +[[package]] +name = "tar" +version = "0.4.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6221d9a6003c78398e3b239969f352578258df48c8eb051caadae0015bc840" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.27.0" @@ -2350,6 +2482,22 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -2359,6 +2507,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-link" version = "0.2.1" @@ -2577,6 +2731,16 @@ dependencies = [ "time", ] +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + [[package]] name = "yasna" version = "0.5.2" diff --git a/crates/sandlock-cli/src/main.rs b/crates/sandlock-cli/src/main.rs index 40e64e8..74acce8 100644 --- a/crates/sandlock-cli/src/main.rs +++ b/crates/sandlock-cli/src/main.rs @@ -105,8 +105,12 @@ struct RunArgs { #[arg(short = 'i', long)] interactive: bool, - /// Use a local Docker image as chroot rootfs - #[arg(long)] + /// Use a local Docker image as chroot rootfs, given by reference + /// (e.g. `python:3.12-slim`, a digest, or an image id). The image + /// must already be present in local Docker storage; sandlock never + /// pulls from a registry. Requires a running Docker daemon and an + /// accessible socket; the run fails early if neither is reachable. + #[arg(long, value_name = "IMAGE")] image: Option, /// Dry-run: run the command, show filesystem changes, then discard @@ -429,13 +433,13 @@ async fn run_command(args: RunArgs) -> Result { // the shared image cache directly. let image_cmd: Option>; if let Some(ref img) = args.image { - let rootfs = sandlock_core::image::extract(img, None)?; + let rootfs = sandlock_core::image::extract(img, None).await?; builder = builder.chroot(&rootfs).fs_read("/"); if pb.workdir.is_none() { builder = builder.workdir(&rootfs); } if args.cmd.is_empty() { - image_cmd = Some(sandlock_core::image::inspect_cmd(img)?); + image_cmd = Some(sandlock_core::image::inspect_cmd(img).await?); } else { image_cmd = None; } diff --git a/crates/sandlock-core/Cargo.toml b/crates/sandlock-core/Cargo.toml index 4e83919..65e5824 100644 --- a/crates/sandlock-core/Cargo.toml +++ b/crates/sandlock-core/Cargo.toml @@ -12,7 +12,7 @@ description = "Lightweight process sandbox using Landlock, seccomp-bpf, and secc libc = "0.2" syscalls = { version = "0.8", default-features = false } nix = { version = "0.29", features = ["process", "signal", "fs", "ioctl", "poll"] } -tokio = { version = "1", features = ["rt", "net", "time", "sync", "macros", "io-util"] } +tokio = { version = "1", features = ["rt", "net", "time", "sync", "macros", "io-util", "fs"] } serde = { version = "1", features = ["derive"] } thiserror = "2" rand = "0.8" @@ -26,7 +26,10 @@ toml = "0.8" jiff = "0.2" pathdiff = "0.2" hudsucker = "0.22" +tar = "0.4" clap = { version = "4", features = ["derive"], optional = true } +bollard = "0.21" +futures-util = "0.3" [features] default = [] diff --git a/crates/sandlock-core/src/image.rs b/crates/sandlock-core/src/image.rs index 0fc6209..4656363 100644 --- a/crates/sandlock-core/src/image.rs +++ b/crates/sandlock-core/src/image.rs @@ -1,181 +1,284 @@ -//! Extract local Docker/OCI images into rootfs directories for sandboxing. +//! Materialize a local Docker image into a rootfs for sandboxing by +//! talking to the Docker daemon over its HTTP API (via bollard). //! -//! Uses `docker create` + `docker export` to extract the image. If the -//! image is present in local Docker storage it is used as-is; otherwise -//! `docker create` pulls it from the configured registry first. +//! `--image ` resolves a *local* image only; sandlock never pulls +//! from a registry. The daemon must be running and its socket +//! accessible: callers fail early (see [`extract`] / [`inspect_cmd`]) +//! when it is not reachable, before any sandbox is built. +//! +//! The image filesystem is obtained the same way `docker export` does: +//! a throwaway stopped container is created from the image, its +//! flattened rootfs is streamed out as a tar, and unpacked into a cache +//! keyed by the image's content id. //! //! ```ignore -//! let rootfs = image::extract("python:3.12-slim", None)?; -//! let cmd = image::inspect_cmd("python:3.12-slim")?; -//! // Use rootfs as chroot, cmd as default command +//! let rootfs = image::extract("python:3.12-slim", None).await?; +//! let cmd = image::inspect_cmd("python:3.12-slim").await?; //! ``` +//! +//! Extracted rootfs is cached at +//! `$HOME/.cache/sandlock/images//rootfs/` and reused on +//! subsequent invocations referencing the same image content. +use std::collections::HashSet; +use std::fs; use std::path::{Path, PathBuf}; -use std::process::Command; + +use bollard::models::{ContainerCreateBody, ImageInspect}; +use bollard::query_parameters::RemoveContainerOptionsBuilder; +use bollard::Docker; +use futures_util::StreamExt; +use tokio::io::AsyncWriteExt; use crate::error::{SandboxRuntimeError, SandlockError}; +// ============================================================ +// Public API +// ============================================================ + /// Default cache directory for extracted images. fn default_cache_dir() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".into()); PathBuf::from(home).join(".cache/sandlock/images") } -/// Compute a short cache key from the image name. -fn cache_key(image: &str) -> String { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - let mut h = DefaultHasher::new(); - image.hash(&mut h); - format!("{:016x}", h.finish()) -} - -/// Extract a local Docker image into a cached rootfs directory. +/// Resolve a local Docker image into a cached rootfs directory. /// -/// Creates a temporary container, exports its filesystem, and extracts -/// it. Returns the cached path on subsequent calls. +/// `image_ref` is a Docker image reference (`python:3.12-slim`, a +/// digest, an image id, ...) that must already be present in local +/// Docker storage. The extracted rootfs is keyed by the image's +/// content id so repeated calls hit the same cache. /// -/// If the image is not in local Docker storage, `docker create` pulls it -/// from the registry first. -pub fn extract(image: &str, cache_dir: Option<&Path>) -> Result { - let cache = cache_dir - .map(PathBuf::from) - .unwrap_or_else(default_cache_dir); - let key = cache_key(image); - let rootfs = cache.join(&key).join("rootfs"); - - // Return cached rootfs if it exists and has content - if rootfs.is_dir() { - if let Ok(mut entries) = std::fs::read_dir(&rootfs) { - if entries.next().is_some() { - return Ok(rootfs); - } - } +/// Fails early if the Docker daemon is not reachable, or if the image +/// is not in local storage. +pub async fn extract(image_ref: &str, cache_dir: Option<&Path>) -> Result { + let docker = connect().await?; + let info = inspect(&docker, image_ref).await?; + let id = info.id.ok_or_else(|| { + SandboxRuntimeError::Child(format!("Docker returned no id for image {image_ref}")) + })?; + + let cache = cache_dir.map(PathBuf::from).unwrap_or_else(default_cache_dir); + let dest = cache.join(sanitize_id(&id)); + let rootfs = dest.join("rootfs"); + + // Cache hit: the .complete marker means we fully unpacked this image + // before. A partial directory (interrupted run) lacks the marker. + if rootfs.is_dir() && dest.join(".complete").is_file() { + return Ok(rootfs); } - // Create container (does not start it) - let output = Command::new("docker") - .args(["create", image, "/bin/true"]) - .output() - .map_err(|e| SandboxRuntimeError::Child(format!("docker not found: {}", e)))?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - return Err(SandboxRuntimeError::Child( - format!("docker create failed: {}", stderr.trim()), - ).into()); - } + // Stale or partial cache: start clean. + let _ = fs::remove_dir_all(&dest); + fs::create_dir_all(&rootfs).map_err(SandboxRuntimeError::Io)?; + + // `docker create`: a stopped container we use only as an export + // source. No command is started. + let body = ContainerCreateBody { + image: Some(image_ref.to_string()), + ..Default::default() + }; + let created = docker + .create_container(None, body) + .await + .map_err(|e| SandboxRuntimeError::Child(format!("docker create failed: {e}")))?; + let cid = created.id; + + // `docker export`: stream the flattened rootfs to a temp tar so we + // never hold a whole image in memory. + let tar_path = dest.join("export.tar"); + let export_res = stream_export(&docker, &cid, &tar_path).await; + + // Always remove the throwaway container, even if the export failed. + let _ = docker + .remove_container( + &cid, + Some(RemoveContainerOptionsBuilder::new().force(true).build()), + ) + .await; + export_res?; + + // Unpack the tar (blocking work) off the async reactor. + let rootfs_out = rootfs.clone(); + let tar_in = tar_path.clone(); + tokio::task::spawn_blocking(move || unpack_rootfs(&tar_in, &rootfs_out)) + .await + .map_err(|e| SandboxRuntimeError::Child(format!("image unpack task failed: {e}")))??; + + let _ = fs::remove_file(&tar_path); + fs::write(dest.join(".complete"), b"").map_err(SandboxRuntimeError::Io)?; + Ok(rootfs) +} - let container_id = String::from_utf8_lossy(&output.stdout).trim().to_string(); +/// Get the default command (Entrypoint + Cmd) for a local Docker image. +/// +/// Returns the concatenation of Entrypoint and Cmd from the image +/// config, or `["/bin/sh"]` if neither is set. Fails early if the +/// daemon is unreachable or the image is not in local storage. +pub async fn inspect_cmd(image_ref: &str) -> Result, SandlockError> { + let docker = connect().await?; + let info = inspect(&docker, image_ref).await?; + Ok(default_cmd(&info)) +} - // Export and extract - let result = extract_container(&container_id, &rootfs); +// ============================================================ +// Docker daemon access +// ============================================================ - // Always remove the temporary container - let _ = Command::new("docker") - .args(["rm", &container_id]) - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .status(); +/// Connect to the local Docker daemon and verify it is actually +/// reachable, so `--image` fails up front rather than mid-setup. +async fn connect() -> Result { + let docker = Docker::connect_with_local_defaults().map_err(daemon_unreachable)?; + docker.ping().await.map_err(daemon_unreachable)?; + Ok(docker) +} - result?; - Ok(rootfs) +fn daemon_unreachable(e: bollard::errors::Error) -> SandlockError { + SandboxRuntimeError::Child(format!( + "cannot reach the Docker daemon, required for --image \ + (is dockerd running and the socket accessible?): {e}" + )) + .into() } -/// Export a container's filesystem and extract it to rootfs. -fn extract_container(container_id: &str, rootfs: &Path) -> Result<(), SandlockError> { - std::fs::create_dir_all(rootfs) - .map_err(|e| SandboxRuntimeError::Io(e))?; - - // docker export → tar stream → extract - let mut child = Command::new("docker") - .args(["export", container_id]) - .stdout(std::process::Stdio::piped()) - .stderr(std::process::Stdio::piped()) - .spawn() - .map_err(|e| SandboxRuntimeError::Child(format!("docker export: {}", e)))?; - - let stdout = child.stdout.take().unwrap(); - - // Use tar crate or shell tar to extract - let tar_status = Command::new("tar") - .args(["xf", "-", "-C"]) - .arg(rootfs) - .stdin(stdout) - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::piped()) - .status() - .map_err(|e| SandboxRuntimeError::Child(format!("tar extract: {}", e)))?; - - let docker_status = child.wait() - .map_err(|e| SandboxRuntimeError::Child(format!("docker export wait: {}", e)))?; - - if !docker_status.success() { - // Clean up partial extraction - let _ = std::fs::remove_dir_all(rootfs); - return Err(SandboxRuntimeError::Child("docker export failed".into()).into()); - } +/// Inspect a local image, mapping a missing image to a clear error. +async fn inspect(docker: &Docker, image_ref: &str) -> Result { + docker.inspect_image(image_ref).await.map_err(|e| { + SandboxRuntimeError::Child(format!( + "image not found in local Docker storage: {image_ref} ({e})" + )) + .into() + }) +} - if !tar_status.success() { - let _ = std::fs::remove_dir_all(rootfs); - return Err(SandboxRuntimeError::Child("tar extraction failed".into()).into()); +/// Stream a container's exported filesystem into `tar_path`. +async fn stream_export(docker: &Docker, cid: &str, tar_path: &Path) -> Result<(), SandlockError> { + let mut file = tokio::fs::File::create(tar_path) + .await + .map_err(SandboxRuntimeError::Io)?; + let mut stream = docker.export_container(cid); + while let Some(chunk) = stream.next().await { + let chunk = chunk.map_err(|e| SandboxRuntimeError::Child(format!("docker export failed: {e}")))?; + file.write_all(&chunk).await.map_err(SandboxRuntimeError::Io)?; } - + file.flush().await.map_err(SandboxRuntimeError::Io)?; Ok(()) } -/// Get the default command (ENTRYPOINT + CMD) for a local Docker image. -/// -/// Returns the combined entrypoint and cmd, or `["/bin/sh"]` if none configured. -pub fn inspect_cmd(image: &str) -> Result, SandlockError> { - let output = Command::new("docker") - .args([ - "inspect", "--format", - "{{json .Config.Entrypoint}}|{{json .Config.Cmd}}", - image, - ]) - .output() - .map_err(|_| SandboxRuntimeError::Child("docker inspect failed".into()))?; - - if !output.status.success() { - return Ok(vec!["/bin/sh".into()]); +fn default_cmd(info: &ImageInspect) -> Vec { + let cfg = info.config.as_ref(); + let entrypoint = cfg.and_then(|c| c.entrypoint.clone()).unwrap_or_default(); + let cmd = cfg.and_then(|c| c.cmd.clone()).unwrap_or_default(); + let combined: Vec = entrypoint.into_iter().chain(cmd).collect(); + if combined.is_empty() { + vec!["/bin/sh".into()] + } else { + combined } +} - let raw = String::from_utf8_lossy(&output.stdout).trim().to_string(); - let parts: Vec<&str> = raw.splitn(2, '|').collect(); +/// Turn an image id (`sha256:abcd...`) into a filesystem-safe cache key. +fn sanitize_id(id: &str) -> String { + id.split_once(':').map(|(_, h)| h).unwrap_or(id).to_string() +} - let entrypoint = parts.first().and_then(|s| parse_json_string_array(s)); - let cmd = parts.get(1).and_then(|s| parse_json_string_array(s)); +// ============================================================ +// Rootfs extraction +// ============================================================ - match (entrypoint, cmd) { - (Some(ep), Some(c)) => Ok([ep, c].concat()), - (Some(ep), None) => Ok(ep), - (None, Some(c)) => Ok(c), - (None, None) => Ok(vec!["/bin/sh".into()]), +/// Unpack a flattened container-export tarball into `rootfs`. +/// +/// `docker export` produces a single already-merged filesystem, so +/// there are no AUFS/OCI whiteouts to apply. Hard links still need +/// care: an entry can reference a target that appears later in the same +/// stream, and the tar crate's own hard-link handling resolves link +/// targets against the process cwd rather than the destination root. +/// We unpack non-link entries first, then resolve links manually under +/// `rootfs`. +fn unpack_rootfs(tar_path: &Path, rootfs: &Path) -> Result<(), SandlockError> { + // Pass 1: extract everything except hard links. + let mut deferred_hardlinks: Vec = Vec::new(); + { + let file = fs::File::open(tar_path).map_err(SandboxRuntimeError::Io)?; + let mut archive = tar::Archive::new(file); + archive.set_preserve_permissions(true); + archive.set_preserve_mtime(true); + archive.set_overwrite(true); + for entry in archive.entries().map_err(SandboxRuntimeError::Io)? { + let mut entry = entry.map_err(SandboxRuntimeError::Io)?; + let raw = entry.path().map_err(SandboxRuntimeError::Io)?.into_owned(); + let dest = rootfs.join(&raw); + if !dest.starts_with(rootfs) { + continue; + } + if entry.header().entry_type() == tar::EntryType::Link { + deferred_hardlinks.push(raw); + continue; + } + entry.unpack(&dest).map_err(SandboxRuntimeError::Io)?; + } } -} -/// Parse a JSON string array like `["a","b"]` or return None for `null`. -fn parse_json_string_array(s: &str) -> Option> { - let s = s.trim(); - if s == "null" || s.is_empty() { - return None; - } - if !s.starts_with('[') || !s.ends_with(']') { - return None; - } - let inner = &s[1..s.len() - 1]; - if inner.trim().is_empty() { - return Some(Vec::new()); - } - let mut result = Vec::new(); - for item in inner.split(',') { - let item = item.trim(); - if item.starts_with('"') && item.ends_with('"') && item.len() >= 2 { - result.push(item[1..item.len() - 1].replace("\\\"", "\"").replace("\\\\", "\\")); + // Pass 2: resolve hard links manually. The tar crate's + // entry.unpack(dest) passes the link_name straight to + // fs::hard_link, which resolves against the process cwd rather than + // the rootfs. We rewrite both endpoints to absolute paths under + // rootfs ourselves. A hard link's target can itself be another + // hard link, so loop until everything resolves or a full sweep + // makes no progress. + while !deferred_hardlinks.is_empty() { + let mut remaining: Vec = Vec::new(); + let mut applied_this_round = 0usize; + let target_set: HashSet = deferred_hardlinks.iter().cloned().collect(); + + let file = fs::File::open(tar_path).map_err(SandboxRuntimeError::Io)?; + let mut archive = tar::Archive::new(file); + for entry in archive.entries().map_err(SandboxRuntimeError::Io)? { + let entry = entry.map_err(SandboxRuntimeError::Io)?; + if entry.header().entry_type() != tar::EntryType::Link { + continue; + } + let raw = entry.path().map_err(SandboxRuntimeError::Io)?.into_owned(); + if !target_set.contains(&raw) { + continue; + } + let link_target = match entry.link_name().map_err(SandboxRuntimeError::Io)? { + Some(t) => t.into_owned(), + None => { + remaining.push(raw); + continue; + } + }; + let src = rootfs.join(&link_target); + let dest = rootfs.join(&raw); + if !src.starts_with(rootfs) || !dest.starts_with(rootfs) { + continue; + } + if let Some(parent) = dest.parent() { + let _ = fs::create_dir_all(parent); + } + // Remove any leftover from a previous failed round; + // fs::hard_link refuses to overwrite. + if dest.exists() || dest.is_symlink() { + let _ = fs::remove_file(&dest); + } + match fs::hard_link(&src, &dest) { + Ok(_) => applied_this_round += 1, + Err(_) => remaining.push(raw), + } } + + if applied_this_round == 0 { + return Err(SandboxRuntimeError::Child(format!( + "image export has {} unresolved hard link(s); broken export", + remaining.len(), + )) + .into()); + } + deferred_hardlinks = remaining; } - if result.is_empty() { None } else { Some(result) } + + Ok(()) } // ============================================================ @@ -185,50 +288,115 @@ fn parse_json_string_array(s: &str) -> Option> { #[cfg(test)] mod tests { use super::*; + use bollard::models::ImageConfig; + + /// Write a tar with the given entries to a temp file and return it. + fn write_tar(entries: impl FnOnce(&mut tar::Builder>)) -> (tempfile::TempDir, PathBuf) { + let mut builder = tar::Builder::new(Vec::new()); + entries(&mut builder); + let bytes = builder.into_inner().unwrap(); + let tmp = tempfile::tempdir().unwrap(); + let p = tmp.path().join("export.tar"); + fs::write(&p, bytes).unwrap(); + (tmp, p) + } - #[test] - fn test_cache_key_deterministic() { - let k1 = cache_key("python:3.12-slim"); - let k2 = cache_key("python:3.12-slim"); - assert_eq!(k1, k2); + fn append_file(b: &mut tar::Builder>, path: &str, data: &[u8]) { + let mut h = tar::Header::new_gnu(); + h.set_path(path).unwrap(); + h.set_size(data.len() as u64); + h.set_mode(0o644); + h.set_cksum(); + b.append(&h, data).unwrap(); } - #[test] - fn test_cache_key_different() { - let k1 = cache_key("python:3.12-slim"); - let k2 = cache_key("alpine:latest"); - assert_ne!(k1, k2); + fn append_dir(b: &mut tar::Builder>, path: &str) { + let mut h = tar::Header::new_gnu(); + h.set_path(path).unwrap(); + h.set_size(0); + h.set_mode(0o755); + h.set_entry_type(tar::EntryType::Directory); + h.set_cksum(); + b.append(&h, std::io::empty()).unwrap(); } #[test] - fn test_default_cache_dir() { - let dir = default_cache_dir(); - assert!(dir.to_str().unwrap().contains("sandlock/images")); + fn unpack_writes_regular_files() { + let (_tmp, tar_path) = write_tar(|b| { + append_file(b, "greeting.txt", b"hello sandlock"); + }); + let rootfs_tmp = tempfile::tempdir().unwrap(); + let rootfs = rootfs_tmp.path(); + + unpack_rootfs(&tar_path, rootfs).unwrap(); + let greeting = rootfs.join("greeting.txt"); + assert!(greeting.is_file()); + assert_eq!(fs::read_to_string(&greeting).unwrap(), "hello sandlock"); } + /// Real Docker images contain hard links whose source paths are + /// relative to the rootfs (e.g. `usr/bin/perl5.34.0` -> + /// `usr/bin/perl`), and the source can appear later in the same tar. + /// Regression test for the resolve-relative-to-rootfs + + /// defer-until-source-exists fix. #[test] - fn test_parse_json_array() { + fn unpack_resolves_hardlinks_with_forward_references() { + let (_tmp, tar_path) = write_tar(|b| { + append_dir(b, "usr/"); + append_dir(b, "usr/bin/"); + // Hard link entry referencing a file that appears LATER. + let mut h = tar::Header::new_gnu(); + h.set_path("usr/bin/perl5.34.0").unwrap(); + h.set_size(0); + h.set_mode(0o755); + h.set_entry_type(tar::EntryType::Link); + h.set_link_name("usr/bin/perl").unwrap(); + h.set_cksum(); + b.append(&h, std::io::empty()).unwrap(); + // The actual binary, defined after the hard link. + append_file(b, "usr/bin/perl", b"#!perl\nnop"); + }); + let rootfs_tmp = tempfile::tempdir().unwrap(); + let rootfs = rootfs_tmp.path(); + + unpack_rootfs(&tar_path, rootfs).unwrap(); + let perl = rootfs.join("usr/bin/perl"); + let perl_versioned = rootfs.join("usr/bin/perl5.34.0"); + assert!(perl.is_file(), "perl should exist as a regular file"); + assert!(perl_versioned.is_file(), "perl5.34.0 should exist as a hard link"); + use std::os::unix::fs::MetadataExt; assert_eq!( - parse_json_string_array(r#"["python3","-c","print(1)"]"#), - Some(vec!["python3".into(), "-c".into(), "print(1)".into()]) + fs::metadata(&perl).unwrap().ino(), + fs::metadata(&perl_versioned).unwrap().ino(), + "hard link should share inode with target", ); } #[test] - fn test_parse_json_null() { - assert_eq!(parse_json_string_array("null"), None); + fn default_cmd_combines_entrypoint_and_cmd() { + let info = ImageInspect { + config: Some(ImageConfig { + entrypoint: Some(vec!["/bin/sh".into(), "-c".into()]), + cmd: Some(vec!["echo hi".into()]), + ..Default::default() + }), + ..Default::default() + }; + assert_eq!(default_cmd(&info), vec!["/bin/sh", "-c", "echo hi"]); } #[test] - fn test_parse_json_empty_array() { - assert_eq!(parse_json_string_array("[]"), Some(vec![])); + fn default_cmd_falls_back_to_bin_sh() { + let info = ImageInspect { + config: Some(ImageConfig::default()), + ..Default::default() + }; + assert_eq!(default_cmd(&info), vec!["/bin/sh"]); } #[test] - fn test_parse_json_single() { - assert_eq!( - parse_json_string_array(r#"["/bin/sh"]"#), - Some(vec!["/bin/sh".into()]) - ); + fn sanitize_id_strips_algorithm_prefix() { + assert_eq!(sanitize_id("sha256:abcdef0123"), "abcdef0123"); + assert_eq!(sanitize_id("abcdef0123"), "abcdef0123"); } }