diff --git a/CHANGELOG.md b/CHANGELOG.md index 17d48c872..c7d9a31bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Categories Used: - Add `--here` flag to unpack into current directory (https://github.com/ouch-org/ouch/pull/962). - List: show symlink targets (for tar and zip) (https://github.com/ouch-org/ouch/pull/934) - Add aliases for ebooks (`.epub`) (https://github.com/ouch-org/ouch/pull/981) +- Add a Landlock sandbox on Linux for `compress`/`decompress`/`list` that restricts filesystem access and, where the kernel supports it, blocks new TCP connections, abstract-socket use, and signals to processes outside the sandbox (disable with `--no-sandbox` or the `OUCH_NO_SANDBOX` environment variable) (https://github.com/ouch-org/ouch/pull/995) ### Improvements diff --git a/Cargo.lock b/Cargo.lock index ff7e38e54..2fe3d00ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -605,6 +605,26 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" +[[package]] +name = "enumflags2" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1027f7680c853e056ebcec683615fb6fbbc07dbaa13b4d5d9442b146ded4ecef" +dependencies = [ + "enumflags2_derive", +] + +[[package]] +name = "enumflags2_derive" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -995,6 +1015,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "landlock" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49fefd6652c57d68aaa32544a4c0e642929725bdc1fd929367cdeb673ab81088" +dependencies = [ + "enumflags2", + "libc", + "thiserror", +] + [[package]] name = "leb128fmt" version = "0.1.0" @@ -1178,6 +1209,7 @@ dependencies = [ "insta", "is_executable", "itertools 0.14.0", + "landlock", "libc", "lz4_flex", "lzma-rust2", diff --git a/Cargo.toml b/Cargo.toml index e89ead6c1..45402ddb0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,9 @@ zip = { version = "8.6.0", default-features = false, features = [ ] } zstd = { version = "0.13.3", default-features = false, features = ["zstdmt"] } +[target.'cfg(target_os = "linux")'.dependencies] +landlock = "0.4.4" + [dev-dependencies] anyhow = "1.0.102" assert_cmd = "2.2.1" diff --git a/src/archive/tar.rs b/src/archive/tar.rs index 843cd3cf7..a14b4ba67 100644 --- a/src/archive/tar.rs +++ b/src/archive/tar.rs @@ -70,11 +70,11 @@ pub fn unpack_archive(reader: impl Read, output_folder: &Path) -> Result { if cfg!(unix) && is_writeable.not() { // We unpacked a read-only directory, make it writeable so that we can // create the files inside of it, by the end, restore the original mode - let original_path = entry.path()?.to_path_buf(); - let unpacked = output_folder.join(&original_path); + let unpacked = output_folder.join(entry.path()?); set_permission_mode(&unpacked, original_mode | 0o200)?; - read_only_dirs_and_modes.push((original_path, original_mode)); + // Store the absolute path because the restore loop runs without changing directory. + read_only_dirs_and_modes.push((unpacked, original_mode)); } } _ => continue, diff --git a/src/cli/args.rs b/src/cli/args.rs index 0da80e4f8..7162786ad 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -50,6 +50,10 @@ pub struct CliArgs { #[arg(short = 'c', long, global = true)] pub threads: Option, + /// Disable the process sandbox. + #[arg(long, global = true)] + pub no_sandbox: bool, + // Ouch and claps subcommands #[command(subcommand)] pub cmd: Subcommand, @@ -156,6 +160,7 @@ mod tests { // This is usually replaced in assertion tests password: None, threads: None, + no_sandbox: false, cmd: Subcommand::Decompress { // Put a crazy value here so no test can assert it unintentionally files: vec!["\x00\x11\x22".into()], diff --git a/src/commands/decompress.rs b/src/commands/decompress.rs index a2ba2eba2..b57e707ad 100644 --- a/src/commands/decompress.rs +++ b/src/commands/decompress.rs @@ -1,4 +1,6 @@ use std::{ + collections::HashSet, + ffi::OsString, io::{self, BufReader, Read}, ops::ControlFlow, path::{Path, PathBuf}, @@ -16,7 +18,7 @@ use crate::{ info, info_accessible, non_archive::lz4::MultiFrameLz4Decoder, utils::{ - self, BytesFmt, PathFmt, file_size, + BytesFmt, PathFmt, file_size, io::{ReadSeek, lock_and_flush_output_stdio}, is_path_stdin, resolve_path_conflict, user_wants_to_continue, }, @@ -28,8 +30,6 @@ pub struct DecompressOptions<'a> { /// Example: [Gz, Tar] (notice it's ordered in decompression order) pub formats: Vec, pub output_dir: &'a Path, - /// Used when extracting single file formats and not archive formats - pub output_file_path: PathBuf, /// Whether the user passed `--dir` explicitly. When false (and `here` is false), /// archives are extracted into a basename-derived subdirectory of the CWD. pub output_dir_was_explicit: bool, @@ -39,6 +39,61 @@ pub struct DecompressOptions<'a> { pub question_policy: QuestionPolicy, pub password: Option<&'a [u8]>, pub remove: bool, + /// Resolved target prepared upfront (before sandbox is applied). + pub prepared: PreparedTarget, +} + +/// Per-input decision made before the sandbox is applied. +/// file_name is the single output file for non-archives and None for archives. +pub enum PreparedTarget { + Target { dir: PathBuf, file_name: Option }, + Cancelled, +} + +/// Resolve conflicts and create the output directory before the sandbox starts. +/// claimed_targets holds directories already taken by earlier inputs of this run. +pub fn prepare_decompress_target( + formats: &[Extension], + output_dir: &Path, + output_file_path: &Path, + output_dir_was_explicit: bool, + here: bool, + question_policy: QuestionPolicy, + claimed_targets: &mut HashSet, +) -> Result { + let (first_extension, _) = split_first_compression_format(formats); + let is_archive = matches!(first_extension, Tar | Zip | Rar | SevenZip); + + // --dir and --here unpack into output_dir as is + // default mode makes a new directory named after the archive so report.txt.gz becomes report/report.txt + let target = if output_dir_was_explicit || here { + output_dir.to_path_buf() + } else if is_archive { + output_file_path.to_path_buf() + } else { + output_file_path.with_extension("") + }; + let is_cwd = target == *INITIAL_CURRENT_DIR; + // merging several inputs into the CWD is intentional so the claimed check skips it + let claimed_by_earlier_input = !is_cwd && claimed_targets.contains(&target); + let is_valid = !claimed_by_earlier_input + && (is_cwd || !target.fs_err_try_exists()? || (target.is_dir() && target.read_dir()?.next().is_none())); + + let resolved = if is_valid { + target + } else if let Some(p) = resolve_path_conflict(&target, question_policy, QuestionAction::Decompression)? { + p + } else { + return Ok(PreparedTarget::Cancelled); + }; + + if !resolved.fs_err_try_exists()? { + fs::create_dir(&resolved)?; + } + claimed_targets.insert(resolved.clone()); + + let file_name = (!is_archive).then(|| output_file_path.file_name().unwrap_or_default().to_os_string()); + Ok(PreparedTarget::Target { dir: resolved, file_name }) } enum DecompressionSummary { @@ -93,42 +148,35 @@ pub fn decompress_file(options: DecompressOptions) -> Result<()> { Ok(reader) }; - // Decide where archives extract: - // --dir -> extract into , no wrapper, no flatten - // --here -> extract into CWD (output_dir), no wrapper - // default -> extract into a basename-derived subdirectory; flatten the - // duplicate when the wrapper would contain exactly one entry - // whose name equals the basename - let archive_output_dir: &Path = if options.output_dir_was_explicit || options.here { - options.output_dir - } else { - &options.output_file_path - }; - let control_flow = match first_extension { Gzip | Bzip | Bzip3 | Lz4 | Lzma | Xz | Lzip | Snappy | Zstd | Brotli => { let reader = create_decoder_up_to_first_extension()?; let mut reader = chain_reader_decoder(&first_extension, reader)?; - let (mut writer, final_output_path) = match utils::create_file_or_prompt_on_conflict( - &options.output_file_path, - options.question_policy, - QuestionAction::Decompression, - )? { - Some(file) => file, - None => return Ok(()), + let PreparedTarget::Target { + dir, + file_name: Some(file_name), + } = options.prepared + else { + return Ok(()); }; + let final_output_path = dir.join(file_name); + let mut writer = fs::File::create(&final_output_path)?; io::copy(&mut reader, &mut writer)?; ControlFlow::Continue(DecompressionSummary::NonArchive { output_path: final_output_path, }) } - Tar => unpack_archive( - |output_dir| crate::archive::tar::unpack_archive(create_decoder_up_to_first_extension()?, output_dir), - archive_output_dir, - options.question_policy, - )?, + Tar => { + let PreparedTarget::Target { dir, .. } = options.prepared else { + return Ok(()); + }; + unpack_archive( + |output_dir| crate::archive::tar::unpack_archive(create_decoder_up_to_first_extension()?, output_dir), + dir, + )? + } Zip | SevenZip => { let unpack_fn = match first_extension { Zip => crate::archive::zip::unpack_archive, @@ -169,16 +217,19 @@ pub fn decompress_file(options: DecompressOptions) -> Result<()> { )) }; - unpack_archive( - |output_dir| unpack_fn(reader, output_dir, options.password), - archive_output_dir, - options.question_policy, - )? + let PreparedTarget::Target { dir, .. } = options.prepared else { + return Ok(()); + }; + unpack_archive(|output_dir| unpack_fn(reader, output_dir, options.password), dir)? } #[cfg(feature = "unrar")] Rar => { + let PreparedTarget::Target { dir, .. } = options.prepared else { + return Ok(()); + }; let unpack_fn: Box Result> = if options.formats.len() > 1 || input_is_stdin { - let mut temp_file = tempfile::NamedTempFile::new()?; + // unrar's C API needs a path; spill the decoded stream into the wrapper dir. + let mut temp_file = tempfile::Builder::new().prefix(".ouch-rar-").tempfile_in(&dir)?; io::copy(&mut create_decoder_up_to_first_extension()?, &mut temp_file)?; Box::new(move |output_dir| { crate::archive::rar::unpack_archive(temp_file.path(), output_dir, options.password) @@ -189,7 +240,7 @@ pub fn decompress_file(options: DecompressOptions) -> Result<()> { }) }; - unpack_archive(unpack_fn, archive_output_dir, options.question_policy)? + unpack_archive(unpack_fn, dir)? } #[cfg(not(feature = "unrar"))] Rar => { @@ -210,8 +261,9 @@ pub fn decompress_file(options: DecompressOptions) -> Result<()> { // ended up containing exactly one entry whose name matches the wrapper itself // (e.g. `archive.zip` contained a single `archive/` root), flatten that // duplicate so the user sees `./archive/...` not `./archive/archive/...`. + // Leave the nested layout if the rename fails. if !options.output_dir_was_explicit && !options.here { - deduplicate_basename_wrapper(&output_path)?; + let _ = deduplicate_basename_wrapper(&output_path); } info_accessible!("Successfully decompressed archive to {}", PathFmt(&output_path)); info_accessible!("Files unpacked: {files_unpacked}"); @@ -239,47 +291,20 @@ pub fn decompress_file(options: DecompressOptions) -> Result<()> { Ok(()) } -/// Unpacks an archive creating the output directory, this function will create the output_dir -/// directory or replace it if it already exists. The `output_dir` needs to be empty -/// - If `output_dir` does not exist OR is a empty directory, it will unpack there -/// - If `output_dir` exist OR is a directory not empty, the user will be asked what to do -/// - If `output_dir` is the current working directory, files are extracted directly without prompting +/// Run the format-specific unpack closure against an already-prepared target dir. fn unpack_archive( unpack_fn: impl FnOnce(&Path) -> Result, - output_dir: &Path, - question_policy: QuestionPolicy, + output_dir: PathBuf, ) -> Result> { - // Extracting into the CWD is a merge into the user's workspace and should not prompt, - // matching the behaviour of `tar xf` and `unzip` when no destination is given. - let is_cwd = output_dir == *INITIAL_CURRENT_DIR; - let is_valid_output_dir = - is_cwd || !output_dir.fs_err_try_exists()? || (output_dir.is_dir() && output_dir.read_dir()?.next().is_none()); - - let output_dir_cleaned = if is_valid_output_dir { - output_dir.to_owned() - } else if let Some(path) = resolve_path_conflict(output_dir, question_policy, QuestionAction::Decompression)? { - path - } else { - return Ok(ControlFlow::Break(())); - }; - - if !output_dir_cleaned.fs_err_try_exists()? { - fs::create_dir(&output_dir_cleaned)?; - } - - let files_unpacked = unpack_fn(&output_dir_cleaned)?; - + let files_unpacked = unpack_fn(&output_dir)?; Ok(ControlFlow::Continue(DecompressionSummary::Archive { files_unpacked, - output_path: output_dir_cleaned, + output_path: output_dir, })) } -/// Expects `wrapper` to be a just-decompressed archive output directory, if -/// `wrapper` contains exactly one entry with the same name (e.g. extracting -/// `archive.zip` produced `archive/archive/...`), then flatten it to `archive/...`. -/// -/// Returns the resulting path the user should see. If reads fail, +/// Flattens a wrapper that holds a single entry with the wrapper name +/// For example a zip that gave archive/archive fn deduplicate_basename_wrapper(wrapper: &Path) -> Result<()> { let Some(wrapper_name) = wrapper.file_name() else { return Ok(()); @@ -291,45 +316,46 @@ fn deduplicate_basename_wrapper(wrapper: &Path) -> Result<()> { let Some(first_file) = entries.next().transpose()? else { return Ok(()); }; - // More than one entry, don't deduplicate + // More than one entry so do not deduplicate. if entries.next().transpose()?.is_some() { return Ok(()); } first_file }; - // name doesn't match, nothing to deduplicate + // Name does not match so nothing to deduplicate. if only_file_in_dir.file_name() != wrapper_name { return Ok(()); } - // The wrapper duplicates the inner entry's name. Promote the inner entry by: - // 1. moving it into a sibling temporary directory - // 2. removing the now-empty wrapper - // 3. moving it back to the wrapper's path - // Each step leaves the filesystem in a consistent state, and a failure midway - // leaves the user with valid extracted content (just nested one level). - let inner_path = only_file_in_dir.path(); - let Some(parent) = wrapper.parent() else { + if !only_file_in_dir.file_type()?.is_dir() { return Ok(()); - }; - - // Create the sibling - let sibling_path = parent.join("ouch-temporary"); - fs::create_dir(&sibling_path)?; - - // Move child inside the sibling - let path_inside_sibling = sibling_path.join(wrapper_name); - fs::rename(&inner_path, &path_inside_sibling)?; - - // Delete old parent - fs::remove_dir(wrapper)?; + } - // Move child to its dead parent place - fs::rename(&path_inside_sibling, wrapper)?; + let inner_dir = only_file_in_dir.path(); + + let mut staged: Option = None; + for entry in fs::read_dir(&inner_dir)? { + let entry = entry?; + let name = entry.file_name(); + if name == wrapper_name { + let tmp = tempfile::Builder::new() + .prefix(".ouch-") + .tempfile_in(wrapper)? + .into_temp_path(); + fs::remove_file(&tmp)?; + fs::rename(entry.path(), &tmp)?; + staged = Some(tmp); + } else { + fs::rename(entry.path(), wrapper.join(name))?; + } + } - // Delete the temporary sibling - fs::remove_dir(sibling_path)?; + fs::remove_dir(&inner_dir)?; + if let Some(tmp) = staged { + fs::rename(&tmp, wrapper.join(wrapper_name))?; + let _ = tmp.keep(); + } Ok(()) } @@ -342,7 +368,7 @@ mod tests { use super::*; - /// Helper: collect the relative paths of every entry under `root`, sorted. + /// Collect sorted relative paths of every entry under root. fn list_tree(root: &Path) -> Vec { fn walk(p: &Path, base: &Path, out: &mut Vec) { if let Ok(entries) = std_fs::read_dir(p) { @@ -364,8 +390,7 @@ mod tests { out } - /// The main case: wrapper contains exactly one entry whose name equals the wrapper's - /// name. The inner entry should be promoted up one level. + /// Inner directory matching the wrapper name is promoted up. #[test] fn deduplicate_flattens_when_inner_dir_matches_wrapper_name() { let dir = tempdir().unwrap(); @@ -377,10 +402,15 @@ mod tests { deduplicate_basename_wrapper(&wrapper).unwrap(); assert_eq!(list_tree(&wrapper), vec!["a.txt", "b.txt"]); + // Parent must be untouched with only the wrapper visible. + let parent_entries: Vec<_> = std_fs::read_dir(dir.path()) + .unwrap() + .map(|e| e.unwrap().file_name()) + .collect(); + assert_eq!(parent_entries, vec![std::ffi::OsString::from("archive")]); } - /// Wrapper contains a single entry, but its name differs from the wrapper's name. - /// No flatten should happen — the wrapper survives and the inner entry stays nested. + /// A single inner entry with a different name stays nested. #[test] fn deduplicate_keeps_wrapper_when_inner_name_differs() { let dir = tempdir().unwrap(); @@ -393,7 +423,7 @@ mod tests { assert_eq!(list_tree(&wrapper), vec!["mytool/", "mytool/file.txt"]); } - /// Wrapper contains two or more entries — no flatten regardless of names. + /// More than one entry means no flatten. #[test] fn deduplicate_keeps_wrapper_when_multiple_entries() { let dir = tempdir().unwrap(); @@ -406,7 +436,7 @@ mod tests { assert_eq!(list_tree(&wrapper), vec!["a.txt", "b.txt"]); } - /// Empty wrapper — nothing to flatten, no-op. + /// An empty wrapper is left alone. #[test] fn deduplicate_is_noop_on_empty_wrapper() { let dir = tempdir().unwrap(); @@ -418,24 +448,38 @@ mod tests { assert_eq!(list_tree(&wrapper), Vec::::new()); } - /// Edge case: the single inner entry is a *file* (not a directory) whose name - /// equals the wrapper's name. The wrapper directory should be replaced with that file. + /// A single inner entry that is a file stays nested. #[test] - fn deduplicate_promotes_single_inner_file_with_matching_name() { + fn deduplicate_keeps_wrapper_when_inner_is_file() { let dir = tempdir().unwrap(); let wrapper = dir.path().join("archive"); std_fs::create_dir(&wrapper).unwrap(); std_fs::write(wrapper.join("archive"), "data").unwrap(); deduplicate_basename_wrapper(&wrapper).unwrap(); - assert!(wrapper.is_file(), "wrapper should now be a file, not a directory"); - assert_eq!(std_fs::read(&wrapper).unwrap(), b"data"); + assert!(wrapper.is_dir(), "wrapper must remain a directory"); + assert_eq!(list_tree(&wrapper), vec!["archive".to_string()]); + assert_eq!(std_fs::read(wrapper.join("archive")).unwrap(), b"data"); + } + + /// Inner child named like the wrapper is moved aside under a temp name. + #[test] + fn deduplicate_handles_inner_entry_named_like_wrapper() { + let dir = tempdir().unwrap(); + let wrapper = dir.path().join("archive"); + let inner = wrapper.join("archive"); + let collision = inner.join("archive"); + std_fs::create_dir_all(&collision).unwrap(); + std_fs::write(collision.join("inside"), "x").unwrap(); + std_fs::write(inner.join("other.txt"), "o").unwrap(); + + deduplicate_basename_wrapper(&wrapper).unwrap(); + assert_eq!(list_tree(&wrapper), vec!["archive/", "archive/inside", "other.txt"]); + assert_eq!(std_fs::read(wrapper.join("archive").join("inside")).unwrap(), b"x"); + assert_eq!(std_fs::read(wrapper.join("other.txt")).unwrap(), b"o"); } - /// The flatten only collapses *one* level: nested same-name directories produced - /// by the archive itself stay intact. For example, an archive whose layout is - /// `testing/testing/file` extracted into `./testing/` should leave the user with - /// `./testing/testing/file`, not `./testing/file`. + /// Only the outer wrapper collapses and nested same-name folders stay. #[test] fn deduplicate_only_flattens_outer_wrapper_not_inner_duplicates() { let dir = tempdir().unwrap(); @@ -446,8 +490,7 @@ mod tests { std_fs::write(nested.join("file"), "deep").unwrap(); deduplicate_basename_wrapper(&wrapper).unwrap(); - // After one flatten, `testing/testing/file` should remain — the algorithm only - // collapses the outer wrapper exactly once. + // After one flatten testing/testing/file remains since only the outer wrapper collapses. assert_eq!(list_tree(&wrapper), vec!["testing/", "testing/file"]); } } diff --git a/src/commands/list.rs b/src/commands/list.rs index 78c9a79fe..e14451c31 100644 --- a/src/commands/list.rs +++ b/src/commands/list.rs @@ -22,6 +22,8 @@ pub fn list_archive_contents( list_options: ListOptions, question_policy: QuestionPolicy, password: Option<&[u8]>, + // Pre-opened tempfile FD for multi-format RAR list under the sandbox. + rar_spill_tempfile: Option, ) -> Result<()> { let reader = fs::File::open(archive_path)?; @@ -100,7 +102,15 @@ pub fn list_archive_contents( #[cfg(feature = "unrar")] Rar => { if formats.len() > 1 { - let mut temp_file = tempfile::NamedTempFile::new()?; + // The caller pre-opens this tempfile before the sandbox starts. + let mut temp_file = match rar_spill_tempfile { + Some(temp_file) => temp_file, + None => { + return Err(crate::error::FinalError::with_title("Failed to list RAR archive") + .detail("the sandbox spill tempfile was not prepared before lockdown") + .into()); + } + }; io::copy(&mut reader, &mut temp_file)?; Box::new(crate::archive::rar::list_archive(temp_file.path(), password)?) } else { diff --git a/src/commands/mod.rs b/src/commands/mod.rs index f712adea1..c81c6d416 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -4,8 +4,10 @@ mod compress; mod decompress; mod list; +use std::path::PathBuf; + use bstr::ByteSlice; -use decompress::DecompressOptions; +use decompress::{DecompressOptions, PreparedTarget, prepare_decompress_target}; use rayon::prelude::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; use utils::colors; @@ -16,12 +18,14 @@ use crate::{ commands::{compress::compress_files, decompress::decompress_file, list::list_archive_contents}, error::{Error, FinalError}, extension::{self, parse_format_flag}, - info_accessible, + info, info_accessible, list::ListOptions, + sandbox::{self, SandboxPolicy}, utils::{ self, BytesFmt, FileVisibilityPolicy, NoQuotePathFmt, PathFmt, QuestionAction, canonicalize, colors::*, file_size, is_path_stdin, }, + warning, }; /// Warn the user that (de)compressing this .zip archive might freeze their system. @@ -49,13 +53,8 @@ fn warn_user_about_loading_sevenz_in_memory() { /// /// There are a lot of custom errors to give enough error description and explanation. pub fn run(args: CliArgs, question_policy: QuestionPolicy, file_visibility_policy: FileVisibilityPolicy) -> Result<()> { - // Skip --threads 0 ("auto") to match cli::mod.rs, and propagate pool-init errors - if let Some(threads) = args.threads.filter(|&t| t != 0) { - rayon::ThreadPoolBuilder::new() - .num_threads(threads) - .build_global() - .map_err(|e| FinalError::with_title("Failed to initialize thread pool").detail(e.to_string()))?; - } + // No global pool here because Landlock only confines threads created after it is applied + // Decompression builds its pool after the sandbox so the workers are confined match args.cmd { Subcommand::Compress { @@ -71,6 +70,18 @@ pub fn run(args: CliArgs, question_policy: QuestionPolicy, file_visibility_polic return Err(FinalError::with_title("No files to compress").into()); } + // gitignore and follow_symlinks both read paths outside the declared input set so the + // sandbox cannot confine them; run unsandboxed and say why + let sandbox_disabled = sandbox::disabled_by_request(args.no_sandbox) || args.gitignore || follow_symlinks; + if cfg!(target_os = "linux") && !sandbox::disabled_by_request(args.no_sandbox) { + if args.gitignore { + info!("Sandbox: disabled because --gitignore reads git configuration outside the input files"); + } + if follow_symlinks { + info!("Sandbox: disabled because --follow-symlinks may read files outside the input set"); + } + } + // Formats from path extension, like "file.tar.gz.xz" -> vec![Tar, Gzip, Lzma] let (formats_from_flag, formats) = match args.format { Some(formats) => { @@ -97,6 +108,17 @@ pub fn run(args: CliArgs, question_policy: QuestionPolicy, file_visibility_polic None => return Ok(()), }; + // Read on inputs. The output FD is held already so no write-path grant is needed, and + // the sandbox is deliberately not widened to let compression delete files in the + // output directory. + let sandbox_active = { + let mut policy = SandboxPolicy::new(); + for f in &files { + policy.allow_read(sandbox::canonicalize_for_sandbox(f)); + } + policy.set_disabled(sandbox_disabled).apply() + }; + let level = if fast { Some(1) // Lowest level of compression } else if slow { @@ -119,18 +141,28 @@ pub fn run(args: CliArgs, question_policy: QuestionPolicy, file_visibility_polic if let Ok(true) = compress_result { info_accessible!("Output file size: {}", BytesFmt(file_size(&output_path)?)); info_accessible!("Successfully compressed to {}", PathFmt(&output_path)); - } else { - // If Ok(false) or Err() occurred, delete incomplete file at `output_path` - // - // if deleting fails, print an extra alert message pointing - // out that we left a possibly CORRUPTED file at `output_path` - if utils::remove_file_or_dir(&output_path).is_err() { - eprintln!("{red}FATAL ERROR:\n", red = *colors::RED); - eprintln!(" Ouch failed to delete the file {}", PathFmt(&output_path)); - eprintln!(" Please delete it manually."); - eprintln!(" This file is corrupted if compression didn't finished."); - - if compress_result.is_err() { + } else if let Ok(false) = compress_result { + // user cancelled; remove the partial output where the sandbox permits it + if !sandbox_active { + let _ = utils::remove_file_or_dir(&output_path); + } + } else if compress_result.is_err() { + let deleted = !sandbox_active && utils::remove_file_or_dir(&output_path).is_ok(); + if !deleted { + if sandbox_active { + // Not a cleanup failure: the sandbox intentionally does not grant removal + // in the output directory, so the partial file is left in place. + warning!( + "Compression did not finish; the partial file at {} was left because the sandbox does not permit removing it. Delete it manually.", + PathFmt(&output_path) + ); + } else { + eprintln!("{red}FATAL ERROR:\n", red = *colors::RED); + eprintln!( + " Compression did not finish; the partial file at {} may be corrupted.", + PathFmt(&output_path) + ); + eprintln!(" Please delete it manually."); eprintln!(" Compression failed for reasons below."); } } @@ -192,42 +224,136 @@ pub fn run(args: CliArgs, question_policy: QuestionPolicy, file_visibility_polic INITIAL_CURRENT_DIR.clone() }; - files - .par_iter() - .zip(files_extensions) - .zip(files_output_paths) - .try_for_each(|((input_path, formats), file_name)| { - // Path used by single file format archives - let output_file_path = if is_path_stdin(&file_name) { + // Per-input output paths for single-file outputs and archive wrapper dirs. + let output_file_paths: Vec = files_output_paths + .iter() + .map(|file_name| { + if is_path_stdin(file_name) { output_dir.join("ouch-output") } else { output_dir.join(file_name) - }; - - decompress_file(DecompressOptions { - input_file_path: input_path, - formats, - output_dir: &output_dir, - output_file_path, - output_dir_was_explicit, - here, - question_policy, - password: args.password.as_deref().map(|str| { - <[u8] as ByteSlice>::from_os_str(str).expect("convert password to bytes failed") - }), - remove, - }) - .map_err(|err| match err { - Error::IoError { reason } => Error::Custom { - reason: FinalError::with_title(format!( - "Failed to decompress {}", - NoQuotePathFmt(input_path) - )) - .detail(reason), - }, - other => other, - }) + } }) + .collect(); + + // Resolve conflicts and create output dirs before the sandbox applies. + // claimed targets make duplicate basenames conflict instead of silently merging + let mut claimed_targets = std::collections::HashSet::new(); + let mut prepared: Vec = Vec::with_capacity(files.len()); + for (formats, output_file_path) in files_extensions.iter().zip(&output_file_paths) { + prepared.push(prepare_decompress_target( + formats, + &output_dir, + output_file_path, + output_dir_was_explicit, + here, + question_policy, + &mut claimed_targets, + )?); + } + + // Read on inputs and read-write on each output directory. + { + let mut policy = SandboxPolicy::new(); + for f in &files { + if !is_path_stdin(f) { + policy.allow_read(sandbox::canonicalize_for_sandbox(f)); + } + } + + // Collect the directories the warnings refer to while building the policy. + // The warnings are printed only when the sandbox is actually enforced. + let mut home_targets: Vec = Vec::new(); + for p in &prepared { + if let PreparedTarget::Target { dir, .. } = p { + let canon = sandbox::canonicalize_for_sandbox(dir); + // Only --dir or --here can point the grant at $HOME + // default mode always makes a fresh subdirectory + if (output_dir_was_explicit || here) && sandbox::is_home_or_ancestor(&canon) { + home_targets.push(canon.clone()); + } + policy.allow_read_write(canon); + } + } + + let mut remove_parents: Vec = Vec::new(); + if remove { + // Collect unique parents so each directory grants and warns once. + for f in &files { + if is_path_stdin(f) { + continue; + } + // Grant in the directory that actually holds the input. For a symlinked + // input that is the symlink's own directory, not the target's, because + // --remove unlinks the symlink itself. + let input_dir = match f.parent() { + Some(p) if !p.as_os_str().is_empty() => sandbox::canonicalize_for_sandbox(p), + _ => sandbox::canonicalize_for_sandbox(std::path::Path::new(".")), + }; + if !remove_parents.contains(&input_dir) { + remove_parents.push(input_dir); + } + } + // The decompressor can delete the input archive but not write nearby. + for parent in &remove_parents { + policy.allow_remove_in(parent.clone()); + } + } + + // Apply once and warn only about what a real sandbox cannot confine. + let enforced = policy.set_disabled(args.no_sandbox).apply(); + if enforced { + for target in home_targets { + warning!( + "Sandbox: extraction target {} is $HOME or an ancestor of it; the sandbox cannot meaningfully confine writes", + PathFmt(&target) + ); + } + for parent in remove_parents { + warning!( + "Sandbox: the extractor can delete any file in {}, not just the archive", + PathFmt(&parent) + ); + } + } + } + + // Build the pool after the sandbox so its worker threads are confined by it + // --threads 0 means auto and lets rayon pick one thread per core + let pool = rayon::ThreadPoolBuilder::new() + .num_threads(args.threads.filter(|&t| t != 0).unwrap_or(0)) + .build() + .map_err(|e| FinalError::with_title("Failed to initialize thread pool").detail(e.to_string()))?; + + pool.install(move || { + files.par_iter().zip(files_extensions).zip(prepared).try_for_each( + |((input_path, formats), prepared)| { + decompress_file(DecompressOptions { + input_file_path: input_path, + formats, + output_dir: &output_dir, + output_dir_was_explicit, + here, + question_policy, + password: args.password.as_deref().map(|str| { + <[u8] as ByteSlice>::from_os_str(str).expect("convert password to bytes failed") + }), + remove, + prepared, + }) + .map_err(|err| match err { + Error::IoError { reason } => Error::Custom { + reason: FinalError::with_title(format!( + "Failed to decompress {}", + NoQuotePathFmt(input_path) + )) + .detail(reason), + }, + other => other, + }) + }, + ) + }) } Subcommand::List { archives: files, tree } => { let mut formats = vec![]; @@ -256,16 +382,68 @@ pub fn run(args: CliArgs, question_policy: QuestionPolicy, file_visibility_polic // Ensure we were not told to list the content of a non-archive compressed file check::check_for_non_archive_formats(&files, &formats)?; + // Flatten the per-file formats up front so the next step can inspect them. + let flat_formats: Vec> = formats + .iter() + .map(|f| extension::flatten_compression_formats(f)) + .collect(); + // Open RAR spill tempfiles up front so unrar can read from them under the sandbox. + // The archive format sits at the front so match there not at the end. + let needs_spill = |formats: &[extension::CompressionFormat]| { + formats.len() > 1 && matches!(formats.first(), Some(extension::CompressionFormat::Rar)) + }; + + // all spills share one private temp dir so the sandbox can allow their cleanup + let rar_spill_dir = if flat_formats.iter().any(|fs| needs_spill(fs)) { + Some(tempfile::tempdir()?) + } else { + None + }; + let mut rar_spill_tempfiles: Vec> = Vec::with_capacity(flat_formats.len()); + for fs in &flat_formats { + let spill = if needs_spill(fs) { + let dir = rar_spill_dir + .as_ref() + .expect("spill dir is created when any input needs a spill"); + Some(tempfile::Builder::new().prefix(".ouch-rar-").tempfile_in(dir.path())?) + } else { + None + }; + rar_spill_tempfiles.push(spill); + } + + { + let mut policy = SandboxPolicy::new(); + for f in &files { + policy.allow_read(sandbox::canonicalize_for_sandbox(f)); + } + if let Some(spill_dir) = &rar_spill_dir { + let canon = sandbox::canonicalize_for_sandbox(spill_dir.path()); + // unrar reopens the spill files by path + policy.allow_read(canon.clone()); + // each NamedTempFile unlinks its spill file on drop + policy.allow_remove_in(canon.clone()); + // No RemoveDir grant on the parent: granting it would cover the whole temp + // directory. The empty spill directory may be left behind if a failure stops + // the TempDir from removing itself under the sandbox, which is the safer trade. + } + policy.set_disabled(args.no_sandbox).apply(); + } + let list_options = ListOptions { tree, quiet: args.quiet, }; - for (i, (archive_path, formats)) in files.iter().zip(formats).enumerate() { + for (i, ((archive_path, formats), spill)) in files + .iter() + .zip(flat_formats) + .zip(rar_spill_tempfiles.iter_mut()) + .enumerate() + { if i > 0 && !args.quiet { println!(); } - let formats = extension::flatten_compression_formats(&formats); list_archive_contents( archive_path, formats, @@ -274,6 +452,7 @@ pub fn run(args: CliArgs, question_policy: QuestionPolicy, file_visibility_polic args.password .as_deref() .map(|str| <[u8] as ByteSlice>::from_os_str(str).expect("convert password to bytes failed")), + spill.take(), )?; } diff --git a/src/main.rs b/src/main.rs index a82e077ab..cb0b72124 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ pub mod error; pub mod extension; pub mod list; pub mod non_archive; +pub mod sandbox; pub mod utils; use std::{env, path::PathBuf, sync::LazyLock}; diff --git a/src/sandbox/landlock.rs b/src/sandbox/landlock.rs new file mode 100644 index 000000000..cf09c93d5 --- /dev/null +++ b/src/sandbox/landlock.rs @@ -0,0 +1,261 @@ +//! Landlock filesystem sandbox backend. + +use std::path::Path; + +use landlock::{ + ABI, Access, AccessFs, AccessNet, BitFlags, CompatLevel, Compatible, PathBeneath, PathFd, Ruleset, RulesetAttr, + RulesetCreated, RulesetCreatedAttr, RulesetError, Scope, +}; + +use super::SandboxPolicy; +use crate::warning; + +// Landlock ABI v6 is required (Linux 6.12). +// It covers filesystem and network rights and IPC scoping. +const REQUIRED_ABI: ABI = ABI::V6; + +pub fn apply(policy: &SandboxPolicy) -> bool { + match install(policy) { + Ok(()) => true, + Err(_) => { + warning!( + "Sandbox: could not enable Landlock; running without a sandbox. \ + This requires the Landlock LSM to be enabled and a kernel with ABI v6 (Linux 6.12 or newer)" + ); + false + } + } +} + +// Handle filesystem and network rights and deny every IPC scope. +// Require ABI v6 from Linux 6.12 for landlock sandbox. +fn base_ruleset() -> Result { + Ruleset::default() + .set_compatibility(CompatLevel::HardRequirement) + .handle_access(AccessFs::from_all(REQUIRED_ABI))? + .handle_access(AccessNet::from_all(REQUIRED_ABI))? + .scope(Scope::from_all(REQUIRED_ABI))? + .create() +} + +// whether landlock can actually be enforced on this kernel +pub fn is_available() -> bool { + base_ruleset().is_ok() +} + +// Rights an extractor needs inside an output directory. +// It never grants execution or device nodes or device ioctls. +fn write_grant() -> BitFlags { + let denied = AccessFs::Execute | AccessFs::IoctlDev | AccessFs::MakeChar | AccessFs::MakeBlock; + AccessFs::from_all(REQUIRED_ABI) & !denied +} + +// Read only rights for input archives. Execution is never granted. +fn read_grant() -> BitFlags { + AccessFs::from_read(REQUIRED_ABI) & !AccessFs::Execute +} + +fn install(policy: &SandboxPolicy) -> Result<(), RulesetError> { + let ruleset = base_ruleset()?; + let ruleset = add_rules(ruleset, policy.read_paths(), read_grant())?; + let ruleset = add_rules(ruleset, policy.read_write_paths(), write_grant())?; + let ruleset = add_rules(ruleset, policy.remove_in_paths(), AccessFs::RemoveFile.into())?; + let ruleset = add_rules(ruleset, policy.remove_dir_in_paths(), AccessFs::RemoveDir.into())?; + // No network rules and no scope exceptions are added. + // So nothing can open TCP or reach abstract sockets or signal outside processes. + let _ = ruleset.restrict_self()?; + Ok(()) +} + +// Drop directory only rights on plain files so the file rule is fully enforced. +fn rights_for(access: BitFlags, is_dir: bool) -> BitFlags { + if is_dir { + access + } else { + access & AccessFs::from_file(REQUIRED_ABI) + } +} + +fn add_rules(ruleset: RulesetCreated, paths: I, access: BitFlags) -> Result +where + I: IntoIterator, + I::Item: AsRef, +{ + let mut current = ruleset; + for path in paths { + let path = path.as_ref(); + match PathFd::new(path) { + Ok(fd) => { + // fstat the opened descriptor so the directory check matches the exact file the + // rule is built from, instead of a separate stat that could be swapped underneath. + let is_dir = { + use std::os::fd::{AsFd, AsRawFd}; + let mut st: libc::stat = unsafe { std::mem::zeroed() }; + // SAFETY: fd is open for this call and st is a valid out-parameter. + let ok = unsafe { libc::fstat(fd.as_fd().as_raw_fd(), &mut st) } == 0; + ok && (st.st_mode & libc::S_IFMT as libc::mode_t) == libc::S_IFDIR as libc::mode_t + }; + let granted = rights_for(access, is_dir); + if granted.is_empty() { + continue; + } + current = current.add_rule(PathBeneath::new(fd, granted))?; + } + Err(err) => { + warning!("Sandbox: cannot open {}: {err}", path.display()); + } + } + } + Ok(current) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn grants_never_allow_execution() { + assert!(!read_grant().contains(AccessFs::Execute)); + assert!(!write_grant().contains(AccessFs::Execute)); + } + + #[test] + fn write_grant_denies_devices_and_ioctl() { + let g = write_grant(); + assert!(!g.contains(AccessFs::IoctlDev)); + assert!(!g.contains(AccessFs::MakeChar)); + assert!(!g.contains(AccessFs::MakeBlock)); + } + + #[test] + fn write_grant_allows_normal_extraction() { + let g = write_grant(); + for right in [ + AccessFs::WriteFile, + AccessFs::MakeReg, + AccessFs::MakeDir, + AccessFs::MakeSym, + AccessFs::Refer, + AccessFs::Truncate, + ] { + assert!(g.contains(right)); + } + } + + #[test] + fn read_grant_is_read_only() { + let g = read_grant(); + assert!(g.contains(AccessFs::ReadFile)); + assert!(g.contains(AccessFs::ReadDir)); + assert!(!g.contains(AccessFs::WriteFile)); + assert!(!g.contains(AccessFs::RemoveFile)); + } + + #[test] + fn network_access_is_handled() { + let net = AccessNet::from_all(REQUIRED_ABI); + assert!(net.contains(AccessNet::BindTcp)); + assert!(net.contains(AccessNet::ConnectTcp)); + } + + #[test] + fn ipc_scopes_are_handled() { + let scopes = Scope::from_all(REQUIRED_ABI); + assert!(scopes.contains(Scope::AbstractUnixSocket)); + assert!(scopes.contains(Scope::Signal)); + } + + // Real enforcement check, run only where the kernel provides Landlock. The ruleset is built + // with raw syscalls and the forked child runs only async-signal-safe syscalls and never + // allocates, so forking from the multithreaded test harness cannot deadlock on a held lock. + #[test] + fn landlock_denies_writes_outside_granted_dir() { + use std::{ffi::CString, os::unix::ffi::OsStrExt}; + + // linux/landlock.h + const FS_WRITE_FILE: u64 = 1 << 1; + const FS_MAKE_REG: u64 = 1 << 8; + const RULE_PATH_BENEATH: libc::c_uint = 1; + #[repr(C)] + struct RulesetAttr { + handled_access_fs: u64, + handled_access_net: u64, + scoped: u64, + } + #[repr(C, packed)] + struct PathBeneathAttr { + allowed_access: u64, + parent_fd: i32, + } + + let granted = tempfile::tempdir().unwrap(); + let outside = tempfile::tempdir().unwrap(); + let inside_c = CString::new(granted.path().join("inside.txt").as_os_str().as_bytes()).unwrap(); + let outside_c = CString::new(outside.path().join("escape.txt").as_os_str().as_bytes()).unwrap(); + let dir_c = CString::new(granted.path().as_os_str().as_bytes()).unwrap(); + + let access = FS_WRITE_FILE | FS_MAKE_REG; + let attr = RulesetAttr { + handled_access_fs: access, + handled_access_net: 0, + scoped: 0, + }; + // SAFETY: attr is a valid, fully-initialized ruleset descriptor. + let ruleset_fd = unsafe { + libc::syscall( + libc::SYS_landlock_create_ruleset, + &attr as *const RulesetAttr, + std::mem::size_of::(), + 0u32, + ) + }; + if ruleset_fd < 0 { + // Landlock is unavailable on this kernel; there is nothing to enforce. + return; + } + let ruleset_fd = ruleset_fd as libc::c_int; + + // SAFETY: dir_c is a valid path; O_PATH yields a descriptor usable as a rule parent. + let dir_fd = unsafe { libc::open(dir_c.as_ptr(), libc::O_PATH | libc::O_CLOEXEC) }; + assert!(dir_fd >= 0, "failed to open the granted directory"); + let beneath = PathBeneathAttr { + allowed_access: access, + parent_fd: dir_fd, + }; + // SAFETY: beneath references the open dir_fd and a valid access mask. + let added = unsafe { + libc::syscall( + libc::SYS_landlock_add_rule, + ruleset_fd, + RULE_PATH_BENEATH, + &beneath as *const PathBeneathAttr, + 0u32, + ) + }; + assert_eq!(added, 0, "failed to add the path rule"); + + // SAFETY: the child runs only async-signal-safe syscalls and never allocates before _exit, + // so forking from a possibly-multithreaded harness is safe here. + let pid = unsafe { libc::fork() }; + if pid == 0 { + unsafe { + libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + let restricted = libc::syscall(libc::SYS_landlock_restrict_self, ruleset_fd, 0u32); + let inside = libc::open(inside_c.as_ptr(), libc::O_CREAT | libc::O_WRONLY, 0o600); + let outside = libc::open(outside_c.as_ptr(), libc::O_CREAT | libc::O_WRONLY, 0o600); + let ok = restricted == 0 && inside >= 0 && outside < 0; + libc::_exit(if ok { 0 } else { 1 }); + } + } + assert!(pid > 0, "fork failed"); + let mut status = 0; + // SAFETY: status is a valid out-parameter for the child just forked. + unsafe { libc::waitpid(pid, &mut status, 0) }; + assert!(libc::WIFEXITED(status), "the sandboxed child did not exit normally"); + assert_eq!( + libc::WEXITSTATUS(status), + 0, + "a write outside the granted directory was not denied by Landlock" + ); + } +} diff --git a/src/sandbox/mod.rs b/src/sandbox/mod.rs new file mode 100644 index 000000000..84606d09e --- /dev/null +++ b/src/sandbox/mod.rs @@ -0,0 +1,184 @@ +//! Per-process filesystem sandbox. + +use std::path::{Path, PathBuf}; + +#[cfg(target_os = "linux")] +mod landlock; + +#[derive(Debug, Default, Clone)] +pub struct SandboxPolicy { + read: Vec, + read_write: Vec, + /// Directories where the decompressor can delete the input archive but not write nearby. + remove_in: Vec, + /// Directories where only subdirectories may be removed. + remove_dir_in: Vec, + disabled: bool, +} + +impl SandboxPolicy { + pub fn new() -> Self { + Self::default() + } + + pub fn allow_read>(&mut self, path: P) -> &mut Self { + self.read.push(path.into()); + self + } + + pub fn allow_read_write>(&mut self, path: P) -> &mut Self { + self.read_write.push(path.into()); + self + } + + /// Allow deleting files inside `path` without granting any other write rights. + pub fn allow_remove_in>(&mut self, path: P) -> &mut Self { + self.remove_in.push(path.into()); + self + } + + /// Allow removing subdirectories of `path` without granting any other write rights. + pub fn allow_remove_dir_in>(&mut self, path: P) -> &mut Self { + self.remove_dir_in.push(path.into()); + self + } + + pub fn set_disabled(&mut self, disabled: bool) -> &mut Self { + self.disabled = disabled; + self + } + + pub fn read_paths(&self) -> &[PathBuf] { + &self.read + } + + pub fn read_write_paths(&self) -> &[PathBuf] { + &self.read_write + } + + pub fn remove_in_paths(&self) -> &[PathBuf] { + &self.remove_in + } + + pub fn remove_dir_in_paths(&self) -> &[PathBuf] { + &self.remove_dir_in + } + + /// Apply the policy and return whether the sandbox is now enforced. + /// Failures emit a warning and let the process run unrestricted. + pub fn apply(&self) -> bool { + if disabled_by_request(self.disabled) { + return false; + } + // On Linux the backend warns instead of failing when Landlock is missing and keeps going + #[cfg(target_os = "linux")] + { + landlock::apply(self) + } + // The sandbox is Linux only so elsewhere there is nothing to enforce + #[cfg(not(target_os = "linux"))] + { + false + } + } +} + +/// Resolve `path` to an absolute path, falling back to its parent if the +/// target itself doesn't yet exist. +pub fn canonicalize_for_sandbox(path: &Path) -> PathBuf { + if let Ok(c) = std::fs::canonicalize(path) { + return c; + } + if let Some(parent) = path.parent() + && let Ok(c) = std::fs::canonicalize(parent) + { + return c.join(path.file_name().unwrap_or_default()); + } + path.to_path_buf() +} + +/// Returns true if `target` is $HOME itself or an ancestor of it. +pub fn is_home_or_ancestor(target: &Path) -> bool { + let Some(home) = std::env::var_os("HOME").map(PathBuf::from) else { + return false; + }; + let Ok(home) = std::fs::canonicalize(&home) else { + return false; + }; + let target = canonicalize_for_sandbox(target); + home == target || home.starts_with(&target) +} + +/// True when the user turned the sandbox off by flag or by OUCH_NO_SANDBOX. +pub fn disabled_by_request(no_sandbox: bool) -> bool { + no_sandbox || std::env::var_os("OUCH_NO_SANDBOX").is_some() +} + +// whether a sandbox can actually be enforced on this platform +pub fn available() -> bool { + #[cfg(target_os = "linux")] + { + landlock::is_available() + } + #[cfg(not(target_os = "linux"))] + { + false + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn policy_collects_paths() { + let mut policy = SandboxPolicy::new(); + policy + .allow_read("/tmp/in") + .allow_read_write("/tmp/out") + .allow_remove_in("/tmp/spill") + .allow_remove_dir_in("/tmp"); + + assert_eq!(policy.read_paths(), &[PathBuf::from("/tmp/in")]); + assert_eq!(policy.read_write_paths(), &[PathBuf::from("/tmp/out")]); + assert_eq!(policy.remove_in_paths(), &[PathBuf::from("/tmp/spill")]); + assert_eq!(policy.remove_dir_in_paths(), &[PathBuf::from("/tmp")]); + } + + #[test] + fn policy_disabled_default_false() { + let policy = SandboxPolicy::new(); + assert!(!policy.disabled); + } + + #[test] + fn policy_set_disabled() { + let mut policy = SandboxPolicy::new(); + policy.set_disabled(true); + assert!(policy.disabled); + } + + #[test] + fn apply_returns_false_when_disabled() { + let mut policy = SandboxPolicy::new(); + policy.set_disabled(true); + assert!(!policy.apply()); + } + + #[test] + fn canonicalize_existing_path_resolves() { + let dir = tempfile::tempdir().unwrap(); + let canon = canonicalize_for_sandbox(dir.path()); + assert!(canon.is_absolute()); + assert!(canon.exists()); + } + + #[test] + fn canonicalize_missing_path_falls_back_to_parent() { + let dir = tempfile::tempdir().unwrap(); + let missing = dir.path().join("does-not-exist-yet.txt"); + let canon = canonicalize_for_sandbox(&missing); + assert!(canon.is_absolute()); + assert_eq!(canon.file_name().unwrap(), "does-not-exist-yet.txt"); + } +} diff --git a/tests/integration.rs b/tests/integration.rs index 4c30533b7..108e6e751 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -495,7 +495,9 @@ fn symlink_pack_and_unpack() -> Result<()> { fs::remove_file(archive)?; fs::remove_dir_all(&dest_files_path)?; + // follow-symlinks is rejected under the sandbox so keep it off here. crate::utils::cargo_bin() + .env("OUCH_NO_SANDBOX", "1") .arg("compress") .arg("--follow-symlinks") .args(files_path) @@ -587,7 +589,9 @@ fn broken_symlink_error_when_compressing_with_follow_symlinks() { let archive = dir.join(format!("archive.{ext}")); + // follow-symlinks is rejected under the sandbox so keep it off here. crate::utils::cargo_bin() + .env("OUCH_NO_SANDBOX", "1") .arg("compress") .arg("--follow-symlinks") .arg(&input) @@ -628,6 +632,8 @@ fn symlink_treatment_inside_nested_dirs_with_follow_symlinks_flag() { let archive = dir.join(format!("archive.{ext}")); let mut cmd = crate::utils::cargo_bin(); + // follow-symlinks is rejected under the sandbox so keep it off here. + cmd.env("OUCH_NO_SANDBOX", "1"); cmd.arg("compress"); if follow_symlinks_flag { cmd.arg("--follow-symlinks"); @@ -1569,3 +1575,192 @@ fn decompress_concatenated_lz4_frames() { encoder.finish().unwrap() }); } + +// Default mode decompression without --dir or --here is not tested elsewhere +// These lock in the layout where output goes into a new directory named after the archive + +#[test] +fn decompress_single_file_default_mode_creates_named_directory() { + let (_tempdir, dir) = testdir().unwrap(); + + fs::write(dir.join("report.txt"), b"hello").unwrap(); + crate::utils::cargo_bin() + .args(["compress", "report.txt", "report.txt.gz", "--yes"]) + .current_dir(dir) + .assert() + .success(); + fs::remove_file(dir.join("report.txt")).unwrap(); + + crate::utils::cargo_bin() + .args(["decompress", "report.txt.gz", "--yes"]) + .current_dir(dir) + .assert() + .success(); + + // report.txt.gz becomes report/report.txt so the directory has no extension and the file keeps it + assert!(dir.join("report").is_dir()); + assert_eq!("hello", fs::read_to_string(dir.join("report").join("report.txt")).unwrap()); + assert!(!dir.join("report.txt").is_dir()); +} + +#[test] +fn decompress_archive_default_mode_creates_named_directory() { + let (_tempdir, dir) = testdir().unwrap(); + + fs::create_dir(dir.join("payload")).unwrap(); + fs::write(dir.join("payload").join("a.txt"), b"a").unwrap(); + crate::utils::cargo_bin() + .args(["compress", "payload", "bundle.tar", "--yes"]) + .current_dir(dir) + .assert() + .success(); + + crate::utils::cargo_bin() + .args(["decompress", "bundle.tar", "--yes"]) + .current_dir(dir) + .assert() + .success(); + + assert!(dir.join("bundle").is_dir()); + assert_eq!("a", fs::read_to_string(dir.join("bundle").join("payload").join("a.txt")).unwrap()); +} + +// An archive whose only entry matches the wrapper name must not nest +// test.zip with a test/ folder must unpack to test/ and not test/test/ +#[test] +fn decompress_archive_default_mode_does_not_duplicate_directory() { + let (_tempdir, dir) = testdir().unwrap(); + + fs::create_dir(dir.join("test")).unwrap(); + fs::write(dir.join("test").join("inner.txt"), b"y").unwrap(); + crate::utils::cargo_bin() + .args(["compress", "test", "test.zip", "--yes"]) + .current_dir(dir) + .assert() + .success(); + fs::remove_dir_all(dir.join("test")).unwrap(); + + crate::utils::cargo_bin() + .args(["decompress", "test.zip", "--yes"]) + .current_dir(dir) + .assert() + .success(); + + assert!(dir.join("test").join("inner.txt").is_file()); + assert!(!dir.join("test").join("test").exists()); +} + +// One run with the sandbox disabled. Checks basic functions and that the sandbox is really off. +#[cfg(target_os = "linux")] +#[test] +fn without_sandbox_runs_and_confirms_sandbox_is_off() { + let (_tempdir, dir) = testdir().unwrap(); + let input = dir.join("data.txt"); + fs::write(&input, "no sandbox").unwrap(); + + // Basic functions still work with the sandbox disabled. + let archive = dir.join("out.tar.gz"); + crate::utils::cargo_bin() + .arg("--no-sandbox") + .arg("compress") + .arg(&input) + .arg(&archive) + .arg("--yes") + .assert() + .success(); + let output = dir.join("out"); + crate::utils::cargo_bin() + .arg("--no-sandbox") + .arg("decompress") + .arg(&archive) + .arg("--dir") + .arg(&output) + .arg("--yes") + .assert() + .success(); + assert_eq!("no sandbox", fs::read_to_string(output.join("data.txt")).unwrap()); + + // Sandbox is really off. With it active --follow-symlinks aborts so success proves it is disabled. + crate::utils::cargo_bin() + .arg("--no-sandbox") + .arg("compress") + .arg("--follow-symlinks") + .arg(&input) + .arg(dir.join("follow.tar")) + .arg("--yes") + .assert() + .success(); +} + +// two inputs with the same basename must not silently merge into one target + +#[test] +fn decompress_duplicate_basenames_skips_second_with_no() { + let (_tempdir, dir) = testdir().unwrap(); + + for (sub, content) in [("a", "AAA"), ("b", "BBB")] { + fs::create_dir(dir.join(sub)).unwrap(); + fs::write(dir.join(sub).join("x.txt"), content).unwrap(); + crate::utils::cargo_bin() + .args(["compress", "x.txt", "x.tar.gz", "--yes"]) + .current_dir(dir.join(sub)) + .assert() + .success(); + } + + // the no flag answers the second conflict with skip + crate::utils::cargo_bin() + .args(["decompress", "a/x.tar.gz", "b/x.tar.gz", "--no"]) + .current_dir(dir) + .assert() + .success(); + + // only the first input extracted and its content is intact + assert_eq!("AAA", fs::read_to_string(dir.join("x").join("x.txt")).unwrap()); + assert!(!dir.join("x_1").exists()); +} + +#[test] +fn decompress_duplicate_basenames_prompts_and_renames() { + let (_tempdir, dir) = testdir().unwrap(); + + for (sub, content) in [("a", "AAA"), ("b", "BBB")] { + fs::create_dir(dir.join(sub)).unwrap(); + fs::write(dir.join(sub).join("x.txt"), content).unwrap(); + crate::utils::cargo_bin() + .args(["compress", "x.txt", "x.tar.gz", "--yes"]) + .current_dir(dir.join(sub)) + .assert() + .success(); + } + + // the second input must hit the conflict question and get renamed + crate::utils::cargo_bin() + .args(["decompress", "a/x.tar.gz", "b/x.tar.gz"]) + .current_dir(dir) + .write_stdin("r") + .assert() + .success(); + + assert_eq!("AAA", fs::read_to_string(dir.join("x").join("x.txt")).unwrap()); + assert_eq!("BBB", fs::read_to_string(dir.join("x_1").join("x.txt")).unwrap()); +} + +// follow symlinks only passes when the sandbox is off so success proves gitignore disabled it +#[cfg(target_os = "linux")] +#[test] +fn gitignore_disables_sandbox_and_unlocks_follow_symlinks() { + let (_tempdir, dir) = testdir().unwrap(); + let input = dir.join("data.txt"); + fs::write(&input, "tracked").unwrap(); + + crate::utils::cargo_bin() + .arg("compress") + .arg("--gitignore") + .arg("--follow-symlinks") + .arg(&input) + .arg(dir.join("out.tar")) + .arg("--yes") + .assert() + .success(); +} diff --git a/tests/snapshots/ui__ui_test_ok_decompress.snap b/tests/snapshots/ui__ui_test_ok_decompress.snap index cb36ed973..2afbe2390 100644 --- a/tests/snapshots/ui__ui_test_ok_decompress.snap +++ b/tests/snapshots/ui__ui_test_ok_decompress.snap @@ -2,6 +2,6 @@ source: tests/ui.rs expression: "run_ouch(\"ouch decompress output.zst\", dir)" --- -[INFO] File "output.zst" decompressed to "output" +[INFO] File "output.zst" decompressed to "output/output" [INFO] Input file size: [SIZE] [INFO] Output file size: [SIZE] diff --git a/tests/snapshots/ui__ui_test_usage_help_flag-2.snap b/tests/snapshots/ui__ui_test_usage_help_flag-2.snap index af5c50c0b..8ddf9a4e0 100644 --- a/tests/snapshots/ui__ui_test_usage_help_flag-2.snap +++ b/tests/snapshots/ui__ui_test_usage_help_flag-2.snap @@ -1,5 +1,6 @@ --- source: tests/ui.rs +assertion_line: 194 expression: "output_to_string(ouch!(\"-h\"))" --- A command-line utility for easily compressing and decompressing files and directories. @@ -22,5 +23,6 @@ Options: -f, --format Specify the format of the archive -p, --password Decompress or list with password -c, --threads Concurrent working threads + --no-sandbox Disable the process sandbox -h, --help Print help (see more with '--help') -V, --version Print version diff --git a/tests/snapshots/ui__ui_test_usage_help_flag.snap b/tests/snapshots/ui__ui_test_usage_help_flag.snap index 7b0572a13..185866fd6 100644 --- a/tests/snapshots/ui__ui_test_usage_help_flag.snap +++ b/tests/snapshots/ui__ui_test_usage_help_flag.snap @@ -1,5 +1,6 @@ --- source: tests/ui.rs +assertion_line: 193 expression: "output_to_string(ouch!(\"--help\"))" --- A command-line utility for easily compressing and decompressing files and directories. @@ -46,6 +47,9 @@ Options: -c, --threads Concurrent working threads + --no-sandbox + Disable the process sandbox + -h, --help Print help (see a summary with '-h') diff --git a/tests/utils.rs b/tests/utils.rs index 443dd38db..03f9d20a4 100644 --- a/tests/utils.rs +++ b/tests/utils.rs @@ -23,7 +23,7 @@ macro_rules! ouch { } pub fn cargo_bin() -> Command { - env::vars() + let mut cmd = env::vars() .find_map(|(k, v)| { (k.starts_with("CARGO_TARGET_") && k.ends_with("_RUNNER")).then(|| { let mut runner = v.split_whitespace(); @@ -32,7 +32,11 @@ pub fn cargo_bin() -> Command { cmd }) }) - .unwrap_or_else(|| Command::cargo_bin("ouch").expect("Failed to find ouch executable")) + .unwrap_or_else(|| Command::cargo_bin("ouch").expect("Failed to find ouch executable")); + // Run the whole suite with the sandbox enabled. + // Clear any inherited OUCH_NO_SANDBOX so a stray value cannot disable it. + cmd.env_remove("OUCH_NO_SANDBOX"); + cmd } pub fn testdir() -> io::Result<(tempfile::TempDir, &'static Path)> {