From f5a53ddc9d591a2ea2ae53e30debc95148aa0497 Mon Sep 17 00:00:00 2001 From: Raphael Coeffic Date: Thu, 7 May 2026 05:47:48 +0200 Subject: [PATCH] fix: guard fork() sites against multi-threaded regressions The three fork() callers run Rust code in the child before exec/exit, which is only safe while the process is single-threaded. Add a small runtime check (panics in debug, warns in release) before each fork and SAFETY comments documenting the invariant. Also annotate the truncated clone3_args struct so its short prefix isn't mistaken for a bug. --- src/bin/dive.rs | 4 ++++ src/fork.rs | 31 +++++++++++++++++++++++++++++++ src/image_builder.rs | 4 ++++ src/lib.rs | 1 + src/shared_mount.rs | 3 +++ src/shell.rs | 4 ++++ 6 files changed, 47 insertions(+) create mode 100644 src/fork.rs diff --git a/src/bin/dive.rs b/src/bin/dive.rs index cdfdc0a..9f1ff70 100644 --- a/src/bin/dive.rs +++ b/src/bin/dive.rs @@ -162,6 +162,10 @@ fn main() -> Result<()> { .make_mount(lead_pid) .context("could not init shared mount")?; + // SAFETY: fork() requires the process to be single-threaded — the + // child only inherits the calling thread, and the child branch runs + // Rust code before exec(). + dive::fork::assert_single_threaded(); match unsafe { fork()? } { Fork::Child(_) => { if let Err(err) = prepare_shell_environment(&shared_mount, lead_pid) diff --git a/src/fork.rs b/src/fork.rs new file mode 100644 index 0000000..5654d28 --- /dev/null +++ b/src/fork.rs @@ -0,0 +1,31 @@ +use std::fs; + +/// Assert that the process is single-threaded. +/// +/// Forking a multi-threaded process is unsafe: only the calling thread +/// survives in the child, but locks and other state held by other threads +/// remain "locked forever", and only async-signal-safe functions are legal +/// in the child until `exec`. dive's fork sites do non-trivial Rust work +/// in the child, so they require the process to be single-threaded. +/// +/// Counts entries under `/proc/self/task`. In debug builds a violation +/// panics; in release builds it logs a warning and proceeds. +pub fn assert_single_threaded() { + let count = match fs::read_dir("/proc/self/task") { + Ok(entries) => entries.count(), + Err(err) => { + log::warn!("could not read /proc/self/task: {err}"); + return; + } + }; + if count > 1 { + log::warn!( + "fork() called with {count} threads alive; \ + child state may be inconsistent" + ); + debug_assert!( + count == 1, + "fork() requires a single-threaded process, found {count} threads" + ); + } +} diff --git a/src/image_builder.rs b/src/image_builder.rs index a3f9ea5..978aae0 100644 --- a/src/image_builder.rs +++ b/src/image_builder.rs @@ -105,6 +105,10 @@ impl BaseImageBuilder { log::info!("building base image"); let tmp = tempdir()?; + // SAFETY: fork() requires the process to be single-threaded — the + // child only inherits the calling thread, and build_base_child + // runs Rust code before exit(). + crate::fork::assert_single_threaded(); match unsafe { fork()? } { Fork::Child(_) => exit(self.build_base_child(tmp.path())), Fork::Parent(child_pid) => self.build_base_parent(child_pid), diff --git a/src/lib.rs b/src/lib.rs index e34bfb3..a1fb833 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ pub mod base_image; +pub mod fork; pub mod image_builder; pub mod namespaces; pub mod nixos; diff --git a/src/shared_mount.rs b/src/shared_mount.rs index 0e4516d..6c332a3 100644 --- a/src/shared_mount.rs +++ b/src/shared_mount.rs @@ -297,6 +297,9 @@ fn new_userns_fd(id_mapping: &IdMaps) -> Result { } fn clone_new_userns() -> Result { + // Intentionally a short prefix of the kernel's `struct clone_args`: + // `set_tid`, `set_tid_size`, and `cgroup` are omitted. The kernel uses + // `args_size` to detect this and zero-fills the missing fields. #[repr(C)] #[allow(non_camel_case_types)] struct clone3_args { diff --git a/src/shell.rs b/src/shell.rs index d4adeb5..9817c07 100644 --- a/src/shell.rs +++ b/src/shell.rs @@ -45,6 +45,10 @@ impl Shell { } pub fn spawn(self) -> Result { + // SAFETY: fork() requires the process to be single-threaded — the + // child only inherits the calling thread, and dive runs Rust code + // before exec(). + crate::fork::assert_single_threaded(); match unsafe { fork()? } { Fork::Child(_) => { let err = self.exec();