From e85915d60f57a77891c9717be1e7858ddad83c37 Mon Sep 17 00:00:00 2001 From: Ethan Pailes Date: Mon, 11 May 2026 22:41:41 +0000 Subject: [PATCH] chore!: drop daemonize crate This patch drops teh daemonize crate in favor of our own daemonization implementation. The daemonize crate is apparently unmaintained, so we need to migrate off of it for security. In writing this patch, I realized that libshpool::run was technically unsound (though in practice, no reasonable caller would trigger unsoundness). Unfortunately, to fix it properly I needed to mark the entrypoint `unsafe` and document the required preconditions (can't call from a multi-threaded program in a situation where it might be self-execed to auto-daemonize). Unfortunately this requires a breaking version upgrade. --- Cargo.lock | 10 --- libshpool/Cargo.toml | 1 - libshpool/src/daemon/mod.rs | 22 +++-- libshpool/src/daemon/signals.rs | 10 +-- libshpool/src/daemonize.rs | 153 +++++++++++++++++++++++++++++++- libshpool/src/lib.rs | 15 +++- shpool/src/main.rs | 3 +- shpool/tests/attach.rs | 50 ----------- shpool/tests/daemon.rs | 127 +++++++++++++++++++++++++- shpool/tests/support/daemon.rs | 4 +- 10 files changed, 315 insertions(+), 80 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e586778e..09e1b822 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -224,15 +224,6 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "daemonize" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab8bfdaacb3c887a54d41bdf48d3af8873b3f5566469f8ba21b92057509f116e" -dependencies = [ - "libc", -] - [[package]] name = "either" version = "1.15.0" @@ -522,7 +513,6 @@ dependencies = [ "byteorder", "clap", "crossbeam-channel", - "daemonize", "lazy_static", "libc", "libproc", diff --git a/libshpool/Cargo.toml b/libshpool/Cargo.toml index a9dc10ff..2aa63e55 100644 --- a/libshpool/Cargo.toml +++ b/libshpool/Cargo.toml @@ -43,7 +43,6 @@ tempfile = "3" # RAII tmp files strip-ansi-escapes = "0.2.0" # cleaning up strings for pager display notify = { version = "7", features = ["crossbeam-channel"] } # watch config file for updates libproc = "0.14.8" # sniffing shells by examining the subprocess -daemonize = "0.5" # autodaemonization parking_lot = { version = "0.12", features = ["arc_lock"] } # faster more featureful sync primitives shpool-protocol = { version = "0.4.0", path = "../shpool-protocol" } # client-server protocol diff --git a/libshpool/src/daemon/mod.rs b/libshpool/src/daemon/mod.rs index 7b52ec71..fe43588b 100644 --- a/libshpool/src/daemon/mod.rs +++ b/libshpool/src/daemon/mod.rs @@ -17,7 +17,7 @@ use std::{env, os::unix::net::UnixListener, path::PathBuf}; use anyhow::Context; use tracing::{info, instrument}; -use crate::{config, consts, hooks}; +use crate::{config, consts, daemonize, hooks}; mod etc_environment; mod exit_notify; @@ -43,7 +43,7 @@ pub fn run( >, socket: PathBuf, ) -> anyhow::Result<()> { - if let Ok(daemonize) = env::var(consts::AUTODAEMONIZE_VAR) { + let pid_guard = if let Ok(daemonize) = env::var(consts::AUTODAEMONIZE_VAR) { if daemonize == "true" { // Safety: this is executing before we have forked any threads, // so we are still in single-threaded mode, therefore it is safe @@ -55,9 +55,15 @@ pub fn run( let pid_file = socket.with_file_name("daemonized-shpool.pid"); info!("daemonizing with pid_file={:?}", pid_file); - daemonize::Daemonize::new().pid_file(pid_file).start().context("daemonizing")?; + // Safety: we are calling this before having forked any threads, + // which is sufficient to meet the safety requirements for fork(). + Some(unsafe { daemonize::daemonize(pid_file) }.context("daemonizing")?) + } else { + None } - } + } else { + None + }; info!("\n\n======================== STARTING DAEMON ============================\n\n"); @@ -82,7 +88,13 @@ pub fn run( } }; // spawn the signal handler thread in the background - signals::Handler::new(cleanup_socket.clone()).spawn()?; + signals::Handler::new( + vec![cleanup_socket.clone(), pid_guard.as_ref().map(|g| g.path().clone())] + .into_iter() + .flatten() + .collect(), + ) + .spawn()?; server::Server::serve(server, listener)?; diff --git a/libshpool/src/daemon/signals.rs b/libshpool/src/daemon/signals.rs index 3359883d..3bd21633 100644 --- a/libshpool/src/daemon/signals.rs +++ b/libshpool/src/daemon/signals.rs @@ -23,11 +23,11 @@ use signal_hook::{consts::TERM_SIGNALS, flag, iterator::Signals}; use tracing::{error, info}; pub struct Handler { - sock: Option, + cleanup_paths: Vec, } impl Handler { - pub fn new(sock: Option) -> Self { - Handler { sock } + pub fn new(cleanup_paths: Vec) -> Self { + Handler { cleanup_paths } } pub fn spawn(self) -> anyhow::Result<()> { @@ -58,8 +58,8 @@ impl Handler { assert!(TERM_SIGNALS.contains(&signal)); info!("term sig handler: cleaning up socket"); - if let Some(sock) = self.sock { - if let Err(e) = std::fs::remove_file(sock).context("cleaning up socket") { + for path in self.cleanup_paths.into_iter() { + if let Err(e) = std::fs::remove_file(path).context("cleaning up socket") { error!("error cleaning up socket file: {}", e); } } diff --git a/libshpool/src/daemonize.rs b/libshpool/src/daemonize.rs index 867aa5a6..a3c666c4 100644 --- a/libshpool/src/daemonize.rs +++ b/libshpool/src/daemonize.rs @@ -12,16 +12,32 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::{ffi::OsStr, os::unix::net::UnixStream, path::Path, process, thread, time::Duration}; +use std::{ + ffi::OsStr, + fs, + io::{Seek, SeekFrom, Write}, + os::unix::net::UnixStream, + path::{Path, PathBuf}, + process, thread, + time::Duration, +}; use crate::{config, consts, Args}; use anyhow::{anyhow, Context}; -use tracing::info; +use nix::{ + fcntl, + fcntl::FlockArg, + sys::{wait, wait::WaitStatus}, + unistd, + unistd::ForkResult, +}; +use tracing::{error, info}; /// Check if we can connect to the control socket, and if we -/// can't, fork the daemon in the background. -pub fn maybe_fork_daemon( +/// can't, launch the daemon in the background. This should be +/// called by client subcommands like `shpool attach` or `shpool list` +pub fn maybe_launch_daemon( config_manager: &config::Manager, args: &Args, shpool_bin: B, @@ -106,3 +122,132 @@ where Err(anyhow!("daemonizing: launched daemon, but control socket never came up")) } + +pub struct PidFileGuard { + p: PathBuf, +} + +impl PidFileGuard { + pub fn path(&self) -> &PathBuf { + &self.p + } +} + +impl std::ops::Drop for PidFileGuard { + fn drop(&mut self) { + if let Err(e) = std::fs::remove_file(&self.p) { + error!("cleaning up pid file: {:?}", e); + } + } +} + +/// Perform the traditional daemonization double-fork setsid dance. +/// This should be called from within `shpool daemon` to detach it +/// from the launching shell. +/// +/// Safety: see nix::unistd::fork for preconditions. +pub unsafe fn daemonize(pid_path: PathBuf) -> anyhow::Result { + let old_mask = nix::sys::stat::umask(nix::sys::stat::Mode::empty()); + info!("set empty umask (old mask: {:?}", old_mask); + + // `cd /` in order to avoid holding open the directory the user + // happened to launch `shpool attach` in so that we don't block + // deletes of that directory the whole time that the daemon + // sticks around. + std::env::set_current_dir("/").context("cding to root")?; + + // Fork and become the child in order to stop being the process group + // leader. We need to avoid being the process group leader in order + // to call setsid(). + // + // Safety: the caller has ensured fork preconditions are met, which + // meet the become_child preconditions. + unsafe { become_child(true) }.context("first fork")?; + + let sid = unistd::setsid().context("creating new session")?; + info!("while daemonizing setsid() = {}", sid); + + let pid_file = fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(&pid_path) + .context("opening pid file")?; + let mut pid_file = match fcntl::Flock::lock(pid_file, FlockArg::LockExclusiveNonblock) { + Ok(l) => l, + Err((_, errno)) if errno == nix::errno::Errno::EWOULDBLOCK => { + return Err(anyhow!("another daemon is already running")); + } + Err((_, errno)) => { + return Err(anyhow!("locking pid file: {:?}", errno)); + } + }; + + // Safety: the caller has ensured fork preconditions are met, which + // meet the become_child preconditions. + unsafe { become_child(false) }.context("second fork")?; + + // Actually write the pid file contents only now that we are in + // the grandchild and have our final pid. + pid_file.set_len(0).context("truncating pid file")?; + pid_file.seek(SeekFrom::Start(0)).context("seeking to start of pid file")?; + writeln!(*pid_file, "{}", std::process::id()).context("writing pid")?; + + redirect_std_fds_to_null()?; + + Ok(PidFileGuard { p: pid_path }) +} + +fn redirect_std_fds_to_null() -> anyhow::Result<()> { + // Safety: path is a valid null terminated string, O_RDWR is a valid flag. + let nullfd = unsafe { libc::open(b"/dev/null\0" as *const [u8; 10] as _, libc::O_RDWR) }; + if nullfd == -1 { + return Err(anyhow!("opening /dev/null: {}", nix::errno::Errno::last())); + } + // Safety: nullfd is valid and newfd need not be open for saftey + let fd = unsafe { libc::dup2(nullfd, libc::STDIN_FILENO) }; + if fd == -1 { + return Err(anyhow!("redirecting stdin to /dev/null: {}", nix::errno::Errno::last())); + } + // Safety: nullfd is valid and newfd need not be open for saftey + let fd = unsafe { libc::dup2(nullfd, libc::STDOUT_FILENO) }; + if fd == -1 { + return Err(anyhow!("redirecting stdout to /dev/null: {}", nix::errno::Errno::last())); + } + // Safety: nullfd is valid and newfd need not be open for saftey + let fd = unsafe { libc::dup2(nullfd, libc::STDERR_FILENO) }; + if fd == -1 { + return Err(anyhow!("redirecting stderr to /dev/null: {}", nix::errno::Errno::last())); + } + + Ok(()) +} + +/// fork() and exit from the parent, so the only running process +/// is the child process. If `wait_child` is true, rather than +/// exiting immediately, the parent will wait for the child proc +/// to exit and exit with its exit code. In the double-fork dance, +/// we want to do this the first time so we propagate the error +/// in case of a crash in the intermediate proc. +/// +/// Safety: see nix::unistd::fork for preconditions. +unsafe fn become_child(wait_child: bool) -> anyhow::Result<()> { + // Safety: since the caller has me the fork preconditions, this is + // safe. + match unsafe { unistd::fork() }.context("forking for daemonization")? { + ForkResult::Parent { child } => { + if wait_child { + match wait::waitpid(child, None).context("waiting for child")? { + WaitStatus::Exited(_, status) => std::process::exit(status), + WaitStatus::Signaled(_, _, _) => std::process::exit(1), + _ => {} + } + } else { + std::process::exit(0); + } + } + ForkResult::Child => {} + } + Ok(()) +} diff --git a/libshpool/src/lib.rs b/libshpool/src/lib.rs index aad548d6..72b06817 100644 --- a/libshpool/src/lib.rs +++ b/libshpool/src/lib.rs @@ -327,7 +327,18 @@ impl<'writer> tracing_subscriber::fmt::MakeWriter<'writer> for LogWriterBuilder /// Run the shpool tool with the given arguments. If hooks is provided, /// inject the callbacks into the daemon. -pub fn run(args: Args, hooks: Option>) -> anyhow::Result<()> { +/// +/// # Safety +/// +/// Callers MUST NOT call this routine after forking a thread. +/// internally shpool will double-fork to daemonize, and doing so with +/// multiple threads running is unsafe. Really this only applies when +/// consts::AUTODAEMONIZE_VAR is set to true in the environment, but +/// we still need to mark this entrypoint unsafe to be sound. +pub unsafe fn run( + args: Args, + hooks: Option>, +) -> anyhow::Result<()> { match (&args.command, env::var(consts::SENTINEL_FLAG_VAR).as_deref()) { (Commands::Daemon, Ok("prompt")) => { println!("{}", consts::PROMPT_SENTINEL); @@ -401,7 +412,7 @@ pub fn run(args: Args, hooks: Option>) -> an if !config_manager.get().nodaemonize.unwrap_or(false) || args.daemonize { let arg0 = env::args().next().ok_or(anyhow!("arg0 missing"))?; if !args.no_daemonize && !matches!(args.command, Commands::Daemon) { - daemonize::maybe_fork_daemon(&config_manager, &args, arg0, &socket)?; + daemonize::maybe_launch_daemon(&config_manager, &args, arg0, &socket)?; } } diff --git a/shpool/src/main.rs b/shpool/src/main.rs index e7f221d2..f3e0f03c 100644 --- a/shpool/src/main.rs +++ b/shpool/src/main.rs @@ -27,5 +27,6 @@ fn main() -> anyhow::Result<()> { return Ok(()); } - libshpool::run(args, None) + // Safety: there is only a single thread running. + unsafe { libshpool::run(args, None) } } diff --git a/shpool/tests/attach.rs b/shpool/tests/attach.rs index 302d6c78..131f6c9b 100644 --- a/shpool/tests/attach.rs +++ b/shpool/tests/attach.rs @@ -5,7 +5,6 @@ use std::{ fs, io::BufRead, io::{Read, Write}, - path::PathBuf, process::{Command, Stdio}, thread, time, }; @@ -1549,55 +1548,6 @@ fn fresh_shell_does_not_have_prompt_setup_code() -> anyhow::Result<()> { Ok(()) } -#[test] -#[timeout(30000)] -fn autodaemonize() -> anyhow::Result<()> { - let tmp_dir = tmpdir::Dir::new("/tmp/shpool-test")?; - eprintln!("testing autodaemonization in {:?}", tmp_dir.path()); - - let mut socket_path = PathBuf::from(tmp_dir.path()); - socket_path.push("control.sock"); - - let mut log_file = PathBuf::from(tmp_dir.path()); - log_file.push("attach.log"); - - // we have to manually spawn the child because the whole point is that there - // isn't a daemon yet so we can't use the attach method. - let mut child = Command::new(support::shpool_bin()?) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .arg("--daemonize") - .arg("--socket") - .arg(socket_path) - .arg("--log-file") - .arg(log_file) - .arg("--config-file") - .arg(support::testdata_file("norc.toml")) - .arg("attach") - .arg("sh1") - .spawn() - .context("spawning attach process")?; - - // After half a second, the daemon should have spanwed - std::thread::sleep(time::Duration::from_millis(500)); - child.kill().context("killing child")?; - - let mut stdout = child.stdout.take().context("missing stdout")?; - let mut stdout_str = String::from(""); - stdout.read_to_string(&mut stdout_str).context("slurping stdout")?; - let stdout_re = Regex::new(".*prompt>.*")?; - assert!(stdout_re.is_match(&stdout_str)); - - // best effort attempt to clean up after ourselves - Command::new("pkill") - .arg("-f") - .arg("shpool-test-autodaemonize") - .output() - .context("running cleanup process")?; - - Ok(()) -} - #[test] #[timeout(30000)] fn config_tmp_default_dir() -> anyhow::Result<()> { diff --git a/shpool/tests/daemon.rs b/shpool/tests/daemon.rs index 79ee2f1d..01ba1c3b 100644 --- a/shpool/tests/daemon.rs +++ b/shpool/tests/daemon.rs @@ -5,7 +5,7 @@ use std::{ net::{UnixListener, UnixStream}, process::CommandExt as _, }, - path, + path::{self, PathBuf}, process::{Command, Stdio}, time, }; @@ -344,3 +344,128 @@ fn allows_dynamic_log_adjustments() -> anyhow::Result<()> { Ok(()) } + +#[test] +#[timeout(30000)] +fn autodaemonize() -> anyhow::Result<()> { + let tmp_dir = tmpdir::Dir::new("/tmp/shpool-test")?; + eprintln!("testing autodaemonization in {:?}", tmp_dir.path()); + + let mut socket_path = PathBuf::from(tmp_dir.path()); + socket_path.push("control.sock"); + + let mut log_file = PathBuf::from(tmp_dir.path()); + log_file.push("attach.log"); + + // we have to manually spawn the child because the whole point is that there + // isn't a daemon yet so we can't use the attach method. + let mut child = Command::new(support::shpool_bin()?) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .arg("--daemonize") + .arg("--socket") + .arg(socket_path) + .arg("--log-file") + .arg(log_file) + .arg("--config-file") + .arg(support::testdata_file("norc.toml")) + .arg("attach") + .arg("sh1") + .spawn() + .context("spawning attach process")?; + + // After half a second, the daemon should have spanwed + std::thread::sleep(time::Duration::from_millis(500)); + child.kill().context("killing child")?; + + let mut stdout = child.stdout.take().context("missing stdout")?; + let mut stdout_str = String::from(""); + stdout.read_to_string(&mut stdout_str).context("slurping stdout")?; + let stdout_re = Regex::new(".*prompt>.*")?; + assert!(stdout_re.is_match(&stdout_str)); + + // best effort attempt to clean up after ourselves + Command::new("pkill") + .arg("-f") + .arg("shpool-test-autodaemonize") + .output() + .context("running cleanup process")?; + + Ok(()) +} + +#[test] +#[timeout(30000)] +fn daemonize_cwd_cleanup() -> anyhow::Result<()> { + let launch_dir = tmpdir::Dir::new("/tmp/shpool-test-launch")?; + let daemon_dir = tmpdir::Dir::new("/tmp/shpool-test-daemon")?; + + let socket_path = daemon_dir.path().join("shpool.socket"); + let log_file = daemon_dir.path().join("shpool.log"); + + // Launch from launch_dir + let out = Command::new(support::shpool_bin()?) + .current_dir(launch_dir.path()) + .arg("--daemonize") + .arg("--socket") + .arg(&socket_path) + .arg("--log-file") + .arg(&log_file) + .arg("--config-file") + .arg(support::testdata_file("norc.toml")) + .arg("list") + .output() + .context("running shpool list --daemonize")?; + + assert!(out.status.success(), "shpool list should succeed"); + + // Give it a moment just in case + std::thread::sleep(time::Duration::from_millis(100)); + + // Explicitly try to remove the launch directory. + // If the daemon didn't cd /, this might fail because the directory is busy. + let rm_res = std::fs::remove_dir_all(launch_dir.path()); + + // Best-effort cleanup of the background daemon spawned by --daemonize. + Command::new("pkill").arg("-f").arg(socket_path.to_string_lossy().as_ref()).output().ok(); + + rm_res.context("removing launch directory")?; + + Ok(()) +} + +#[test] +#[timeout(30000)] +fn daemonize_pid_file_exists() -> anyhow::Result<()> { + let tmp_dir = tmpdir::Dir::new("/tmp/shpool-test-pid")?; + let socket_path = tmp_dir.path().join("shpool.socket"); + let log_file = tmp_dir.path().join("shpool.log"); + let pid_file = tmp_dir.path().join("daemonized-shpool.pid"); + + // Launch daemon via autodaemonize + let out = Command::new(support::shpool_bin()?) + .arg("--daemonize") + .arg("--socket") + .arg(&socket_path) + .arg("--log-file") + .arg(&log_file) + .arg("--config-file") + .arg(support::testdata_file("norc.toml")) + .arg("list") + .output() + .context("running shpool list --daemonize")?; + + assert!(out.status.success(), "shpool list should succeed"); + + // Give it a moment to spawn + std::thread::sleep(time::Duration::from_millis(500)); + + let pid_exists = pid_file.exists(); + + // Cleanup + Command::new("pkill").arg("-f").arg(socket_path.to_string_lossy().as_ref()).output().ok(); + + assert!(pid_exists, "pid file should exist while daemon is running"); + + Ok(()) +} diff --git a/shpool/tests/support/daemon.rs b/shpool/tests/support/daemon.rs index 102761b2..3f932d35 100644 --- a/shpool/tests/support/daemon.rs +++ b/shpool/tests/support/daemon.rs @@ -239,7 +239,9 @@ impl Proc { // spawn the daemon in a thread thread::spawn(move || { - if let Err(err) = libshpool::run(args, Some(hooks_recorder)) { + // Safety: we do not have consts::AUTODAEMONIZE_VAR set in the + // environment, so libshpool::run will not fork. + if let Err(err) = unsafe { libshpool::run(args, Some(hooks_recorder)) } { eprintln!("shpool proc exited with err: {err:?}"); } });