diff --git a/Cargo.lock b/Cargo.lock index e586778e..09e1b822 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -224,15 +224,6 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "daemonize" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab8bfdaacb3c887a54d41bdf48d3af8873b3f5566469f8ba21b92057509f116e" -dependencies = [ - "libc", -] - [[package]] name = "either" version = "1.15.0" @@ -522,7 +513,6 @@ dependencies = [ "byteorder", "clap", "crossbeam-channel", - "daemonize", "lazy_static", "libc", "libproc", diff --git a/libshpool/Cargo.toml b/libshpool/Cargo.toml index a9dc10ff..2aa63e55 100644 --- a/libshpool/Cargo.toml +++ b/libshpool/Cargo.toml @@ -43,7 +43,6 @@ tempfile = "3" # RAII tmp files strip-ansi-escapes = "0.2.0" # cleaning up strings for pager display notify = { version = "7", features = ["crossbeam-channel"] } # watch config file for updates libproc = "0.14.8" # sniffing shells by examining the subprocess -daemonize = "0.5" # autodaemonization parking_lot = { version = "0.12", features = ["arc_lock"] } # faster more featureful sync primitives shpool-protocol = { version = "0.4.0", path = "../shpool-protocol" } # client-server protocol diff --git a/libshpool/src/daemon/mod.rs b/libshpool/src/daemon/mod.rs index 7b52ec71..fe43588b 100644 --- a/libshpool/src/daemon/mod.rs +++ b/libshpool/src/daemon/mod.rs @@ -17,7 +17,7 @@ use std::{env, os::unix::net::UnixListener, path::PathBuf}; use anyhow::Context; use tracing::{info, instrument}; -use crate::{config, consts, hooks}; +use crate::{config, consts, daemonize, hooks}; mod etc_environment; mod exit_notify; @@ -43,7 +43,7 @@ pub fn run( >, socket: PathBuf, ) -> anyhow::Result<()> { - if let Ok(daemonize) = env::var(consts::AUTODAEMONIZE_VAR) { + let pid_guard = if let Ok(daemonize) = env::var(consts::AUTODAEMONIZE_VAR) { if daemonize == "true" { // Safety: this is executing before we have forked any threads, // so we are still in single-threaded mode, therefore it is safe @@ -55,9 +55,15 @@ pub fn run( let pid_file = socket.with_file_name("daemonized-shpool.pid"); info!("daemonizing with pid_file={:?}", pid_file); - daemonize::Daemonize::new().pid_file(pid_file).start().context("daemonizing")?; + // Safety: we are calling this before having forked any threads, + // which is sufficient to meet the safety requirements for fork(). + Some(unsafe { daemonize::daemonize(pid_file) }.context("daemonizing")?) + } else { + None } - } + } else { + None + }; info!("\n\n======================== STARTING DAEMON ============================\n\n"); @@ -82,7 +88,13 @@ pub fn run( } }; // spawn the signal handler thread in the background - signals::Handler::new(cleanup_socket.clone()).spawn()?; + signals::Handler::new( + vec![cleanup_socket.clone(), pid_guard.as_ref().map(|g| g.path().clone())] + .into_iter() + .flatten() + .collect(), + ) + .spawn()?; server::Server::serve(server, listener)?; diff --git a/libshpool/src/daemon/signals.rs b/libshpool/src/daemon/signals.rs index 3359883d..3bd21633 100644 --- a/libshpool/src/daemon/signals.rs +++ b/libshpool/src/daemon/signals.rs @@ -23,11 +23,11 @@ use signal_hook::{consts::TERM_SIGNALS, flag, iterator::Signals}; use tracing::{error, info}; pub struct Handler { - sock: Option, + cleanup_paths: Vec, } impl Handler { - pub fn new(sock: Option) -> Self { - Handler { sock } + pub fn new(cleanup_paths: Vec) -> Self { + Handler { cleanup_paths } } pub fn spawn(self) -> anyhow::Result<()> { @@ -58,8 +58,8 @@ impl Handler { assert!(TERM_SIGNALS.contains(&signal)); info!("term sig handler: cleaning up socket"); - if let Some(sock) = self.sock { - if let Err(e) = std::fs::remove_file(sock).context("cleaning up socket") { + for path in self.cleanup_paths.into_iter() { + if let Err(e) = std::fs::remove_file(path).context("cleaning up socket") { error!("error cleaning up socket file: {}", e); } } diff --git a/libshpool/src/daemonize.rs b/libshpool/src/daemonize.rs index 867aa5a6..a3c666c4 100644 --- a/libshpool/src/daemonize.rs +++ b/libshpool/src/daemonize.rs @@ -12,16 +12,32 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::{ffi::OsStr, os::unix::net::UnixStream, path::Path, process, thread, time::Duration}; +use std::{ + ffi::OsStr, + fs, + io::{Seek, SeekFrom, Write}, + os::unix::net::UnixStream, + path::{Path, PathBuf}, + process, thread, + time::Duration, +}; use crate::{config, consts, Args}; use anyhow::{anyhow, Context}; -use tracing::info; +use nix::{ + fcntl, + fcntl::FlockArg, + sys::{wait, wait::WaitStatus}, + unistd, + unistd::ForkResult, +}; +use tracing::{error, info}; /// Check if we can connect to the control socket, and if we -/// can't, fork the daemon in the background. -pub fn maybe_fork_daemon( +/// can't, launch the daemon in the background. This should be +/// called by client subcommands like `shpool attach` or `shpool list` +pub fn maybe_launch_daemon( config_manager: &config::Manager, args: &Args, shpool_bin: B, @@ -106,3 +122,132 @@ where Err(anyhow!("daemonizing: launched daemon, but control socket never came up")) } + +pub struct PidFileGuard { + p: PathBuf, +} + +impl PidFileGuard { + pub fn path(&self) -> &PathBuf { + &self.p + } +} + +impl std::ops::Drop for PidFileGuard { + fn drop(&mut self) { + if let Err(e) = std::fs::remove_file(&self.p) { + error!("cleaning up pid file: {:?}", e); + } + } +} + +/// Perform the traditional daemonization double-fork setsid dance. +/// This should be called from within `shpool daemon` to detach it +/// from the launching shell. +/// +/// Safety: see nix::unistd::fork for preconditions. +pub unsafe fn daemonize(pid_path: PathBuf) -> anyhow::Result { + let old_mask = nix::sys::stat::umask(nix::sys::stat::Mode::empty()); + info!("set empty umask (old mask: {:?}", old_mask); + + // `cd /` in order to avoid holding open the directory the user + // happened to launch `shpool attach` in so that we don't block + // deletes of that directory the whole time that the daemon + // sticks around. + std::env::set_current_dir("/").context("cding to root")?; + + // Fork and become the child in order to stop being the process group + // leader. We need to avoid being the process group leader in order + // to call setsid(). + // + // Safety: the caller has ensured fork preconditions are met, which + // meet the become_child preconditions. + unsafe { become_child(true) }.context("first fork")?; + + let sid = unistd::setsid().context("creating new session")?; + info!("while daemonizing setsid() = {}", sid); + + let pid_file = fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(&pid_path) + .context("opening pid file")?; + let mut pid_file = match fcntl::Flock::lock(pid_file, FlockArg::LockExclusiveNonblock) { + Ok(l) => l, + Err((_, errno)) if errno == nix::errno::Errno::EWOULDBLOCK => { + return Err(anyhow!("another daemon is already running")); + } + Err((_, errno)) => { + return Err(anyhow!("locking pid file: {:?}", errno)); + } + }; + + // Safety: the caller has ensured fork preconditions are met, which + // meet the become_child preconditions. + unsafe { become_child(false) }.context("second fork")?; + + // Actually write the pid file contents only now that we are in + // the grandchild and have our final pid. + pid_file.set_len(0).context("truncating pid file")?; + pid_file.seek(SeekFrom::Start(0)).context("seeking to start of pid file")?; + writeln!(*pid_file, "{}", std::process::id()).context("writing pid")?; + + redirect_std_fds_to_null()?; + + Ok(PidFileGuard { p: pid_path }) +} + +fn redirect_std_fds_to_null() -> anyhow::Result<()> { + // Safety: path is a valid null terminated string, O_RDWR is a valid flag. + let nullfd = unsafe { libc::open(b"/dev/null\0" as *const [u8; 10] as _, libc::O_RDWR) }; + if nullfd == -1 { + return Err(anyhow!("opening /dev/null: {}", nix::errno::Errno::last())); + } + // Safety: nullfd is valid and newfd need not be open for saftey + let fd = unsafe { libc::dup2(nullfd, libc::STDIN_FILENO) }; + if fd == -1 { + return Err(anyhow!("redirecting stdin to /dev/null: {}", nix::errno::Errno::last())); + } + // Safety: nullfd is valid and newfd need not be open for saftey + let fd = unsafe { libc::dup2(nullfd, libc::STDOUT_FILENO) }; + if fd == -1 { + return Err(anyhow!("redirecting stdout to /dev/null: {}", nix::errno::Errno::last())); + } + // Safety: nullfd is valid and newfd need not be open for saftey + let fd = unsafe { libc::dup2(nullfd, libc::STDERR_FILENO) }; + if fd == -1 { + return Err(anyhow!("redirecting stderr to /dev/null: {}", nix::errno::Errno::last())); + } + + Ok(()) +} + +/// fork() and exit from the parent, so the only running process +/// is the child process. If `wait_child` is true, rather than +/// exiting immediately, the parent will wait for the child proc +/// to exit and exit with its exit code. In the double-fork dance, +/// we want to do this the first time so we propagate the error +/// in case of a crash in the intermediate proc. +/// +/// Safety: see nix::unistd::fork for preconditions. +unsafe fn become_child(wait_child: bool) -> anyhow::Result<()> { + // Safety: since the caller has me the fork preconditions, this is + // safe. + match unsafe { unistd::fork() }.context("forking for daemonization")? { + ForkResult::Parent { child } => { + if wait_child { + match wait::waitpid(child, None).context("waiting for child")? { + WaitStatus::Exited(_, status) => std::process::exit(status), + WaitStatus::Signaled(_, _, _) => std::process::exit(1), + _ => {} + } + } else { + std::process::exit(0); + } + } + ForkResult::Child => {} + } + Ok(()) +} diff --git a/libshpool/src/lib.rs b/libshpool/src/lib.rs index aad548d6..72b06817 100644 --- a/libshpool/src/lib.rs +++ b/libshpool/src/lib.rs @@ -327,7 +327,18 @@ impl<'writer> tracing_subscriber::fmt::MakeWriter<'writer> for LogWriterBuilder /// Run the shpool tool with the given arguments. If hooks is provided, /// inject the callbacks into the daemon. -pub fn run(args: Args, hooks: Option>) -> anyhow::Result<()> { +/// +/// # Safety +/// +/// Callers MUST NOT call this routine after forking a thread. +/// internally shpool will double-fork to daemonize, and doing so with +/// multiple threads running is unsafe. Really this only applies when +/// consts::AUTODAEMONIZE_VAR is set to true in the environment, but +/// we still need to mark this entrypoint unsafe to be sound. +pub unsafe fn run( + args: Args, + hooks: Option>, +) -> anyhow::Result<()> { match (&args.command, env::var(consts::SENTINEL_FLAG_VAR).as_deref()) { (Commands::Daemon, Ok("prompt")) => { println!("{}", consts::PROMPT_SENTINEL); @@ -401,7 +412,7 @@ pub fn run(args: Args, hooks: Option>) -> an if !config_manager.get().nodaemonize.unwrap_or(false) || args.daemonize { let arg0 = env::args().next().ok_or(anyhow!("arg0 missing"))?; if !args.no_daemonize && !matches!(args.command, Commands::Daemon) { - daemonize::maybe_fork_daemon(&config_manager, &args, arg0, &socket)?; + daemonize::maybe_launch_daemon(&config_manager, &args, arg0, &socket)?; } } diff --git a/shpool/src/main.rs b/shpool/src/main.rs index e7f221d2..f3e0f03c 100644 --- a/shpool/src/main.rs +++ b/shpool/src/main.rs @@ -27,5 +27,6 @@ fn main() -> anyhow::Result<()> { return Ok(()); } - libshpool::run(args, None) + // Safety: there is only a single thread running. + unsafe { libshpool::run(args, None) } } diff --git a/shpool/tests/attach.rs b/shpool/tests/attach.rs index 302d6c78..131f6c9b 100644 --- a/shpool/tests/attach.rs +++ b/shpool/tests/attach.rs @@ -5,7 +5,6 @@ use std::{ fs, io::BufRead, io::{Read, Write}, - path::PathBuf, process::{Command, Stdio}, thread, time, }; @@ -1549,55 +1548,6 @@ fn fresh_shell_does_not_have_prompt_setup_code() -> anyhow::Result<()> { Ok(()) } -#[test] -#[timeout(30000)] -fn autodaemonize() -> anyhow::Result<()> { - let tmp_dir = tmpdir::Dir::new("/tmp/shpool-test")?; - eprintln!("testing autodaemonization in {:?}", tmp_dir.path()); - - let mut socket_path = PathBuf::from(tmp_dir.path()); - socket_path.push("control.sock"); - - let mut log_file = PathBuf::from(tmp_dir.path()); - log_file.push("attach.log"); - - // we have to manually spawn the child because the whole point is that there - // isn't a daemon yet so we can't use the attach method. - let mut child = Command::new(support::shpool_bin()?) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .arg("--daemonize") - .arg("--socket") - .arg(socket_path) - .arg("--log-file") - .arg(log_file) - .arg("--config-file") - .arg(support::testdata_file("norc.toml")) - .arg("attach") - .arg("sh1") - .spawn() - .context("spawning attach process")?; - - // After half a second, the daemon should have spanwed - std::thread::sleep(time::Duration::from_millis(500)); - child.kill().context("killing child")?; - - let mut stdout = child.stdout.take().context("missing stdout")?; - let mut stdout_str = String::from(""); - stdout.read_to_string(&mut stdout_str).context("slurping stdout")?; - let stdout_re = Regex::new(".*prompt>.*")?; - assert!(stdout_re.is_match(&stdout_str)); - - // best effort attempt to clean up after ourselves - Command::new("pkill") - .arg("-f") - .arg("shpool-test-autodaemonize") - .output() - .context("running cleanup process")?; - - Ok(()) -} - #[test] #[timeout(30000)] fn config_tmp_default_dir() -> anyhow::Result<()> { diff --git a/shpool/tests/daemon.rs b/shpool/tests/daemon.rs index 79ee2f1d..01ba1c3b 100644 --- a/shpool/tests/daemon.rs +++ b/shpool/tests/daemon.rs @@ -5,7 +5,7 @@ use std::{ net::{UnixListener, UnixStream}, process::CommandExt as _, }, - path, + path::{self, PathBuf}, process::{Command, Stdio}, time, }; @@ -344,3 +344,128 @@ fn allows_dynamic_log_adjustments() -> anyhow::Result<()> { Ok(()) } + +#[test] +#[timeout(30000)] +fn autodaemonize() -> anyhow::Result<()> { + let tmp_dir = tmpdir::Dir::new("/tmp/shpool-test")?; + eprintln!("testing autodaemonization in {:?}", tmp_dir.path()); + + let mut socket_path = PathBuf::from(tmp_dir.path()); + socket_path.push("control.sock"); + + let mut log_file = PathBuf::from(tmp_dir.path()); + log_file.push("attach.log"); + + // we have to manually spawn the child because the whole point is that there + // isn't a daemon yet so we can't use the attach method. + let mut child = Command::new(support::shpool_bin()?) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .arg("--daemonize") + .arg("--socket") + .arg(socket_path) + .arg("--log-file") + .arg(log_file) + .arg("--config-file") + .arg(support::testdata_file("norc.toml")) + .arg("attach") + .arg("sh1") + .spawn() + .context("spawning attach process")?; + + // After half a second, the daemon should have spanwed + std::thread::sleep(time::Duration::from_millis(500)); + child.kill().context("killing child")?; + + let mut stdout = child.stdout.take().context("missing stdout")?; + let mut stdout_str = String::from(""); + stdout.read_to_string(&mut stdout_str).context("slurping stdout")?; + let stdout_re = Regex::new(".*prompt>.*")?; + assert!(stdout_re.is_match(&stdout_str)); + + // best effort attempt to clean up after ourselves + Command::new("pkill") + .arg("-f") + .arg("shpool-test-autodaemonize") + .output() + .context("running cleanup process")?; + + Ok(()) +} + +#[test] +#[timeout(30000)] +fn daemonize_cwd_cleanup() -> anyhow::Result<()> { + let launch_dir = tmpdir::Dir::new("/tmp/shpool-test-launch")?; + let daemon_dir = tmpdir::Dir::new("/tmp/shpool-test-daemon")?; + + let socket_path = daemon_dir.path().join("shpool.socket"); + let log_file = daemon_dir.path().join("shpool.log"); + + // Launch from launch_dir + let out = Command::new(support::shpool_bin()?) + .current_dir(launch_dir.path()) + .arg("--daemonize") + .arg("--socket") + .arg(&socket_path) + .arg("--log-file") + .arg(&log_file) + .arg("--config-file") + .arg(support::testdata_file("norc.toml")) + .arg("list") + .output() + .context("running shpool list --daemonize")?; + + assert!(out.status.success(), "shpool list should succeed"); + + // Give it a moment just in case + std::thread::sleep(time::Duration::from_millis(100)); + + // Explicitly try to remove the launch directory. + // If the daemon didn't cd /, this might fail because the directory is busy. + let rm_res = std::fs::remove_dir_all(launch_dir.path()); + + // Best-effort cleanup of the background daemon spawned by --daemonize. + Command::new("pkill").arg("-f").arg(socket_path.to_string_lossy().as_ref()).output().ok(); + + rm_res.context("removing launch directory")?; + + Ok(()) +} + +#[test] +#[timeout(30000)] +fn daemonize_pid_file_exists() -> anyhow::Result<()> { + let tmp_dir = tmpdir::Dir::new("/tmp/shpool-test-pid")?; + let socket_path = tmp_dir.path().join("shpool.socket"); + let log_file = tmp_dir.path().join("shpool.log"); + let pid_file = tmp_dir.path().join("daemonized-shpool.pid"); + + // Launch daemon via autodaemonize + let out = Command::new(support::shpool_bin()?) + .arg("--daemonize") + .arg("--socket") + .arg(&socket_path) + .arg("--log-file") + .arg(&log_file) + .arg("--config-file") + .arg(support::testdata_file("norc.toml")) + .arg("list") + .output() + .context("running shpool list --daemonize")?; + + assert!(out.status.success(), "shpool list should succeed"); + + // Give it a moment to spawn + std::thread::sleep(time::Duration::from_millis(500)); + + let pid_exists = pid_file.exists(); + + // Cleanup + Command::new("pkill").arg("-f").arg(socket_path.to_string_lossy().as_ref()).output().ok(); + + assert!(pid_exists, "pid file should exist while daemon is running"); + + Ok(()) +} diff --git a/shpool/tests/support/daemon.rs b/shpool/tests/support/daemon.rs index 102761b2..3f932d35 100644 --- a/shpool/tests/support/daemon.rs +++ b/shpool/tests/support/daemon.rs @@ -239,7 +239,9 @@ impl Proc { // spawn the daemon in a thread thread::spawn(move || { - if let Err(err) = libshpool::run(args, Some(hooks_recorder)) { + // Safety: we do not have consts::AUTODAEMONIZE_VAR set in the + // environment, so libshpool::run will not fork. + if let Err(err) = unsafe { libshpool::run(args, Some(hooks_recorder)) } { eprintln!("shpool proc exited with err: {err:?}"); } });