diff --git a/src/boxlite/src/litebox/init/tasks/vmm_spawn.rs b/src/boxlite/src/litebox/init/tasks/vmm_spawn.rs index b9dae3525..fcfc6cf07 100644 --- a/src/boxlite/src/litebox/init/tasks/vmm_spawn.rs +++ b/src/boxlite/src/litebox/init/tasks/vmm_spawn.rs @@ -90,9 +90,15 @@ impl PipelineTask for VmmSpawnTask { .inspect_err(|e| log_task_error(&box_id, task_name, e))?; // Spawn VM - let handler = spawn_vm(&box_id, &instance_spec, &options, &layout) - .await - .inspect_err(|e| log_task_error(&box_id, task_name, e))?; + let handler = spawn_vm( + &box_id, + &instance_spec, + &options, + &layout, + &runtime.shim_reaper, + ) + .await + .inspect_err(|e| log_task_error(&box_id, task_name, e))?; let mut ctx = ctx.lock().await; ctx.guard.set_handler(handler); @@ -375,6 +381,7 @@ async fn spawn_vm( config: &InstanceSpec, options: &BoxOptions, layout: &BoxFilesystemLayout, + reaper: &std::sync::Arc, ) -> BoxliteResult> { let mut controller = ShimController::new( find_binary("boxlite-shim")?, @@ -382,6 +389,7 @@ async fn spawn_vm( box_id.clone(), options.clone(), layout.clone(), + reaper.clone(), )?; controller.start(config).await diff --git a/src/boxlite/src/metrics/runtime_metrics.rs b/src/boxlite/src/metrics/runtime_metrics.rs index b56c5b7d4..578f613de 100644 --- a/src/boxlite/src/metrics/runtime_metrics.rs +++ b/src/boxlite/src/metrics/runtime_metrics.rs @@ -19,6 +19,21 @@ pub struct RuntimeMetricsStorage { pub(crate) total_commands: Arc, /// Total command execution errors across all boxes pub(crate) total_exec_errors: Arc, + /// Total shim PIDs the scoped reaper has collected (Issue #523). + /// Monotonic; tracks shim children whose `Child` handle was dropped + /// without `wait()` — the load-bearing zombie-prevention signal. + pub(crate) shim_reaped: Arc, + /// Total shim PIDs that vanished out from under the reaper before + /// it could collect them (ECHILD on waitpid). Monotonic. High vs + /// `shim_reaped` ratio means most cleanups are handled by the + /// owner's own `Child::wait()` — that's a healthy signal. + pub(crate) shim_reaper_vanished: Arc, + /// Current size of the reaper's PID registry. Gauge, not counter — + /// this rises when a shim is registered, falls when reaper sweeps + /// it. Sustained growth means either many running boxes or a + /// reaper-side bug; flag in dashboards if it climbs without + /// `shim_reaped` keeping pace. + pub(crate) shim_reaper_registered: Arc, } impl RuntimeMetricsStorage { @@ -92,6 +107,24 @@ impl RuntimeMetrics { pub fn total_exec_errors(&self) -> u64 { self.storage.total_exec_errors.load(Ordering::Relaxed) } + + /// Total shim PIDs the scoped reaper collected (Issue #523). + /// Never decreases. + pub fn shim_reaped_total(&self) -> u64 { + self.storage.shim_reaped.load(Ordering::Relaxed) + } + + /// Total shim PIDs that vanished (ECHILD) before the reaper saw + /// them. Never decreases. See `shim_reaped` field doc on + /// `RuntimeMetricsStorage` for what the ratio means operationally. + pub fn shim_reaper_vanished_total(&self) -> u64 { + self.storage.shim_reaper_vanished.load(Ordering::Relaxed) + } + + /// Current size of the reaper's PID registry. Gauge. + pub fn shim_reaper_registered(&self) -> u64 { + self.storage.shim_reaper_registered.load(Ordering::Relaxed) + } } #[cfg(test)] diff --git a/src/boxlite/src/rest/runtime.rs b/src/boxlite/src/rest/runtime.rs index 157b0a7d5..8b7667abe 100644 --- a/src/boxlite/src/rest/runtime.rs +++ b/src/boxlite/src/rest/runtime.rs @@ -171,6 +171,15 @@ fn runtime_metrics_from_response(resp: &RuntimeMetricsResponse) -> RuntimeMetric storage .total_exec_errors .store(resp.total_exec_errors, Ordering::Relaxed); + storage + .shim_reaped + .store(resp.shim_reaped_total, Ordering::Relaxed); + storage + .shim_reaper_vanished + .store(resp.shim_reaper_vanished_total, Ordering::Relaxed); + storage + .shim_reaper_registered + .store(resp.shim_reaper_registered, Ordering::Relaxed); RuntimeMetrics::new(storage) } diff --git a/src/boxlite/src/rest/types.rs b/src/boxlite/src/rest/types.rs index fcc3d9b1b..d88530c8a 100644 --- a/src/boxlite/src/rest/types.rs +++ b/src/boxlite/src/rest/types.rs @@ -406,6 +406,14 @@ pub(crate) struct RuntimeMetricsResponse { pub total_commands_executed: u64, #[serde(default)] pub total_exec_errors: u64, + /// Scoped shim reaper counters (Issue #523). Optional for + /// forward/backward compat with servers that pre-date the reaper. + #[serde(default)] + pub shim_reaped_total: u64, + #[serde(default)] + pub shim_reaper_vanished_total: u64, + #[serde(default)] + pub shim_reaper_registered: u64, } #[derive(Debug, Deserialize)] @@ -690,6 +698,50 @@ mod tests { assert_eq!(resp.total_commands_executed, 100); } + #[test] + fn test_runtime_metrics_back_compat_default_zero_for_missing_shim_fields() { + // Older servers that pre-date the shim reaper (Issue #523) emit + // JSON without the three `shim_*` fields. `serde(default)` on the + // client mirror must surface them as 0 instead of failing the + // deserialize — losing metrics is preferable to losing the whole + // /v1/metrics response. + let json = r#"{ + "boxes_created_total": 0, + "boxes_failed_total": 0, + "boxes_stopped_total": 0, + "num_running_boxes": 0, + "total_commands_executed": 0, + "total_exec_errors": 0 + }"#; + let resp: RuntimeMetricsResponse = serde_json::from_str(json).unwrap(); + assert_eq!(resp.shim_reaped_total, 0); + assert_eq!(resp.shim_reaper_vanished_total, 0); + assert_eq!(resp.shim_reaper_registered, 0); + } + + #[test] + fn test_runtime_metrics_shim_fields_round_trip() { + // Server-shaped JSON with the new fields populated round-trips + // through the deserializer with values intact. Locks the wire + // contract: the server's `shim_reaped_total` etc. must reach the + // client unchanged. + let json = r#"{ + "boxes_created_total": 5, + "boxes_failed_total": 1, + "boxes_stopped_total": 2, + "num_running_boxes": 2, + "total_commands_executed": 0, + "total_exec_errors": 0, + "shim_reaped_total": 17, + "shim_reaper_vanished_total": 4, + "shim_reaper_registered": 3 + }"#; + let resp: RuntimeMetricsResponse = serde_json::from_str(json).unwrap(); + assert_eq!(resp.shim_reaped_total, 17); + assert_eq!(resp.shim_reaper_vanished_total, 4); + assert_eq!(resp.shim_reaper_registered, 3); + } + #[test] fn test_box_status_transient_mapping() { let mut resp = BoxResponse { diff --git a/src/boxlite/src/runtime/rt_impl.rs b/src/boxlite/src/runtime/rt_impl.rs index 233873407..8a4da85d2 100644 --- a/src/boxlite/src/runtime/rt_impl.rs +++ b/src/boxlite/src/runtime/rt_impl.rs @@ -109,6 +109,11 @@ pub struct RuntimeImpl { /// Use `.is_cancelled()` for sync checks, `.cancelled()` for async select!. /// Child tokens are passed to each box via `.child_token()`. pub(crate) shutdown_token: CancellationToken, + + /// Scoped reaper for shim PIDs (Issue #523). Cleans up zombies left + /// by abandoned mid-init shims. Per-runtime so test binaries can have + /// many runtimes serially without leaking worker tasks. + pub(crate) shim_reaper: Arc, } /// Synchronized state protected by RwLock. @@ -223,6 +228,17 @@ impl RuntimeImpl { ImageDiskManager::new(layout.image_layout().disk_images_dir(), layout.temp_dir()); let guest_rootfs_mgr = GuestRootfsManager::new(base_disk_mgr.clone(), layout.temp_dir()); + // Construct runtime_metrics before the reaper so we can hand it + // a sink that mirrors reap activity into the metrics storage + // (visible via `RuntimeMetrics` and the REST /v1/metrics endpoint). + let runtime_metrics = RuntimeMetricsStorage::new(); + let shim_reaper = + crate::util::ShimReaper::spawn_with_sink(Some(crate::util::ShimReaperMetricsSink { + reaped_total: Arc::clone(&runtime_metrics.shim_reaped), + vanished_total: Arc::clone(&runtime_metrics.shim_reaper_vanished), + registered_now: Arc::clone(&runtime_metrics.shim_reaper_registered), + })); + let inner = Arc::new(Self { sync_state: RwLock::new(SynchronizedState { active_boxes_by_id: HashMap::new(), @@ -234,12 +250,13 @@ impl RuntimeImpl { image_disk_mgr, guest_rootfs_mgr, guest_rootfs: Arc::new(OnceCell::new()), - runtime_metrics: RuntimeMetricsStorage::new(), + runtime_metrics, base_disk_mgr, snapshot_mgr, lock_manager, _runtime_lock: runtime_lock, shutdown_token: CancellationToken::new(), + shim_reaper, }); tracing::debug!("initialized runtime"); @@ -615,6 +632,7 @@ impl RuntimeImpl { if active_boxes.is_empty() { tracing::info!("No active boxes to shutdown"); + self.shim_reaper.shutdown(); return Ok(()); } @@ -657,6 +675,13 @@ impl RuntimeImpl { } } + // Stop the shim reaper worker. Sync call but very fast — the + // Condvar wakeup short-circuits the REAPER_TICK sleep. Doing this + // after the per-box stop loop means any final shim exits triggered + // by the stop loop get a chance to be reaped in the reaper's + // shutdown-drain pass. + self.shim_reaper.shutdown(); + if errors.is_empty() { tracing::info!("Runtime shutdown complete"); Ok(()) @@ -736,6 +761,11 @@ impl RuntimeImpl { .pid_file_path(); let _ = std::fs::remove_file(&pid_file); } + + // Stop the reaper worker thread. Mirrors the async shutdown() path — + // important for the atexit case so a long-lived process doesn't + // leak the reaper thread on graceful exit. + self.shim_reaper.shutdown(); } // ======================================================================== diff --git a/src/boxlite/src/util/mod.rs b/src/boxlite/src/util/mod.rs index e19f9aa9e..592db2f08 100644 --- a/src/boxlite/src/util/mod.rs +++ b/src/boxlite/src/util/mod.rs @@ -1,6 +1,7 @@ mod binary_finder; mod pid_file; pub mod process; +pub mod reaper; pub use binary_finder::{RuntimeBinaryFinder, find_binary}; pub use pid_file::{PidFileReader, PidFileWriter, PidRecord, ProcessIdentity}; @@ -17,6 +18,7 @@ use tracing_subscriber::{EnvFilter, fmt}; pub use process::{ ProcessExit, ProcessMonitor, is_process_alive, kill_process, process_start_time, }; +pub use reaper::{MetricsSink as ShimReaperMetricsSink, ReaperStats, ShimReaper}; #[cfg(any(target_os = "linux", target_os = "macos"))] unsafe extern "C" { diff --git a/src/boxlite/src/util/reaper.rs b/src/boxlite/src/util/reaper.rs new file mode 100644 index 000000000..538e33324 --- /dev/null +++ b/src/boxlite/src/util/reaper.rs @@ -0,0 +1,747 @@ +//! Scoped shim-PID reaper (Issue #523). +//! +//! ## Why this exists +//! +//! `boxlite-shim` children whose Rust-side `Child` handle gets dropped +//! without anyone calling `wait()` on it become zombies when the shim +//! later exits. `std::process::Child`'s Drop impl is a no-op — it neither +//! kills nor waits — so any code path that holds a `Child`, then drops it +//! while the underlying process is still alive (or recently exited and +//! unreaped), leaks a zombie. Over time these accumulate; the +//! `CL84LvGx7RBE` incident showed 7+ `` shims under the daemon. +//! +//! The load-bearing leak source in production is `ShimHandler` being +//! dropped without `ShimHandler::stop()` running to completion. The two +//! ways this happens routinely: +//! +//! - Box removal via `rt_impl::remove_box`, which SIGKILLs the shim by +//! PID and never invokes `handler.stop()` — so the `Child` inside the +//! `ShimHandler` is dropped while/after the shim is dying, no `wait()` +//! is ever called, zombie. +//! - Runtime drop without `shutdown()`: the active `BoxImpl`s drop, their +//! `ShimHandler`s drop with `Child` still inside, zombie. +//! +//! Init-failure paths are NOT a zombie source: `CleanupGuard::drop` calls +//! `handler.stop()` which reaps via `Child::wait()`. Likewise the normal +//! user-driven `boxlite stop` path. +//! +//! ## Why this is scoped, not daemon-wide +//! +//! An earlier attempt (PR #520, reverted commit on the same PR) installed +//! a `waitpid(-1, WNOHANG)` reaper. That races every other call site that +//! owns a `Child` handle: if the reaper wins, the owner's `wait()` returns +//! `ECHILD` and the exit code is lost. To dodge the race without auditing +//! every `Child::wait()` in the workspace, this reaper only touches PIDs +//! that were explicitly registered via [`ShimReaper::register`]. The only +//! registrar today is `ShimHandler::from_spawned` +//! (`src/boxlite/src/vmm/controller/shim.rs`), so the reaper's blast +//! radius is exactly the shim PID set. +//! +//! For shim PIDs, the three `let _ = process.wait();` sites in shim.rs +//! discard their results, so `ECHILD` from a reaper-win is safe. +//! +//! ## Why registration is a one-way door (no auto-unregister on Drop) +//! +//! `register(pid)` returns nothing. There is intentionally no RAII handle +//! that would unregister on Drop. Earlier draft had one; it produced a +//! load-bearing miss: when `ShimHandler` field-order-drops, the +//! `keepalive` field drops *before* the handle, which closes the watchdog +//! pipe → shim begins graceful exit → handle drops → registry is purged +//! → shim actually exits ~100 ms later → no one waits → zombie. The +//! reaper had been told "stop watching" microseconds before the very +//! event it existed to catch. +//! +//! With no auto-unregister, the registry stays populated until the sweep +//! observes `waitpid(pid, WNOHANG)` returning either `Reaped` (we just +//! collected it) or `Vanished` (someone else collected it, or the PID is +//! gone). Both outcomes drop the PID from the registry. Worst-case +//! membership is one [`REAPER_TICK`] (250 ms) after exit. The registry +//! never grows unbounded under normal traffic. +//! +//! ## Observability and self-healing +//! +//! [`ShimReaper::stats`] returns a snapshot of registry size, reaped +//! count, vanished count, sweep count, and last-sweep time. These power +//! both the in-process `RuntimeMetrics` and the REST `/v1/metrics` +//! endpoint so operators can spot "reaper isn't running" by watching the +//! `sweeps_completed` counter stop incrementing. +//! +//! If the worker thread dies (panic on a poisoned mutex, etc.), the next +//! `register()` call detects this via [`ShimReaper::worker_alive`] and +//! transparently respawns the worker. A loud `tracing::error!` is +//! emitted whenever this happens; the alert is the load-bearing signal +//! that something pathological is going on inside the reaper. +//! +//! ## Why polling, not SIGCHLD +//! +//! SIGCHLD plumbing in async Rust (signal-hook / tokio::signal::unix) is +//! process-global and adds a race surface against the runtime's other +//! signal handlers. A 250 ms HashSet scan is cheaper than that complexity +//! buys back. Worst-case zombie lifetime is 250 ms; tests verify drain in +//! < 2 s by polling, never sleeping. +//! +//! ## Why a std thread, not a tokio task +//! +//! `RuntimeImpl::new` is sync and can be called outside of any tokio +//! runtime context. A `std::thread` worker has no such precondition. The +//! work is sync anyway (`waitpid` + sleep), so there's no benefit to a +//! tokio task here. + +use std::collections::HashSet; +use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering}; +use std::sync::{Arc, Condvar, Mutex, MutexGuard, PoisonError}; +use std::time::Duration; + +/// External sink the reaper writes its counters to on every sweep. +/// +/// Used to expose reaper health through `RuntimeMetrics` and the REST +/// `/v1/metrics` endpoint without making the `util` module depend on the +/// `metrics` module — `RuntimeImpl::new` constructs one of these by +/// cloning the relevant `Arc`s out of its +/// `RuntimeMetricsStorage` and hands it to `ShimReaper::spawn_with_sink`. +#[derive(Clone, Default)] +pub struct MetricsSink { + /// Monotonic total Reaped outcomes. + pub reaped_total: Arc, + /// Monotonic total Vanished outcomes. + pub vanished_total: Arc, + /// Current registry size (gauge). + pub registered_now: Arc, +} + +/// Recover from a poisoned mutex by extracting the inner data anyway. +/// +/// Standard Rust pattern: poison indicates a panic happened while holding +/// the lock. For our `HashSet` registry, no operation can leave the +/// set in a torn state (insert/remove are atomic at the std-lib level), +/// so it's safe to recover and proceed. Without this, a poisoned mutex +/// would propagate panics through every code path that touches the +/// registry — defeating the self-healing respawn in `register()`. +fn lock_or_recover(m: &Mutex) -> MutexGuard<'_, T> { + m.lock().unwrap_or_else(PoisonError::into_inner) +} + +/// How often the worker sweeps the registry for exited PIDs. +/// +/// Trade-off: shorter = quicker reaping + slightly more CPU on a HashSet +/// scan. 250 ms keeps the worst-case zombie lifetime well below 1 s and +/// lets the unit test confirm reap in < 2 s without flakiness. +const REAPER_TICK: Duration = Duration::from_millis(250); + +/// Outcome of a single `waitpid(pid, WNOHANG)` probe. Used by the worker +/// to decide whether to drop the PID from the registry. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ReapOutcome { + /// `waitpid` returned > 0 — we just reaped this PID. Drop from registry. + Reaped, + /// `waitpid` returned -1 with ECHILD — another reaper got it, or it's + /// no longer our child. Either way nothing more to do. Drop from registry. + Vanished, + /// `waitpid` returned 0 — still alive, leave in registry. + StillAlive, +} + +fn probe_pid(pid: u32) -> ReapOutcome { + let mut status: i32 = 0; + // SAFETY: waitpid is async-signal-safe and has no Rust-visible + // preconditions beyond a valid status pointer. + let result = unsafe { libc::waitpid(pid as i32, &mut status, libc::WNOHANG) }; + if result > 0 { + ReapOutcome::Reaped + } else if result < 0 { + // ECHILD is the only error path that's interesting here — it means + // someone else (the owner's Child::wait, or an unrelated reaper) + // already collected this PID, or this process is no longer our + // child. EINTR can't happen with WNOHANG. Treat both as "done". + ReapOutcome::Vanished + } else { + ReapOutcome::StillAlive + } +} + +fn unix_ms_now() -> i64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as i64) + .unwrap_or(0) +} + +/// Internal state shared with the worker thread. +struct Inner { + /// PIDs currently registered for reaping. Mutated under the Condvar lock. + registry: Mutex>, + /// Signals worker to exit at the next wake-up. + shutdown: AtomicBool, + /// Lets the worker wake immediately on shutdown without burning a full + /// REAPER_TICK. Pairs with `shutdown` boolean. + wake: Condvar, + /// Companion mutex for `wake`. Always locked before the condvar wait; + /// holds no data — separated from `registry` so the worker doesn't + /// hold the registry lock during the timed wait. + wake_lock: Mutex<()>, + /// Monotonic count of `Reaped` outcomes since reaper start. + reaped_total: AtomicU64, + /// Monotonic count of `Vanished` outcomes since reaper start. + vanished_total: AtomicU64, + /// Monotonic count of completed sweep passes. Stops incrementing if + /// the worker dies — operators alert on this going flat. + sweeps_completed: AtomicU64, + /// Unix-ms timestamp of the last completed sweep. -1 means "never". + /// Same purpose as sweeps_completed; lets dashboards show "X seconds + /// since last sweep" without subscribing to a counter delta. + last_sweep_at_ms: AtomicI64, + /// Test-only panic injection. When set, the worker panics on its + /// next loop iteration. Used by `worker_respawns_after_death` to + /// deterministically simulate worker death since the production + /// poison-recovery path (`lock_or_recover`) makes the natural + /// "panic via mutex poison" scenario impossible to provoke. + #[cfg(test)] + test_panic_next_iteration: AtomicBool, + /// Optional external counters mirrored on each sweep so + /// `RuntimeMetrics` and `/v1/metrics` can show reaper activity. + sink: Option, +} + +/// Snapshot of reaper health. Returned by [`ShimReaper::stats`]. Cheap +/// to construct (one mutex acquire + four atomic loads). Designed to be +/// pulled into [`crate::metrics::RuntimeMetrics`] at request time rather +/// than maintained by a separate observer. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ReaperStats { + /// Current number of PIDs tracked. + pub registered: usize, + /// Total `Reaped` outcomes since reaper start. + pub reaped_total: u64, + /// Total `Vanished` outcomes since reaper start. + pub vanished_total: u64, + /// Total completed sweeps. If this is increasing over time, the + /// worker is alive and pumping. If it goes flat while PIDs are + /// registered, the worker has died. + pub sweeps_completed: u64, + /// Unix milliseconds at the end of the most recent sweep. `None` + /// if no sweep has finished yet (worker just started). + pub last_sweep_at_ms: Option, + /// Whether the worker thread is currently alive. False means the + /// next `register()` will respawn it. + pub worker_alive: bool, +} + +/// Scoped reaper for `boxlite-shim` PIDs. +/// +/// Owns a worker thread that periodically calls `waitpid(pid, WNOHANG)` on +/// every registered PID. Registrations are added by +/// `ShimHandler::from_spawned` and never explicitly removed — the worker +/// removes a PID once it observes the process is reaped or gone. +pub struct ShimReaper { + inner: Arc, + worker: Mutex>>, +} + +impl ShimReaper { + /// Construct a reaper without external metrics export. + pub fn spawn() -> Arc { + Self::spawn_with_sink(None) + } + + /// Construct a reaper and start its worker thread. If `sink` is + /// `Some`, the worker mirrors its counters into the sink on every + /// sweep — used by `RuntimeImpl::new` to feed `RuntimeMetricsStorage`. + pub fn spawn_with_sink(sink: Option) -> Arc { + let inner = Arc::new(Inner { + registry: Mutex::new(HashSet::new()), + shutdown: AtomicBool::new(false), + wake: Condvar::new(), + wake_lock: Mutex::new(()), + reaped_total: AtomicU64::new(0), + vanished_total: AtomicU64::new(0), + sweeps_completed: AtomicU64::new(0), + last_sweep_at_ms: AtomicI64::new(-1), + #[cfg(test)] + test_panic_next_iteration: AtomicBool::new(false), + sink, + }); + let reaper = Arc::new(Self { + inner: Arc::clone(&inner), + worker: Mutex::new(None), + }); + reaper.spawn_worker(); + reaper + } + + /// (Re)spawn the worker thread. Called by `spawn` and by `register` + /// when it detects the prior worker has died. + fn spawn_worker(self: &Arc) { + let inner_for_worker = Arc::clone(&self.inner); + let worker = std::thread::Builder::new() + .name("boxlite-shim-reaper".into()) + .spawn(move || worker_loop(inner_for_worker)) + .expect("spawn reaper worker thread"); + *lock_or_recover(&self.worker) = Some(worker); + } + + /// Register a shim PID for reaping. + /// + /// If the worker has died since the last call (panic on poisoned + /// mutex, unexpected kernel state, etc.), this transparently respawns + /// it. A `tracing::error!` is emitted whenever a respawn happens so + /// operators get a load-bearing alert that something pathological is + /// going on. + /// + /// No handle is returned. The reaper's sweep is the authoritative + /// cleanup: when `waitpid(pid, WNOHANG)` reports the PID as `Reaped` + /// or `Vanished`, it leaves the registry. There is no caller-side + /// "unregister" because the load-bearing zombie source is exactly + /// the case where the caller has no orderly chance to unregister + /// before the shim dies (see module doc, "no auto-unregister on Drop"). + pub fn register(self: &Arc, pid: u32) { + if !self.worker_alive() && !self.inner.shutdown.load(Ordering::SeqCst) { + tracing::error!( + pid, + "Shim reaper worker thread died unexpectedly; respawning. \ + Investigate sweep panic or mutex poisoning." + ); + self.spawn_worker(); + } + let new_size = { + let mut reg = lock_or_recover(&self.inner.registry); + reg.insert(pid); + reg.len() as u64 + }; + if let Some(sink) = &self.inner.sink { + // Update the gauge eagerly on register so dashboards see + // the bump before the next sweep — avoids "we registered + // 1000 PIDs but the metric stays at 0 for 250 ms" surprise. + sink.registered_now.store(new_size, Ordering::Relaxed); + } + } + + /// Snapshot reaper health. See [`ReaperStats`]. + pub fn stats(&self) -> ReaperStats { + let registered = lock_or_recover(&self.inner.registry).len(); + let last_ms = self.inner.last_sweep_at_ms.load(Ordering::Relaxed); + ReaperStats { + registered, + reaped_total: self.inner.reaped_total.load(Ordering::Relaxed), + vanished_total: self.inner.vanished_total.load(Ordering::Relaxed), + sweeps_completed: self.inner.sweeps_completed.load(Ordering::Relaxed), + last_sweep_at_ms: if last_ms < 0 { None } else { Some(last_ms) }, + worker_alive: self.worker_alive(), + } + } + + /// Is the worker thread still running? + /// + /// Returns `false` if `shutdown()` has been called (the explicit + /// termination case) or if the worker has died unexpectedly (panic, + /// etc.). Distinguishing the two requires also checking + /// [`stats`]`.sweeps_completed` against the shutdown flag. + pub fn worker_alive(&self) -> bool { + match lock_or_recover(&self.worker).as_ref() { + Some(h) => !h.is_finished(), + None => false, + } + } + + /// Snapshot of currently registered PIDs. Test/debug aid; production + /// callers should not need this. + #[cfg(any(test, debug_assertions))] + pub fn registered(&self) -> Vec { + let mut v: Vec = lock_or_recover(&self.inner.registry) + .iter() + .copied() + .collect(); + v.sort_unstable(); + v + } + + /// Stop the worker and wait for it to exit. Idempotent — repeated calls + /// after the first return immediately. Called from + /// `RuntimeImpl::shutdown` so the worker doesn't outlive the runtime + /// (especially important in test binaries that construct many runtimes + /// serially). + /// + /// Sync (not async) because the worker is a `std::thread` and its + /// `JoinHandle::join` is blocking. Worst-case wait is one Condvar + /// wake-up + one sweep — typically sub-millisecond. + pub fn shutdown(&self) { + self.inner.shutdown.store(true, Ordering::SeqCst); + // Wake the worker immediately so it doesn't sleep through REAPER_TICK. + let _g = lock_or_recover(&self.inner.wake_lock); + self.inner.wake.notify_all(); + drop(_g); + let handle = lock_or_recover(&self.worker).take(); + if let Some(h) = handle { + // best-effort: if the thread panicked, don't propagate. + let _ = h.join(); + } + } +} + +impl Drop for ShimReaper { + fn drop(&mut self) { + // If shutdown() wasn't called explicitly, do it now so the worker + // thread doesn't outlive its Inner. Safe to call from a sync Drop. + self.shutdown(); + } +} + +fn worker_loop(inner: Arc) { + loop { + #[cfg(test)] + if inner + .test_panic_next_iteration + .swap(false, Ordering::SeqCst) + { + panic!("worker: test_panic_next_iteration triggered"); + } + if inner.shutdown.load(Ordering::SeqCst) { + // One final drain pass on shutdown so we don't leave the + // kernel holding zombies for any registered PIDs that already + // exited. + sweep(&inner); + return; + } + sweep(&inner); + // Wait up to REAPER_TICK or until shutdown wakes us. Holds the + // empty `wake_lock` for the duration of the wait — not the + // registry lock — so register stays responsive. + let guard = lock_or_recover(&inner.wake_lock); + let _ = inner.wake.wait_timeout(guard, REAPER_TICK); + } +} + +/// One pass over the registry. Drops any PID we successfully reaped or +/// that has vanished out from under us; leaves still-alive PIDs in place. +fn sweep(inner: &Inner) { + // Snapshot the PID set so we don't hold the lock across waitpid(). + // waitpid(WNOHANG) is fast, but register also holds this mutex + // briefly — short critical section is the right tradeoff. + let snapshot: Vec = lock_or_recover(&inner.registry).iter().copied().collect(); + let mut to_remove: Vec = Vec::new(); + let mut reaped = 0u64; + let mut vanished = 0u64; + for pid in snapshot { + match probe_pid(pid) { + ReapOutcome::Reaped => { + tracing::debug!(pid, "Scoped reaper collected shim exit"); + to_remove.push(pid); + reaped += 1; + } + ReapOutcome::Vanished => { + tracing::trace!(pid, "Scoped reaper: PID no longer reapable (ECHILD)"); + to_remove.push(pid); + vanished += 1; + } + ReapOutcome::StillAlive => {} + } + } + if !to_remove.is_empty() { + let mut reg = lock_or_recover(&inner.registry); + for pid in to_remove { + reg.remove(&pid); + } + } + if reaped > 0 { + inner.reaped_total.fetch_add(reaped, Ordering::Relaxed); + } + if vanished > 0 { + inner.vanished_total.fetch_add(vanished, Ordering::Relaxed); + } + inner.sweeps_completed.fetch_add(1, Ordering::Relaxed); + inner + .last_sweep_at_ms + .store(unix_ms_now(), Ordering::Relaxed); + + // Mirror to the external sink for RuntimeMetrics / REST consumers. + // The sink's reaped/vanished are monotonic counters; bump by the + // deltas observed in this sweep. registered_now is a gauge; set + // to the post-sweep registry size. + if let Some(sink) = &inner.sink { + if reaped > 0 { + sink.reaped_total.fetch_add(reaped, Ordering::Relaxed); + } + if vanished > 0 { + sink.vanished_total.fetch_add(vanished, Ordering::Relaxed); + } + let size = lock_or_recover(&inner.registry).len() as u64; + sink.registered_now.store(size, Ordering::Relaxed); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Instant; + + /// Spawn a real subprocess that exits immediately and assert the + /// reaper drops its PID from the registry within the worst-case + /// REAPER_TICK budget. Uses `/bin/sh -c true` rather than a Rust + /// in-process fork so the test exercises the actual waitpid path, + /// not a stubbed liveness probe. + #[test] + fn reaps_exited_pid_within_one_second() { + let reaper = ShimReaper::spawn(); + + // Spawn `true` — exits with code 0 essentially instantly. + let child = std::process::Command::new("/bin/sh") + .args(["-c", "true"]) + .spawn() + .expect("spawn /bin/sh -c true"); + let pid = child.id(); + // Deliberately leak the Child so its Drop doesn't wait() and + // race the reaper — this mirrors the production "Child dropped + // without wait" path the reaper exists to catch. + std::mem::forget(child); + + reaper.register(pid); + + // Poll the registry. Must succeed within 1 s; if it doesn't, + // the worker isn't reaping at the expected cadence. + let deadline = Instant::now() + Duration::from_secs(1); + loop { + let still_registered = reaper.registered().contains(&pid); + if !still_registered { + break; // success + } + assert!( + Instant::now() < deadline, + "registry still contains {pid} after 1s — reaper didn't run" + ); + std::thread::sleep(Duration::from_millis(50)); + } + + // The reaped counter must reflect this collection. (The integration + // test asserts the full set-cardinality invariant; this one pins + // the per-counter increment for a single PID.) + assert!(reaper.stats().reaped_total >= 1); + reaper.shutdown(); + } + + /// Pins the load-bearing post-fix invariant: registration is a one-way + /// door, only the reaper's `waitpid` sweep removes a PID. This is the + /// property the earlier RAII-handle design got wrong (registry was + /// purged on owner Drop before the shim had finished exiting, missing + /// the very zombie the reaper existed to catch). + /// + /// Concretely: register a long-lived child PID, give the worker + /// plenty of time to sweep, and assert the PID is still registered. + /// Then kill the child and assert the next sweep drains it. + #[test] + fn live_pid_stays_registered_until_actual_exit() { + let reaper = ShimReaper::spawn(); + + let mut child = std::process::Command::new("/bin/sh") + .args(["-c", "sleep 30"]) + .spawn() + .expect("spawn sleep child"); + let pid = child.id(); + reaper.register(pid); + + // Three ticks worth — the worker has had plenty of chances to + // run waitpid and observe "still alive". + std::thread::sleep(REAPER_TICK * 3); + assert!( + reaper.registered().contains(&pid), + "reaper must not unregister a still-running PID" + ); + + // Kill the child and wait via the Child handle ourselves so the + // reaper's next sweep sees Vanished (ECHILD) rather than Reaped. + child.kill().expect("kill child"); + child.wait().expect("wait child"); + + let deadline = Instant::now() + Duration::from_secs(1); + loop { + if !reaper.registered().contains(&pid) { + break; + } + assert!( + Instant::now() < deadline, + "reaper didn't notice the PID vanished within 1 s" + ); + std::thread::sleep(Duration::from_millis(50)); + } + + assert!(reaper.stats().vanished_total >= 1); + reaper.shutdown(); + } + + /// Shutdown must terminate the worker thread quickly. The Condvar + /// notify in `shutdown()` wakes the worker without it sleeping through + /// a full REAPER_TICK — so `shutdown()` itself returns in well under + /// the tick duration. + #[test] + fn shutdown_returns_promptly() { + let reaper = ShimReaper::spawn(); + + // Let one tick elapse so we know the worker is in steady-state + // wait (not racing initial registration). + std::thread::sleep(Duration::from_millis(50)); + + let t0 = Instant::now(); + reaper.shutdown(); + let elapsed = t0.elapsed(); + assert!( + elapsed < REAPER_TICK, + "shutdown took {elapsed:?}, expected well under {REAPER_TICK:?}" + ); + + // Second shutdown is a no-op. + let t0 = Instant::now(); + reaper.shutdown(); + assert!( + t0.elapsed() < Duration::from_millis(10), + "second shutdown should be near-instant" + ); + + // After shutdown, worker_alive() must report false. + assert!(!reaper.worker_alive()); + } + + /// Burst-register many PIDs that all return ECHILD on waitpid (we're + /// not the parent of any of them — they're a high-range u32 that the + /// kernel hasn't issued). Asserts the sweep drains the whole batch + /// without deadlocking on the registry mutex and within a reasonable + /// time bound. The PID values are bounded to the i32 positive range + /// so the libc::waitpid signature doesn't choke. + #[test] + fn registry_under_high_register_pressure() { + const N: usize = 1024; + const PRESSURE_BUDGET: Duration = Duration::from_secs(3); + + let reaper = ShimReaper::spawn(); + + // Use a high range close to (but below) i32::MAX so we get + // valid waitpid args that the kernel will reject with ECHILD. + // Start at i32::MAX - 100_000 to avoid colliding with any + // real-but-recycled PIDs. + let base: u32 = (i32::MAX as u32) - 100_000; + for i in 0..N { + reaper.register(base + i as u32); + } + // Note: we don't assert `registered == N` immediately — by the + // time the last few inserts complete, the worker may have + // already swept the first few. The load-bearing assertion is + // the drain below + the vanished_total counter. + + // All N should drain to Vanished within the budget. + let deadline = Instant::now() + PRESSURE_BUDGET; + loop { + let remaining = reaper.stats().registered; + if remaining == 0 { + break; + } + assert!( + Instant::now() < deadline, + "registry still has {remaining} entries after {PRESSURE_BUDGET:?}" + ); + std::thread::sleep(Duration::from_millis(50)); + } + + let stats = reaper.stats(); + assert!( + stats.vanished_total >= N as u64, + "expected at least {N} vanished, got {}", + stats.vanished_total + ); + assert!(stats.sweeps_completed >= 1); + + reaper.shutdown(); + } + + /// Use the test-only `test_panic_next_iteration` flag to deterministic- + /// ally crash the worker thread, then call register() and verify the + /// reaper transparently respawns it. Pins the "self-healing under + /// worker death" contract — a single reaper panic must not silently + /// degrade the daemon into the leak-everything state. + /// + /// We can't reproduce this with mutex poisoning anymore: the + /// production `lock_or_recover` path absorbs poison silently (which + /// is the correct behavior for our registry — no torn-state risk + /// with HashSet inserts). So the test uses an explicit injection + /// flag instead. + #[test] + fn worker_respawns_after_death() { + let reaper = ShimReaper::spawn(); + assert!(reaper.worker_alive()); + + // Arm the panic flag and wake the worker so it hits the panic + // on its very next iteration (not after a 250 ms wait). + reaper + .inner + .test_panic_next_iteration + .store(true, Ordering::SeqCst); + let _g = reaper.inner.wake_lock.lock().unwrap(); + reaper.inner.wake.notify_all(); + drop(_g); + + // Wait until the worker thread is observably finished. + let deadline = Instant::now() + Duration::from_secs(2); + loop { + if !reaper.worker_alive() { + break; + } + assert!( + Instant::now() < deadline, + "worker didn't die within 2 s of injected panic" + ); + std::thread::sleep(Duration::from_millis(50)); + } + + // register() should detect the dead worker and respawn it. + reaper.register(42); + assert!( + reaper.worker_alive(), + "register() must respawn the worker after observed death" + ); + + // The respawned worker must continue to function — its + // sweeps_completed counter increments past the snapshot we + // take here. + let pre = reaper.stats().sweeps_completed; + let deadline = Instant::now() + Duration::from_secs(2); + loop { + if reaper.stats().sweeps_completed > pre { + break; + } + assert!( + Instant::now() < deadline, + "respawned worker isn't pumping sweeps" + ); + std::thread::sleep(Duration::from_millis(50)); + } + + reaper.shutdown(); + } + + /// Stats counters move under real activity. Independent assertion from + /// the individual reap/vanish tests — verifies the aggregate snapshot + /// API works (used by `RuntimeMetrics` and the REST `/v1/metrics` + /// handler). + #[test] + fn stats_track_sweep_progress() { + let reaper = ShimReaper::spawn(); + + // Register a PID that will be Vanished (ECHILD) immediately. + reaper.register((i32::MAX as u32) - 1); + + // Give worker at least two ticks. + std::thread::sleep(REAPER_TICK * 2 + Duration::from_millis(50)); + + let stats = reaper.stats(); + assert!(stats.worker_alive); + assert!( + stats.sweeps_completed >= 1, + "sweeps_completed = {}", + stats.sweeps_completed + ); + assert!(stats.last_sweep_at_ms.is_some()); + assert!( + stats.vanished_total >= 1, + "vanished_total = {}", + stats.vanished_total + ); + + reaper.shutdown(); + } +} diff --git a/src/boxlite/src/vmm/controller/shim.rs b/src/boxlite/src/vmm/controller/shim.rs index 7f5334c37..090f202e8 100644 --- a/src/boxlite/src/vmm/controller/shim.rs +++ b/src/boxlite/src/vmm/controller/shim.rs @@ -1,10 +1,11 @@ //! ShimController and ShimHandler - Universal process management for all Box engines. -use std::{path::PathBuf, process::Child, sync::Mutex, time::Instant}; +use std::{path::PathBuf, process::Child, sync::Arc, sync::Mutex, time::Instant}; use crate::{ BoxID, runtime::layout::BoxFilesystemLayout, + util::ShimReaper, vmm::{InstanceSpec, VmmKind}, }; use boxlite_shared::errors::{BoxliteError, BoxliteResult}; @@ -48,8 +49,20 @@ impl ShimHandler { /// Takes ownership of the `SpawnedShim` (child process + keepalive) for /// proper lifecycle management. The keepalive keeps the watchdog pipe /// alive; dropping it triggers shim shutdown. - pub fn from_spawned(spawned: SpawnedShim, box_id: BoxID) -> Self { + /// + /// Registers the shim PID with `reaper` so the scoped zombie reaper + /// (Issue #523) collects the child even when this handler is dropped + /// without `stop()` ever calling `Child::wait()` — the load-bearing + /// case being `rt_impl::remove_box` (SIGKILL by PID, no `wait`) and + /// any runtime drop that skips `shutdown()`. + /// + /// The registration is *not* tied to this handler's lifetime: there + /// is no caller-visible handle, and dropping `ShimHandler` does not + /// unregister. The reaper's `waitpid` sweep is the authoritative + /// cleanup; see `util::reaper` module docs for the rationale. + pub fn from_spawned(spawned: SpawnedShim, box_id: BoxID, reaper: &Arc) -> Self { let pid = spawned.child.id(); + reaper.register(pid); Self { pid, box_id, @@ -64,6 +77,11 @@ impl ShimHandler { /// Used when reconnecting to a running box. We don't have a Child handle /// or keepalive, so we manage the process by PID only. /// + /// Does NOT register with the reaper: an attached shim was spawned by + /// a prior runtime process, so `waitpid` would return ECHILD here + /// regardless. The shim's eventual zombie (if its real parent is gone) + /// becomes init's problem, not ours. + /// /// # Arguments /// * `pid` - Process ID of the running VM /// * `box_id` - Box identifier (for logging) @@ -238,6 +256,8 @@ pub struct ShimController { options: crate::runtime::options::BoxOptions, /// Box filesystem layout (provides paths for stderr, sockets, etc.) layout: BoxFilesystemLayout, + /// Scoped reaper for the shim PID (Issue #523). + reaper: Arc, } impl ShimController { @@ -259,6 +279,7 @@ impl ShimController { box_id: BoxID, options: crate::runtime::options::BoxOptions, layout: BoxFilesystemLayout, + reaper: Arc, ) -> BoxliteResult { // Verify that the shim binary exists if !binary_path.exists() { @@ -274,6 +295,7 @@ impl ShimController { box_id, options, layout, + reaper, }) } } @@ -369,7 +391,7 @@ impl VmmController for ShimController { // which allows reusing that task across spawn/restart/reconnect. // Create handler from spawned shim (takes ownership of child + keepalive) - let handler = ShimHandler::from_spawned(spawned, self.box_id.clone()); + let handler = ShimHandler::from_spawned(spawned, self.box_id.clone(), &self.reaper); tracing::info!( box_id = %self.box_id, diff --git a/src/boxlite/tests/zombie_reaper.rs b/src/boxlite/tests/zombie_reaper.rs new file mode 100644 index 000000000..f2f117b0e --- /dev/null +++ b/src/boxlite/tests/zombie_reaper.rs @@ -0,0 +1,174 @@ +//! Integration test for the scoped shim-PID reaper (Issue #523). +//! +//! Verifies the acceptance criterion: "N init failures leave 0 zombies". +//! Simulates that scenario by spawning N short-lived subprocesses that all +//! exit immediately, registering each with the reaper, and asserting the +//! reaper drains its registry — and that no `` entries remain +//! visible in `/proc` — within the < 2 s test budget. +//! +//! Uses `/bin/sh -c true` rather than `boxlite-shim` because the reaper is +//! agnostic to the binary; it only cares about the PID lifecycle. This +//! keeps the test fast and VM-free. + +use boxlite::util::ShimReaper; +use std::process::Command; +use std::time::{Duration, Instant}; + +/// Simulates N abandoned mid-init shims. Each "failure" is a real fork + +/// exec that exits with code 0 essentially instantly; the test +/// `std::mem::forget`s the Child handle so its Drop doesn't `wait()` and +/// race the reaper — matching the production path where the abandoned +/// shim never gets a Rust-side `Child::wait()`. +#[test] +fn n_init_failures_leave_zero_zombies() { + const N: usize = 8; // matches the CL84LvGx7RBE incident count (7+ zombies) + const REAP_BUDGET: Duration = Duration::from_secs(2); + + let reaper = ShimReaper::spawn(); + + // Spawn N "failed init" stand-ins and register each PID. No handle + // is returned (and none is needed) — the reaper's sweep is the + // authoritative cleanup, register-then-forget is the production + // pattern. + let mut pids = Vec::with_capacity(N); + for _ in 0..N { + let child = Command::new("/bin/sh") + .args(["-c", "true"]) + .spawn() + .expect("spawn /bin/sh -c true"); + let pid = child.id(); + // Mirror the abandoned-mid-init path: leak the Child so its Drop + // doesn't reap the process out from under the reaper. + std::mem::forget(child); + reaper.register(pid); + pids.push(pid); + } + + // All N must be reaped within the budget. Poll, not sleep — finishing + // earlier than REAP_BUDGET keeps the test fast on healthy machines. + let deadline = Instant::now() + REAP_BUDGET; + loop { + let remaining = reaper.registered(); + if remaining.is_empty() { + break; + } + assert!( + Instant::now() < deadline, + "reaper still tracking {} of {N} PIDs after {REAP_BUDGET:?}: {remaining:?}", + remaining.len() + ); + std::thread::sleep(Duration::from_millis(50)); + } + + // Belt-and-suspenders: no zombie should remain visible in /proc. + // On Linux, a reaped or vanished process has no /proc/ entry at + // all; an unreaped zombie shows State: Z. Either way, the assertion + // below catches the regression we care about. + for pid in &pids { + let status_path = format!("/proc/{pid}/status"); + match std::fs::read_to_string(&status_path) { + Ok(content) => { + assert!( + !content.contains("State:\tZ"), + "PID {pid} is still a zombie in /proc:\n{content}" + ); + } + Err(_) => { + // No /proc entry: process is gone. Expected outcome. + } + } + } + + reaper.shutdown(); +} + +/// Pins the post-fix invariant that `register` is idempotent: calling it +/// twice for the same PID does not double-track, and a re-registration +/// after the reaper has cleaned up a prior PID works exactly like a +/// first-time registration. Matches the production "retry" path where an +/// init attempt fails and a fresh spawn re-enters the same code paths. +#[test] +fn register_is_idempotent_and_survives_prior_cleanup() { + let reaper = ShimReaper::spawn(); + + // First registration of a same-PID stand-in: the OS will assign pid1 + // to the first child; that PID may even get re-used by the second + // spawn (rare but valid). The contract this test pins is that double- + // registration is a no-op and re-registration after sweep-cleanup is + // a clean state. + let child1 = Command::new("/bin/sh") + .args(["-c", "true"]) + .spawn() + .expect("spawn first sh"); + let pid1 = child1.id(); + std::mem::forget(child1); + reaper.register(pid1); + reaper.register(pid1); // idempotent — HashSet insert returns false but does not error + + assert_eq!( + reaper.registered().iter().filter(|p| **p == pid1).count(), + 1, + "double register must result in a single registry entry" + ); + + // Wait for reaper to drain pid1. + let deadline = Instant::now() + Duration::from_secs(2); + while reaper.registered().contains(&pid1) { + assert!(Instant::now() < deadline, "pid1 never reaped"); + std::thread::sleep(Duration::from_millis(50)); + } + + // Register a second PID. Must work without state pollution from + // the first. + let child2 = Command::new("/bin/sh") + .args(["-c", "true"]) + .spawn() + .expect("spawn second sh"); + let pid2 = child2.id(); + std::mem::forget(child2); + reaper.register(pid2); + + assert!(reaper.registered().contains(&pid2)); + let deadline = Instant::now() + Duration::from_secs(2); + while reaper.registered().contains(&pid2) { + assert!(Instant::now() < deadline, "pid2 never reaped"); + std::thread::sleep(Duration::from_millis(50)); + } + + reaper.shutdown(); +} + +/// Scoping guarantee: a child whose PID was never registered must be left +/// untouched, so its owner's `wait()` still returns the real exit code. This +/// is the property whose absence got PR #520's global `waitpid(-1)` reaper +/// reverted (Issue #523) — it consumed children it did not own, and their +/// owners then got `ECHILD` with the exit code lost. Guards against +/// re-widening the reaper's blast radius past the registered shim-PID set. +#[test] +fn unregistered_child_exit_code_is_not_stolen() { + let reaper = ShimReaper::spawn(); + + // A real child that exits with a DISTINCT code, deliberately NOT + // registered with the reaper. + let mut child = Command::new("/bin/sh") + .args(["-c", "exit 42"]) + .spawn() + .expect("spawn /bin/sh -c 'exit 42'"); + + // ~3x the reaper's 250 ms tick: ample cycles for a scoped reaper to sweep + // its (empty) registry without touching this child. A global waitpid(-1) + // reaper would consume it here, and the owner's wait() below would get + // ECHILD instead of exit code 42. + std::thread::sleep(Duration::from_millis(800)); + + let status = child + .wait() + .expect("owner must still be able to wait its own child"); + assert_eq!( + status.code(), + Some(42), + "unregistered child's exit code must survive — reaper must stay scoped, not waitpid(-1)" + ); + + reaper.shutdown(); +} diff --git a/src/cli/src/commands/serve/handlers/metrics.rs b/src/cli/src/commands/serve/handlers/metrics.rs index abfe4f86d..f8a7c0eb6 100644 --- a/src/cli/src/commands/serve/handlers/metrics.rs +++ b/src/cli/src/commands/serve/handlers/metrics.rs @@ -21,6 +21,9 @@ pub(in crate::commands::serve) async fn runtime_metrics( num_running_boxes: metrics.num_running_boxes(), total_commands_executed: metrics.total_commands_executed(), total_exec_errors: metrics.total_exec_errors(), + shim_reaped_total: metrics.shim_reaped_total(), + shim_reaper_vanished_total: metrics.shim_reaper_vanished_total(), + shim_reaper_registered: metrics.shim_reaper_registered(), }) .into_response(), Err(e) => error_from_boxlite(&e), diff --git a/src/cli/src/commands/serve/types.rs b/src/cli/src/commands/serve/types.rs index 278d39e66..110b1a99d 100644 --- a/src/cli/src/commands/serve/types.rs +++ b/src/cli/src/commands/serve/types.rs @@ -177,6 +177,16 @@ pub(super) struct RuntimeMetricsResponse { pub num_running_boxes: u64, pub total_commands_executed: u64, pub total_exec_errors: u64, + /// Scoped shim reaper counters (Issue #523). Missing on older + /// servers that pre-date the reaper — clients tolerate absence via + /// `serde(default)` on the mirror type. + #[serde(default)] + pub shim_reaped_total: u64, + #[serde(default)] + pub shim_reaper_vanished_total: u64, + /// Gauge: current count of PIDs tracked by the reaper's registry. + #[serde(default)] + pub shim_reaper_registered: u64, } #[derive(Serialize)]