Skip to content

Commit 9c15099

Browse files
committed
fix(runner): use child process instead of pid/perf fifo for FIFO health check
1 parent 1833366 commit 9c15099

3 files changed

Lines changed: 29 additions & 42 deletions

File tree

src/executor/helpers/run_command_with_log_pipe.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use std::thread;
1111
///
1212
/// # Arguments
1313
/// - `cmd`: The command to run.
14-
/// - `cb`: A callback function that takes the process ID and returns a result.
14+
/// - `cb`: A callback function that takes the process and returns the exit status.
1515
///
1616
/// # Returns
1717
///
@@ -22,8 +22,8 @@ pub async fn run_command_with_log_pipe_and_callback<F, Fut>(
2222
cb: F,
2323
) -> Result<ExitStatus>
2424
where
25-
F: FnOnce(u32) -> Fut,
26-
Fut: Future<Output = anyhow::Result<()>>,
25+
F: FnOnce(std::process::Child) -> Fut,
26+
Fut: Future<Output = anyhow::Result<ExitStatus>>,
2727
{
2828
fn log_tee(
2929
mut reader: impl Read,
@@ -93,11 +93,9 @@ where
9393
log_tee(stderr, std::io::stderr(), Some("[stderr]")).unwrap();
9494
});
9595

96-
cb(process.id()).await?;
97-
98-
process.wait().context("failed to wait for the process")
96+
cb(process).await
9997
}
10098

10199
pub async fn run_command_with_log_pipe(cmd: Command) -> Result<ExitStatus> {
102-
run_command_with_log_pipe_and_callback(cmd, async |_| Ok(())).await
100+
run_command_with_log_pipe_and_callback(cmd, |mut child| async move { Ok(child.wait()?) }).await
103101
}

src/executor/memory/executor.rs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -98,15 +98,15 @@ impl Executor for MemoryExecutor {
9898
debug!("cmd: {cmd:?}");
9999

100100
let runner_fifo = RunnerFifo::new()?;
101-
let on_process_started = async |pid| -> anyhow::Result<()> {
102-
let marker_result = Self::handle_fifo(runner_fifo, pid, ipc).await?;
101+
let on_process_started = |mut child: std::process::Child| async move {
102+
let marker_result = Self::handle_fifo(runner_fifo, ipc, &mut child).await?;
103103

104104
// Directly write to the profile folder, to avoid having to define another field
105105
marker_result
106106
.save_to(execution_context.profile_folder.join("results"))
107107
.unwrap();
108108

109-
Ok(())
109+
Ok(child.wait()?)
110110
};
111111

112112
let status = run_command_with_log_pipe_and_callback(cmd, on_process_started).await?;
@@ -152,11 +152,9 @@ impl Executor for MemoryExecutor {
152152
impl MemoryExecutor {
153153
async fn handle_fifo(
154154
mut runner_fifo: RunnerFifo,
155-
pid: u32,
156155
ipc: MemtrackIpcServer,
156+
child: &mut std::process::Child,
157157
) -> anyhow::Result<ExecutionTimestamps> {
158-
debug!("handle_fifo called with PID {pid}");
159-
160158
// Accept the IPC connection from memtrack and get the sender it sends us
161159
// Use a timeout to prevent hanging if the process doesn't start properly
162160
// https://github.com/servo/ipc-channel/issues/261
@@ -170,12 +168,12 @@ impl MemoryExecutor {
170168
.context("Timeout waiting for IPC connection from memtrack process")??;
171169
let ipc_client = Rc::new(MemtrackIpcClient::from_accepted(memtrack_sender));
172170

173-
let ipc_client_health = Rc::clone(&ipc_client);
174-
let health_check = async move || {
175-
// Ping memtrack via IPC to check if it's still responding
176-
match ipc_client_health.ping() {
177-
Ok(()) => Ok(true),
178-
Err(_) => Ok(false),
171+
// Check if the process has exited using try_wait (non-blocking)
172+
let health_check = async || {
173+
match child.try_wait() {
174+
Ok(None) => Ok(true), // Still running
175+
Ok(Some(_)) => Ok(false), // Exited
176+
Err(e) => Err(anyhow::Error::from(e)),
179177
}
180178
};
181179

src/executor/wall_time/perf/mod.rs

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,7 @@ use runner_shared::metadata::PerfMetadata;
3131
use runner_shared::unwind_data::UnwindData;
3232
use std::path::Path;
3333
use std::path::PathBuf;
34-
use std::sync::Arc;
35-
use std::time::Duration;
3634
use std::{cell::OnceCell, collections::HashMap, process::ExitStatus};
37-
use tokio::sync::Mutex;
3835

3936
mod jit_dump;
4037
mod memory_mappings;
@@ -205,14 +202,15 @@ impl PerfRunner {
205202
let cmd = wrap_with_sudo(wrapped_builder)?.build();
206203
debug!("cmd: {cmd:?}");
207204

208-
let on_process_started = async |_| -> anyhow::Result<()> {
205+
let on_process_started = |mut child: std::process::Child| async move {
209206
// If we output pipedata, we do not parse the perf map during teardown yet, so we need to parse memory
210207
// maps as we receive the `CurrentBenchmark` fifo commands.
211-
let data = Self::handle_fifo(runner_fifo, perf_fifo, self.output_pipedata).await?;
208+
let data =
209+
Self::handle_fifo(runner_fifo, perf_fifo, self.output_pipedata, &mut child).await?;
212210
self.benchmark_data.set(data).unwrap_or_else(|_| {
213211
error!("Failed to set benchmark data in PerfRunner");
214212
});
215-
Ok(())
213+
Ok(child.wait()?)
216214
};
217215
run_command_with_log_pipe_and_callback(cmd, on_process_started).await
218216
}
@@ -248,37 +246,30 @@ impl PerfRunner {
248246

249247
async fn handle_fifo(
250248
mut runner_fifo: RunnerFifo,
251-
perf_fifo: PerfFifo,
249+
mut perf_fifo: PerfFifo,
252250
parse_memory_maps: bool,
251+
child: &mut std::process::Child,
253252
) -> anyhow::Result<BenchmarkData> {
254253
let mut symbols_by_pid = HashMap::<pid_t, ProcessSymbols>::new();
255254
let mut unwind_data_by_pid = HashMap::<pid_t, Vec<UnwindData>>::new();
256255

257-
let perf_fifo = Arc::new(Mutex::new(perf_fifo));
258-
let mut perf_ping_timeout = 5;
256+
// Check if the process has exited using try_wait (non-blocking)
259257
let health_check = async || {
260-
let perf_ping = tokio::time::timeout(Duration::from_secs(perf_ping_timeout), async {
261-
perf_fifo.lock().await.ping().await
262-
})
263-
.await;
264-
if let Ok(Err(_)) | Err(_) = perf_ping {
265-
debug!("Failed to ping perf FIFO, ending perf fifo loop");
266-
return Ok(false);
258+
match child.try_wait() {
259+
Ok(None) => Ok(true), // Still running
260+
Ok(Some(_)) => Ok(false), // Exited
261+
Err(e) => Err(anyhow::Error::from(e)),
267262
}
268-
// Perf has started successfully, we can decrease the timeout for future pings
269-
perf_ping_timeout = 1;
270-
271-
Ok(true)
272263
};
273264

274265
let on_cmd = async |cmd: &FifoCommand| {
275266
#[allow(deprecated)]
276267
match cmd {
277268
FifoCommand::StartBenchmark => {
278-
perf_fifo.lock().await.start_events().await?;
269+
perf_fifo.start_events().await?;
279270
}
280271
FifoCommand::StopBenchmark => {
281-
perf_fifo.lock().await.stop_events().await?;
272+
perf_fifo.stop_events().await?;
282273
}
283274
FifoCommand::CurrentBenchmark { pid, .. } => {
284275
#[cfg(target_os = "linux")]
@@ -294,7 +285,7 @@ impl PerfRunner {
294285
}
295286
}
296287
FifoCommand::PingPerf => {
297-
if perf_fifo.lock().await.ping().await.is_err() {
288+
if perf_fifo.ping().await.is_err() {
298289
return Ok(Some(FifoCommand::Err));
299290
}
300291
return Ok(Some(FifoCommand::Ack));

0 commit comments

Comments
 (0)