Skip to content

Commit 9035594

Browse files
committed
fix(runner): replace perf fifo check with pid check
The FIFO check is very error prone and also slows down the teardown, since we're waiting for the last timeout.
1 parent e2d87f9 commit 9035594

2 files changed

Lines changed: 29 additions & 22 deletions

File tree

src/executor/shared/fifo.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ impl RunnerFifo {
241241

242242
let is_alive = health_check().await?;
243243
if !is_alive {
244+
info!("Process terminated, stopping the command handler");
244245
break;
245246
}
246247
}

src/executor/wall_time/perf/mod.rs

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,7 @@ use runner_shared::metadata::PerfMetadata;
3131
use runner_shared::unwind_data::UnwindData;
3232
use std::path::Path;
3333
use std::path::PathBuf;
34-
use std::sync::Arc;
35-
use std::time::Duration;
3634
use std::{cell::OnceCell, collections::HashMap, process::ExitStatus};
37-
use tokio::sync::Mutex;
3835

3936
mod jit_dump;
4037
mod memory_mappings;
@@ -205,10 +202,12 @@ impl PerfRunner {
205202
let cmd = wrap_with_sudo(wrapped_builder)?.build();
206203
debug!("cmd: {cmd:?}");
207204

208-
let on_process_started = async |_| -> anyhow::Result<()> {
205+
let on_process_started = async |pid| -> anyhow::Result<()> {
209206
// If we output pipedata, we do not parse the perf map during teardown yet, so we need to parse memory
210207
// maps as we receive the `CurrentBenchmark` fifo commands.
211-
let data = Self::handle_fifo(runner_fifo, perf_fifo, self.output_pipedata).await?;
208+
let data =
209+
Self::handle_fifo(runner_fifo, perf_fifo, self.output_pipedata, pid as pid_t)
210+
.await?;
212211
self.benchmark_data.set(data).unwrap_or_else(|_| {
213212
error!("Failed to set benchmark data in PerfRunner");
214213
});
@@ -248,37 +247,44 @@ impl PerfRunner {
248247

249248
async fn handle_fifo(
250249
mut runner_fifo: RunnerFifo,
251-
perf_fifo: PerfFifo,
250+
mut perf_fifo: PerfFifo,
252251
parse_memory_maps: bool,
252+
pid: pid_t,
253253
) -> anyhow::Result<BenchmarkData> {
254254
let mut symbols_by_pid = HashMap::<pid_t, ProcessSymbols>::new();
255255
let mut unwind_data_by_pid = HashMap::<pid_t, Vec<UnwindData>>::new();
256256

257-
let perf_fifo = Arc::new(Mutex::new(perf_fifo));
258-
let mut perf_ping_timeout = 5;
257+
// The runner spawns a `bash` process, which will execute perf and the benchmark. To check if it
258+
// terminated, we have to check if all the sub-processes terminated.
259+
// We can't check for the `bash` process, because it will terminate after the FIFO handler has
260+
// finished (because we're using a single-threaded runtime).
261+
let mut sys = sysinfo::System::new();
259262
let health_check = async || {
260-
let perf_ping = tokio::time::timeout(Duration::from_secs(perf_ping_timeout), async {
261-
perf_fifo.lock().await.ping().await
262-
})
263-
.await;
264-
if let Ok(Err(_)) | Err(_) = perf_ping {
265-
debug!("Failed to ping perf FIFO, ending perf fifo loop");
266-
return Ok(false);
267-
}
268-
// Perf has started successfully, we can decrease the timeout for future pings
269-
perf_ping_timeout = 1;
263+
sys.refresh_processes(sysinfo::ProcessesToUpdate::All, true);
264+
let process = sys.process(sysinfo::Pid::from_u32(pid as u32));
265+
266+
match process {
267+
None => Ok(false),
268+
Some(_proc) => {
269+
let has_children = sys.processes().values().any(|p| {
270+
p.parent()
271+
.map(|parent_pid| parent_pid.as_u32() == pid as u32)
272+
.unwrap_or(false)
273+
});
270274

271-
Ok(true)
275+
Ok(has_children)
276+
}
277+
}
272278
};
273279

274280
let on_cmd = async |cmd: &FifoCommand| {
275281
#[allow(deprecated)]
276282
match cmd {
277283
FifoCommand::StartBenchmark => {
278-
perf_fifo.lock().await.start_events().await?;
284+
perf_fifo.start_events().await?;
279285
}
280286
FifoCommand::StopBenchmark => {
281-
perf_fifo.lock().await.stop_events().await?;
287+
perf_fifo.stop_events().await?;
282288
}
283289
FifoCommand::CurrentBenchmark { pid, .. } => {
284290
#[cfg(target_os = "linux")]
@@ -294,7 +300,7 @@ impl PerfRunner {
294300
}
295301
}
296302
FifoCommand::PingPerf => {
297-
if perf_fifo.lock().await.ping().await.is_err() {
303+
if perf_fifo.ping().await.is_err() {
298304
return Ok(Some(FifoCommand::Err));
299305
}
300306
return Ok(Some(FifoCommand::Ack));

0 commit comments

Comments
 (0)