Skip to content

Commit 95c91a1

Browse files
committed
feat(runner): add perf integration for python
1 parent ad2ab52 commit 95c91a1

File tree

3 files changed

+59
-3
lines changed

3 files changed

+59
-3
lines changed

src/run/runner/wall_time/perf/metadata.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ pub struct PerfMetadata {
1515

1616
/// The URIs of the benchmarks in the order they were executed.
1717
pub bench_order_by_pid: HashMap<u32, Vec<String>>,
18+
19+
/// Modules that should be ignored and removed from the folded trace and callgraph (e.g. python interpreter)
20+
pub ignored_modules: Vec<(String, u64, u64)>,
1821
}
1922

2023
impl PerfMetadata {

src/run/runner/wall_time/perf/mod.rs

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
use crate::prelude::*;
22
use crate::run::runner::helpers::run_command_with_log_pipe::run_command_with_log_pipe_and_callback;
33
use crate::run::runner::helpers::setup::run_with_sudo;
4+
use crate::run::runner::valgrind::helpers::ignored_objects_path::get_objects_path_to_ignore;
5+
use crate::run::runner::valgrind::helpers::perf_maps::harvest_perf_maps_for_pids;
46
use anyhow::Context;
57
use fifo::{PerfFifo, RunnerFifo};
68
use futures::stream::FuturesUnordered;
79
use metadata::PerfMetadata;
810
use perf_map::ProcessSymbols;
911
use procfs::process::MMPermissions;
1012
use shared::Command as FifoCommand;
13+
use std::collections::HashSet;
1114
use std::path::PathBuf;
1215
use std::process::Command;
1316
use std::time::Duration;
@@ -76,10 +79,21 @@ impl PerfRunner {
7679
.prefix(PERF_DATA_PREFIX)
7780
.tempfile_in(&self.perf_dir)?;
7881

82+
// Detect the mode based on the command to be executed
83+
let cg_mode = if bench_cmd.contains("cargo") {
84+
"dwarf"
85+
} else if bench_cmd.contains("pytest") {
86+
"fp"
87+
} else {
88+
warn!("Couldn't detect call graph mode for command: {}", bench_cmd);
89+
"dwarf"
90+
};
91+
debug!("Using call graph mode: {}", cg_mode);
92+
7993
cmd.args([
8094
"-c",
8195
&format!(
82-
"perf record --quiet --user-callchains --freq=999 --switch-output --control=fifo:{},{} --delay=-1 -g --call-graph=dwarf --output={} -- {bench_cmd}",
96+
"perf record --quiet --user-callchains --freq=999 --switch-output --control=fifo:{},{} --delay=-1 -g --call-graph={cg_mode} --output={} -- {bench_cmd}",
8397
perf_fifo.ctl_fifo_path.to_string_lossy(),
8498
perf_fifo.ack_fifo_path.to_string_lossy(),
8599
perf_file.path().to_string_lossy()
@@ -137,7 +151,7 @@ impl PerfRunner {
137151
let dst_path = profile_folder.join(dst_file_name);
138152
tokio::fs::copy(src_path, dst_path).await?;
139153

140-
Ok::<_, anyhow::Error>(())
154+
Ok::<_, anyhow::Error>(pid)
141155
})
142156
})
143157
.collect::<FuturesUnordered<_>>();
@@ -151,7 +165,16 @@ impl PerfRunner {
151165
bench_data.bench_count(),
152166
"Benchmark count mismatch"
153167
);
154-
let _ = futures::future::try_join_all(copy_tasks).await?;
168+
169+
// Harvest the perf maps generated by python. This will copy the perf
170+
// maps from /tmp to the profile folder. We have to write our own perf
171+
// maps to these files AFTERWARDS, otherwise it'll be overwritten!
172+
let perf_map_pids = futures::future::try_join_all(copy_tasks)
173+
.await?
174+
.into_iter()
175+
.filter_map(Result::ok)
176+
.collect::<HashSet<_>>();
177+
harvest_perf_maps_for_pids(profile_folder, &perf_map_pids).await?;
155178

156179
// Append perf maps, unwind info and other metadata
157180
bench_data.save_to(profile_folder).unwrap();
@@ -284,6 +307,27 @@ impl BenchmarkData {
284307
let metadata = PerfMetadata {
285308
integration: self.integration.clone(),
286309
bench_order_by_pid: self.bench_order_by_pid.clone(),
310+
ignored_modules: {
311+
let mut to_ignore = vec![];
312+
313+
// Check if any of the ignored modules has been loaded in the process
314+
for ignore_path in get_objects_path_to_ignore() {
315+
for proc in self.symbols_by_pid.values() {
316+
if let Some(mapping) = proc.module_mapping(&ignore_path) {
317+
let (Some((base_addr, _)), Some((_, end_addr))) = (
318+
mapping.iter().min_by_key(|(base_addr, _)| base_addr),
319+
mapping.iter().max_by_key(|(_, end_addr)| end_addr),
320+
) else {
321+
continue;
322+
};
323+
324+
to_ignore.push((ignore_path.clone(), *base_addr, *end_addr));
325+
}
326+
}
327+
}
328+
329+
to_ignore
330+
},
287331
};
288332
metadata.save_to(&path).unwrap();
289333

src/run/runner/wall_time/perf/perf_map.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,15 @@ impl ProcessSymbols {
125125
.push((start_addr, end_addr));
126126
}
127127

128+
pub fn module_mapping<P: AsRef<std::path::Path>>(
129+
&self,
130+
module_path: P,
131+
) -> Option<&[(u64, u64)]> {
132+
self.module_mappings
133+
.get(module_path.as_ref())
134+
.map(|bounds| bounds.as_slice())
135+
}
136+
128137
pub fn save_to<P: AsRef<std::path::Path>>(&self, folder: P) -> anyhow::Result<()> {
129138
if self.modules.is_empty() {
130139
return Ok(());

0 commit comments

Comments
 (0)