feat(runner): add perf integration for python

not-matthias · not-matthias · commit 1b67cd8f5a44 · 2025-04-25T14:59:29.000+02:00
diff --git a/src/run/runner/wall_time/perf/metadata.rs b/src/run/runner/wall_time/perf/metadata.rs
@@ -15,6 +15,9 @@ pub struct PerfMetadata {
 
     /// The URIs of the benchmarks in the order they were executed.
     pub bench_order_by_pid: HashMap<u32, Vec<String>>,
+
+    /// Modules that should be ignored and removed from the folded trace and callgraph (e.g. python interpreter)
+    pub ignored_modules: Vec<(String, u64, u64)>,
 }
 
 impl PerfMetadata {
diff --git a/src/run/runner/wall_time/perf/mod.rs b/src/run/runner/wall_time/perf/mod.rs
@@ -1,13 +1,16 @@
 use crate::prelude::*;
 use crate::run::runner::helpers::run_command_with_log_pipe::run_command_with_log_pipe_and_callback;
 use crate::run::runner::helpers::setup::run_with_sudo;
+use crate::run::runner::valgrind::helpers::ignored_objects_path::get_objects_path_to_ignore;
+use crate::run::runner::valgrind::helpers::perf_maps::harvest_perf_maps_for_pids;
 use anyhow::Context;
 use fifo::{PerfFifo, RunnerFifo};
 use futures::stream::FuturesUnordered;
 use metadata::PerfMetadata;
 use perf_map::ProcessSymbols;
 use procfs::process::MMPermissions;
 use shared::Command as FifoCommand;
+use std::collections::HashSet;
 use std::path::PathBuf;
 use std::process::Command;
 use std::time::Duration;
@@ -76,10 +79,23 @@ impl PerfRunner {
             .prefix(PERF_DATA_PREFIX)
             .tempfile_in(&self.perf_dir)?;
 
+        // Detect the mode based on the command to be executed
+        let cg_mode = if bench_cmd.contains("cargo") {
+            "dwarf"
+        } else if bench_cmd.contains("pytest") {
+            "fp"
+        } else {
+            panic!(
+                "Perf not supported. Failed to detect call graph mode for command: {}",
+                bench_cmd
+            )
+        };
+        debug!("Using call graph mode: {}", cg_mode);
+
         cmd.args([
             "-c",
             &format!(
-                "perf record --quiet --user-callchains --freq=999 --switch-output --control=fifo:{},{} --delay=-1 -g --call-graph=dwarf --output={} -- {bench_cmd}",
+                "perf record --quiet --user-callchains --freq=999 --switch-output --control=fifo:{},{} --delay=-1 -g --call-graph={cg_mode} --output={} -- {bench_cmd}",
                 perf_fifo.ctl_fifo_path.to_string_lossy(),
                 perf_fifo.ack_fifo_path.to_string_lossy(),
                 perf_file.path().to_string_lossy()
@@ -125,7 +141,7 @@ impl PerfRunner {
                     let dst_path = profile_folder.join(dst_file_name);
                     tokio::fs::copy(src_path, dst_path).await?;
 
-                    Ok::<_, anyhow::Error>(())
+                    Ok::<_, anyhow::Error>(pid)
                 })
             })
             .collect::<FuturesUnordered<_>>();
@@ -139,7 +155,16 @@ impl PerfRunner {
             bench_data.bench_count(),
             "Benchmark count mismatch"
         );
-        let _ = futures::future::try_join_all(copy_tasks).await?;
+
+        // Harvest the perf maps generated by python. This will copy the perf
+        // maps from /tmp to the profile folder. We have to write our own perf
+        // maps to these files AFTERWARDS, otherwise it'll be overwritten!
+        let perf_map_pids = futures::future::try_join_all(copy_tasks)
+            .await?
+            .into_iter()
+            .filter_map(Result::ok)
+            .collect::<HashSet<_>>();
+        harvest_perf_maps_for_pids(profile_folder, &perf_map_pids).await?;
 
         // Append perf maps, unwind info and other metadata
         bench_data.save_to(profile_folder).unwrap();
@@ -272,6 +297,27 @@ impl BenchmarkData {
         let metadata = PerfMetadata {
             integration: self.integration.clone(),
             bench_order_by_pid: self.bench_order_by_pid.clone(),
+            ignored_modules: {
+                let mut to_ignore = vec![];
+
+                // Check if any of the ignored modules has been loaded in the process
+                for ignore_path in get_objects_path_to_ignore() {
+                    for proc in self.symbols_by_pid.values() {
+                        if let Some(mapping) = proc.module_mapping(&ignore_path) {
+                            let (Some((base_addr, _)), Some((_, end_addr))) = (
+                                mapping.iter().min_by_key(|(base_addr, _)| base_addr),
+                                mapping.iter().max_by_key(|(_, end_addr)| end_addr),
+                            ) else {
+                                continue;
+                            };
+
+                            to_ignore.push((ignore_path.clone(), *base_addr, *end_addr));
+                        }
+                    }
+                }
+
+                to_ignore
+            },
         };
         metadata.save_to(&path).unwrap();
 
diff --git a/src/run/runner/wall_time/perf/perf_map.rs b/src/run/runner/wall_time/perf/perf_map.rs
@@ -125,6 +125,15 @@ impl ProcessSymbols {
             .push((start_addr, end_addr));
     }
 
+    pub fn module_mapping<P: AsRef<std::path::Path>>(
+        &self,
+        module_path: P,
+    ) -> Option<&[(u64, u64)]> {
+        self.module_mappings
+            .get(module_path.as_ref())
+            .map(|bounds| bounds.as_slice())
+    }
+
     pub fn save_to<P: AsRef<std::path::Path>>(&self, folder: P) -> anyhow::Result<()> {
         if self.modules.is_empty() {
             return Ok(());

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,9 @@ pub struct PerfMetadata {`
`15`	`15`
`16`	`16`	`/// The URIs of the benchmarks in the order they were executed.`
`17`	`17`	`pub bench_order_by_pid: HashMap<u32, Vec<String>>,`
	`18`	`+`
	`19`	`+ /// Modules that should be ignored and removed from the folded trace and callgraph (e.g. python interpreter)`
	`20`	`+ pub ignored_modules: Vec<(String, u64, u64)>,`
`18`	`21`	`}`
`19`	`22`
`20`	`23`	`impl PerfMetadata {`