From 9454c456e20db492d788d62ec6e5fc981d1360db Mon Sep 17 00:00:00 2001 From: Joe Birr-Pixton Date: Thu, 6 Nov 2025 13:12:56 +0000 Subject: [PATCH 1/4] Plumb in detailed results of memory tests --- ci-bench-runner/src/db.rs | 11 +++++++++++ ci-bench-runner/src/job/bench_pr.rs | 6 ++++++ ci-bench-runner/src/job/mod.rs | 8 ++++++++ ci-bench-runner/src/test/mod.rs | 4 ++++ 4 files changed, 29 insertions(+) diff --git a/ci-bench-runner/src/db.rs b/ci-bench-runner/src/db.rs index 45e27f2..62748ed 100644 --- a/ci-bench-runner/src/db.rs +++ b/ci-bench-runner/src/db.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::ops::DerefMut; use std::sync::Arc; @@ -10,6 +11,8 @@ use time::OffsetDateTime; use tokio::sync::Mutex; use uuid::Uuid; +use crate::job::MemoryDetails; + /// An enqueued GitHub event #[derive(Debug)] pub struct QueuedEvent { @@ -92,6 +95,8 @@ pub struct ComparisonSubResult { pub diffs: Vec, /// Benchmark scenarios present in the candidate but missing in the baseline pub scenarios_missing_in_baseline: Vec, + /// Detailed results for memory jobs (name -> (baseline, candidate)) + pub(crate) memory_details: Option>, } /// A diff for a particular scenario, obtained by comparing benchmark results between two versions @@ -487,10 +492,12 @@ impl Db { icount: ComparisonSubResult { scenarios_missing_in_baseline: icount_scenarios_missing_in_baseline, diffs: icount_diffs, + memory_details: None, }, walltime: ComparisonSubResult { scenarios_missing_in_baseline: walltime_scenarios_missing_in_baseline, diffs: walltime_diffs, + memory_details: None, }, })) } @@ -703,10 +710,12 @@ mod test { icount: ComparisonSubResult { scenarios_missing_in_baseline: Vec::new(), diffs: icount_diffs.clone(), + memory_details: None, }, walltime: ComparisonSubResult { scenarios_missing_in_baseline: Vec::new(), diffs: walltime_diffs.clone(), + memory_details: None, }, }, ) @@ -771,10 +780,12 @@ mod test { icount: ComparisonSubResult { diffs: diffs.clone(), scenarios_missing_in_baseline: vec!["bar".to_string()], + memory_details: None, }, walltime: ComparisonSubResult { diffs: Vec::new(), scenarios_missing_in_baseline: vec!["baz".to_string()], + memory_details: None, }, }, ) diff --git a/ci-bench-runner/src/job/bench_pr.rs b/ci-bench-runner/src/job/bench_pr.rs index f42fcc6..aade378 100644 --- a/ci-bench-runner/src/job/bench_pr.rs +++ b/ci-bench-runner/src/job/bench_pr.rs @@ -22,6 +22,7 @@ use crate::db::{BenchResult, ComparisonResult, ComparisonSubResult, ScenarioDiff use crate::event_queue::JobContext; use crate::github::api::{CommentEvent, PullRequestReviewEvent}; use crate::github::{self, update_commit_status}; +use crate::job::MemoryDetails; use crate::runner::{write_logs_for_run, BenchRunner, Log}; use crate::CommitIdentifier; @@ -424,10 +425,12 @@ fn compare_refs( icount: ComparisonSubResult { diffs: icount_diffs, scenarios_missing_in_baseline: icount_missing, + memory_details: None, }, walltime: ComparisonSubResult { diffs: walltime_diffs, scenarios_missing_in_baseline: walltime_missing, + memory_details: None, }, }) } @@ -679,6 +682,8 @@ pub struct Diffs { negligible_diffs: Vec, /// Benchmark scenarios present in the candidate but missing in the baseline scenarios_missing_in_baseline: Vec, + /// Detailed results for memory jobs + memory_details: Option>, } impl Diffs { @@ -688,6 +693,7 @@ impl Diffs { significant_diffs, negligible_diffs, scenarios_missing_in_baseline: sub_result.scenarios_missing_in_baseline, + memory_details: sub_result.memory_details, } } } diff --git a/ci-bench-runner/src/job/mod.rs b/ci-bench-runner/src/job/mod.rs index 600d553..f7b831b 100644 --- a/ci-bench-runner/src/job/mod.rs +++ b/ci-bench-runner/src/job/mod.rs @@ -87,3 +87,11 @@ fn icounts_path(base: &Path) -> PathBuf { pub fn walltimes_path(base: &Path) -> PathBuf { base.join("results/walltimes.csv") } + +#[derive(Copy, Clone, Debug, Default)] +pub(crate) struct MemoryDetails { + pub(crate) heap_total_bytes: u64, + pub(crate) heap_total_blocks: u64, + pub(crate) heap_peak_bytes: u64, + pub(crate) heap_peak_blocks: u64, +} diff --git a/ci-bench-runner/src/test/mod.rs b/ci-bench-runner/src/test/mod.rs index 81b9250..75756fd 100644 --- a/ci-bench-runner/src/test/mod.rs +++ b/ci-bench-runner/src/test/mod.rs @@ -476,10 +476,12 @@ async fn test_pr_synchronize_cached() { significance_threshold: 0.35, cachegrind_diff: Some("dummy cachegrind diff".to_string()), }], + memory_details: None, }, walltime: ComparisonSubResult { scenarios_missing_in_baseline: vec!["bar".to_string()], diffs: Vec::new(), + memory_details: None, }, }, ) @@ -657,10 +659,12 @@ async fn test_get_cachegrind_diff() { significance_threshold: 0.35, cachegrind_diff: Some("dummy cachegrind diff".to_string()), }], + memory_details: None, }, walltime: ComparisonSubResult { scenarios_missing_in_baseline: vec!["bar".to_string()], diffs: Vec::new(), + memory_details: None, }, }, ) From 88627e37befe5fe5ba843acbd1516b38817d8abb Mon Sep 17 00:00:00 2001 From: Joe Birr-Pixton Date: Mon, 3 Nov 2025 11:01:02 +0000 Subject: [PATCH 2/4] Add memory results to PR report --- ci-bench-runner/migrations/1_add_memory.sql | 2 + ci-bench-runner/src/db.rs | 76 +++++++++++++- ci-bench-runner/src/job/bench_pr.rs | 65 +++++++++++- ci-bench-runner/src/job/mod.rs | 98 +++++++++++++++++++ ci-bench-runner/src/test/mod.rs | 16 +++ .../templates/comparison_success_comment.md | 30 ++++++ ci-bench-runner/templates/macros.md | 19 ++++ 7 files changed, 299 insertions(+), 7 deletions(-) create mode 100644 ci-bench-runner/migrations/1_add_memory.sql diff --git a/ci-bench-runner/migrations/1_add_memory.sql b/ci-bench-runner/migrations/1_add_memory.sql new file mode 100644 index 0000000..ef0e091 --- /dev/null +++ b/ci-bench-runner/migrations/1_add_memory.sql @@ -0,0 +1,2 @@ +ALTER TABLE comparison_runs + ADD COLUMN memory_scenarios_missing_in_baseline TEXT; diff --git a/ci-bench-runner/src/db.rs b/ci-bench-runner/src/db.rs index 62748ed..b9b9da5 100644 --- a/ci-bench-runner/src/db.rs +++ b/ci-bench-runner/src/db.rs @@ -87,6 +87,8 @@ pub struct ComparisonResult { pub icount: ComparisonSubResult, /// Result for the walltime benchmarks pub walltime: ComparisonSubResult, + /// Result for the memory benchmarks + pub memory: ComparisonSubResult, } #[derive(Debug, Clone)] @@ -128,12 +130,46 @@ impl ScenarioDiff { pub fn diff_ratio(&self) -> f64 { self.diff() / self.baseline_result } + + pub(crate) fn baseline_memory( + &self, + details: &Option>, + ) -> String { + let Some(detail) = details.as_ref().and_then(|m| m.get(&self.scenario_name)) else { + return String::new(); + }; + + detail.0.to_string() + } + + pub(crate) fn candidate_memory( + &self, + details: &Option>, + ) -> String { + let Some(detail) = details.as_ref().and_then(|m| m.get(&self.scenario_name)) else { + return String::new(); + }; + + detail.1.to_string() + } + + pub(crate) fn memory_diff( + &self, + details: &Option>, + ) -> String { + let Some(detail) = details.as_ref().and_then(|m| m.get(&self.scenario_name)) else { + return String::new(); + }; + + detail.0.diff_string(detail.1) + } } #[derive(Copy, Clone, Debug, PartialEq)] pub enum ScenarioKind { Icount = 0, Walltime = 1, + Memory = 2, } impl TryFrom for ScenarioKind { @@ -143,6 +179,7 @@ impl TryFrom for ScenarioKind { match value { 0 => Ok(Self::Icount), 1 => Ok(Self::Walltime), + 2 => Ok(Self::Memory), kind => bail!("invalid scenario kind: {kind}"), } } @@ -387,6 +424,7 @@ impl Db { let icount_scenarios_missing = to_json_array(&result.icount.scenarios_missing_in_baseline); let walltime_scenarios_missing = to_json_array(&result.walltime.scenarios_missing_in_baseline); + let memory_scenarios_missing = to_json_array(&result.memory.scenarios_missing_in_baseline); let mut conn = self.sqlite.lock().await; let id = conn.transaction(|t| { @@ -395,7 +433,7 @@ impl Db { let id = Uuid::new_v4(); let now = OffsetDateTime::now_utc(); sqlx::query( - "INSERT INTO comparison_runs (id, created_utc, baseline_commit, candidate_commit, icount_scenarios_missing_in_baseline, walltime_scenarios_missing_in_baseline) VALUES (?, ?, ?, ?, ?, ?)", + "INSERT INTO comparison_runs (id, created_utc, baseline_commit, candidate_commit, icount_scenarios_missing_in_baseline, walltime_scenarios_missing_in_baseline, memory_scenarios_missing_in_baseline) VALUES (?, ?, ?, ?, ?, ?, ?)", ) .bind(id.as_bytes().as_slice()) .bind(now) @@ -403,11 +441,12 @@ impl Db { .bind(candidate_commit) .bind(icount_scenarios_missing) .bind(walltime_scenarios_missing) + .bind(memory_scenarios_missing) .execute(t.deref_mut()) .await?; // Insert the associated diffs - for diff in result.icount.diffs.into_iter().chain(result.walltime.diffs) { + for diff in result.icount.diffs.into_iter().chain(result.walltime.diffs).chain(result.memory.diffs) { sqlx::query( "INSERT INTO scenario_diffs (comparison_run_id, scenario_name, scenario_kind, baseline_result, candidate_result, significance_threshold, cachegrind_diff) VALUES (?, ?, ?, ?, ?, ?, ?)", ) @@ -447,7 +486,7 @@ impl Db { let mut conn = self.sqlite.lock().await; let row = sqlx::query( r" - SELECT id, created_utc, icount_scenarios_missing_in_baseline, walltime_scenarios_missing_in_baseline + SELECT id, created_utc, icount_scenarios_missing_in_baseline, walltime_scenarios_missing_in_baseline, memory_scenarios_missing_in_baseline FROM comparison_runs WHERE baseline_commit = ? AND candidate_commit = ?", ) @@ -465,6 +504,8 @@ impl Db { from_json_array(row.try_get("icount_scenarios_missing_in_baseline")?)?; let walltime_scenarios_missing_in_baseline = from_json_array(row.try_get("walltime_scenarios_missing_in_baseline")?)?; + let memory_scenarios_missing_in_baseline = + from_json_array(row.try_get("memory_scenarios_missing_in_baseline")?)?; let icount_diffs = sqlx::query_as( r" @@ -483,11 +524,22 @@ impl Db { FROM scenario_diffs WHERE comparison_run_id = ? AND scenario_kind = ?", ) - .bind(id) + .bind(&id) .bind(ScenarioKind::Walltime as i64) .fetch_all(conn.deref_mut()) .await?; + let memory_diffs = sqlx::query_as( + r" + SELECT * + FROM scenario_diffs + WHERE comparison_run_id = ? AND scenario_kind = ?", + ) + .bind(id) + .bind(ScenarioKind::Memory as i64) + .fetch_all(conn.deref_mut()) + .await?; + Ok(Some(ComparisonResult { icount: ComparisonSubResult { scenarios_missing_in_baseline: icount_scenarios_missing_in_baseline, @@ -499,6 +551,11 @@ impl Db { diffs: walltime_diffs, memory_details: None, }, + memory: ComparisonSubResult { + scenarios_missing_in_baseline: memory_scenarios_missing_in_baseline, + diffs: memory_diffs, + memory_details: None, + }, })) } @@ -702,6 +759,7 @@ mod test { let candidate_commit = "7faf240afbdbb4e76c47ff5f3f049c7a78c9c843"; let icount_diffs = make_diffs(ScenarioKind::Icount); let walltime_diffs = make_diffs(ScenarioKind::Walltime); + let memory_diffs = make_diffs(ScenarioKind::Memory); db.store_comparison_result( baseline_commit.to_string(), @@ -717,6 +775,11 @@ mod test { diffs: walltime_diffs.clone(), memory_details: None, }, + memory: ComparisonSubResult { + scenarios_missing_in_baseline: Vec::new(), + diffs: memory_diffs.clone(), + memory_details: None, + }, }, ) .await?; @@ -787,6 +850,11 @@ mod test { scenarios_missing_in_baseline: vec!["baz".to_string()], memory_details: None, }, + memory: ComparisonSubResult { + diffs: Vec::new(), + scenarios_missing_in_baseline: vec!["quux".to_string()], + memory_details: None, + }, }, ) .await?; diff --git a/ci-bench-runner/src/job/bench_pr.rs b/ci-bench-runner/src/job/bench_pr.rs index aade378..1941091 100644 --- a/ci-bench-runner/src/job/bench_pr.rs +++ b/ci-bench-runner/src/job/bench_pr.rs @@ -17,7 +17,10 @@ use tempfile::TempDir; use time::{Duration, OffsetDateTime}; use tracing::{error, trace}; -use super::{icounts_path, read_icount_results, read_walltime_results, walltimes_path}; +use super::{ + icounts_path, maybe_read_memory_results, memory_path, read_icount_results, + read_walltime_results, walltimes_path, +}; use crate::db::{BenchResult, ComparisonResult, ComparisonSubResult, ScenarioDiff, ScenarioKind}; use crate::event_queue::JobContext; use crate::github::api::{CommentEvent, PullRequestReviewEvent}; @@ -298,13 +301,20 @@ async fn bench_pr_and_cache_results( let icount_significance_thresholds = calculate_significance_thresholds(icount_results); let walltime_results = historical_results - .into_iter() - .filter(|r| r.scenario_kind == ScenarioKind::Walltime); + .iter() + .filter(|r| r.scenario_kind == ScenarioKind::Walltime) + .cloned(); let walltime_significance_thresholds = calculate_significance_thresholds(walltime_results); + let memory_results = historical_results + .into_iter() + .filter(|r| r.scenario_kind == ScenarioKind::Memory); + let memory_significance_thresholds = calculate_significance_thresholds(memory_results); + let significance_thresholds = SignificanceThresholds { icount: icount_significance_thresholds, walltime: walltime_significance_thresholds, + memory: memory_significance_thresholds, }; let job_output_dir = ctx.job_output_dir.clone(); @@ -421,6 +431,35 @@ fn compare_refs( MINIMUM_WALLTIME_NOISE_THRESHOLD, )?; + let memory_baseline = maybe_read_memory_results(&memory_path(&job_output_path.join("base")))?; + let memory_candidate = + maybe_read_memory_results(&memory_path(&job_output_path.join("candidate")))?; + + let (memory_diffs, memory_missing) = compare_results( + job_output_path, + &distill_memory(&memory_baseline), + &distill_memory(&memory_candidate), + &significance_thresholds.memory, + ScenarioKind::Memory, + DEFAULT_MEMORY_NOISE_THRESHOLD, + MINIMUM_MEMORY_NOISE_THRESHOLD, + )?; + + let memory_details = Some( + memory_diffs + .iter() + .map(|diff| { + ( + diff.scenario_name.clone(), + ( + *memory_baseline.get(&diff.scenario_name).unwrap(), + *memory_candidate.get(&diff.scenario_name).unwrap(), + ), + ) + }) + .collect(), + ); + Ok(ComparisonResult { icount: ComparisonSubResult { diffs: icount_diffs, @@ -432,9 +471,23 @@ fn compare_refs( scenarios_missing_in_baseline: walltime_missing, memory_details: None, }, + memory: ComparisonSubResult { + diffs: memory_diffs, + scenarios_missing_in_baseline: memory_missing, + memory_details, + }, }) } +/// Controls how the comparison is done between MemoryDetails, by distilling a +/// single f64 from it. +fn distill_memory(input: &HashMap) -> HashMap { + input + .iter() + .map(|(scenario, detail)| (scenario.clone(), detail.comparison_basis() as f64)) + .collect() +} + /// Returns the calculated significance threshold for each scenario /// /// Scenarios with less than 10 results will be skipped. It is the responsibility of the caller to @@ -481,6 +534,7 @@ pub fn calculate_significance_thresholds( struct SignificanceThresholds { icount: HashMap, walltime: HashMap, + memory: HashMap, } #[derive(Debug, Clone)] @@ -513,6 +567,7 @@ fn markdown_comment( cachegrind_diff_url: diff_url, icount: Diffs::from_sub_result(bench_results.icount), walltime: Diffs::from_sub_result(bench_results.walltime), + memory: Diffs::from_sub_result(bench_results.memory), branches, bencher_project_id, common_time_unit: |x, y| common_time_unit(*x, *y), @@ -665,6 +720,8 @@ pub struct ComparisonSuccessComment<'a> { icount: Diffs, /// Diffs for the walltime benchmarks walltime: Diffs, + /// Diffs for the memory benchmarks + memory: Diffs, /// The base url to obtain cachegrind diffs cachegrind_diff_url: &'a str, /// Information about the branches that were compared @@ -737,6 +794,8 @@ static DEFAULT_ICOUNT_NOISE_THRESHOLD: f64 = 0.002; // 0.2% static MINIMUM_ICOUNT_NOISE_THRESHOLD: f64 = 0.002; // 0.2% static DEFAULT_WALLTIME_NOISE_THRESHOLD: f64 = 0.05; // 5% static MINIMUM_WALLTIME_NOISE_THRESHOLD: f64 = 0.01; // 1% +static DEFAULT_MEMORY_NOISE_THRESHOLD: f64 = 0.001; // 0.1% +static MINIMUM_MEMORY_NOISE_THRESHOLD: f64 = 0.001; // 0.1% static APP_NAME: &str = "rustls-benchmarking"; /// Functions inside this module will be available as askama filters diff --git a/ci-bench-runner/src/job/mod.rs b/ci-bench-runner/src/job/mod.rs index f7b831b..caebdf4 100644 --- a/ci-bench-runner/src/job/mod.rs +++ b/ci-bench-runner/src/job/mod.rs @@ -80,6 +80,46 @@ pub fn read_walltime_results(path: &Path) -> anyhow::Result Ok(results) } +/// Reads the (benchmark, result) pairs from previous CSV output +pub fn maybe_read_memory_results(path: &Path) -> anyhow::Result> { + trace!( + path = path.display().to_string(), + "reading memory results from CSV file" + ); + + let mut results = HashMap::new(); + let results_file = match File::open(path) { + Ok(f) => f, + Err(e) => { + trace!("failed to open file {e:?} (ignoring)"); + return Ok(results); + } + }; + for line in BufReader::new(results_file).lines() { + let line = line.context("failed to read line from CSV file")?; + let line = line.trim(); + let mut parts = line.split(','); + + let scenario = parts.next().ok_or(anyhow!("empty line"))?.to_string(); + let counts: Result, _> = parts.map(|s| s.parse::()).collect(); + let details = match counts.context("invalid u64 in row")?.as_slice() { + [heap_total_bytes, heap_total_blocks, heap_peak_bytes, heap_peak_blocks] => { + MemoryDetails { + heap_total_bytes: *heap_total_bytes, + heap_total_blocks: *heap_total_blocks, + heap_peak_bytes: *heap_peak_bytes, + heap_peak_blocks: *heap_peak_blocks, + } + } + _ => bail!("incorrect number of measurements for memory results row"), + }; + + results.insert(scenario, details); + } + + Ok(results) +} + fn icounts_path(base: &Path) -> PathBuf { base.join("results/icounts.csv") } @@ -88,6 +128,10 @@ pub fn walltimes_path(base: &Path) -> PathBuf { base.join("results/walltimes.csv") } +pub fn memory_path(base: &Path) -> PathBuf { + base.join("results/memory.csv") +} + #[derive(Copy, Clone, Debug, Default)] pub(crate) struct MemoryDetails { pub(crate) heap_total_bytes: u64, @@ -95,3 +139,57 @@ pub(crate) struct MemoryDetails { pub(crate) heap_peak_bytes: u64, pub(crate) heap_peak_blocks: u64, } + +impl MemoryDetails { + /// Which field we use to compare two `MemoryDetails` + fn comparison_basis(&self) -> u64 { + self.heap_total_bytes + } + + /// Returns a string, like the `Display` impl, that illustrates + /// the difference between the baseline `self` and `candidate`. + pub(crate) fn diff_string(&self, candidate: MemoryDetails) -> String { + let heap_total_bytes = candidate.heap_total_bytes as f64 - self.heap_total_bytes as f64; + let heap_total_blocks = candidate.heap_total_blocks as f64 - self.heap_total_blocks as f64; + let heap_peak_bytes = candidate.heap_peak_bytes as f64 - self.heap_peak_bytes as f64; + let heap_peak_blocks = candidate.heap_peak_blocks as f64 - self.heap_peak_blocks as f64; + + format!( + "∑ {heap_total_bytes:+}B {heap_total_blocks:+}a
🔝 {heap_peak_bytes:+}B {heap_peak_blocks:+}a" + ) + } +} + +impl std::fmt::Display for MemoryDetails { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "∑ {}B {}a
🔝 {}B {}a", + self.heap_total_bytes, + self.heap_total_blocks, + self.heap_peak_bytes, + self.heap_peak_blocks + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_diff() { + let start = MemoryDetails { + heap_total_bytes: 10, + heap_total_blocks: 0, + heap_peak_bytes: 20, + heap_peak_blocks: 30, + }; + let end = MemoryDetails { + heap_total_bytes: 10, + heap_total_blocks: 20, + heap_peak_bytes: 15, + heap_peak_blocks: 10, + }; + assert_eq!(start.diff_string(end), "∑ +0B +20a
🔝 -5B -20a"); + } +} diff --git a/ci-bench-runner/src/test/mod.rs b/ci-bench-runner/src/test/mod.rs index 75756fd..414982c 100644 --- a/ci-bench-runner/src/test/mod.rs +++ b/ci-bench-runner/src/test/mod.rs @@ -146,6 +146,12 @@ impl BenchRunner for MockBenchRunner { "fake_walltime_bench,12345,12432,12211", )?; + // Fake memory + fs::write( + results_dir.join("memory.csv"), + "fake_memory_bench,11197667,2653,599466,199", + )?; + // Notify any watchers of this call self.runs_tx .send(MockBenchRun { @@ -483,6 +489,11 @@ async fn test_pr_synchronize_cached() { diffs: Vec::new(), memory_details: None, }, + memory: ComparisonSubResult { + scenarios_missing_in_baseline: vec![], + diffs: Vec::new(), + memory_details: None, + }, }, ) .await @@ -666,6 +677,11 @@ async fn test_get_cachegrind_diff() { diffs: Vec::new(), memory_details: None, }, + memory: ComparisonSubResult { + diffs: vec![], + scenarios_missing_in_baseline: vec![], + memory_details: None, + }, }, ) .await diff --git a/ci-bench-runner/templates/comparison_success_comment.md b/ci-bench-runner/templates/comparison_success_comment.md index e6d7fa1..7b1bd0a 100644 --- a/ci-bench-runner/templates/comparison_success_comment.md +++ b/ci-bench-runner/templates/comparison_success_comment.md @@ -82,6 +82,36 @@ _There are no other wall-time count differences_ {% endif %} +## Memory usage + +{% call macros::missing_scenarios(memory.scenarios_missing_in_baseline) %} + +Key: + +- ∑: sum usage for entire benchmark run +- 🔝: peak usage +- B: bytes +- a: allocations + +#### Significant differences + +{% if memory.significant_diffs.is_empty() %} + +_There are no significant memory usage differences_ + +{% else %} + +⚠️ There are significant memory usage differences + +
+Click to expand + +{% call macros::memory_table(memory.significant_diffs, memory.memory_details, true) %} + +
+ +{% endif %} + ## Additional information {% if let Some(project_id) = bencher_project_id %} diff --git a/ci-bench-runner/templates/macros.md b/ci-bench-runner/templates/macros.md index 0e57c4d..05105f9 100644 --- a/ci-bench-runner/templates/macros.md +++ b/ci-bench-runner/templates/macros.md @@ -54,6 +54,25 @@ The following benchmark scenarios are present in the candidate but not in the ba {%- endmacro -%} +{%- macro memory_table(diffs, details, use_emoji) -%} + +| Scenario | Baseline | Candidate | Diff | Threshold | +| --- | ---: | ---: | ---: | ---: | +{% for diff in diffs %} +{%- let emoji -%} +{%- if use_emoji && diff.diff() > 0.0 -%} +{%- let emoji = "⚠️ " -%} +{%- else if use_emoji && diff.diff() < 0.0 -%} +{%- let emoji = "✅ " -%} +{%- else -%} +{%- let emoji = "" -%} +{%- endif -%} +| {{ diff.scenario_name }} | {{ diff.baseline_memory(details) }} | {{ diff.candidate_memory(details) }} | {{emoji}}{{ diff.memory_diff(details) }} ({{ "{:.2}%"|format(diff.diff_ratio() * 100.0) }}) | {{ "{:.2}%"|format(diff.significance_threshold * 100.0) }} | +{% endfor %} + +{%- endmacro -%} + + {%- macro checkout_details(branches) -%} Checkout details: From f72f89326e5276af98db69935bd0a9a84f511ccc Mon Sep 17 00:00:00 2001 From: Joe Birr-Pixton Date: Thu, 6 Nov 2025 16:31:42 +0000 Subject: [PATCH 3/4] Add memory results to bencher.dev --- ci-bench-runner/src/bencher_dev.rs | 68 ++++++++++++++++++++++++++- ci-bench-runner/src/job/bench_main.rs | 15 +++++- ci-bench-runner/src/test/mod.rs | 2 +- 3 files changed, 82 insertions(+), 3 deletions(-) diff --git a/ci-bench-runner/src/bencher_dev.rs b/ci-bench-runner/src/bencher_dev.rs index de11fcc..9665a57 100644 --- a/ci-bench-runner/src/bencher_dev.rs +++ b/ci-bench-runner/src/bencher_dev.rs @@ -10,6 +10,7 @@ use bencher_client::{ }; use tracing::error; +use crate::job::MemoryDetails; use crate::BencherConfig; /// The Bencher.dev client along with its configuration @@ -33,6 +34,7 @@ impl BencherDev { } /// Sends the icount and walltime results to bencher.dev for visualization + #[expect(clippy::too_many_arguments)] pub async fn track_results( &self, branch: &str, @@ -41,9 +43,14 @@ impl BencherDev { end_time: DateTime, icounts: HashMap, walltimes: HashMap, + memory: HashMap, ) -> anyhow::Result<()> { let mut bmf_map = results_to_bmf(icounts, INSTRUCTIONS_SLUG_STR.parse().unwrap()); - bmf_map.extend(results_to_bmf(walltimes, LATENCY_SLUG_STR.parse().unwrap())); + merge( + &mut bmf_map, + results_to_bmf(walltimes, LATENCY_SLUG_STR.parse().unwrap()), + ); + memory_results_to_bmf(&mut bmf_map, memory); let testbed = self.config.testbed_id.clone(); let report = JsonNewReport { @@ -78,6 +85,49 @@ impl BencherDev { } } +fn memory_results_to_bmf(out: &mut JsonResultsMap, results: HashMap) { + merge( + out, + results_to_bmf( + results + .iter() + .map(|(k, v)| (k.clone(), v.heap_peak_blocks as f64)) + .collect(), + "heap-peak-blocks".parse().unwrap(), + ), + ); + merge( + out, + results_to_bmf( + results + .iter() + .map(|(k, v)| (k.clone(), v.heap_peak_bytes as f64)) + .collect(), + "heap-peak-bytes".parse().unwrap(), + ), + ); + merge( + out, + results_to_bmf( + results + .iter() + .map(|(k, v)| (k.clone(), v.heap_total_blocks as f64)) + .collect(), + "heap-total-blocks".parse().unwrap(), + ), + ); + merge( + out, + results_to_bmf( + results + .iter() + .map(|(k, v)| (k.clone(), v.heap_total_bytes as f64)) + .collect(), + "heap-total-bytes".parse().unwrap(), + ), + ); +} + /// Converts the instruction counts map into Bencher Metric Format (BMF) /// /// See for details @@ -106,3 +156,19 @@ fn results_to_bmf(results: HashMap, measure: Measure) -> JsonResult }) .collect() } + +/// Merges `src` into `target. +/// +/// With both `src` and `target` in the form: +/// +/// ```json +/// { "benchmark-name": { "measure": { ... }}} +/// ``` +/// +/// Ensures multiple measures appear in the end result. Does +/// not require that "benchmark-name" exists in `target`. +fn merge(target: &mut JsonResultsMap, src: JsonResultsMap) { + for (bench, contents) in src.into_iter() { + target.entry(bench).or_default().extend(contents); + } +} diff --git a/ci-bench-runner/src/job/bench_main.rs b/ci-bench-runner/src/job/bench_main.rs index 97f2610..f1b09fd 100644 --- a/ci-bench-runner/src/job/bench_main.rs +++ b/ci-bench-runner/src/job/bench_main.rs @@ -5,7 +5,10 @@ use bencher_client::json::DateTime; use tempfile::TempDir; use tracing::{trace, warn}; -use super::{icounts_path, read_icount_results, read_walltime_results, walltimes_path}; +use super::{ + icounts_path, maybe_read_memory_results, memory_path, read_icount_results, + read_walltime_results, walltimes_path, +}; use crate::db::ScenarioKind; use crate::event_queue::JobContext; use crate::github::api::PushEvent; @@ -79,6 +82,8 @@ pub async fn bench_main(ctx: JobContext<'_>) -> anyhow::Result<()> { .context("failed to read instruction counts from file")?; let walltimes = read_walltime_results(&walltimes_path(&ctx.job_output_dir)) .context("failed to read walltimes from file")?; + let memory = maybe_read_memory_results(&memory_path(&ctx.job_output_dir)) + .context("failed to read memory results from file")?; // Persist results in the DB and in bencher.dev let results = icounts @@ -89,6 +94,13 @@ pub async fn bench_main(ctx: JobContext<'_>) -> anyhow::Result<()> { .iter() .map(|(scenario, result)| (scenario.clone(), ScenarioKind::Walltime, *result)), ) + .chain(memory.iter().map(|(scenario, result)| { + ( + scenario.clone(), + ScenarioKind::Memory, + result.comparison_basis() as f64, + ) + })) .collect(); ctx.db .store_run_results(results) @@ -104,6 +116,7 @@ pub async fn bench_main(ctx: JobContext<'_>) -> anyhow::Result<()> { benchmark_run_end, icounts, walltimes, + memory, ) .await .context("failed to send results to bencher.dev"); diff --git a/ci-bench-runner/src/test/mod.rs b/ci-bench-runner/src/test/mod.rs index 414982c..f191015 100644 --- a/ci-bench-runner/src/test/mod.rs +++ b/ci-bench-runner/src/test/mod.rs @@ -645,7 +645,7 @@ async fn test_push_happy_path() { .await .unwrap() .len(); - assert_eq!(result_count, 4); + assert_eq!(result_count, 6); } #[tokio::test] From 294f85644d69c5a4c63be65c231b321bb700ac9a Mon Sep 17 00:00:00 2001 From: Joe Birr-Pixton Date: Wed, 12 Nov 2025 09:11:59 +0000 Subject: [PATCH 4/4] Disable PR processing from cache This didn't work for memory results, because the "headline" figure only is persisted in the database (to match the assumption that any measure is a f64, whereas memory measurements are more structured than that.) This means force-pushes to PRs will re-run and re-measure the benchmarks, naturally that is slower than _not_ doing that, but also takes about two minutes currently. --- ci-bench-runner/src/job/bench_pr.rs | 20 ++------- ci-bench-runner/src/test/mod.rs | 67 ----------------------------- 2 files changed, 4 insertions(+), 83 deletions(-) diff --git a/ci-bench-runner/src/job/bench_pr.rs b/ci-bench-runner/src/job/bench_pr.rs index 1941091..7c6bf58 100644 --- a/ci-bench-runner/src/job/bench_pr.rs +++ b/ci-bench-runner/src/job/bench_pr.rs @@ -211,22 +211,10 @@ pub async fn bench_pr( ) .await; - let cached_result = ctx - .db - .comparison_result( - &branches.baseline.commit_sha, - &branches.candidate.commit_sha, - ) - .await?; - let result = match cached_result { - Some(result) => Ok(result), - None => { - let mut logs = BenchPrLogs::default(); - bench_pr_and_cache_results(&ctx, branches.clone(), &mut logs) - .await - .map_err(|error| BenchPrError { error, logs }) - } - }; + let mut logs = BenchPrLogs::default(); + let result = bench_pr_and_cache_results(&ctx, branches.clone(), &mut logs) + .await + .map_err(|error| BenchPrError { error, logs }); let cachegrind_diff_url = format!( "{}/comparisons/{}:{}/cachegrind-diff", diff --git a/ci-bench-runner/src/test/mod.rs b/ci-bench-runner/src/test/mod.rs index f191015..b27ed3f 100644 --- a/ci-bench-runner/src/test/mod.rs +++ b/ci-bench-runner/src/test/mod.rs @@ -455,73 +455,6 @@ async fn test_pr_synchronize_happy_path() { mock_github.server.verify().await; } -#[tokio::test] -async fn test_pr_synchronize_cached() { - // Mock HTTP responses from GitHub - let mock_github = MockGitHub::start().await; - let _post_comment = mock_github.mock_post_comment().await; - let post_status = mock_github.mock_post_status().await; - - // Run the job server - let server = TestServer::start(&mock_github).await; - - // Ensure the DB already has a stored comparison result for the provided commit pair - server - .db - .store_comparison_result( - "7edbfb999b352aa09fe669e9103d8155d7e7d890".to_string(), - "b0b69e925b2c9c6187cb16f361dd36e156f8e097".to_string(), - ComparisonResult { - icount: ComparisonSubResult { - scenarios_missing_in_baseline: Vec::new(), - diffs: vec![ScenarioDiff { - scenario_name: "foo".to_string(), - scenario_kind: ScenarioKind::Icount, - baseline_result: 1000.0, - candidate_result: 1001.0, - significance_threshold: 0.35, - cachegrind_diff: Some("dummy cachegrind diff".to_string()), - }], - memory_details: None, - }, - walltime: ComparisonSubResult { - scenarios_missing_in_baseline: vec!["bar".to_string()], - diffs: Vec::new(), - memory_details: None, - }, - memory: ComparisonSubResult { - scenarios_missing_in_baseline: vec![], - diffs: Vec::new(), - memory_details: None, - }, - }, - ) - .await - .unwrap(); - - // The bench runner will crash if it runs, making the test fail - server.mock_bench_runner.config.lock().unwrap().crash = true; - - // Post the webhook event - let client = reqwest::Client::default(); - post_webhook( - &client, - &server.base_url, - &server.config.webhook_secret, - webhook::pull_request_synchronized(), - "pull_request", - ) - .await; - - // Wait for our mock endpoints to have been called - tokio::time::timeout(Duration::from_secs(5), post_status.wait_until_satisfied()) - .await - .ok(); - - // Assert that the mocks were used and report any errors - mock_github.server.verify().await; -} - #[tokio::test] async fn test_pr_review_happy_path() { // Mock HTTP responses from GitHub