From 9454c456e20db492d788d62ec6e5fc981d1360db Mon Sep 17 00:00:00 2001
From: Joe Birr-Pixton <jpixton@gmail.com>
Date: Thu, 6 Nov 2025 13:12:56 +0000
Subject: [PATCH 1/4] Plumb in detailed results of memory tests

---
 ci-bench-runner/src/db.rs           | 11 +++++++++++
 ci-bench-runner/src/job/bench_pr.rs |  6 ++++++
 ci-bench-runner/src/job/mod.rs      |  8 ++++++++
 ci-bench-runner/src/test/mod.rs     |  4 ++++
 4 files changed, 29 insertions(+)
diff --git a/ci-bench-runner/src/db.rs b/ci-bench-runner/src/db.rs
index 45e27f2..62748ed 100644
--- a/ci-bench-runner/src/db.rs
+++ b/ci-bench-runner/src/db.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
 use std::ops::DerefMut;
 use std::sync::Arc;
 
@@ -10,6 +11,8 @@ use time::OffsetDateTime;
 use tokio::sync::Mutex;
 use uuid::Uuid;
 
+use crate::job::MemoryDetails;
+
 /// An enqueued GitHub event
 #[derive(Debug)]
 pub struct QueuedEvent {
@@ -92,6 +95,8 @@ pub struct ComparisonSubResult {
     pub diffs: Vec<ScenarioDiff>,
     /// Benchmark scenarios present in the candidate but missing in the baseline
     pub scenarios_missing_in_baseline: Vec<String>,
+    /// Detailed results for memory jobs (name -> (baseline, candidate))
+    pub(crate) memory_details: Option<HashMap<String, (MemoryDetails, MemoryDetails)>>,
 }
 
 /// A diff for a particular scenario, obtained by comparing benchmark results between two versions
@@ -487,10 +492,12 @@ impl Db {
             icount: ComparisonSubResult {
                 scenarios_missing_in_baseline: icount_scenarios_missing_in_baseline,
                 diffs: icount_diffs,
+                memory_details: None,
             },
             walltime: ComparisonSubResult {
                 scenarios_missing_in_baseline: walltime_scenarios_missing_in_baseline,
                 diffs: walltime_diffs,
+                memory_details: None,
             },
         }))
     }
@@ -703,10 +710,12 @@ mod test {
                 icount: ComparisonSubResult {
                     scenarios_missing_in_baseline: Vec::new(),
                     diffs: icount_diffs.clone(),
+                    memory_details: None,
                 },
                 walltime: ComparisonSubResult {
                     scenarios_missing_in_baseline: Vec::new(),
                     diffs: walltime_diffs.clone(),
+                    memory_details: None,
                 },
             },
         )
@@ -771,10 +780,12 @@ mod test {
                 icount: ComparisonSubResult {
                     diffs: diffs.clone(),
                     scenarios_missing_in_baseline: vec!["bar".to_string()],
+                    memory_details: None,
                 },
                 walltime: ComparisonSubResult {
                     diffs: Vec::new(),
                     scenarios_missing_in_baseline: vec!["baz".to_string()],
+                    memory_details: None,
                 },
             },
         )
diff --git a/ci-bench-runner/src/job/bench_pr.rs b/ci-bench-runner/src/job/bench_pr.rs
index f42fcc6..aade378 100644
--- a/ci-bench-runner/src/job/bench_pr.rs
+++ b/ci-bench-runner/src/job/bench_pr.rs
@@ -22,6 +22,7 @@ use crate::db::{BenchResult, ComparisonResult, ComparisonSubResult, ScenarioDiff
 use crate::event_queue::JobContext;
 use crate::github::api::{CommentEvent, PullRequestReviewEvent};
 use crate::github::{self, update_commit_status};
+use crate::job::MemoryDetails;
 use crate::runner::{write_logs_for_run, BenchRunner, Log};
 use crate::CommitIdentifier;
 
@@ -424,10 +425,12 @@ fn compare_refs(
         icount: ComparisonSubResult {
             diffs: icount_diffs,
             scenarios_missing_in_baseline: icount_missing,
+            memory_details: None,
         },
         walltime: ComparisonSubResult {
             diffs: walltime_diffs,
             scenarios_missing_in_baseline: walltime_missing,
+            memory_details: None,
         },
     })
 }
@@ -679,6 +682,8 @@ pub struct Diffs {
     negligible_diffs: Vec<ScenarioDiff>,
     /// Benchmark scenarios present in the candidate but missing in the baseline
     scenarios_missing_in_baseline: Vec<String>,
+    /// Detailed results for memory jobs
+    memory_details: Option<HashMap<String, (MemoryDetails, MemoryDetails)>>,
 }
 
 impl Diffs {
@@ -688,6 +693,7 @@ impl Diffs {
             significant_diffs,
             negligible_diffs,
             scenarios_missing_in_baseline: sub_result.scenarios_missing_in_baseline,
+            memory_details: sub_result.memory_details,
         }
     }
 }
diff --git a/ci-bench-runner/src/job/mod.rs b/ci-bench-runner/src/job/mod.rs
index 600d553..f7b831b 100644
--- a/ci-bench-runner/src/job/mod.rs
+++ b/ci-bench-runner/src/job/mod.rs
@@ -87,3 +87,11 @@ fn icounts_path(base: &Path) -> PathBuf {
 pub fn walltimes_path(base: &Path) -> PathBuf {
     base.join("results/walltimes.csv")
 }
+
+#[derive(Copy, Clone, Debug, Default)]
+pub(crate) struct MemoryDetails {
+    pub(crate) heap_total_bytes: u64,
+    pub(crate) heap_total_blocks: u64,
+    pub(crate) heap_peak_bytes: u64,
+    pub(crate) heap_peak_blocks: u64,
+}
diff --git a/ci-bench-runner/src/test/mod.rs b/ci-bench-runner/src/test/mod.rs
index 81b9250..75756fd 100644
--- a/ci-bench-runner/src/test/mod.rs
+++ b/ci-bench-runner/src/test/mod.rs
@@ -476,10 +476,12 @@ async fn test_pr_synchronize_cached() {
                         significance_threshold: 0.35,
                         cachegrind_diff: Some("dummy cachegrind diff".to_string()),
                     }],
+                    memory_details: None,
                 },
                 walltime: ComparisonSubResult {
                     scenarios_missing_in_baseline: vec!["bar".to_string()],
                     diffs: Vec::new(),
+                    memory_details: None,
                 },
             },
         )
@@ -657,10 +659,12 @@ async fn test_get_cachegrind_diff() {
                         significance_threshold: 0.35,
                         cachegrind_diff: Some("dummy cachegrind diff".to_string()),
                     }],
+                    memory_details: None,
                 },
                 walltime: ComparisonSubResult {
                     scenarios_missing_in_baseline: vec!["bar".to_string()],
                     diffs: Vec::new(),
+                    memory_details: None,
                 },
             },
         )

From 88627e37befe5fe5ba843acbd1516b38817d8abb Mon Sep 17 00:00:00 2001
From: Joe Birr-Pixton <jpixton@gmail.com>
Date: Mon, 3 Nov 2025 11:01:02 +0000
Subject: [PATCH 2/4] Add memory results to PR report

---
 ci-bench-runner/migrations/1_add_memory.sql   |  2 +
 ci-bench-runner/src/db.rs                     | 76 +++++++++++++-
 ci-bench-runner/src/job/bench_pr.rs           | 65 +++++++++++-
 ci-bench-runner/src/job/mod.rs                | 98 +++++++++++++++++++
 ci-bench-runner/src/test/mod.rs               | 16 +++
 .../templates/comparison_success_comment.md   | 30 ++++++
 ci-bench-runner/templates/macros.md           | 19 ++++
 7 files changed, 299 insertions(+), 7 deletions(-)
 create mode 100644 ci-bench-runner/migrations/1_add_memory.sql

diff --git a/ci-bench-runner/migrations/1_add_memory.sql b/ci-bench-runner/migrations/1_add_memory.sql
new file mode 100644
index 0000000..ef0e091
--- /dev/null
+++ b/ci-bench-runner/migrations/1_add_memory.sql
@@ -0,0 +1,2 @@
+ALTER TABLE comparison_runs
+    ADD COLUMN memory_scenarios_missing_in_baseline TEXT;
diff --git a/ci-bench-runner/src/db.rs b/ci-bench-runner/src/db.rs
index 62748ed..b9b9da5 100644
--- a/ci-bench-runner/src/db.rs
+++ b/ci-bench-runner/src/db.rs
@@ -87,6 +87,8 @@ pub struct ComparisonResult {
     pub icount: ComparisonSubResult,
     /// Result for the walltime benchmarks
     pub walltime: ComparisonSubResult,
+    /// Result for the memory benchmarks
+    pub memory: ComparisonSubResult,
 }
 
 #[derive(Debug, Clone)]
@@ -128,12 +130,46 @@ impl ScenarioDiff {
     pub fn diff_ratio(&self) -> f64 {
         self.diff() / self.baseline_result
     }
+
+    pub(crate) fn baseline_memory(
+        &self,
+        details: &Option<HashMap<String, (MemoryDetails, MemoryDetails)>>,
+    ) -> String {
+        let Some(detail) = details.as_ref().and_then(|m| m.get(&self.scenario_name)) else {
+            return String::new();
+        };
+
+        detail.0.to_string()
+    }
+
+    pub(crate) fn candidate_memory(
+        &self,
+        details: &Option<HashMap<String, (MemoryDetails, MemoryDetails)>>,
+    ) -> String {
+        let Some(detail) = details.as_ref().and_then(|m| m.get(&self.scenario_name)) else {
+            return String::new();
+        };
+
+        detail.1.to_string()
+    }
+
+    pub(crate) fn memory_diff(
+        &self,
+        details: &Option<HashMap<String, (MemoryDetails, MemoryDetails)>>,
+    ) -> String {
+        let Some(detail) = details.as_ref().and_then(|m| m.get(&self.scenario_name)) else {
+            return String::new();
+        };
+
+        detail.0.diff_string(detail.1)
+    }
 }
 
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub enum ScenarioKind {
     Icount = 0,
     Walltime = 1,
+    Memory = 2,
 }
 
 impl TryFrom<i64> for ScenarioKind {
@@ -143,6 +179,7 @@ impl TryFrom<i64> for ScenarioKind {
         match value {
             0 => Ok(Self::Icount),
             1 => Ok(Self::Walltime),
+            2 => Ok(Self::Memory),
             kind => bail!("invalid scenario kind: {kind}"),
         }
     }
@@ -387,6 +424,7 @@ impl Db {
         let icount_scenarios_missing = to_json_array(&result.icount.scenarios_missing_in_baseline);
         let walltime_scenarios_missing =
             to_json_array(&result.walltime.scenarios_missing_in_baseline);
+        let memory_scenarios_missing = to_json_array(&result.memory.scenarios_missing_in_baseline);
 
         let mut conn = self.sqlite.lock().await;
         let id = conn.transaction(|t| {
@@ -395,7 +433,7 @@ impl Db {
                 let id = Uuid::new_v4();
                 let now = OffsetDateTime::now_utc();
                 sqlx::query(
-                    "INSERT INTO comparison_runs (id, created_utc, baseline_commit, candidate_commit, icount_scenarios_missing_in_baseline, walltime_scenarios_missing_in_baseline) VALUES (?, ?, ?, ?, ?, ?)",
+                    "INSERT INTO comparison_runs (id, created_utc, baseline_commit, candidate_commit, icount_scenarios_missing_in_baseline, walltime_scenarios_missing_in_baseline, memory_scenarios_missing_in_baseline) VALUES (?, ?, ?, ?, ?, ?, ?)",
                 )
                     .bind(id.as_bytes().as_slice())
                     .bind(now)
@@ -403,11 +441,12 @@ impl Db {
                     .bind(candidate_commit)
                     .bind(icount_scenarios_missing)
                     .bind(walltime_scenarios_missing)
+                    .bind(memory_scenarios_missing)
                     .execute(t.deref_mut())
                     .await?;
 
                 // Insert the associated diffs
-                for diff in result.icount.diffs.into_iter().chain(result.walltime.diffs) {
+                for diff in result.icount.diffs.into_iter().chain(result.walltime.diffs).chain(result.memory.diffs) {
                     sqlx::query(
                         "INSERT INTO scenario_diffs (comparison_run_id, scenario_name, scenario_kind, baseline_result, candidate_result, significance_threshold, cachegrind_diff) VALUES (?, ?, ?, ?, ?, ?, ?)",
                     )
@@ -447,7 +486,7 @@ impl Db {
         let mut conn = self.sqlite.lock().await;
         let row = sqlx::query(
             r"
-            SELECT id, created_utc, icount_scenarios_missing_in_baseline, walltime_scenarios_missing_in_baseline
+            SELECT id, created_utc, icount_scenarios_missing_in_baseline, walltime_scenarios_missing_in_baseline, memory_scenarios_missing_in_baseline
             FROM comparison_runs
             WHERE baseline_commit = ? AND candidate_commit = ?",
         )
@@ -465,6 +504,8 @@ impl Db {
             from_json_array(row.try_get("icount_scenarios_missing_in_baseline")?)?;
         let walltime_scenarios_missing_in_baseline =
             from_json_array(row.try_get("walltime_scenarios_missing_in_baseline")?)?;
+        let memory_scenarios_missing_in_baseline =
+            from_json_array(row.try_get("memory_scenarios_missing_in_baseline")?)?;
 
         let icount_diffs = sqlx::query_as(
             r"
@@ -483,11 +524,22 @@ impl Db {
             FROM scenario_diffs
             WHERE comparison_run_id = ? AND scenario_kind = ?",
         )
-        .bind(id)
+        .bind(&id)
         .bind(ScenarioKind::Walltime as i64)
         .fetch_all(conn.deref_mut())
         .await?;
 
+        let memory_diffs = sqlx::query_as(
+            r"
+            SELECT *
+            FROM scenario_diffs
+            WHERE comparison_run_id = ? AND scenario_kind = ?",
+        )
+        .bind(id)
+        .bind(ScenarioKind::Memory as i64)
+        .fetch_all(conn.deref_mut())
+        .await?;
+
         Ok(Some(ComparisonResult {
             icount: ComparisonSubResult {
                 scenarios_missing_in_baseline: icount_scenarios_missing_in_baseline,
@@ -499,6 +551,11 @@ impl Db {
                 diffs: walltime_diffs,
                 memory_details: None,
             },
+            memory: ComparisonSubResult {
+                scenarios_missing_in_baseline: memory_scenarios_missing_in_baseline,
+                diffs: memory_diffs,
+                memory_details: None,
+            },
         }))
     }
 
@@ -702,6 +759,7 @@ mod test {
         let candidate_commit = "7faf240afbdbb4e76c47ff5f3f049c7a78c9c843";
         let icount_diffs = make_diffs(ScenarioKind::Icount);
         let walltime_diffs = make_diffs(ScenarioKind::Walltime);
+        let memory_diffs = make_diffs(ScenarioKind::Memory);
 
         db.store_comparison_result(
             baseline_commit.to_string(),
@@ -717,6 +775,11 @@ mod test {
                     diffs: walltime_diffs.clone(),
                     memory_details: None,
                 },
+                memory: ComparisonSubResult {
+                    scenarios_missing_in_baseline: Vec::new(),
+                    diffs: memory_diffs.clone(),
+                    memory_details: None,
+                },
             },
         )
         .await?;
@@ -787,6 +850,11 @@ mod test {
                     scenarios_missing_in_baseline: vec!["baz".to_string()],
                     memory_details: None,
                 },
+                memory: ComparisonSubResult {
+                    diffs: Vec::new(),
+                    scenarios_missing_in_baseline: vec!["quux".to_string()],
+                    memory_details: None,
+                },
             },
         )
         .await?;
diff --git a/ci-bench-runner/src/job/bench_pr.rs b/ci-bench-runner/src/job/bench_pr.rs
index aade378..1941091 100644
--- a/ci-bench-runner/src/job/bench_pr.rs
+++ b/ci-bench-runner/src/job/bench_pr.rs
@@ -17,7 +17,10 @@ use tempfile::TempDir;
 use time::{Duration, OffsetDateTime};
 use tracing::{error, trace};
 
-use super::{icounts_path, read_icount_results, read_walltime_results, walltimes_path};
+use super::{
+    icounts_path, maybe_read_memory_results, memory_path, read_icount_results,
+    read_walltime_results, walltimes_path,
+};
 use crate::db::{BenchResult, ComparisonResult, ComparisonSubResult, ScenarioDiff, ScenarioKind};
 use crate::event_queue::JobContext;
 use crate::github::api::{CommentEvent, PullRequestReviewEvent};
@@ -298,13 +301,20 @@ async fn bench_pr_and_cache_results(
     let icount_significance_thresholds = calculate_significance_thresholds(icount_results);
 
     let walltime_results = historical_results
-        .into_iter()
-        .filter(|r| r.scenario_kind == ScenarioKind::Walltime);
+        .iter()
+        .filter(|r| r.scenario_kind == ScenarioKind::Walltime)
+        .cloned();
     let walltime_significance_thresholds = calculate_significance_thresholds(walltime_results);
 
+    let memory_results = historical_results
+        .into_iter()
+        .filter(|r| r.scenario_kind == ScenarioKind::Memory);
+    let memory_significance_thresholds = calculate_significance_thresholds(memory_results);
+
     let significance_thresholds = SignificanceThresholds {
         icount: icount_significance_thresholds,
         walltime: walltime_significance_thresholds,
+        memory: memory_significance_thresholds,
     };
 
     let job_output_dir = ctx.job_output_dir.clone();
@@ -421,6 +431,35 @@ fn compare_refs(
         MINIMUM_WALLTIME_NOISE_THRESHOLD,
     )?;
 
+    let memory_baseline = maybe_read_memory_results(&memory_path(&job_output_path.join("base")))?;
+    let memory_candidate =
+        maybe_read_memory_results(&memory_path(&job_output_path.join("candidate")))?;
+
+    let (memory_diffs, memory_missing) = compare_results(
+        job_output_path,
+        &distill_memory(&memory_baseline),
+        &distill_memory(&memory_candidate),
+        &significance_thresholds.memory,
+        ScenarioKind::Memory,
+        DEFAULT_MEMORY_NOISE_THRESHOLD,
+        MINIMUM_MEMORY_NOISE_THRESHOLD,
+    )?;
+
+    let memory_details = Some(
+        memory_diffs
+            .iter()
+            .map(|diff| {
+                (
+                    diff.scenario_name.clone(),
+                    (
+                        *memory_baseline.get(&diff.scenario_name).unwrap(),
+                        *memory_candidate.get(&diff.scenario_name).unwrap(),
+                    ),
+                )
+            })
+            .collect(),
+    );
+
     Ok(ComparisonResult {
         icount: ComparisonSubResult {
             diffs: icount_diffs,
@@ -432,9 +471,23 @@ fn compare_refs(
             scenarios_missing_in_baseline: walltime_missing,
             memory_details: None,
         },
+        memory: ComparisonSubResult {
+            diffs: memory_diffs,
+            scenarios_missing_in_baseline: memory_missing,
+            memory_details,
+        },
     })
 }
 
+/// Controls how the comparison is done between MemoryDetails, by distilling a
+/// single f64 from it.
+fn distill_memory(input: &HashMap<String, MemoryDetails>) -> HashMap<String, f64> {
+    input
+        .iter()
+        .map(|(scenario, detail)| (scenario.clone(), detail.comparison_basis() as f64))
+        .collect()
+}
+
 /// Returns the calculated significance threshold for each scenario
 ///
 /// Scenarios with less than 10 results will be skipped. It is the responsibility of the caller to
@@ -481,6 +534,7 @@ pub fn calculate_significance_thresholds(
 struct SignificanceThresholds {
     icount: HashMap<String, f64>,
     walltime: HashMap<String, f64>,
+    memory: HashMap<String, f64>,
 }
 
 #[derive(Debug, Clone)]
@@ -513,6 +567,7 @@ fn markdown_comment(
             cachegrind_diff_url: diff_url,
             icount: Diffs::from_sub_result(bench_results.icount),
             walltime: Diffs::from_sub_result(bench_results.walltime),
+            memory: Diffs::from_sub_result(bench_results.memory),
             branches,
             bencher_project_id,
             common_time_unit: |x, y| common_time_unit(*x, *y),
@@ -665,6 +720,8 @@ pub struct ComparisonSuccessComment<'a> {
     icount: Diffs,
     /// Diffs for the walltime benchmarks
     walltime: Diffs,
+    /// Diffs for the memory benchmarks
+    memory: Diffs,
     /// The base url to obtain cachegrind diffs
     cachegrind_diff_url: &'a str,
     /// Information about the branches that were compared
@@ -737,6 +794,8 @@ static DEFAULT_ICOUNT_NOISE_THRESHOLD: f64 = 0.002; // 0.2%
 static MINIMUM_ICOUNT_NOISE_THRESHOLD: f64 = 0.002; // 0.2%
 static DEFAULT_WALLTIME_NOISE_THRESHOLD: f64 = 0.05; // 5%
 static MINIMUM_WALLTIME_NOISE_THRESHOLD: f64 = 0.01; // 1%
+static DEFAULT_MEMORY_NOISE_THRESHOLD: f64 = 0.001; // 0.1%
+static MINIMUM_MEMORY_NOISE_THRESHOLD: f64 = 0.001; // 0.1%
 static APP_NAME: &str = "rustls-benchmarking";
 
 /// Functions inside this module will be available as askama filters
diff --git a/ci-bench-runner/src/job/mod.rs b/ci-bench-runner/src/job/mod.rs
index f7b831b..caebdf4 100644
--- a/ci-bench-runner/src/job/mod.rs
+++ b/ci-bench-runner/src/job/mod.rs
@@ -80,6 +80,46 @@ pub fn read_walltime_results(path: &Path) -> anyhow::Result<HashMap<String, f64>
     Ok(results)
 }
 
+/// Reads the (benchmark, result) pairs from previous CSV output
+pub fn maybe_read_memory_results(path: &Path) -> anyhow::Result<HashMap<String, MemoryDetails>> {
+    trace!(
+        path = path.display().to_string(),
+        "reading memory results from CSV file"
+    );
+
+    let mut results = HashMap::new();
+    let results_file = match File::open(path) {
+        Ok(f) => f,
+        Err(e) => {
+            trace!("failed to open file {e:?} (ignoring)");
+            return Ok(results);
+        }
+    };
+    for line in BufReader::new(results_file).lines() {
+        let line = line.context("failed to read line from CSV file")?;
+        let line = line.trim();
+        let mut parts = line.split(',');
+
+        let scenario = parts.next().ok_or(anyhow!("empty line"))?.to_string();
+        let counts: Result<Vec<_>, _> = parts.map(|s| s.parse::<u64>()).collect();
+        let details = match counts.context("invalid u64 in row")?.as_slice() {
+            [heap_total_bytes, heap_total_blocks, heap_peak_bytes, heap_peak_blocks] => {
+                MemoryDetails {
+                    heap_total_bytes: *heap_total_bytes,
+                    heap_total_blocks: *heap_total_blocks,
+                    heap_peak_bytes: *heap_peak_bytes,
+                    heap_peak_blocks: *heap_peak_blocks,
+                }
+            }
+            _ => bail!("incorrect number of measurements for memory results row"),
+        };
+
+        results.insert(scenario, details);
+    }
+
+    Ok(results)
+}
+
 fn icounts_path(base: &Path) -> PathBuf {
     base.join("results/icounts.csv")
 }
@@ -88,6 +128,10 @@ pub fn walltimes_path(base: &Path) -> PathBuf {
     base.join("results/walltimes.csv")
 }
 
+pub fn memory_path(base: &Path) -> PathBuf {
+    base.join("results/memory.csv")
+}
+
 #[derive(Copy, Clone, Debug, Default)]
 pub(crate) struct MemoryDetails {
     pub(crate) heap_total_bytes: u64,
@@ -95,3 +139,57 @@ pub(crate) struct MemoryDetails {
     pub(crate) heap_peak_bytes: u64,
     pub(crate) heap_peak_blocks: u64,
 }
+
+impl MemoryDetails {
+    /// Which field we use to compare two `MemoryDetails`
+    fn comparison_basis(&self) -> u64 {
+        self.heap_total_bytes
+    }
+
+    /// Returns a string, like the `Display` impl, that illustrates
+    /// the difference between the baseline `self` and `candidate`.
+    pub(crate) fn diff_string(&self, candidate: MemoryDetails) -> String {
+        let heap_total_bytes = candidate.heap_total_bytes as f64 - self.heap_total_bytes as f64;
+        let heap_total_blocks = candidate.heap_total_blocks as f64 - self.heap_total_blocks as f64;
+        let heap_peak_bytes = candidate.heap_peak_bytes as f64 - self.heap_peak_bytes as f64;
+        let heap_peak_blocks = candidate.heap_peak_blocks as f64 - self.heap_peak_blocks as f64;
+
+        format!(
+            "∑ {heap_total_bytes:+}B  {heap_total_blocks:+}a<br/>🔝 {heap_peak_bytes:+}B  {heap_peak_blocks:+}a"
+        )
+    }
+}
+
+impl std::fmt::Display for MemoryDetails {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "∑ {}B  {}a<br/>🔝 {}B  {}a",
+            self.heap_total_bytes,
+            self.heap_total_blocks,
+            self.heap_peak_bytes,
+            self.heap_peak_blocks
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_diff() {
+        let start = MemoryDetails {
+            heap_total_bytes: 10,
+            heap_total_blocks: 0,
+            heap_peak_bytes: 20,
+            heap_peak_blocks: 30,
+        };
+        let end = MemoryDetails {
+            heap_total_bytes: 10,
+            heap_total_blocks: 20,
+            heap_peak_bytes: 15,
+            heap_peak_blocks: 10,
+        };
+        assert_eq!(start.diff_string(end), "∑ +0B  +20a<br/>🔝 -5B  -20a");
+    }
+}
diff --git a/ci-bench-runner/src/test/mod.rs b/ci-bench-runner/src/test/mod.rs
index 75756fd..414982c 100644
--- a/ci-bench-runner/src/test/mod.rs
+++ b/ci-bench-runner/src/test/mod.rs
@@ -146,6 +146,12 @@ impl BenchRunner for MockBenchRunner {
             "fake_walltime_bench,12345,12432,12211",
         )?;
 
+        // Fake memory
+        fs::write(
+            results_dir.join("memory.csv"),
+            "fake_memory_bench,11197667,2653,599466,199",
+        )?;
+
         // Notify any watchers of this call
         self.runs_tx
             .send(MockBenchRun {
@@ -483,6 +489,11 @@ async fn test_pr_synchronize_cached() {
                     diffs: Vec::new(),
                     memory_details: None,
                 },
+                memory: ComparisonSubResult {
+                    scenarios_missing_in_baseline: vec![],
+                    diffs: Vec::new(),
+                    memory_details: None,
+                },
             },
         )
         .await
@@ -666,6 +677,11 @@ async fn test_get_cachegrind_diff() {
                     diffs: Vec::new(),
                     memory_details: None,
                 },
+                memory: ComparisonSubResult {
+                    diffs: vec![],
+                    scenarios_missing_in_baseline: vec![],
+                    memory_details: None,
+                },
             },
         )
         .await
diff --git a/ci-bench-runner/templates/comparison_success_comment.md b/ci-bench-runner/templates/comparison_success_comment.md
index e6d7fa1..7b1bd0a 100644
--- a/ci-bench-runner/templates/comparison_success_comment.md
+++ b/ci-bench-runner/templates/comparison_success_comment.md
@@ -82,6 +82,36 @@ _There are no other wall-time count differences_
 
 {% endif %}
 
+## Memory usage
+
+{% call macros::missing_scenarios(memory.scenarios_missing_in_baseline) %}
+
+Key:
+
+- ∑: sum usage for entire benchmark run
+- 🔝: peak usage
+- B: bytes
+- a: allocations
+
+#### Significant differences
+
+{% if memory.significant_diffs.is_empty() %}
+
+_There are no significant memory usage differences_
+
+{% else %}
+
+⚠️ There are significant memory usage differences
+
+<details>
+<summary>Click to expand</summary>
+
+{% call macros::memory_table(memory.significant_diffs, memory.memory_details, true) %}
+
+</details>
+
+{% endif %}
+
 ## Additional information
 
 {% if let Some(project_id) = bencher_project_id %}
diff --git a/ci-bench-runner/templates/macros.md b/ci-bench-runner/templates/macros.md
index 0e57c4d..05105f9 100644
--- a/ci-bench-runner/templates/macros.md
+++ b/ci-bench-runner/templates/macros.md
@@ -54,6 +54,25 @@ The following benchmark scenarios are present in the candidate but not in the ba
 {%- endmacro -%}
 
 
+{%- macro memory_table(diffs, details, use_emoji) -%}
+
+| Scenario | Baseline | Candidate | Diff | Threshold |
+| --- | ---: | ---: | ---: | ---: |
+{% for diff in diffs %}
+{%- let emoji -%}
+{%- if use_emoji && diff.diff() > 0.0 -%}
+{%- let emoji = "⚠️ " -%}
+{%- else if use_emoji && diff.diff() < 0.0 -%}
+{%- let emoji = "✅ " -%}
+{%- else -%}
+{%- let emoji = "" -%}
+{%- endif -%}
+| {{ diff.scenario_name }} | {{ diff.baseline_memory(details) }} | {{ diff.candidate_memory(details) }} | {{emoji}}{{ diff.memory_diff(details) }} ({{ "{:.2}%"|format(diff.diff_ratio() * 100.0) }}) | {{ "{:.2}%"|format(diff.significance_threshold * 100.0) }} |
+{% endfor %}
+
+{%- endmacro -%}
+
+
 {%- macro checkout_details(branches) -%}
 
 Checkout details:

From f72f89326e5276af98db69935bd0a9a84f511ccc Mon Sep 17 00:00:00 2001
From: Joe Birr-Pixton <jpixton@gmail.com>
Date: Thu, 6 Nov 2025 16:31:42 +0000
Subject: [PATCH 3/4] Add memory results to bencher.dev

---
 ci-bench-runner/src/bencher_dev.rs    | 68 ++++++++++++++++++++++++++-
 ci-bench-runner/src/job/bench_main.rs | 15 +++++-
 ci-bench-runner/src/test/mod.rs       |  2 +-
 3 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/ci-bench-runner/src/bencher_dev.rs b/ci-bench-runner/src/bencher_dev.rs
index de11fcc..9665a57 100644
--- a/ci-bench-runner/src/bencher_dev.rs
+++ b/ci-bench-runner/src/bencher_dev.rs
@@ -10,6 +10,7 @@ use bencher_client::{
 };
 use tracing::error;
 
+use crate::job::MemoryDetails;
 use crate::BencherConfig;
 
 /// The Bencher.dev client along with its configuration
@@ -33,6 +34,7 @@ impl BencherDev {
     }
 
     /// Sends the icount and walltime results to bencher.dev for visualization
+    #[expect(clippy::too_many_arguments)]
     pub async fn track_results(
         &self,
         branch: &str,
@@ -41,9 +43,14 @@ impl BencherDev {
         end_time: DateTime,
         icounts: HashMap<String, f64>,
         walltimes: HashMap<String, f64>,
+        memory: HashMap<String, MemoryDetails>,
     ) -> anyhow::Result<()> {
         let mut bmf_map = results_to_bmf(icounts, INSTRUCTIONS_SLUG_STR.parse().unwrap());
-        bmf_map.extend(results_to_bmf(walltimes, LATENCY_SLUG_STR.parse().unwrap()));
+        merge(
+            &mut bmf_map,
+            results_to_bmf(walltimes, LATENCY_SLUG_STR.parse().unwrap()),
+        );
+        memory_results_to_bmf(&mut bmf_map, memory);
 
         let testbed = self.config.testbed_id.clone();
         let report = JsonNewReport {
@@ -78,6 +85,49 @@ impl BencherDev {
     }
 }
 
+fn memory_results_to_bmf(out: &mut JsonResultsMap, results: HashMap<String, MemoryDetails>) {
+    merge(
+        out,
+        results_to_bmf(
+            results
+                .iter()
+                .map(|(k, v)| (k.clone(), v.heap_peak_blocks as f64))
+                .collect(),
+            "heap-peak-blocks".parse().unwrap(),
+        ),
+    );
+    merge(
+        out,
+        results_to_bmf(
+            results
+                .iter()
+                .map(|(k, v)| (k.clone(), v.heap_peak_bytes as f64))
+                .collect(),
+            "heap-peak-bytes".parse().unwrap(),
+        ),
+    );
+    merge(
+        out,
+        results_to_bmf(
+            results
+                .iter()
+                .map(|(k, v)| (k.clone(), v.heap_total_blocks as f64))
+                .collect(),
+            "heap-total-blocks".parse().unwrap(),
+        ),
+    );
+    merge(
+        out,
+        results_to_bmf(
+            results
+                .iter()
+                .map(|(k, v)| (k.clone(), v.heap_total_bytes as f64))
+                .collect(),
+            "heap-total-bytes".parse().unwrap(),
+        ),
+    );
+}
+
 /// Converts the instruction counts map into Bencher Metric Format (BMF)
 ///
 /// See <https://bencher.dev/docs/explanation/adapters#-json> for details
@@ -106,3 +156,19 @@ fn results_to_bmf(results: HashMap<String, f64>, measure: Measure) -> JsonResult
         })
         .collect()
 }
+
+/// Merges `src` into `target.
+///
+/// With both `src` and `target` in the form:
+///
+/// ```json
+/// { "benchmark-name": { "measure": { ... }}}
+/// ```
+///
+/// Ensures multiple measures appear in the end result.  Does
+/// not require that "benchmark-name" exists in `target`.
+fn merge(target: &mut JsonResultsMap, src: JsonResultsMap) {
+    for (bench, contents) in src.into_iter() {
+        target.entry(bench).or_default().extend(contents);
+    }
+}
diff --git a/ci-bench-runner/src/job/bench_main.rs b/ci-bench-runner/src/job/bench_main.rs
index 97f2610..f1b09fd 100644
--- a/ci-bench-runner/src/job/bench_main.rs
+++ b/ci-bench-runner/src/job/bench_main.rs
@@ -5,7 +5,10 @@ use bencher_client::json::DateTime;
 use tempfile::TempDir;
 use tracing::{trace, warn};
 
-use super::{icounts_path, read_icount_results, read_walltime_results, walltimes_path};
+use super::{
+    icounts_path, maybe_read_memory_results, memory_path, read_icount_results,
+    read_walltime_results, walltimes_path,
+};
 use crate::db::ScenarioKind;
 use crate::event_queue::JobContext;
 use crate::github::api::PushEvent;
@@ -79,6 +82,8 @@ pub async fn bench_main(ctx: JobContext<'_>) -> anyhow::Result<()> {
         .context("failed to read instruction counts from file")?;
     let walltimes = read_walltime_results(&walltimes_path(&ctx.job_output_dir))
         .context("failed to read walltimes from file")?;
+    let memory = maybe_read_memory_results(&memory_path(&ctx.job_output_dir))
+        .context("failed to read memory results from file")?;
 
     // Persist results in the DB and in bencher.dev
     let results = icounts
@@ -89,6 +94,13 @@ pub async fn bench_main(ctx: JobContext<'_>) -> anyhow::Result<()> {
                 .iter()
                 .map(|(scenario, result)| (scenario.clone(), ScenarioKind::Walltime, *result)),
         )
+        .chain(memory.iter().map(|(scenario, result)| {
+            (
+                scenario.clone(),
+                ScenarioKind::Memory,
+                result.comparison_basis() as f64,
+            )
+        }))
         .collect();
     ctx.db
         .store_run_results(results)
@@ -104,6 +116,7 @@ pub async fn bench_main(ctx: JobContext<'_>) -> anyhow::Result<()> {
                 benchmark_run_end,
                 icounts,
                 walltimes,
+                memory,
             )
             .await
             .context("failed to send results to bencher.dev");
diff --git a/ci-bench-runner/src/test/mod.rs b/ci-bench-runner/src/test/mod.rs
index 414982c..f191015 100644
--- a/ci-bench-runner/src/test/mod.rs
+++ b/ci-bench-runner/src/test/mod.rs
@@ -645,7 +645,7 @@ async fn test_push_happy_path() {
         .await
         .unwrap()
         .len();
-    assert_eq!(result_count, 4);
+    assert_eq!(result_count, 6);
 }
 
 #[tokio::test]

From 294f85644d69c5a4c63be65c231b321bb700ac9a Mon Sep 17 00:00:00 2001
From: Joe Birr-Pixton <jpixton@gmail.com>
Date: Wed, 12 Nov 2025 09:11:59 +0000
Subject: [PATCH 4/4] Disable PR processing from cache

This didn't work for memory results, because the "headline" figure
only is persisted in the database (to match the assumption that
any measure is a f64, whereas memory measurements are more structured
than that.)

This means force-pushes to PRs will re-run and re-measure the benchmarks,
naturally that is slower than _not_ doing that, but also takes about two
minutes currently.
---
 ci-bench-runner/src/job/bench_pr.rs | 20 ++-------
 ci-bench-runner/src/test/mod.rs     | 67 -----------------------------
 2 files changed, 4 insertions(+), 83 deletions(-)

diff --git a/ci-bench-runner/src/job/bench_pr.rs b/ci-bench-runner/src/job/bench_pr.rs
index 1941091..7c6bf58 100644
--- a/ci-bench-runner/src/job/bench_pr.rs
+++ b/ci-bench-runner/src/job/bench_pr.rs
@@ -211,22 +211,10 @@ pub async fn bench_pr(
     )
     .await;
 
-    let cached_result = ctx
-        .db
-        .comparison_result(
-            &branches.baseline.commit_sha,
-            &branches.candidate.commit_sha,
-        )
-        .await?;
-    let result = match cached_result {
-        Some(result) => Ok(result),
-        None => {
-            let mut logs = BenchPrLogs::default();
-            bench_pr_and_cache_results(&ctx, branches.clone(), &mut logs)
-                .await
-                .map_err(|error| BenchPrError { error, logs })
-        }
-    };
+    let mut logs = BenchPrLogs::default();
+    let result = bench_pr_and_cache_results(&ctx, branches.clone(), &mut logs)
+        .await
+        .map_err(|error| BenchPrError { error, logs });
 
     let cachegrind_diff_url = format!(
         "{}/comparisons/{}:{}/cachegrind-diff",
diff --git a/ci-bench-runner/src/test/mod.rs b/ci-bench-runner/src/test/mod.rs
index f191015..b27ed3f 100644
--- a/ci-bench-runner/src/test/mod.rs
+++ b/ci-bench-runner/src/test/mod.rs
@@ -455,73 +455,6 @@ async fn test_pr_synchronize_happy_path() {
     mock_github.server.verify().await;
 }
 
-#[tokio::test]
-async fn test_pr_synchronize_cached() {
-    // Mock HTTP responses from GitHub
-    let mock_github = MockGitHub::start().await;
-    let _post_comment = mock_github.mock_post_comment().await;
-    let post_status = mock_github.mock_post_status().await;
-
-    // Run the job server
-    let server = TestServer::start(&mock_github).await;
-
-    // Ensure the DB already has a stored comparison result for the provided commit pair
-    server
-        .db
-        .store_comparison_result(
-            "7edbfb999b352aa09fe669e9103d8155d7e7d890".to_string(),
-            "b0b69e925b2c9c6187cb16f361dd36e156f8e097".to_string(),
-            ComparisonResult {
-                icount: ComparisonSubResult {
-                    scenarios_missing_in_baseline: Vec::new(),
-                    diffs: vec![ScenarioDiff {
-                        scenario_name: "foo".to_string(),
-                        scenario_kind: ScenarioKind::Icount,
-                        baseline_result: 1000.0,
-                        candidate_result: 1001.0,
-                        significance_threshold: 0.35,
-                        cachegrind_diff: Some("dummy cachegrind diff".to_string()),
-                    }],
-                    memory_details: None,
-                },
-                walltime: ComparisonSubResult {
-                    scenarios_missing_in_baseline: vec!["bar".to_string()],
-                    diffs: Vec::new(),
-                    memory_details: None,
-                },
-                memory: ComparisonSubResult {
-                    scenarios_missing_in_baseline: vec![],
-                    diffs: Vec::new(),
-                    memory_details: None,
-                },
-            },
-        )
-        .await
-        .unwrap();
-
-    // The bench runner will crash if it runs, making the test fail
-    server.mock_bench_runner.config.lock().unwrap().crash = true;
-
-    // Post the webhook event
-    let client = reqwest::Client::default();
-    post_webhook(
-        &client,
-        &server.base_url,
-        &server.config.webhook_secret,
-        webhook::pull_request_synchronized(),
-        "pull_request",
-    )
-    .await;
-
-    // Wait for our mock endpoints to have been called
-    tokio::time::timeout(Duration::from_secs(5), post_status.wait_until_satisfied())
-        .await
-        .ok();
-
-    // Assert that the mocks were used and report any errors
-    mock_github.server.verify().await;
-}
-
 #[tokio::test]
 async fn test_pr_review_happy_path() {
     // Mock HTTP responses from GitHub