From 715cee4b93cae1fa2f0f86ff0f1ccf4e2fb8b2da Mon Sep 17 00:00:00 2001
From: Peng Ding <pding.dp@foxmail.com>
Date: Mon, 18 May 2026 07:02:36 -0500
Subject: [PATCH] feat(bench): expose min/max/stddev/P95 in benchmark reports
 (closes #81)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the single "Spread" tooltip column with four explicit tail-latency
columns (Min, Max, StdDev, P95) in all HTML benchmark tables — both the
zerodep vs. reference comparison tables and the standalone tables.

Key changes:
- Rename _spread_cell() → _tail_cells() which now renders four <td> elements
  instead of one, so min/max/stddev/P95 are always visible without hovering
- Use a two-row <thead> with colspan grouping headers ("zerodep tail latency"
  / "Ref tail latency") to keep the wider table readable
- Wrap all tables in <div class="table-wrap"> (overflow-x: auto) so the page
  stays usable on narrow viewports
- P95 is computed from stats.data when present; falls back to "—" when absent
  (backward compatible with older JSON that omits the data array)
- Both _generate_html() and _generate_module_page() updated consistently
---
 _scripts/generate_bench_report.py | 220 ++++++++++++++++++++++++++----
 1 file changed, 197 insertions(+), 23 deletions(-)
diff --git a/_scripts/generate_bench_report.py b/_scripts/generate_bench_report.py
index 43c33b8..c82eb7c 100644
--- a/_scripts/generate_bench_report.py
+++ b/_scripts/generate_bench_report.py
@@ -10,6 +10,7 @@
 from __future__ import annotations
 
 import json
+import math
 import re
 import sys
 from collections import defaultdict
@@ -128,16 +129,31 @@ def _parse_benchmarks(data: dict) -> dict:
 
         is_zd = _is_zerodep(test_method)
 
+        # Calculate P95 from raw data if available
+        raw_data = b["stats"].get("data")
+        if raw_data and len(raw_data) > 0:
+            sorted_data = sorted(raw_data)
+            p95_idx = math.ceil(0.95 * len(sorted_data)) - 1
+            p95 = sorted_data[p95_idx]
+        else:
+            p95 = None
+
+        mean = b["stats"]["mean"]
+        stddev = b["stats"].get("stddev", 0)
+        cv = (stddev / mean * 100) if mean > 0 else 0.0
+
         entry = {
             "method": test_method,
             "is_zerodep": is_zd,
             "label": "zerodep" if is_zd else _ref_display_name(test_method),
-            "mean": b["stats"]["mean"],
+            "mean": mean,
             "ops": b["stats"]["ops"],
-            "stddev": b["stats"].get("stddev", 0),
-            "min": b["stats"].get("min", b["stats"]["mean"]),
-            "max": b["stats"].get("max", b["stats"]["mean"]),
+            "stddev": stddev,
+            "min": b["stats"].get("min", mean),
+            "max": b["stats"].get("max", mean),
             "rounds": b["stats"].get("rounds", 0),
+            "p95": p95,
+            "cv": cv,
         }
 
         modules[module][operation].append(entry)
@@ -224,6 +240,12 @@ def _build_comparisons(modules: dict) -> list[dict]:
                             "variant": e["method"].removeprefix("test_"),
                             "mean": e["mean"],
                             "ops": e["ops"],
+                            "stddev": e["stddev"],
+                            "min": e["min"],
+                            "max": e["max"],
+                            "rounds": e["rounds"],
+                            "p95": e["p95"],
+                            "cv": e["cv"],
                         }
                     )
                 continue
@@ -237,6 +259,12 @@ def _build_comparisons(modules: dict) -> list[dict]:
                             "variant": e["label"],
                             "mean": e["mean"],
                             "ops": e["ops"],
+                            "stddev": e["stddev"],
+                            "min": e["min"],
+                            "max": e["max"],
+                            "rounds": e["rounds"],
+                            "p95": e["p95"],
+                            "cv": e["cv"],
                         }
                     )
                 continue
@@ -276,9 +304,21 @@ def _build_comparisons(modules: dict) -> list[dict]:
                                     "zd_variant": zd_variant,
                                     "zd_mean": zd["mean"],
                                     "zd_ops": zd["ops"],
+                                    "zd_stddev": zd["stddev"],
+                                    "zd_min": zd["min"],
+                                    "zd_max": zd["max"],
+                                    "zd_rounds": zd["rounds"],
+                                    "zd_p95": zd["p95"],
+                                    "zd_cv": zd["cv"],
                                     "ref_label": ref["label"],
                                     "ref_mean": ref["mean"],
                                     "ref_ops": ref["ops"],
+                                    "ref_stddev": ref["stddev"],
+                                    "ref_min": ref["min"],
+                                    "ref_max": ref["max"],
+                                    "ref_rounds": ref["rounds"],
+                                    "ref_p95": ref["p95"],
+                                    "ref_cv": ref["cv"],
                                     "ratio": ratio,
                                 }
                             )
@@ -292,6 +332,12 @@ def _build_comparisons(modules: dict) -> list[dict]:
                                 "variant": e["method"].removeprefix("test_"),
                                 "mean": e["mean"],
                                 "ops": e["ops"],
+                                "stddev": e["stddev"],
+                                "min": e["min"],
+                                "max": e["max"],
+                                "rounds": e["rounds"],
+                                "p95": e["p95"],
+                                "cv": e["cv"],
                             }
                         )
 
@@ -304,6 +350,12 @@ def _build_comparisons(modules: dict) -> list[dict]:
                                 "variant": e["label"],
                                 "mean": e["mean"],
                                 "ops": e["ops"],
+                                "stddev": e["stddev"],
+                                "min": e["min"],
+                                "max": e["max"],
+                                "rounds": e["rounds"],
+                                "p95": e["p95"],
+                                "cv": e["cv"],
                             }
                         )
             else:
@@ -324,9 +376,21 @@ def _build_comparisons(modules: dict) -> list[dict]:
                                 "zd_variant": zd_variant,
                                 "zd_mean": zd["mean"],
                                 "zd_ops": zd["ops"],
+                                "zd_stddev": zd["stddev"],
+                                "zd_min": zd["min"],
+                                "zd_max": zd["max"],
+                                "zd_rounds": zd["rounds"],
+                                "zd_p95": zd["p95"],
+                                "zd_cv": zd["cv"],
                                 "ref_label": ref["label"],
                                 "ref_mean": ref["mean"],
                                 "ref_ops": ref["ops"],
+                                "ref_stddev": ref["stddev"],
+                                "ref_min": ref["min"],
+                                "ref_max": ref["max"],
+                                "ref_rounds": ref["rounds"],
+                                "ref_p95": ref["p95"],
+                                "ref_cv": ref["cv"],
                                 "ratio": ratio,
                             }
                         )
@@ -397,8 +461,9 @@ def _build_comparisons(modules: dict) -> list[dict]:
 }
 .summary-card .num { font-size: 2rem; font-weight: bold; }
 .summary-card .label { font-size: .85rem; color: var(--meta); }
+.table-wrap { overflow-x: auto; margin-bottom: 1rem; }
 table {
-  width: 100%; border-collapse: collapse; margin-bottom: 1rem;
+  width: 100%; border-collapse: collapse;
   font-size: .9rem;
 }
 th, td {
@@ -442,6 +507,43 @@ def _ratio_text(ratio: float) -> str:
     return "~equal"
 
 
+def _tail_cells(
+    cv: float,
+    p95: float | None,
+    stddev: float,
+    min_t: float,
+    max_t: float,
+    rounds: int,
+) -> str:
+    """Build four ``<td>`` cells exposing tail-latency statistics.
+
+    Renders min, max, stddev, and P95 as separate table cells so they are
+    visible without hovering.  A tooltip on each cell also shows the number
+    of rounds for context.
+
+    Args:
+        cv: Coefficient of variation (stddev/mean * 100) as a percentage.
+        p95: 95th-percentile latency in seconds, or None if unavailable.
+        stddev: Standard deviation in seconds.
+        min_t: Minimum time in seconds.
+        max_t: Maximum time in seconds.
+        rounds: Number of benchmark rounds.
+
+    Returns:
+        Four HTML ``<td>`` elements: min, max, stddev, P95.
+    """
+    p95_text = _human_time(p95) if p95 is not None else "\u2014"
+    rounds_tip = f"rounds={rounds}"
+    cv_tip = f"CV={cv:.1f}%"
+    shared_tip = f"{cv_tip} | {rounds_tip}"
+    return (
+        f'<td title="{shared_tip}">{_human_time(min_t)}</td>'
+        f'<td title="{shared_tip}">{_human_time(max_t)}</td>'
+        f'<td title="{shared_tip}">{_human_time(stddev)}</td>'
+        f'<td title="{shared_tip}">{p95_text}</td>'
+    )
+
+
 def _build_sparkline_init_js(module_names: list[str], data_js_path: str) -> str:
     """Generate JS that loads history and draws a sparkline per module."""
     modules_json = json.dumps(module_names)
@@ -524,11 +626,22 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str:
 
         if pairs:
             # --- Comparison table ---
-            s += "<table>\n<thead><tr>"
-            s += "<th>Operation</th><th>zerodep</th><th>Reference</th>"
-            s += "<th>zerodep time</th><th>Ref time</th>"
-            s += "<th>zerodep ops/s</th><th>Ref ops/s</th>"
-            s += "<th>Ratio</th></tr></thead>\n<tbody>\n"
+            s += '<div class="table-wrap"><table>\n<thead>'
+            s += "<tr>"
+            s += '<th rowspan="2">Operation</th>'
+            s += '<th rowspan="2">zerodep</th>'
+            s += '<th rowspan="2">Reference</th>'
+            s += '<th rowspan="2">zd mean</th>'
+            s += '<th rowspan="2">Ref mean</th>'
+            s += '<th rowspan="2">zd ops/s</th>'
+            s += '<th rowspan="2">Ref ops/s</th>'
+            s += '<th colspan="4" style="text-align:center">zerodep tail latency</th>'
+            s += '<th colspan="4" style="text-align:center">Ref tail latency</th>'
+            s += '<th rowspan="2">Ratio</th>'
+            s += "</tr>\n<tr>"
+            for _ in range(2):
+                s += "<th>Min</th><th>Max</th><th>StdDev</th><th>P95</th>"
+            s += "</tr>\n</thead>\n<tbody>\n"
 
             for p in pairs:
                 rc = _ratio_class(p["ratio"])
@@ -540,10 +653,26 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str:
                 s += f"<td>{_human_time(p['ref_mean'])}</td>"
                 s += f"<td>{_human_ops(p['zd_ops'])}</td>"
                 s += f"<td>{_human_ops(p['ref_ops'])}</td>"
+                s += _tail_cells(
+                    p["zd_cv"],
+                    p["zd_p95"],
+                    p["zd_stddev"],
+                    p["zd_min"],
+                    p["zd_max"],
+                    p["zd_rounds"],
+                )
+                s += _tail_cells(
+                    p["ref_cv"],
+                    p["ref_p95"],
+                    p["ref_stddev"],
+                    p["ref_min"],
+                    p["ref_max"],
+                    p["ref_rounds"],
+                )
                 s += f'<td class="ratio-cell {rc}">{_ratio_text(p["ratio"])}</td>'
                 s += "</tr>\n"
 
-            s += "</tbody></table>\n"
+            s += "</tbody></table></div>\n"
 
             # --- Chart: group by operation, show zerodep vs best-reference ops/s ---
             # Deduplicate operations, pick best reference per operation
@@ -610,8 +739,10 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str:
 
         if standalone:
             s += "<h3>Standalone benchmarks</h3>\n"
-            s += '<table class="standalone-table">\n<thead><tr>'
-            s += "<th>Operation</th><th>Variant</th><th>Mean</th><th>ops/s</th>"
+            s += '<div class="table-wrap"><table class="standalone-table">\n<thead><tr>'
+            s += "<th>Operation</th><th>Variant</th>"
+            s += "<th>Mean</th><th>ops/s</th>"
+            s += "<th>Min</th><th>Max</th><th>StdDev</th><th>P95</th>"
             s += "</tr></thead>\n<tbody>\n"
             for st in standalone:
                 s += "<tr>"
@@ -619,8 +750,16 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str:
                 s += f"<td>{st['variant']}</td>"
                 s += f"<td>{_human_time(st['mean'])}</td>"
                 s += f"<td>{_human_ops(st['ops'])}</td>"
+                s += _tail_cells(
+                    st["cv"],
+                    st["p95"],
+                    st["stddev"],
+                    st["min"],
+                    st["max"],
+                    st["rounds"],
+                )
                 s += "</tr>\n"
-            s += "</tbody></table>\n"
+            s += "</tbody></table></div>\n"
 
         s += "</div>\n"
         sections.append(s)
@@ -962,12 +1101,22 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None:
     body = f"<h2>{module}</h2>\n"
 
     if pairs:
-        body += "<table>\n<thead><tr>"
-        body += "<th>Operation</th><th>zerodep</th>"
-        body += "<th>Reference</th>"
-        body += "<th>zerodep time</th><th>Ref time</th>"
-        body += "<th>zerodep ops/s</th><th>Ref ops/s</th>"
-        body += "<th>Ratio</th></tr></thead>\n<tbody>\n"
+        body += '<div class="table-wrap"><table>\n<thead>'
+        body += "<tr>"
+        body += '<th rowspan="2">Operation</th>'
+        body += '<th rowspan="2">zerodep</th>'
+        body += '<th rowspan="2">Reference</th>'
+        body += '<th rowspan="2">zd mean</th>'
+        body += '<th rowspan="2">Ref mean</th>'
+        body += '<th rowspan="2">zd ops/s</th>'
+        body += '<th rowspan="2">Ref ops/s</th>'
+        body += '<th colspan="4" style="text-align:center">zerodep tail latency</th>'
+        body += '<th colspan="4" style="text-align:center">Ref tail latency</th>'
+        body += '<th rowspan="2">Ratio</th>'
+        body += "</tr>\n<tr>"
+        for _ in range(2):
+            body += "<th>Min</th><th>Max</th><th>StdDev</th><th>P95</th>"
+        body += "</tr>\n</thead>\n<tbody>\n"
 
         for p in pairs:
             rc = _ratio_class(p["ratio"])
@@ -979,9 +1128,25 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None:
             body += f"<td>{_human_time(p['ref_mean'])}</td>"
             body += f"<td>{_human_ops(p['zd_ops'])}</td>"
             body += f"<td>{_human_ops(p['ref_ops'])}</td>"
+            body += _tail_cells(
+                p["zd_cv"],
+                p["zd_p95"],
+                p["zd_stddev"],
+                p["zd_min"],
+                p["zd_max"],
+                p["zd_rounds"],
+            )
+            body += _tail_cells(
+                p["ref_cv"],
+                p["ref_p95"],
+                p["ref_stddev"],
+                p["ref_min"],
+                p["ref_max"],
+                p["ref_rounds"],
+            )
             body += f'<td class="ratio-cell {rc}">{_ratio_text(p["ratio"])}</td>'
             body += "</tr>\n"
-        body += "</tbody></table>\n"
+        body += "</tbody></table></div>\n"
 
         op_best: dict[str, dict] = {}
         for p in pairs:
@@ -1040,9 +1205,10 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None:
 
     if standalone:
         body += "<h3>Standalone benchmarks</h3>\n"
-        body += '<table class="standalone-table">\n<thead><tr>'
+        body += '<div class="table-wrap"><table class="standalone-table">\n<thead><tr>'
         body += "<th>Operation</th><th>Variant</th>"
         body += "<th>Mean</th><th>ops/s</th>"
+        body += "<th>Min</th><th>Max</th><th>StdDev</th><th>P95</th>"
         body += "</tr></thead>\n<tbody>\n"
         for st in standalone:
             body += "<tr>"
@@ -1050,8 +1216,16 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None:
             body += f"<td>{st['variant']}</td>"
             body += f"<td>{_human_time(st['mean'])}</td>"
             body += f"<td>{_human_ops(st['ops'])}</td>"
+            body += _tail_cells(
+                st["cv"],
+                st["p95"],
+                st["stddev"],
+                st["min"],
+                st["max"],
+                st["rounds"],
+            )
             body += "</tr>\n"
-        body += "</tbody></table>\n"
+        body += "</tbody></table></div>\n"
 
     meta_line = (
         f"Version: {version} | Commit: {commit_short} | {timestamp} "

Operation	zerodep	Reference	zd mean	Ref mean	zd ops/s	Ref ops/s	zerodep tail latency				Ref tail latency				Ratio
Operation	zerodep	Reference	zd mean	Ref mean	zd ops/s	Ref ops/s	Min	Max	StdDev	P95					Ratio
{_human_time(p['ref_mean'])}	{_human_ops(p['zd_ops'])}	{_human_ops(p['ref_ops'])}	{_ratio_text(p["ratio"])}