From 715cee4b93cae1fa2f0f86ff0f1ccf4e2fb8b2da Mon Sep 17 00:00:00 2001 From: Peng Ding Date: Mon, 18 May 2026 07:02:36 -0500 Subject: [PATCH] feat(bench): expose min/max/stddev/P95 in benchmark reports (closes #81) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the single "Spread" tooltip column with four explicit tail-latency columns (Min, Max, StdDev, P95) in all HTML benchmark tables — both the zerodep vs. reference comparison tables and the standalone tables. Key changes: - Rename _spread_cell() → _tail_cells() which now renders four elements instead of one, so min/max/stddev/P95 are always visible without hovering - Use a two-row with colspan grouping headers ("zerodep tail latency" / "Ref tail latency") to keep the wider table readable - Wrap all tables in
(overflow-x: auto) so the page stays usable on narrow viewports - P95 is computed from stats.data when present; falls back to "—" when absent (backward compatible with older JSON that omits the data array) - Both _generate_html() and _generate_module_page() updated consistently --- _scripts/generate_bench_report.py | 220 ++++++++++++++++++++++++++---- 1 file changed, 197 insertions(+), 23 deletions(-) diff --git a/_scripts/generate_bench_report.py b/_scripts/generate_bench_report.py index 43c33b8..c82eb7c 100644 --- a/_scripts/generate_bench_report.py +++ b/_scripts/generate_bench_report.py @@ -10,6 +10,7 @@ from __future__ import annotations import json +import math import re import sys from collections import defaultdict @@ -128,16 +129,31 @@ def _parse_benchmarks(data: dict) -> dict: is_zd = _is_zerodep(test_method) + # Calculate P95 from raw data if available + raw_data = b["stats"].get("data") + if raw_data and len(raw_data) > 0: + sorted_data = sorted(raw_data) + p95_idx = math.ceil(0.95 * len(sorted_data)) - 1 + p95 = sorted_data[p95_idx] + else: + p95 = None + + mean = b["stats"]["mean"] + stddev = b["stats"].get("stddev", 0) + cv = (stddev / mean * 100) if mean > 0 else 0.0 + entry = { "method": test_method, "is_zerodep": is_zd, "label": "zerodep" if is_zd else _ref_display_name(test_method), - "mean": b["stats"]["mean"], + "mean": mean, "ops": b["stats"]["ops"], - "stddev": b["stats"].get("stddev", 0), - "min": b["stats"].get("min", b["stats"]["mean"]), - "max": b["stats"].get("max", b["stats"]["mean"]), + "stddev": stddev, + "min": b["stats"].get("min", mean), + "max": b["stats"].get("max", mean), "rounds": b["stats"].get("rounds", 0), + "p95": p95, + "cv": cv, } modules[module][operation].append(entry) @@ -224,6 +240,12 @@ def _build_comparisons(modules: dict) -> list[dict]: "variant": e["method"].removeprefix("test_"), "mean": e["mean"], "ops": e["ops"], + "stddev": e["stddev"], + "min": e["min"], + "max": e["max"], + "rounds": e["rounds"], + "p95": e["p95"], + "cv": e["cv"], } ) continue @@ -237,6 +259,12 @@ def _build_comparisons(modules: dict) -> list[dict]: "variant": e["label"], "mean": e["mean"], "ops": e["ops"], + "stddev": e["stddev"], + "min": e["min"], + "max": e["max"], + "rounds": e["rounds"], + "p95": e["p95"], + "cv": e["cv"], } ) continue @@ -276,9 +304,21 @@ def _build_comparisons(modules: dict) -> list[dict]: "zd_variant": zd_variant, "zd_mean": zd["mean"], "zd_ops": zd["ops"], + "zd_stddev": zd["stddev"], + "zd_min": zd["min"], + "zd_max": zd["max"], + "zd_rounds": zd["rounds"], + "zd_p95": zd["p95"], + "zd_cv": zd["cv"], "ref_label": ref["label"], "ref_mean": ref["mean"], "ref_ops": ref["ops"], + "ref_stddev": ref["stddev"], + "ref_min": ref["min"], + "ref_max": ref["max"], + "ref_rounds": ref["rounds"], + "ref_p95": ref["p95"], + "ref_cv": ref["cv"], "ratio": ratio, } ) @@ -292,6 +332,12 @@ def _build_comparisons(modules: dict) -> list[dict]: "variant": e["method"].removeprefix("test_"), "mean": e["mean"], "ops": e["ops"], + "stddev": e["stddev"], + "min": e["min"], + "max": e["max"], + "rounds": e["rounds"], + "p95": e["p95"], + "cv": e["cv"], } ) @@ -304,6 +350,12 @@ def _build_comparisons(modules: dict) -> list[dict]: "variant": e["label"], "mean": e["mean"], "ops": e["ops"], + "stddev": e["stddev"], + "min": e["min"], + "max": e["max"], + "rounds": e["rounds"], + "p95": e["p95"], + "cv": e["cv"], } ) else: @@ -324,9 +376,21 @@ def _build_comparisons(modules: dict) -> list[dict]: "zd_variant": zd_variant, "zd_mean": zd["mean"], "zd_ops": zd["ops"], + "zd_stddev": zd["stddev"], + "zd_min": zd["min"], + "zd_max": zd["max"], + "zd_rounds": zd["rounds"], + "zd_p95": zd["p95"], + "zd_cv": zd["cv"], "ref_label": ref["label"], "ref_mean": ref["mean"], "ref_ops": ref["ops"], + "ref_stddev": ref["stddev"], + "ref_min": ref["min"], + "ref_max": ref["max"], + "ref_rounds": ref["rounds"], + "ref_p95": ref["p95"], + "ref_cv": ref["cv"], "ratio": ratio, } ) @@ -397,8 +461,9 @@ def _build_comparisons(modules: dict) -> list[dict]: } .summary-card .num { font-size: 2rem; font-weight: bold; } .summary-card .label { font-size: .85rem; color: var(--meta); } +.table-wrap { overflow-x: auto; margin-bottom: 1rem; } table { - width: 100%; border-collapse: collapse; margin-bottom: 1rem; + width: 100%; border-collapse: collapse; font-size: .9rem; } th, td { @@ -442,6 +507,43 @@ def _ratio_text(ratio: float) -> str: return "~equal" +def _tail_cells( + cv: float, + p95: float | None, + stddev: float, + min_t: float, + max_t: float, + rounds: int, +) -> str: + """Build four ```` cells exposing tail-latency statistics. + + Renders min, max, stddev, and P95 as separate table cells so they are + visible without hovering. A tooltip on each cell also shows the number + of rounds for context. + + Args: + cv: Coefficient of variation (stddev/mean * 100) as a percentage. + p95: 95th-percentile latency in seconds, or None if unavailable. + stddev: Standard deviation in seconds. + min_t: Minimum time in seconds. + max_t: Maximum time in seconds. + rounds: Number of benchmark rounds. + + Returns: + Four HTML ```` elements: min, max, stddev, P95. + """ + p95_text = _human_time(p95) if p95 is not None else "\u2014" + rounds_tip = f"rounds={rounds}" + cv_tip = f"CV={cv:.1f}%" + shared_tip = f"{cv_tip} | {rounds_tip}" + return ( + f'{_human_time(min_t)}' + f'{_human_time(max_t)}' + f'{_human_time(stddev)}' + f'{p95_text}' + ) + + def _build_sparkline_init_js(module_names: list[str], data_js_path: str) -> str: """Generate JS that loads history and draws a sparkline per module.""" modules_json = json.dumps(module_names) @@ -524,11 +626,22 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str: if pairs: # --- Comparison table --- - s += "\n" - s += "" - s += "" - s += "" - s += "\n\n" + s += '
OperationzerodepReferencezerodep timeRef timezerodep ops/sRef ops/sRatio
\n' + s += "" + s += '' + s += '' + s += '' + s += '' + s += '' + s += '' + s += '' + s += '' + s += '' + s += '' + s += "\n" + for _ in range(2): + s += "" + s += "\n\n\n" for p in pairs: rc = _ratio_class(p["ratio"]) @@ -540,10 +653,26 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str: s += f"" s += f"" s += f"" + s += _tail_cells( + p["zd_cv"], + p["zd_p95"], + p["zd_stddev"], + p["zd_min"], + p["zd_max"], + p["zd_rounds"], + ) + s += _tail_cells( + p["ref_cv"], + p["ref_p95"], + p["ref_stddev"], + p["ref_min"], + p["ref_max"], + p["ref_rounds"], + ) s += f'' s += "\n" - s += "
OperationzerodepReferencezd meanRef meanzd ops/sRef ops/szerodep tail latencyRef tail latencyRatio
MinMaxStdDevP95
{_human_time(p['ref_mean'])}{_human_ops(p['zd_ops'])}{_human_ops(p['ref_ops'])}{_ratio_text(p["ratio"])}
\n" + s += "
\n" # --- Chart: group by operation, show zerodep vs best-reference ops/s --- # Deduplicate operations, pick best reference per operation @@ -610,8 +739,10 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str: if standalone: s += "

Standalone benchmarks

\n" - s += '\n' - s += "" + s += '
OperationVariantMeanops/s
\n' + s += "" + s += "" + s += "" s += "\n\n" for st in standalone: s += "" @@ -619,8 +750,16 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str: s += f"" s += f"" s += f"" + s += _tail_cells( + st["cv"], + st["p95"], + st["stddev"], + st["min"], + st["max"], + st["rounds"], + ) s += "\n" - s += "
OperationVariantMeanops/sMinMaxStdDevP95
{st['variant']}{_human_time(st['mean'])}{_human_ops(st['ops'])}
\n" + s += "\n" s += "\n" sections.append(s) @@ -962,12 +1101,22 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None: body = f"

{module}

\n" if pairs: - body += "\n" - body += "" - body += "" - body += "" - body += "" - body += "\n\n" + body += '
OperationzerodepReferencezerodep timeRef timezerodep ops/sRef ops/sRatio
\n' + body += "" + body += '' + body += '' + body += '' + body += '' + body += '' + body += '' + body += '' + body += '' + body += '' + body += '' + body += "\n" + for _ in range(2): + body += "" + body += "\n\n\n" for p in pairs: rc = _ratio_class(p["ratio"]) @@ -979,9 +1128,25 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None: body += f"" body += f"" body += f"" + body += _tail_cells( + p["zd_cv"], + p["zd_p95"], + p["zd_stddev"], + p["zd_min"], + p["zd_max"], + p["zd_rounds"], + ) + body += _tail_cells( + p["ref_cv"], + p["ref_p95"], + p["ref_stddev"], + p["ref_min"], + p["ref_max"], + p["ref_rounds"], + ) body += f'' body += "\n" - body += "
OperationzerodepReferencezd meanRef meanzd ops/sRef ops/szerodep tail latencyRef tail latencyRatio
MinMaxStdDevP95
{_human_time(p['ref_mean'])}{_human_ops(p['zd_ops'])}{_human_ops(p['ref_ops'])}{_ratio_text(p["ratio"])}
\n" + body += "\n" op_best: dict[str, dict] = {} for p in pairs: @@ -1040,9 +1205,10 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None: if standalone: body += "

Standalone benchmarks

\n" - body += '\n' + body += '
\n' body += "" body += "" + body += "" body += "\n\n" for st in standalone: body += "" @@ -1050,8 +1216,16 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None: body += f"" body += f"" body += f"" + body += _tail_cells( + st["cv"], + st["p95"], + st["stddev"], + st["min"], + st["max"], + st["rounds"], + ) body += "\n" - body += "
OperationVariantMeanops/sMinMaxStdDevP95
{st['variant']}{_human_time(st['mean'])}{_human_ops(st['ops'])}
\n" + body += "\n" meta_line = ( f"Version: {version} | Commit: {commit_short} | {timestamp} "