Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 197 additions & 23 deletions _scripts/generate_bench_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from __future__ import annotations

import json
import math
import re
import sys
from collections import defaultdict
Expand Down Expand Up @@ -128,16 +129,31 @@ def _parse_benchmarks(data: dict) -> dict:

is_zd = _is_zerodep(test_method)

# Calculate P95 from raw data if available
raw_data = b["stats"].get("data")
if raw_data and len(raw_data) > 0:
sorted_data = sorted(raw_data)
p95_idx = math.ceil(0.95 * len(sorted_data)) - 1
p95 = sorted_data[p95_idx]
else:
p95 = None

mean = b["stats"]["mean"]
stddev = b["stats"].get("stddev", 0)
cv = (stddev / mean * 100) if mean > 0 else 0.0

entry = {
"method": test_method,
"is_zerodep": is_zd,
"label": "zerodep" if is_zd else _ref_display_name(test_method),
"mean": b["stats"]["mean"],
"mean": mean,
"ops": b["stats"]["ops"],
"stddev": b["stats"].get("stddev", 0),
"min": b["stats"].get("min", b["stats"]["mean"]),
"max": b["stats"].get("max", b["stats"]["mean"]),
"stddev": stddev,
"min": b["stats"].get("min", mean),
"max": b["stats"].get("max", mean),
"rounds": b["stats"].get("rounds", 0),
"p95": p95,
"cv": cv,
}

modules[module][operation].append(entry)
Expand Down Expand Up @@ -224,6 +240,12 @@ def _build_comparisons(modules: dict) -> list[dict]:
"variant": e["method"].removeprefix("test_"),
"mean": e["mean"],
"ops": e["ops"],
"stddev": e["stddev"],
"min": e["min"],
"max": e["max"],
"rounds": e["rounds"],
"p95": e["p95"],
"cv": e["cv"],
}
)
continue
Expand All @@ -237,6 +259,12 @@ def _build_comparisons(modules: dict) -> list[dict]:
"variant": e["label"],
"mean": e["mean"],
"ops": e["ops"],
"stddev": e["stddev"],
"min": e["min"],
"max": e["max"],
"rounds": e["rounds"],
"p95": e["p95"],
"cv": e["cv"],
}
)
continue
Expand Down Expand Up @@ -276,9 +304,21 @@ def _build_comparisons(modules: dict) -> list[dict]:
"zd_variant": zd_variant,
"zd_mean": zd["mean"],
"zd_ops": zd["ops"],
"zd_stddev": zd["stddev"],
"zd_min": zd["min"],
"zd_max": zd["max"],
"zd_rounds": zd["rounds"],
"zd_p95": zd["p95"],
"zd_cv": zd["cv"],
"ref_label": ref["label"],
"ref_mean": ref["mean"],
"ref_ops": ref["ops"],
"ref_stddev": ref["stddev"],
"ref_min": ref["min"],
"ref_max": ref["max"],
"ref_rounds": ref["rounds"],
"ref_p95": ref["p95"],
"ref_cv": ref["cv"],
"ratio": ratio,
}
)
Expand All @@ -292,6 +332,12 @@ def _build_comparisons(modules: dict) -> list[dict]:
"variant": e["method"].removeprefix("test_"),
"mean": e["mean"],
"ops": e["ops"],
"stddev": e["stddev"],
"min": e["min"],
"max": e["max"],
"rounds": e["rounds"],
"p95": e["p95"],
"cv": e["cv"],
}
)

Expand All @@ -304,6 +350,12 @@ def _build_comparisons(modules: dict) -> list[dict]:
"variant": e["label"],
"mean": e["mean"],
"ops": e["ops"],
"stddev": e["stddev"],
"min": e["min"],
"max": e["max"],
"rounds": e["rounds"],
"p95": e["p95"],
"cv": e["cv"],
}
)
else:
Expand All @@ -324,9 +376,21 @@ def _build_comparisons(modules: dict) -> list[dict]:
"zd_variant": zd_variant,
"zd_mean": zd["mean"],
"zd_ops": zd["ops"],
"zd_stddev": zd["stddev"],
"zd_min": zd["min"],
"zd_max": zd["max"],
"zd_rounds": zd["rounds"],
"zd_p95": zd["p95"],
"zd_cv": zd["cv"],
"ref_label": ref["label"],
"ref_mean": ref["mean"],
"ref_ops": ref["ops"],
"ref_stddev": ref["stddev"],
"ref_min": ref["min"],
"ref_max": ref["max"],
"ref_rounds": ref["rounds"],
"ref_p95": ref["p95"],
"ref_cv": ref["cv"],
"ratio": ratio,
}
)
Expand Down Expand Up @@ -397,8 +461,9 @@ def _build_comparisons(modules: dict) -> list[dict]:
}
.summary-card .num { font-size: 2rem; font-weight: bold; }
.summary-card .label { font-size: .85rem; color: var(--meta); }
.table-wrap { overflow-x: auto; margin-bottom: 1rem; }
table {
width: 100%; border-collapse: collapse; margin-bottom: 1rem;
width: 100%; border-collapse: collapse;
font-size: .9rem;
}
th, td {
Expand Down Expand Up @@ -442,6 +507,43 @@ def _ratio_text(ratio: float) -> str:
return "~equal"


def _tail_cells(
cv: float,
p95: float | None,
stddev: float,
min_t: float,
max_t: float,
rounds: int,
) -> str:
"""Build four ``<td>`` cells exposing tail-latency statistics.

Renders min, max, stddev, and P95 as separate table cells so they are
visible without hovering. A tooltip on each cell also shows the number
of rounds for context.

Args:
cv: Coefficient of variation (stddev/mean * 100) as a percentage.
p95: 95th-percentile latency in seconds, or None if unavailable.
stddev: Standard deviation in seconds.
min_t: Minimum time in seconds.
max_t: Maximum time in seconds.
rounds: Number of benchmark rounds.

Returns:
Four HTML ``<td>`` elements: min, max, stddev, P95.
"""
p95_text = _human_time(p95) if p95 is not None else "\u2014"
rounds_tip = f"rounds={rounds}"
cv_tip = f"CV={cv:.1f}%"
shared_tip = f"{cv_tip} | {rounds_tip}"
return (
f'<td title="{shared_tip}">{_human_time(min_t)}</td>'
f'<td title="{shared_tip}">{_human_time(max_t)}</td>'
f'<td title="{shared_tip}">{_human_time(stddev)}</td>'
f'<td title="{shared_tip}">{p95_text}</td>'
)


def _build_sparkline_init_js(module_names: list[str], data_js_path: str) -> str:
"""Generate JS that loads history and draws a sparkline per module."""
modules_json = json.dumps(module_names)
Expand Down Expand Up @@ -524,11 +626,22 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str:

if pairs:
# --- Comparison table ---
s += "<table>\n<thead><tr>"
s += "<th>Operation</th><th>zerodep</th><th>Reference</th>"
s += "<th>zerodep time</th><th>Ref time</th>"
s += "<th>zerodep ops/s</th><th>Ref ops/s</th>"
s += "<th>Ratio</th></tr></thead>\n<tbody>\n"
s += '<div class="table-wrap"><table>\n<thead>'
s += "<tr>"
s += '<th rowspan="2">Operation</th>'
s += '<th rowspan="2">zerodep</th>'
s += '<th rowspan="2">Reference</th>'
s += '<th rowspan="2">zd mean</th>'
s += '<th rowspan="2">Ref mean</th>'
s += '<th rowspan="2">zd ops/s</th>'
s += '<th rowspan="2">Ref ops/s</th>'
s += '<th colspan="4" style="text-align:center">zerodep tail latency</th>'
s += '<th colspan="4" style="text-align:center">Ref tail latency</th>'
s += '<th rowspan="2">Ratio</th>'
s += "</tr>\n<tr>"
for _ in range(2):
s += "<th>Min</th><th>Max</th><th>StdDev</th><th>P95</th>"
s += "</tr>\n</thead>\n<tbody>\n"

for p in pairs:
rc = _ratio_class(p["ratio"])
Expand All @@ -540,10 +653,26 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str:
s += f"<td>{_human_time(p['ref_mean'])}</td>"
s += f"<td>{_human_ops(p['zd_ops'])}</td>"
s += f"<td>{_human_ops(p['ref_ops'])}</td>"
s += _tail_cells(
p["zd_cv"],
p["zd_p95"],
p["zd_stddev"],
p["zd_min"],
p["zd_max"],
p["zd_rounds"],
)
s += _tail_cells(
p["ref_cv"],
p["ref_p95"],
p["ref_stddev"],
p["ref_min"],
p["ref_max"],
p["ref_rounds"],
)
s += f'<td class="ratio-cell {rc}">{_ratio_text(p["ratio"])}</td>'
s += "</tr>\n"

s += "</tbody></table>\n"
s += "</tbody></table></div>\n"

# --- Chart: group by operation, show zerodep vs best-reference ops/s ---
# Deduplicate operations, pick best reference per operation
Expand Down Expand Up @@ -610,17 +739,27 @@ def _generate_html(comparisons: list[dict], meta: dict) -> str:

if standalone:
s += "<h3>Standalone benchmarks</h3>\n"
s += '<table class="standalone-table">\n<thead><tr>'
s += "<th>Operation</th><th>Variant</th><th>Mean</th><th>ops/s</th>"
s += '<div class="table-wrap"><table class="standalone-table">\n<thead><tr>'
s += "<th>Operation</th><th>Variant</th>"
s += "<th>Mean</th><th>ops/s</th>"
s += "<th>Min</th><th>Max</th><th>StdDev</th><th>P95</th>"
s += "</tr></thead>\n<tbody>\n"
for st in standalone:
s += "<tr>"
s += f"<td>{st['operation']}</td>"
s += f"<td>{st['variant']}</td>"
s += f"<td>{_human_time(st['mean'])}</td>"
s += f"<td>{_human_ops(st['ops'])}</td>"
s += _tail_cells(
st["cv"],
st["p95"],
st["stddev"],
st["min"],
st["max"],
st["rounds"],
)
s += "</tr>\n"
s += "</tbody></table>\n"
s += "</tbody></table></div>\n"

s += "</div>\n"
sections.append(s)
Expand Down Expand Up @@ -962,12 +1101,22 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None:
body = f"<h2>{module}</h2>\n"

if pairs:
body += "<table>\n<thead><tr>"
body += "<th>Operation</th><th>zerodep</th>"
body += "<th>Reference</th>"
body += "<th>zerodep time</th><th>Ref time</th>"
body += "<th>zerodep ops/s</th><th>Ref ops/s</th>"
body += "<th>Ratio</th></tr></thead>\n<tbody>\n"
body += '<div class="table-wrap"><table>\n<thead>'
body += "<tr>"
body += '<th rowspan="2">Operation</th>'
body += '<th rowspan="2">zerodep</th>'
body += '<th rowspan="2">Reference</th>'
body += '<th rowspan="2">zd mean</th>'
body += '<th rowspan="2">Ref mean</th>'
body += '<th rowspan="2">zd ops/s</th>'
body += '<th rowspan="2">Ref ops/s</th>'
body += '<th colspan="4" style="text-align:center">zerodep tail latency</th>'
body += '<th colspan="4" style="text-align:center">Ref tail latency</th>'
body += '<th rowspan="2">Ratio</th>'
body += "</tr>\n<tr>"
for _ in range(2):
body += "<th>Min</th><th>Max</th><th>StdDev</th><th>P95</th>"
body += "</tr>\n</thead>\n<tbody>\n"

for p in pairs:
rc = _ratio_class(p["ratio"])
Expand All @@ -979,9 +1128,25 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None:
body += f"<td>{_human_time(p['ref_mean'])}</td>"
body += f"<td>{_human_ops(p['zd_ops'])}</td>"
body += f"<td>{_human_ops(p['ref_ops'])}</td>"
body += _tail_cells(
p["zd_cv"],
p["zd_p95"],
p["zd_stddev"],
p["zd_min"],
p["zd_max"],
p["zd_rounds"],
)
body += _tail_cells(
p["ref_cv"],
p["ref_p95"],
p["ref_stddev"],
p["ref_min"],
p["ref_max"],
p["ref_rounds"],
)
body += f'<td class="ratio-cell {rc}">{_ratio_text(p["ratio"])}</td>'
body += "</tr>\n"
body += "</tbody></table>\n"
body += "</tbody></table></div>\n"

op_best: dict[str, dict] = {}
for p in pairs:
Expand Down Expand Up @@ -1040,18 +1205,27 @@ def _generate_module_page(mod_data: dict, meta: dict) -> str | None:

if standalone:
body += "<h3>Standalone benchmarks</h3>\n"
body += '<table class="standalone-table">\n<thead><tr>'
body += '<div class="table-wrap"><table class="standalone-table">\n<thead><tr>'
body += "<th>Operation</th><th>Variant</th>"
body += "<th>Mean</th><th>ops/s</th>"
body += "<th>Min</th><th>Max</th><th>StdDev</th><th>P95</th>"
body += "</tr></thead>\n<tbody>\n"
for st in standalone:
body += "<tr>"
body += f"<td>{st['operation']}</td>"
body += f"<td>{st['variant']}</td>"
body += f"<td>{_human_time(st['mean'])}</td>"
body += f"<td>{_human_ops(st['ops'])}</td>"
body += _tail_cells(
st["cv"],
st["p95"],
st["stddev"],
st["min"],
st["max"],
st["rounds"],
)
body += "</tr>\n"
body += "</tbody></table>\n"
body += "</tbody></table></div>\n"

meta_line = (
f"Version: {version} | Commit: {commit_short} | {timestamp} "
Expand Down
Loading