StackUnderflow/bench_optimize_mart.py at main · 0bserver07/StackUnderflow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""Bench the 4 ``optimize`` detectors: ``message_tool_mart`` vs raw scan.

Runs each detector twice on a side-loaded copy of the user's store:

1. ``mart`` path — ``message_tool_mart`` fully populated (what ships).
2. ``raw`` path — same store with ``message_tool_mart`` *emptied*, so the
   detector falls through to the raw-``messages`` scan.

Reports ms per detector for both paths plus the speedup. Run with::

    python bench_optimize_mart.py /tmp/wt-optimize/test-store.db
"""

from __future__ import annotations

import shutil
import sys
import time
from pathlib import Path

from stackunderflow.reports import optimize as optimize_mod
from stackunderflow.reports.optimize import (
    _detect_bash_output_limits,
    _detect_ghost_agents,
    _detect_junk_reads,
    _detect_low_read_edit_ratio,
)
from stackunderflow.reports.scope import Scope
from stackunderflow.store import db


def _bench_detector(label, fn, conn):
    t0 = time.perf_counter()
    findings = fn(conn)
    dt = (time.perf_counter() - t0) * 1000
    return label, dt, len(findings)


def _run_all(conn, scope):
    return [
        _bench_detector("junk_reads",         lambda c: _detect_junk_reads(c, scope=scope), conn),
        _bench_detector("bash_output_limits", lambda c: _detect_bash_output_limits(c, scope=scope), conn),
        _bench_detector("low_read_edit",      lambda c: _detect_low_read_edit_ratio(c, scope=scope), conn),
        _bench_detector("ghost_agents",       lambda c: _detect_ghost_agents(c, scope=scope), conn),
    ]


def main():
    if len(sys.argv) != 2:
        print("usage: python bench_optimize_mart.py <store.db>")
        raise SystemExit(2)
    src = Path(sys.argv[1])
    if not src.is_file():
        print(f"no such file: {src}")
        raise SystemExit(2)

    # Bench ghost_agents with synthetic registered agents so neither path
    # short-circuits at the "no agents to ghost" guard. We don't probe the
    # real ~/.claude/agents/ directory — that varies per machine and is
    # outside the project blast radius this script wants to measure.
    optimize_mod._registered_agents = lambda: [
        ("explorer-bench", Path("/tmp/explorer-bench.md")),
        ("reviewer-bench", Path("/tmp/reviewer-bench.md")),
        ("writer-bench", Path("/tmp/writer-bench.md")),
    ]

    # All-time scope so we exercise the whole mart / messages table.
    scope = Scope(label="all", since=None, until=None)

    print(f"source: {src}")
    print(f"size: {src.stat().st_size / 1e9:.1f} GB")

    # ── A) mart path ───────────────────────────────────────────────────
    mart_db = Path("/tmp/bench-mart.db")
    if mart_db.exists():
        mart_db.unlink()
    shutil.copy2(src, mart_db)
    conn = db.connect(mart_db)
    n_mt = conn.execute("SELECT COUNT(*) AS n FROM message_tool_mart").fetchone()["n"]
    n_msg = conn.execute("SELECT COUNT(*) AS n FROM messages").fetchone()["n"]
    print(f"message_tool_mart rows: {n_mt:,}")
    print(f"messages rows:          {n_msg:,}")
    print()
    print("=== mart path (message_tool_mart populated) ===")
    mart_results = _run_all(conn, scope)
    for label, dt, n in mart_results:
        print(f"  {label:<22s}  {dt:8.1f} ms   findings={n}")
    conn.close()

    # ── B) raw path ────────────────────────────────────────────────────
    raw_db = Path("/tmp/bench-raw.db")
    if raw_db.exists():
        raw_db.unlink()
    shutil.copy2(src, raw_db)
    conn = db.connect(raw_db)
    conn.execute("DELETE FROM message_tool_mart")
    # ALSO empty tool_mart so the Wave 5 short-circuit doesn't intercept
    # — we want the full raw-json fallback to run for an honest comparison.
    try:
        conn.execute("DELETE FROM tool_mart")
    except Exception:  # noqa: BLE001
        pass
    conn.commit()
    print()
    print("=== raw path (message_tool_mart emptied, tool_mart emptied) ===")
    raw_results = _run_all(conn, scope)
    for label, dt, n in raw_results:
        print(f"  {label:<22s}  {dt:8.1f} ms   findings={n}")
    conn.close()

    # ── C) summary ─────────────────────────────────────────────────────
    print()
    print("=== speedup (raw / mart) ===")
    for (label, mart_dt, _), (_, raw_dt, _) in zip(mart_results, raw_results, strict=True):
        if mart_dt > 0:
            ratio = raw_dt / mart_dt
            print(f"  {label:<22s}  raw={raw_dt:8.1f}ms  mart={mart_dt:8.1f}ms  speedup={ratio:6.1f}x")
        else:
            print(f"  {label:<22s}  raw={raw_dt:8.1f}ms  mart={mart_dt:8.1f}ms  speedup=inf")


if __name__ == "__main__":
    main()