diff --git a/tracker/api.py b/tracker/api.py index d3edbd8..0325d67 100644 --- a/tracker/api.py +++ b/tracker/api.py @@ -11,7 +11,7 @@ from .db import DEFAULT_DB_PATH, connect, init from .ingest import run as run_ingest -from .pricing import _lookup as _price_lookup +from .pricing import _lookup as _price_lookup, reload as _price_reload from .recompute_costs import run as run_recompute ROOT = Path(__file__).resolve().parent.parent @@ -147,6 +147,27 @@ def stats( {join}""", params).fetchone() totals = dict(totals_row) + # Cost breakdown by bucket. Group tokens by (tool, model), apply per-bucket rates, + # then sum. Lets the UI show "where the $ went" (cache reads almost always dominate). + by_tm = c.execute( + f"""SELECT m.tool, m.model, + SUM(m.input_tokens) AS in_tok, + SUM(m.output_tokens) AS out_tok, + SUM(m.cache_read) AS cr_tok, + SUM(m.cache_write_5m) AS cw5_tok, + SUM(m.cache_write_1h) AS cw1_tok + {join} + GROUP BY m.tool, m.model""", params).fetchall() + cb = {"input": 0.0, "output": 0.0, "cache_read": 0.0, "cache_write_5m": 0.0, "cache_write_1h": 0.0} + for r in by_tm: + p = _price_lookup(r["tool"], r["model"]) + cb["input"] += (r["in_tok"] or 0) * p.get("input", 0) / 1_000_000 + cb["output"] += (r["out_tok"] or 0) * p.get("output", 0) / 1_000_000 + cb["cache_read"] += (r["cr_tok"] or 0) * p.get("cache_read", 0) / 1_000_000 + cb["cache_write_5m"] += (r["cw5_tok"] or 0) * p.get("cache_write_5m", 0) / 1_000_000 + cb["cache_write_1h"] += (r["cw1_tok"] or 0) * p.get("cache_write_1h", 0) / 1_000_000 + totals["cost_breakdown"] = {k: round(v, 4) for k, v in cb.items()} + # Active hours: sum over sessions of (max(ts) - min(ts)) within the filter window. # This excludes pure idle gaps between sessions and gives a more meaningful rate. active_row = c.execute( diff --git a/web/app.js b/web/app.js index d5df9e0..2092bba 100644 --- a/web/app.js +++ b/web/app.js @@ -176,11 +176,18 @@ function renderCards(totals) { const freshTokens = tokens_in + tokens_out + tokens_cw5 + tokens_cw1; const allTokens = freshTokens + tokens_hit; const cacheShare = allTokens ? tokens_hit / allTokens : 0; + const cb = totals.cost_breakdown || {}; + const cwTotal = (cb.cache_write_5m || 0) + (cb.cache_write_1h || 0); + const pct = (v) => cost ? Math.round((v / cost) * 100) + "%" : "—"; const cards = [ { label: "est cost", value: fmt.usd(cost), accent: true }, { label: "$ / active hour", value: fmt.usd(cph), accent: true, sub: "Σ session spans" }, + { label: "cost: cache read", value: fmt.usd(cb.cache_read), sub: pct(cb.cache_read || 0) + " of total" }, + { label: "cost: cache write", value: fmt.usd(cwTotal), sub: pct(cwTotal) + " of total · 5m+1h" }, + { label: "cost: output", value: fmt.usd(cb.output), sub: pct(cb.output || 0) + " of total · incl reasoning" }, + { label: "cost: fresh input", value: fmt.usd(cb.input), sub: pct(cb.input || 0) + " of total" }, + { label: "cache share (tokens)", value: Math.round(cacheShare * 100) + "%", sub: fmt.n(tokens_hit) + " cache hits" }, { label: "fresh + output tokens", value: fmt.short(freshTokens), sub: fmt.n(freshTokens) }, - { label: "cache share", value: Math.round(cacheShare * 100) + "%", sub: fmt.n(tokens_hit) + " cache hits" }, { label: "sessions", value: fmt.n(totals.sessions) }, { label: "messages", value: fmt.n(totals.msgs) }, { label: "active hours", value: ah < 1 ? ah.toFixed(2) : ah.toFixed(1) },