Skip to content

Commit e372c8b

Browse files
committed
feat(webapp): add prompt-cache metrics to Models and AI metrics
Your models gets a cache-savings column and per-model cached-tokens and cache-hit-rate views; the AI metrics dashboard gets a caching section (hit rate, cached tokens, estimated savings, hit rate by model). Also makes the Your models charts all time-series for consistency.
1 parent b5a7a56 commit e372c8b

4 files changed

Lines changed: 121 additions & 18 deletions

File tree

.server-changes/models-page-usage-tabs.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ area: webapp
33
type: feature
44
---
55

6-
The Models page now has a Your models tab showing your project's model usage (cost, calls, latency, and trend sparklines over a selectable time range) alongside the full model library, which is ordered by provider relevance and release date.
6+
The Models page now has a Your models tab showing your project's model usage (cost, calls, latency, prompt-cache savings, and trend sparklines over a selectable time range) alongside the full model library, ordered by provider relevance and release date. The AI metrics dashboard also gains a caching section with cache hit rate, cached tokens, and estimated savings.

apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,13 @@ const llmDashboard: BuiltInDashboard = {
252252
{ i: "llm-cost-user", x: 6, y: 92, w: 6, h: 13 },
253253
// Efficiency section
254254
{ i: "llm-title-efficiency", x: 0, y: 105, w: 12, h: 2, minH: 2, maxH: 2 },
255-
{ i: "llm-cost-operation", x: 0, y: 107, w: 6, h: 13 },
256-
{ i: "llm-cache-util", x: 6, y: 107, w: 6, h: 13 },
255+
{ i: "llm-cost-operation", x: 0, y: 107, w: 12, h: 13 },
256+
// Caching section
257+
{ i: "llm-title-caching", x: 0, y: 120, w: 12, h: 2, minH: 2, maxH: 2 },
258+
{ i: "llm-cache-hit", x: 0, y: 122, w: 6, h: 13 },
259+
{ i: "llm-cache-tokens", x: 6, y: 122, w: 6, h: 13 },
260+
{ i: "llm-cache-savings", x: 0, y: 135, w: 6, h: 13 },
261+
{ i: "llm-cache-by-model", x: 6, y: 135, w: 6, h: 13 },
257262
],
258263
widgets: {
259264
"llm-cost": {
@@ -487,10 +492,11 @@ const llmDashboard: BuiltInDashboard = {
487492
aggregation: "sum",
488493
},
489494
},
490-
"llm-cache-util": {
491-
title: "Cache utilization",
495+
"llm-title-caching": { title: "Caching", query: "", display: { type: "title" } },
496+
"llm-cache-hit": {
497+
title: "Cache hit rate over time",
492498
query:
493-
"SELECT\r\n timeBucket(),\r\n round(countIf(cached_read_tokens > 0) * 100.0 / count(), 1) AS cache_hit_pct,\r\n round(avg(cached_read_tokens), 0) AS avg_cached_tokens\r\nFROM\r\n llm_metrics\r\nGROUP BY\r\n timeBucket\r\nORDER BY\r\n timeBucket",
499+
"SELECT timeBucket(), round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket",
494500
display: {
495501
type: "chart",
496502
chartType: "line",
@@ -503,6 +509,44 @@ const llmDashboard: BuiltInDashboard = {
503509
aggregation: "avg",
504510
},
505511
},
512+
"llm-cache-tokens": {
513+
title: "Cached tokens over time",
514+
query:
515+
"SELECT timeBucket(), sum(cached_read_tokens) AS cache_reads, sum(cache_creation_tokens) AS cache_writes FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket",
516+
display: {
517+
type: "chart",
518+
chartType: "bar",
519+
xAxisColumn: "timebucket",
520+
yAxisColumns: ["cache_reads", "cache_writes"],
521+
groupByColumn: null,
522+
stacked: true,
523+
sortByColumn: null,
524+
sortDirection: "asc",
525+
aggregation: "sum",
526+
},
527+
},
528+
"llm-cache-savings": {
529+
title: "Cache savings over time",
530+
query:
531+
"SELECT timeBucket(), round(sum(cached_read_tokens) * (sum(input_cost) / (sum(input_tokens) + 1)) - sum(cached_read_cost), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket",
532+
display: {
533+
type: "chart",
534+
chartType: "bar",
535+
xAxisColumn: "timebucket",
536+
yAxisColumns: ["cache_savings"],
537+
groupByColumn: null,
538+
stacked: false,
539+
sortByColumn: null,
540+
sortDirection: "asc",
541+
aggregation: "sum",
542+
},
543+
},
544+
"llm-cache-by-model": {
545+
title: "Cache hit rate by model",
546+
query:
547+
"SELECT response_model, round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20",
548+
display: { type: "table", prettyFormatting: true, sorting: [] },
549+
},
506550
},
507551
},
508552
};

apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,12 @@ export type ProjectModelUsageItem = {
229229
totalTokens: number;
230230
avgTtfc: number;
231231
avgTps: number;
232+
/** Input tokens (used as the denominator for the cache read rate). */
233+
inputTokens: number;
234+
/** Input tokens served from the provider's prompt cache. */
235+
cachedReadTokens: number;
236+
/** Actual (discounted) cost of those cached read tokens. */
237+
cachedReadCost: number;
232238
};
233239

234240
// --- ClickHouse schemas for user metrics ---
@@ -256,6 +262,9 @@ const ProjectModelUsageRow = z.object({
256262
total_tokens: z.coerce.number(),
257263
avg_ttfc: z.coerce.number(),
258264
avg_tps: z.coerce.number(),
265+
input_tokens: z.coerce.number(),
266+
cached_read_tokens: z.coerce.number(),
267+
cached_read_cost: z.coerce.number(),
259268
});
260269

261270
const ModelSparklineRow = z.object({
@@ -661,7 +670,10 @@ export class ModelRegistryPresenter extends BasePresenter {
661670
sum(total_cost) AS total_cost,
662671
sum(total_tokens) AS total_tokens,
663672
round(avg(ms_to_first_chunk), 1) AS avg_ttfc,
664-
round(avg(tokens_per_second), 1) AS avg_tps
673+
round(avg(tokens_per_second), 1) AS avg_tps,
674+
sum(input_tokens) AS input_tokens,
675+
sum(usage_details['input_cached_tokens']) AS cached_read_tokens,
676+
sum(cost_details['input_cached_tokens']) AS cached_read_cost
665677
FROM trigger_dev.llm_metrics_v1
666678
WHERE project_id = {projectId: String}
667679
AND environment_id = {environmentId: String}
@@ -698,6 +710,9 @@ export class ModelRegistryPresenter extends BasePresenter {
698710
totalTokens: r.total_tokens,
699711
avgTtfc: r.avg_ttfc,
700712
avgTps: r.avg_tps,
713+
inputTokens: r.input_tokens,
714+
cachedReadTokens: r.cached_read_tokens,
715+
cachedReadCost: r.cached_read_cost,
701716
}));
702717
}
703718

apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,17 @@ function DetailYourUsageTab({
11251125
{...widgetProps}
11261126
/>
11271127
</div>
1128+
<div className="h-[120px]">
1129+
<MetricWidget
1130+
widgetKey={`${modelName}-user-cached-tokens`}
1131+
title="Cached tokens"
1132+
query={`SELECT sum(cached_read_tokens) AS cached_tokens FROM llm_metrics WHERE response_model = '${escapeTSQL(
1133+
modelName
1134+
)}'`}
1135+
config={bignumberConfig("cached_tokens", { aggregation: "sum", abbreviate: true })}
1136+
{...widgetProps}
1137+
/>
1138+
</div>
11281139

11291140
<div className="h-[400px]">
11301141
<MetricWidget
@@ -1156,6 +1167,22 @@ function DetailYourUsageTab({
11561167
{...widgetProps}
11571168
/>
11581169
</div>
1170+
<div className="h-[400px]">
1171+
<MetricWidget
1172+
widgetKey={`${modelName}-user-cache-hit`}
1173+
title="Cache hit rate over time"
1174+
query={`SELECT timeBucket(), round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct FROM llm_metrics WHERE response_model = '${escapeTSQL(
1175+
modelName
1176+
)}' GROUP BY timeBucket ORDER BY timeBucket`}
1177+
config={chartConfig({
1178+
chartType: "line",
1179+
xAxisColumn: "timebucket",
1180+
yAxisColumns: ["cache_hit_pct"],
1181+
aggregation: "avg",
1182+
})}
1183+
{...widgetProps}
1184+
/>
1185+
</div>
11591186
<div className="h-[400px]">
11601187
<MetricWidget
11611188
widgetKey={`${modelName}-user-tasks`}
@@ -1246,10 +1273,10 @@ function YourModelsTab({
12461273
</div>
12471274
<div className="h-[312px]">
12481275
<MetricWidget
1249-
widgetKey="your-models-calls-by-model"
1250-
title="Calls by model"
1251-
query={`SELECT response_model, count() AS calls FROM llm_metrics GROUP BY response_model ORDER BY calls DESC LIMIT 10`}
1252-
config={chartConfig({ chartType: "bar", xAxisColumn: "response_model", yAxisColumns: ["calls"] })}
1276+
widgetKey="your-models-calls-over-time"
1277+
title="Calls over time"
1278+
query={`SELECT timeBucket(), count() AS calls FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket`}
1279+
config={chartConfig({ chartType: "bar", xAxisColumn: "timebucket", yAxisColumns: ["calls"] })}
12531280
{...widgetProps}
12541281
/>
12551282
</div>
@@ -1270,22 +1297,25 @@ function YourModelsTab({
12701297
<Table className="table-fixed">
12711298
<TableHeader>
12721299
<TableRow>
1273-
<TableHeaderCell className="w-[20%]">Model</TableHeaderCell>
1274-
<TableHeaderCell className="w-[13%]">Provider</TableHeaderCell>
1275-
<TableHeaderCell className="w-[9%]" alignment="right">
1300+
<TableHeaderCell className="w-[18%]">Model</TableHeaderCell>
1301+
<TableHeaderCell className="w-[12%]">Provider</TableHeaderCell>
1302+
<TableHeaderCell className="w-[8%]" alignment="right">
12761303
Calls
12771304
</TableHeaderCell>
1278-
<TableHeaderCell className="w-[9%]" alignment="right">
1305+
<TableHeaderCell className="w-[8%]" alignment="right">
12791306
Cost
12801307
</TableHeaderCell>
12811308
<TableHeaderCell className="w-[10%]" alignment="right">
1309+
Cache savings
1310+
</TableHeaderCell>
1311+
<TableHeaderCell className="w-[9%]" alignment="right">
12821312
Avg TTFC
12831313
</TableHeaderCell>
1284-
<TableHeaderCell className="w-[12%]" alignment="right">
1314+
<TableHeaderCell className="w-[11%]" alignment="right">
12851315
Avg tokens/sec
12861316
</TableHeaderCell>
1287-
<TableHeaderCell className="w-[13.5%]">Calls trend</TableHeaderCell>
1288-
<TableHeaderCell className="w-[13.5%]">Tokens trend</TableHeaderCell>
1317+
<TableHeaderCell className="w-[12%]">Calls trend</TableHeaderCell>
1318+
<TableHeaderCell className="w-[12%]">Tokens trend</TableHeaderCell>
12891319
</TableRow>
12901320
</TableHeader>
12911321
<TableBody>
@@ -1294,6 +1324,13 @@ function YourModelsTab({
12941324
const provider = catalogItem?.provider ?? u.genAiSystem;
12951325
const displayId = catalogItem?.displayId ?? `${provider}:${u.responseModel}`;
12961326
const select = catalogItem ? () => onSelectModel(catalogItem) : undefined;
1327+
// Savings = cached reads valued at the normal input rate minus what
1328+
// they actually cost. Needs the model's input price from the catalog.
1329+
const inputPrice = catalogItem?.inputPrice ?? null;
1330+
const cacheSavings =
1331+
inputPrice != null && u.cachedReadTokens > 0
1332+
? Math.max(0, u.cachedReadTokens * inputPrice - u.cachedReadCost)
1333+
: null;
12971334
return (
12981335
<TableRow
12991336
key={u.responseModel}
@@ -1314,6 +1351,13 @@ function YourModelsTab({
13141351
<TableCell onClick={select} alignment="right" className="tabular-nums">
13151352
{formatModelCost(u.totalCost)}
13161353
</TableCell>
1354+
<TableCell
1355+
onClick={select}
1356+
alignment="right"
1357+
className="tabular-nums text-emerald-400/80"
1358+
>
1359+
{cacheSavings != null ? formatModelCost(cacheSavings) : "—"}
1360+
</TableCell>
13171361
<TableCell onClick={select} alignment="right" className="tabular-nums">
13181362
{u.avgTtfc > 0 ? `${u.avgTtfc.toFixed(0)}ms` : "—"}
13191363
</TableCell>

0 commit comments

Comments
 (0)