diff --git a/.server-changes/models-page-usage-tabs.md b/.server-changes/models-page-usage-tabs.md new file mode 100644 index 00000000000..6b37b45dd20 --- /dev/null +++ b/.server-changes/models-page-usage-tabs.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +The Models page now has a Your models tab showing your project's model usage (cost, calls, latency, prompt-cache savings, and trend sparklines over a selectable time range) alongside the full model library, ordered by provider relevance and release date. The AI metrics dashboard also gains a caching section with cache hit rate, cached tokens, and estimated savings. diff --git a/apps/webapp/app/assets/icons/AiProviderIcons.tsx b/apps/webapp/app/assets/icons/AiProviderIcons.tsx index 85a01b98d63..2be3fe38ed7 100644 --- a/apps/webapp/app/assets/icons/AiProviderIcons.tsx +++ b/apps/webapp/app/assets/icons/AiProviderIcons.tsx @@ -46,8 +46,8 @@ export function LlamaIcon({ className }: IconProps) { xmlns="http://www.w3.org/2000/svg" > @@ -58,10 +58,10 @@ export function LlamaIcon({ className }: IconProps) { export function DeepseekIcon({ className }: IconProps) { return ( - + @@ -99,8 +99,8 @@ export function PerplexityIcon({ className }: IconProps) { return ( @@ -112,32 +112,32 @@ export function CerebrasIcon({ className }: IconProps) { return ( diff --git a/apps/webapp/app/components/primitives/UsageSparkline.tsx b/apps/webapp/app/components/primitives/UsageSparkline.tsx new file mode 100644 index 00000000000..2ffc1936a1d --- /dev/null +++ b/apps/webapp/app/components/primitives/UsageSparkline.tsx @@ -0,0 +1,123 @@ +import { + Bar, + BarChart, + ReferenceLine, + ResponsiveContainer, + Tooltip, + YAxis, + type TooltipProps, +} from "recharts"; +import { cn } from "~/utils/cn"; +import { formatDateTime } from "./DateTime"; +import { Header3 } from "./Headers"; +import TooltipPortal from "./TooltipPortal"; + +type UsageDatum = { date: Date; count: number }; + +type UnitLabel = { singular: string; plural: string }; + +export type UsageSparklineProps = { + /** Equal-width time buckets, oldest first. */ + data?: number[]; + /** Epoch ms of the first bucket's start. When omitted, the last bucket is anchored to now. */ + bucketStartMs?: number; + /** Width of each bucket in ms. Defaults to one hour. */ + bucketIntervalMs?: number; + /** Bar colour. Defaults to blue. */ + color?: string; + /** Unit shown in the tooltip (e.g. calls, tokens). */ + unitLabel?: UnitLabel; + /** Format the trailing total. Defaults to `toLocaleString`. */ + formatTotal?: (total: number) => string; + /** Class for the trailing total label. */ + totalClassName?: string; +}; + +/** + * Inline 24h sparkline for list rows. Renders a small bar chart plus a trailing + * total, or an em-dash when there's no data. Shared by the prompts and models + * lists — keep it presentational (the caller supplies the zero-filled buckets). + */ +export function UsageSparkline({ + data, + bucketStartMs, + bucketIntervalMs, + color = "#3B82F6", + unitLabel = { singular: "call", plural: "calls" }, + formatTotal, + totalClassName = "text-blue-400", +}: UsageSparklineProps) { + if (!data || data.every((v) => v === 0)) { + return ; + } + + const total = data.reduce((a, b) => a + b, 0); + const max = Math.max(...data); + + // Map each bucket to a dated point so the tooltip can show the window it + // represents. Buckets are `intervalMs` wide; if the caller didn't pass the + // first bucket's start, anchor the last bucket to now (hourly default). + const intervalMs = bucketIntervalMs ?? 3600_000; + const startMs = bucketStartMs ?? Date.now() - (data.length - 1) * intervalMs; + const chartData: UsageDatum[] = data.map((count, i) => ({ + date: new Date(startMs + i * intervalMs), + count, + })); + + return ( +
+
+ + + + } + allowEscapeViewBox={{ x: true, y: true }} + wrapperStyle={{ zIndex: 1000 }} + animationDuration={0} + /> + + + {max > 0 && ( + + )} + + +
+ + {formatTotal ? formatTotal(total) : total.toLocaleString()} + +
+ ); +} + +function UsageSparklineTooltip({ + active, + payload, + unitLabel, +}: TooltipProps & { unitLabel: UnitLabel }) { + if (!active || !payload || payload.length === 0) return null; + const entry = payload[0].payload as UsageDatum; + const date = entry.date instanceof Date ? entry.date : new Date(entry.date); + const formattedDate = formatDateTime(date, "UTC", [], false, true); + return ( + +
+ {formattedDate} +
+ {entry.count.toLocaleString()}{" "} + + {entry.count === 1 ? unitLabel.singular : unitLabel.plural} + +
+
+
+ ); +} diff --git a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts index 06b5ee2d406..03561ee7e20 100644 --- a/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts +++ b/apps/webapp/app/presenters/v3/BuiltInDashboards.server.ts @@ -252,8 +252,13 @@ const llmDashboard: BuiltInDashboard = { { i: "llm-cost-user", x: 6, y: 92, w: 6, h: 13 }, // Efficiency section { i: "llm-title-efficiency", x: 0, y: 105, w: 12, h: 2, minH: 2, maxH: 2 }, - { i: "llm-cost-operation", x: 0, y: 107, w: 6, h: 13 }, - { i: "llm-cache-util", x: 6, y: 107, w: 6, h: 13 }, + { i: "llm-cost-operation", x: 0, y: 107, w: 12, h: 13 }, + // Caching section + { i: "llm-title-caching", x: 0, y: 120, w: 12, h: 2, minH: 2, maxH: 2 }, + { i: "llm-cache-hit", x: 0, y: 122, w: 6, h: 13 }, + { i: "llm-cache-tokens", x: 6, y: 122, w: 6, h: 13 }, + { i: "llm-cache-savings", x: 0, y: 135, w: 6, h: 13 }, + { i: "llm-cache-by-model", x: 6, y: 135, w: 6, h: 13 }, ], widgets: { "llm-cost": { @@ -487,10 +492,11 @@ const llmDashboard: BuiltInDashboard = { aggregation: "sum", }, }, - "llm-cache-util": { - title: "Cache utilization", + "llm-title-caching": { title: "Caching", query: "", display: { type: "title" } }, + "llm-cache-hit": { + title: "Cache hit rate over time", query: - "SELECT\r\n timeBucket(),\r\n round(countIf(cached_read_tokens > 0) * 100.0 / count(), 1) AS cache_hit_pct,\r\n round(avg(cached_read_tokens), 0) AS avg_cached_tokens\r\nFROM\r\n llm_metrics\r\nGROUP BY\r\n timeBucket\r\nORDER BY\r\n timeBucket", + "SELECT timeBucket(), round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket", display: { type: "chart", chartType: "line", @@ -503,6 +509,44 @@ const llmDashboard: BuiltInDashboard = { aggregation: "avg", }, }, + "llm-cache-tokens": { + title: "Cached tokens over time", + query: + "SELECT timeBucket(), sum(cached_read_tokens) AS cache_reads, sum(cache_creation_tokens) AS cache_writes FROM llm_metrics GROUP BY timeBucket ORDER BY timeBucket", + display: { + type: "chart", + chartType: "bar", + xAxisColumn: "timebucket", + yAxisColumns: ["cache_reads", "cache_writes"], + groupByColumn: null, + stacked: true, + sortByColumn: null, + sortDirection: "asc", + aggregation: "sum", + }, + }, + "llm-cache-savings": { + title: "Cache savings over time", + query: + "SELECT timeBucket(), round(sum(cached_read_tokens) * (sum(input_cost) / (sum(input_tokens) + 1)) - sum(cached_read_cost), 4) AS cache_savings FROM llm_metrics WHERE cached_read_tokens > 0 GROUP BY timeBucket ORDER BY timeBucket", + display: { + type: "chart", + chartType: "bar", + xAxisColumn: "timebucket", + yAxisColumns: ["cache_savings"], + groupByColumn: null, + stacked: false, + sortByColumn: null, + sortDirection: "asc", + aggregation: "sum", + }, + }, + "llm-cache-by-model": { + title: "Cache hit rate by model", + query: + "SELECT response_model, round(sum(cached_read_tokens) * 100.0 / (sum(input_tokens) + sum(cached_read_tokens)), 1) AS cache_hit_pct, sum(cached_read_tokens) AS cached_tokens FROM llm_metrics GROUP BY response_model ORDER BY cached_tokens DESC LIMIT 20", + display: { type: "table", prettyFormatting: true, sorting: [] }, + }, }, }, }; diff --git a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts index 16a0aa75046..b8565b87011 100644 --- a/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ModelRegistryPresenter.server.ts @@ -52,6 +52,64 @@ export function formatModelId(provider: string, modelName: string): string { return `${provider}:${modelName}`; } +/** + * Hardcoded provider display priority (most relevant first). Providers not in + * this list fall back to alphabetical order after the listed ones. Within a + * provider, models are always sorted by release date (newest first). + */ +const PROVIDER_IMPORTANCE = [ + "anthropic", + "openai", + "google", + "xai", + "meta", + "mistral", + "deepseek", +]; + +function providerRank(provider: string): number { + const index = PROVIDER_IMPORTANCE.indexOf(provider); + return index === -1 ? PROVIDER_IMPORTANCE.length : index; +} + +/** + * Pick a sparkline bucket size (in seconds) for a given range so the rendered + * sparkline stays a readable ~24-52 bars. Tuned for the small inline charts in + * the "Your models" list — coarser than the full-size dashboard charts. + */ +function sparklineBucketSeconds(rangeMs: number): number { + const MIN = 60; + const HOUR = 3600; + const DAY = 86400; + const ms = (s: number) => s * 1000; + if (rangeMs <= ms(HOUR)) return 2 * MIN; + if (rangeMs <= ms(3 * HOUR)) return 5 * MIN; + if (rangeMs <= ms(6 * HOUR)) return 15 * MIN; + if (rangeMs <= ms(DAY)) return HOUR; + if (rangeMs <= ms(3 * DAY)) return 2 * HOUR; + if (rangeMs <= ms(7 * DAY)) return 6 * HOUR; + if (rangeMs <= ms(14 * DAY)) return 12 * HOUR; + if (rangeMs <= ms(30 * DAY)) return DAY; + if (rangeMs <= ms(90 * DAY)) return 3 * DAY; + return 7 * DAY; +} + +/** + * Generate the ordered bucket-start keys for [from, to] at the given interval, + * epoch-aligned in UTC to exactly match ClickHouse's + * `toStartOfInterval(col, INTERVAL n SECOND)` output strings ("YYYY-MM-DD HH:MM:SS"). + */ +function sparklineBucketKeys(from: Date, to: Date, intervalSeconds: number): string[] { + const intervalMs = intervalSeconds * 1000; + const start = Math.floor(from.getTime() / intervalMs) * intervalMs; + const end = Math.floor(to.getTime() / intervalMs) * intervalMs; + const keys: string[] = []; + for (let t = start; t <= end; t += intervalMs) { + keys.push(new Date(t).toISOString().slice(0, 19).replace("T", " ")); + } + return keys; +} + // --- Types --- export type ModelCatalogItem = { @@ -162,6 +220,23 @@ export type PopularModel = { ttfcP50: number; }; +/** A model with usage in a specific project/environment (the "Your models" list). */ +export type ProjectModelUsageItem = { + responseModel: string; + genAiSystem: string; + calls: number; + totalCost: number; + totalTokens: number; + avgTtfc: number; + avgTps: number; + /** Input tokens (used as the denominator for the cache read rate). */ + inputTokens: number; + /** Input tokens served from the provider's prompt cache. */ + cachedReadTokens: number; + /** Actual (discounted) cost of those cached read tokens. */ + cachedReadCost: number; +}; + // --- ClickHouse schemas for user metrics --- const UserMetricsSummaryRow = z.object({ @@ -179,6 +254,25 @@ const UserTaskBreakdownRow = z.object({ cost: z.coerce.number(), }); +const ProjectModelUsageRow = z.object({ + response_model: z.string(), + gen_ai_system: z.string(), + calls: z.coerce.number(), + total_cost: z.coerce.number(), + total_tokens: z.coerce.number(), + avg_ttfc: z.coerce.number(), + avg_tps: z.coerce.number(), + input_tokens: z.coerce.number(), + cached_read_tokens: z.coerce.number(), + cached_read_cost: z.coerce.number(), +}); + +const ModelSparklineRow = z.object({ + response_model: z.string(), + bucket: z.string(), + val: z.coerce.number(), +}); + // --- Presenter --- export class ModelRegistryPresenter extends BasePresenter { @@ -296,7 +390,12 @@ export class ModelRegistryPresenter extends BasePresenter { } return Array.from(groups.entries()) - .sort(([a], [b]) => a.localeCompare(b)) + .sort(([a], [b]) => { + const rankA = providerRank(a); + const rankB = providerRank(b); + if (rankA !== rankB) return rankA - rankB; + return a.localeCompare(b); + }) .map(([provider, models]) => ({ provider, models: models.sort((a, b) => { @@ -549,4 +648,171 @@ export class ModelRegistryPresenter extends BasePresenter { ttfcP50: r.ttfc_p50, })); } + + /** + * Models that had usage in a specific project/environment over the window, + * with aggregate metrics. This is the tenant-scoped "Your models" list (as + * opposed to the cross-tenant getPopularModels). + */ + async getProjectModelUsage( + projectId: string, + environmentId: string, + startTime: Date, + endTime: Date + ): Promise { + const queryFn = this.clickhouse.reader.query({ + name: "modelRegistryProjectUsage", + query: ` + SELECT + response_model, + any(gen_ai_system) AS gen_ai_system, + count() AS calls, + sum(total_cost) AS total_cost, + sum(total_tokens) AS total_tokens, + round(avg(ms_to_first_chunk), 1) AS avg_ttfc, + round(avg(tokens_per_second), 1) AS avg_tps, + sum(input_tokens) AS input_tokens, + sum(usage_details['input_cached_tokens']) AS cached_read_tokens, + sum(cost_details['input_cached_tokens']) AS cached_read_cost + FROM trigger_dev.llm_metrics_v1 + WHERE project_id = {projectId: String} + AND environment_id = {environmentId: String} + AND start_time >= {startTime: String} + AND start_time <= {endTime: String} + AND response_model != '' + GROUP BY response_model + ORDER BY calls DESC + LIMIT 100 + `, + params: z.object({ + projectId: z.string(), + environmentId: z.string(), + startTime: z.string(), + endTime: z.string(), + }), + schema: ProjectModelUsageRow, + }); + + const [error, rows] = await queryFn({ + projectId, + environmentId, + startTime: formatDateForCH(startTime), + endTime: formatDateForCH(endTime), + }); + + if (error || !rows) return []; + + return rows.map((r) => ({ + responseModel: r.response_model, + genAiSystem: r.gen_ai_system, + calls: r.calls, + totalCost: r.total_cost, + totalTokens: r.total_tokens, + avgTtfc: r.avg_ttfc, + avgTps: r.avg_tps, + inputTokens: r.input_tokens, + cachedReadTokens: r.cached_read_tokens, + cachedReadCost: r.cached_read_cost, + })); + } + + /** + * Call-count and total-token sparklines per response_model over [from, to], + * matching the window the "Your models" charts and table use. The bucket size + * adapts to the range (see sparklineBucketSeconds) so a sparkline stays a + * readable ~24-52 bars regardless of the selected period. Zero-filled. + */ + async getModelUsageSparklines( + environmentId: string, + responseModels: string[], + from: Date, + to: Date + ): Promise<{ + calls: Record; + tokens: Record; + bucketIntervalMs: number; + bucketStartMs: number; + }> { + const intervalSeconds = sparklineBucketSeconds(to.getTime() - from.getTime()); + const intervalMs = intervalSeconds * 1000; + // Epoch-aligned start of the first bucket, matching sparklineBucketKeys and + // ClickHouse toStartOfInterval. Returned so the sparkline tooltip can label + // each bar with its true time rather than assuming hourly buckets. + const bucketStartMs = Math.floor(from.getTime() / intervalMs) * intervalMs; + + if (responseModels.length === 0) { + return { calls: {}, tokens: {}, bucketIntervalMs: intervalMs, bucketStartMs }; + } + + const bucketKeys = sparklineBucketKeys(from, to, intervalSeconds); + + // intervalSeconds is a server-derived integer from a fixed ladder, so it's + // safe to inline. Epoch-aligned SECOND buckets match the JS keys above. + const buildQuery = (valueExpr: string, name: string) => + this.clickhouse.reader.query({ + name, + query: ` + SELECT + response_model, + toStartOfInterval(start_time, INTERVAL ${intervalSeconds} SECOND) AS bucket, + ${valueExpr} AS val + FROM trigger_dev.llm_metrics_v1 + WHERE environment_id = {environmentId: String} + AND response_model IN {responseModels: Array(String)} + AND start_time >= {startTime: String} + AND start_time <= {endTime: String} + GROUP BY response_model, bucket + ORDER BY response_model, bucket + `, + params: z.object({ + environmentId: z.string(), + responseModels: z.array(z.string()), + startTime: z.string(), + endTime: z.string(), + }), + schema: ModelSparklineRow, + }); + + const queryParams = { + environmentId, + responseModels, + startTime: formatDateForCH(from), + endTime: formatDateForCH(to), + }; + + const [callsResult, tokensResult] = await Promise.all([ + buildQuery("count()", "modelCallSparklines")(queryParams), + buildQuery("sum(total_tokens)", "modelTokenSparklines")(queryParams), + ]); + + return { + calls: this.#buildSparklineMap(callsResult, responseModels, bucketKeys), + tokens: this.#buildSparklineMap(tokensResult, responseModels, bucketKeys), + bucketIntervalMs: intervalMs, + bucketStartMs, + }; + } + + /** Convert a sparkline query result to a zero-filled bucket map. */ + #buildSparklineMap( + queryResult: + | [Error, null] + | [null, { response_model: string; bucket: string; val: number }[]], + keys: string[], + bucketKeys: string[] + ): Record { + const [error, rows] = queryResult; + if (error || !rows) return {}; + + const rowMap = new Map(); + for (const row of rows) { + rowMap.set(`${row.response_model}|${row.bucket}`, row.val); + } + + const result: Record = {}; + for (const key of keys) { + result[key] = bucketKeys.map((b) => rowMap.get(`${key}|${b}`) ?? 0); + } + return result; + } } diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx index 8785c9a2dc2..a3a60b88e37 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -1,11 +1,17 @@ import { AdjustmentsHorizontalIcon, + ArrowTopRightOnSquareIcon, CheckIcon, CubeIcon, XMarkIcon, } from "@heroicons/react/20/solid"; import * as Ariakit from "@ariakit/react"; -import { Form, type MetaFunction, useFetcher } from "@remix-run/react"; +import { + Form, + type MetaFunction, + type ShouldRevalidateFunctionArgs, + useFetcher, +} from "@remix-run/react"; import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; import { AnimatePresence, motion } from "framer-motion"; import { useEffect, useMemo, useRef, useState } from "react"; @@ -27,7 +33,7 @@ import { InlineCode } from "~/components/code/InlineCode"; import { PageBody, PageContainer } from "~/components/layout/AppLayout"; import { AppliedFilter } from "~/components/primitives/AppliedFilter"; import { Badge } from "~/components/primitives/Badge"; -import { Button } from "~/components/primitives/Buttons"; +import { Button, LinkButton } from "~/components/primitives/Buttons"; import { Callout } from "~/components/primitives/Callout"; import { Checkbox } from "~/components/primitives/Checkbox"; import { DateTime } from "~/components/primitives/DateTime"; @@ -61,7 +67,13 @@ import { TableRow, } from "~/components/primitives/Table"; import { TabButton, TabContainer } from "~/components/primitives/Tabs"; -import { appliedSummary } from "~/components/runs/v3/SharedFilters"; +import { + appliedSummary, + TimeFilter, + type TimeFilterApplyValues, + timeFilterFromTo, +} from "~/components/runs/v3/SharedFilters"; +import { parseFiniteInt } from "~/utils/searchParams"; import { useSearchParams } from "~/hooks/useSearchParam"; import { useShortcutKeys } from "~/hooks/useShortcutKeys"; import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; @@ -71,6 +83,7 @@ import { type ModelCatalogItem, type ModelComparisonItem, type PopularModel, + type ProjectModelUsageItem, ModelRegistryPresenter, } from "~/presenters/v3/ModelRegistryPresenter.server"; import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; @@ -78,7 +91,7 @@ import { requireUserId } from "~/services/session.server"; import { useEnvironment } from "~/hooks/useEnvironment"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; -import { EnvironmentParamSchema, v3ModelComparePath } from "~/utils/pathBuilder"; +import { EnvironmentParamSchema, v3BuiltInDashboardPath, v3ModelComparePath } from "~/utils/pathBuilder"; import { formatModelPrice, formatTokenCount, @@ -88,6 +101,7 @@ import { } from "~/utils/modelFormatters"; import { formatNumberCompact } from "~/utils/numberFormatter"; import { Spinner } from "~/components/primitives/Spinner"; +import { UsageSparkline } from "~/components/primitives/UsageSparkline"; import { MetricWidget } from "~/routes/resources.metric"; import type { QueryWidgetConfig } from "~/components/metrics/QueryWidget"; @@ -116,9 +130,32 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const presenter = new ModelRegistryPresenter(clickhouse); const catalog = await presenter.getModelCatalog(); - const now = new Date(); - const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); - const popularModels = await presenter.getPopularModels(sevenDaysAgo, now, 50); + // Shared time range for the "Your models" tab (charts, usage table, sparklines). + // Mirrors the agent detail page: URL-driven period / from / to via TimeFilter. + const url = new URL(request.url); + const period = url.searchParams.get("period") ?? undefined; + const from = parseFiniteInt(url.searchParams.get("from")); + const to = parseFiniteInt(url.searchParams.get("to")); + const time = timeFilterFromTo({ period, from, to, defaultPeriod: "7d" }); + + // popularModels powers the library tab's cross-tenant p50 TTFC column — a + // stable "typical latency" reference, so it always uses a fixed 7-day window + // independent of the Your models time selector (the library tab has none). + const popularTo = new Date(); + const popularFrom = new Date(popularTo.getTime() - 7 * 24 * 60 * 60 * 1000); + + // projectUsage = tenant-scoped models with usage in this env (the "Your models" tab). + const [popularModels, projectUsage] = await Promise.all([ + presenter.getPopularModels(popularFrom, popularTo, 50), + presenter.getProjectModelUsage(project.id, environment.id, time.from, time.to), + ]); + + const usageSparklines = await presenter.getModelUsageSparklines( + environment.id, + projectUsage.map((u) => u.responseModel), + time.from, + time.to + ); const allProviders = catalog.map((g) => g.provider); const allFeatures = Array.from( @@ -128,6 +165,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { return typedjson({ catalog, popularModels, + projectUsage, + usageSparklines, allProviders, allFeatures, organizationId: project.organizationId, @@ -136,6 +175,30 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { }); }; +export function shouldRevalidate({ + currentUrl, + nextUrl, + defaultShouldRevalidate, +}: ShouldRevalidateFunctionArgs) { + // The active tab is persisted in the URL (?tab=), but no loader data depends + // on it — so switching tabs must not refetch. Any other change (a different + // project/environment in the path, or a period/from/to param) revalidates as + // normal, since the loader data is scoped to the path params + time range. + const normalize = (url: URL) => { + const params = new URLSearchParams(url.search); + params.delete("tab"); + params.sort(); + return params.toString(); + }; + if ( + currentUrl.pathname === nextUrl.pathname && + normalize(currentUrl) === normalize(nextUrl) + ) { + return false; + } + return defaultShouldRevalidate; +} + const providerIcons: Record JSX.Element> = { openai: OpenAIIcon, anthropic: AnthropicIcon, @@ -154,6 +217,16 @@ function providerIcon(slug: string) { return ; } +const NEW_MODEL_WINDOW_DAYS = 7; + +/** True if the model was released within the last NEW_MODEL_WINDOW_DAYS. */ +function isNewModel(releaseDate: string | null): boolean { + if (!releaseDate) return false; + const released = new Date(releaseDate).getTime(); + if (Number.isNaN(released)) return false; + return Date.now() - released <= NEW_MODEL_WINDOW_DAYS * 24 * 60 * 60 * 1000; +} + // --- Filter Components --- const providerShortcut = { key: "p" }; @@ -468,7 +541,10 @@ function ModelsList({ /> - {model.displayId} + + {model.displayId} + {isNewModel(model.releaseDate) && New} + @@ -768,14 +844,16 @@ function chartConfig(opts: { xAxisColumn: string; yAxisColumns: string[]; aggregation?: "sum" | "avg"; + stacked?: boolean; + groupByColumn?: string | null; }): QueryWidgetConfig { return { type: "chart", chartType: opts.chartType, xAxisColumn: opts.xAxisColumn, yAxisColumns: opts.yAxisColumns, - groupByColumn: null, - stacked: false, + groupByColumn: opts.groupByColumn ?? null, + stacked: opts.stacked ?? false, sortByColumn: null, sortDirection: "asc", aggregation: opts.aggregation ?? "sum", @@ -784,17 +862,21 @@ function chartConfig(opts: { type DetailTab = "overview" | "usage"; +type ModelsTab = "yours" | "library"; + function ModelDetailPanel({ model, organizationId, projectId, environmentId, + aiMetricsBasePath, onClose, }: { model: ModelCatalogItem; organizationId: string; projectId: string; environmentId: string; + aiMetricsBasePath: string; onClose: () => void; }) { const [tab, setTab] = useState("overview"); @@ -840,6 +922,7 @@ function ModelDetailPanel({ organizationId={organizationId} projectId={projectId} environmentId={environmentId} + aiMetricsBasePath={aiMetricsBasePath} /> )} @@ -947,28 +1030,61 @@ function DetailYourUsageTab({ organizationId, projectId, environmentId, + aiMetricsBasePath, }: { modelName: string; organizationId: string; projectId: string; environmentId: string; + aiMetricsBasePath: string; }) { + // Inspector-local range, independent of the page-level "Your models" range. + const [range, setRange] = useState({ period: "7d" }); + const widgetProps = { organizationId, projectId, environmentId, scope: "environment" as const, - period: "7d", - from: null, - to: null, + period: range.from && range.to ? null : range.period ?? "7d", + from: range.from ?? null, + to: range.to ?? null, }; + // Deep-link to the AI metrics dashboard pre-filtered to this model, carrying + // the inspector's current range so the dashboard opens on the same window. + const dashboardParams = new URLSearchParams({ models: modelName }); + if (range.from && range.to) { + dashboardParams.set("from", range.from); + dashboardParams.set("to", range.to); + } else if (range.period) { + dashboardParams.set("period", range.period); + } + const aiMetricsHref = `${aiMetricsBasePath}?${dashboardParams.toString()}`; + return (
+
+ + + View in AI metrics + +
+
+ +
+
+ +
; + tokenSparklines: Record; + bucketStartMs: number; + bucketIntervalMs: number; + organizationId: string; + projectId: string; + environmentId: string; + period: string | null; + from: string | null; + to: string | null; + modelLookup: Map; + selectedModelId: string | null; + onSelectModel: (model: ModelCatalogItem) => void; + onGoToLibrary: () => void; +}) { + // Drive the charts off the same URL-selected range as the table + sparklines. + // period and from/to are mutually exclusive (TimeFilter enforces this). + const widgetProps = { + organizationId, + projectId, + environmentId, + scope: "environment" as const, + period: from && to ? null : period ?? "7d", + from, + to, + }; + + return ( +
+
+
+ +
+
+ +
+
+ +
+
+ +
+ {usage.length === 0 ? ( +
+

+ No model usage in this environment yet. Models you call from your tasks will appear here + with usage metrics. +

+ +
+ ) : ( + + + + Model + Provider + + Calls + + + Cost + + + Cache savings + + + Avg TTFC + + + Avg tokens/sec + + Calls trend + Tokens trend + + + + {usage.map((u) => { + const catalogItem = modelLookup.get(u.responseModel); + const provider = catalogItem?.provider ?? u.genAiSystem; + const displayId = catalogItem?.displayId ?? `${provider}:${u.responseModel}`; + const select = catalogItem ? () => onSelectModel(catalogItem) : undefined; + // Savings = cached reads valued at the normal input rate minus what + // they actually cost. Needs the model's input price from the catalog. + const inputPrice = catalogItem?.inputPrice ?? null; + const cacheSavings = + inputPrice != null && u.cachedReadTokens > 0 + ? Math.max(0, u.cachedReadTokens * inputPrice - u.cachedReadCost) + : null; + return ( + + + {displayId} + + + + {providerIcon(provider)} + {formatProviderName(provider)} + + + + {formatNumberCompact(u.calls)} + + + {formatModelCost(u.totalCost)} + + + {cacheSavings != null ? formatModelCost(cacheSavings) : "—"} + + + {u.avgTtfc > 0 ? `${u.avgTtfc.toFixed(0)}ms` : "—"} + + + {u.avgTps > 0 ? u.avgTps.toFixed(0) : "—"} + + + + + + formatNumberCompact(t)} + totalClassName="text-emerald-400" + /> + + + ); + })} + +
+ )} +
+
+ ); +} + // --- Main Page --- export default function ModelsPage() { const { catalog, popularModels, + projectUsage, + usageSparklines, allProviders, allFeatures, organizationId, projectId, environmentId, } = useTypedLoaderData(); - const { values: searchValues, value: searchValue } = useSearchParams(); + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + const aiMetricsBasePath = v3BuiltInDashboardPath(organization, project, environment, "llm"); + const { values: searchValues, value: searchValue, replace } = useSearchParams(); const search = searchValue("search") ?? ""; const selectedProviders = searchValues("providers"); const selectedFeatures = searchValues("features"); + const periodParam = searchValue("period") ?? null; + const fromParam = searchValue("from") ?? null; + const toParam = searchValue("to") ?? null; + // Active tab is persisted in the URL (?tab=) so it survives refresh and is + // shareable. Defaults to "yours" when there's usage, else "library". + const tabParam = searchValue("tab"); + const view: ModelsTab = + tabParam === "library" + ? "library" + : tabParam === "yours" + ? "yours" + : projectUsage.length > 0 + ? "yours" + : "library"; + const setView = (next: ModelsTab) => replace({ tab: next }); const [compareSet, setCompareSet] = useState>(new Set()); const [showAllDetails, setShowAllDetails] = useState(false); const [compareOpen, setCompareOpen] = useState(false); @@ -1117,6 +1476,19 @@ export default function ModelsPage() { const compareModels = useMemo(() => Array.from(compareSet), [compareSet]); const allModels = useMemo(() => catalog.flatMap((g) => g.models), [catalog]); + // Resolve a used response_model (base or dated variant) to its catalog card, + // so a "Your models" row can open the same detail inspector as the library. + const modelLookup = useMemo(() => { + const map = new Map(); + for (const model of allModels) { + map.set(model.modelName, model); + for (const variant of model.variants) { + map.set(variant.modelName, model); + } + } + return map; + }, [allModels]); + return ( @@ -1126,24 +1498,69 @@ export default function ModelsPage() {
- setCompareOpen(true)} - showAllDetails={showAllDetails} - onToggleAllDetails={(checked) => setShowAllDetails(checked)} - /> - +
+ + setView("yours")} + > + Your models + + setView("library")} + > + Model library + + + {view === "yours" && ( +
+ +
+ )} +
+ {view === "yours" ? ( + setView("library")} + /> + ) : ( +
+ setCompareOpen(true)} + showAllDetails={showAllDetails} + onToggleAllDetails={(checked) => setShowAllDetails(checked)} + /> + +
+ )}
setSelectedModel(null)} /> )}