Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion apps/web/src/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ export interface ModelEndpoints {
export type ModelEndpointKey = keyof ModelEndpoints;

// USD per million tokens, keyed by billing dimension.
export type ModelPricing = Partial<Record<'input' | 'input_cache_read' | 'input_cache_write' | 'input_image' | 'output' | 'output_image', number>>;
export type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_cache_write_1h' | 'input_image' | 'output' | 'output_image';
export type ModelPricing = Partial<Record<BillingDimension, number>>;

export interface UpstreamModelConfig {
upstreamModelId: string;
Expand Down
11 changes: 6 additions & 5 deletions apps/web/src/components/upstream-edit/ModelEditor.vue
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { computed } from 'vue';
import EndpointsField from './EndpointsField.vue';
import FlagOverridesEditor from './FlagOverridesEditor.vue';
import { configOf, defaultEndpointsForKind, publicIdOf, titleFor, type Row } from './modelRows.ts';
import type { FlagDef, ModelKind, ModelPricing, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts';
import type { BillingDimension, FlagDef, ModelKind, ModelPricing, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts';
import { Button, Input, Select, Switch } from '@floway-dev/ui';

const props = defineProps<{
Expand Down Expand Up @@ -37,14 +37,15 @@ const kindOptions: { value: ModelKind; label: string }[] = [
const PRICING_LABELS: Record<string, string> = {
input: 'Input ($/MTok)',
input_cache_read: 'Cache Read ($/MTok)',
input_cache_write: 'Cache Write ($/MTok)',
input_cache_write: 'Cache Write 5m ($/MTok)',
input_cache_write_1h: 'Cache Write 1h ($/MTok)',
input_image: 'Image Input ($/MTok)',
output: 'Output ($/MTok)',
output_image: 'Image Output ($/MTok)',
};

const PRICING_BY_KIND: Record<ModelKind, (keyof ModelPricing)[]> = {
chat: ['input', 'input_cache_read', 'input_cache_write', 'output'],
const PRICING_BY_KIND: Record<ModelKind, BillingDimension[]> = {
chat: ['input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'output'],
embedding: ['input'],
image: ['input', 'input_image', 'output', 'output_image'],
};
Expand Down Expand Up @@ -81,7 +82,7 @@ const updateLimit = (
patch({ limits: Object.keys(limits).length > 0 ? limits : undefined });
};

const updateCost = (key: keyof ModelPricing, raw: string | number | null | undefined) => {
const updateCost = (key: BillingDimension, raw: string | number | null | undefined) => {
if (!config.value) return;
const cost = { ...(config.value.cost ?? {}) } as Record<string, unknown>;
const num = parseOptionalNumber(raw);
Expand Down
16 changes: 8 additions & 8 deletions apps/web/src/pages/dashboard/usage.vue
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@ import { defineBasicLoader } from 'unplugin-vue-router/data-loaders/basic';
import { computed, ref, watch } from 'vue';

import { callApi, useApi, type ApiClient } from '../../api/client.ts';
import type { BillingDimension } from '../../api/types.ts';
import ChartCanvas from '../../components/charts/ChartCanvas.vue';
import { bucketKeyForUtcHour, chartColor, chartFont, chartXAxisTick, dashboardBuckets, dashboardRangeQuery, type DashboardRange } from '../../components/charts/dashboard-chart.ts';
import UsageSummaryMetric from '../../components/usage/UsageSummaryMetric.vue';
import { useModelsStore } from '../../composables/useModels.ts';
import { useAuthStore } from '../../stores/auth.ts';
import { OverlayScrollbars, Spinner } from '@floway-dev/ui';

type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_image' | 'output' | 'output_image';

interface DisplayUsageRecord {
keyId: string;
keyName?: string;
Expand Down Expand Up @@ -114,6 +113,7 @@ type Metric =
type Range = DashboardRange;

const dim = (r: DisplayUsageRecord, k: BillingDimension): number => r.tokens[k] ?? 0;
const cacheWrite = (r: DisplayUsageRecord): number => dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h');

const api = useApi();
const auth = useAuthStore();
Expand Down Expand Up @@ -190,7 +190,7 @@ const tokenSummary = computed(() => {
input += dim(r, 'input');
output += dim(r, 'output');
cacheRead += dim(r, 'input_cache_read');
cacheCreation += dim(r, 'input_cache_write');
cacheCreation += cacheWrite(r);
inputImage += dim(r, 'input_image');
outputImage += dim(r, 'output_image');
}
Expand Down Expand Up @@ -240,12 +240,12 @@ const metricValue = (r: DisplayUsageRecord, metric: Metric): number => {
switch (metric) {
case 'requests': return r.requests;
case 'cost': return r.cost;
case 'total': return dim(r, 'input') + dim(r, 'output') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_image') + dim(r, 'output_image');
case 'input': return dim(r, 'input') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_image');
case 'total': return dim(r, 'input') + dim(r, 'output') + dim(r, 'input_cache_read') + cacheWrite(r) + dim(r, 'input_image') + dim(r, 'output_image');
case 'input': return dim(r, 'input') + dim(r, 'input_cache_read') + cacheWrite(r) + dim(r, 'input_image');
case 'output': return dim(r, 'output') + dim(r, 'output_image');
case 'prefill': return dim(r, 'input') + dim(r, 'input_cache_write') + dim(r, 'input_image');
case 'prefill': return dim(r, 'input') + cacheWrite(r) + dim(r, 'input_image');
case 'cached': return dim(r, 'input_cache_read');
case 'cacheCreation': return dim(r, 'input_cache_write');
case 'cacheCreation': return cacheWrite(r);
case 'cachedRate':
case 'cacheHitRate':
return 0;
Expand Down Expand Up @@ -339,7 +339,7 @@ const aggregateTokenRecords = (records: readonly DisplayUsageRecord[], groupKey:
detail.input += dim(r, 'input');
detail.output += dim(r, 'output');
detail.cacheRead += dim(r, 'input_cache_read');
detail.cacheCreation += dim(r, 'input_cache_write');
detail.cacheCreation += dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h');
detail.inputImage += dim(r, 'input_image');
detail.outputImage += dim(r, 'output_image');
detail.cost += r.cost;
Expand Down
49 changes: 49 additions & 0 deletions packages/gateway/migrations/0034_usage_per_ttl_and_tier.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
-- Add `tier` (Anthropic `usage.speed`, OpenAI `usage.service_tier`) to usage
-- and usage_requests, and `input_cache_write_1h` to the dimension CHECK list.
-- Existing rows backfill with `tier = NULL` so historical aggregations compute
-- identically. SQLite cannot extend a CHECK constraint or a UNIQUE INDEX in
-- place over a new column, so both tables are rebuilt.

CREATE TABLE usage_new (
key_id TEXT NOT NULL,
model TEXT NOT NULL,
upstream TEXT,
model_key TEXT NOT NULL,
hour TEXT NOT NULL,
tier TEXT,
dimension TEXT NOT NULL CHECK (dimension IN (
'input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'input_image', 'output', 'output_image'
)),
tokens INTEGER NOT NULL DEFAULT 0,
unit_price REAL
);

INSERT INTO usage_new (key_id, model, upstream, model_key, hour, tier, dimension, tokens, unit_price)
SELECT key_id, model, upstream, model_key, hour, NULL, dimension, tokens, unit_price FROM usage;

DROP TABLE usage;
ALTER TABLE usage_new RENAME TO usage;

CREATE UNIQUE INDEX idx_usage_dimension_identity
ON usage (key_id, model, COALESCE(upstream, ''), model_key, hour, COALESCE(tier, ''), dimension);
CREATE INDEX idx_usage_dimension_hour ON usage (hour);

CREATE TABLE usage_requests_new (
key_id TEXT NOT NULL,
model TEXT NOT NULL,
upstream TEXT,
model_key TEXT NOT NULL,
hour TEXT NOT NULL,
tier TEXT,
requests INTEGER NOT NULL DEFAULT 0
);

INSERT INTO usage_requests_new (key_id, model, upstream, model_key, hour, tier, requests)
SELECT key_id, model, upstream, model_key, hour, NULL, requests FROM usage_requests;

DROP TABLE usage_requests;
ALTER TABLE usage_requests_new RENAME TO usage_requests;

CREATE UNIQUE INDEX idx_usage_requests_identity
ON usage_requests (key_id, model, COALESCE(upstream, ''), model_key, hour, COALESCE(tier, ''));
CREATE INDEX idx_usage_requests_hour ON usage_requests (hour);
5 changes: 5 additions & 0 deletions packages/gateway/src/app-control_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ test('/api/token-usage scopes to the actor\'s keys when called with an API key',
upstream: null,
modelKey: 'claude-sonnet-4',
hour: '2026-03-15T10',
tier: null,
requests: 2,
tokens: { input: 10, output: 5, input_cache_read: 4, input_cache_write: 1 },
cost: null,
Expand All @@ -118,6 +119,7 @@ test('/api/token-usage scopes to the actor\'s keys when called with an API key',
upstream: null,
modelKey: 'gpt-5',
hour: '2026-03-15T11',
tier: null,
requests: 1,
tokens: { input: 20, output: 8, input_cache_read: 6, input_cache_write: 2 },
cost: null,
Expand Down Expand Up @@ -155,6 +157,7 @@ test('/api/token-usage in self-by-key mode includes per-key metadata for the act
upstream: null,
modelKey: 'gpt-5',
hour: '2026-03-16T10',
tier: null,
requests: 1,
tokens: { input: 20, output: 8 },
cost: null,
Expand Down Expand Up @@ -182,6 +185,7 @@ test('/api/token-usage all-by-user view aggregates across keys per user', async
upstream: null,
modelKey: 'gpt-5',
hour: '2026-03-15T10',
tier: null,
requests: 1,
tokens: { input: 10, output: 5 },
cost: null,
Expand Down Expand Up @@ -213,6 +217,7 @@ test('/api/token-usage merges Claude variants into backend base model records',
keyId: apiKey.id,
hour: '2026-03-17T10',
upstream: 'copilot:1',
tier: null,
requests: 1,
tokens: { input: 10, output: 5, input_cache_read: 2, input_cache_write: 1 },
};
Expand Down
4 changes: 4 additions & 0 deletions packages/gateway/src/control-plane/data-transfer/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,9 @@ const parseUsageRecords = (value: unknown): { type: 'ok'; records: UsageRecord[]
if (typeof record.upstream === 'string' && isLegacyUpstreamIdentity(record.upstream)) {
return { type: 'invalid', index: i, error: 'upstream must use a raw upstream id, not a legacy provider-prefixed identity' };
}
if (record.tier !== undefined && record.tier !== null && typeof record.tier !== 'string') {
return { type: 'invalid', index: i, error: 'record has invalid tier (must be a string or null)' };
}
const tokensResult = parseImportedTokens(record.tokens);
if (tokensResult.type === 'invalid') return { type: 'invalid', index: i, error: 'record has invalid token dimension counts' };
const costResult = parseImportedCost(record.cost);
Expand All @@ -412,6 +415,7 @@ const parseUsageRecords = (value: unknown): { type: 'ok'; records: UsageRecord[]
upstream: record.upstream as string | null,
modelKey: record.modelKey,
hour: record.hour,
tier: (record.tier as string | null | undefined) ?? null,
requests: record.requests,
tokens: tokensResult.tokens,
cost: costResult.cost,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ const USAGE_1: UsageRecord = {
upstream: 'up_copilot_a',
modelKey: 'claude-opus-4.7',
hour: '2026-01-01T10',
tier: null,
requests: 5,
tokens: { input: 1000, output: 500, input_cache_read: 120, input_cache_write: 80 },
cost: null,
Expand All @@ -188,6 +189,7 @@ const USAGE_2: UsageRecord = {
upstream: 'up_azure_a',
modelKey: 'gpt-prod',
hour: '2026-01-01T11',
tier: null,
requests: 3,
tokens: { input: 2000, output: 800, input_cache_read: 200, input_cache_write: 50 },
cost: null,
Expand Down
1 change: 1 addition & 0 deletions packages/gateway/src/control-plane/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ const upstreamModelSchema = z.object({
output: z.number().optional(),
input_cache_read: z.number().optional(),
input_cache_write: z.number().optional(),
input_cache_write_1h: z.number().optional(),
input_image: z.number().optional(),
output_image: z.number().optional(),
}).optional(),
Expand Down
9 changes: 6 additions & 3 deletions packages/gateway/src/control-plane/token-usage/aggregate.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { UsageRecord } from '../../repo/types.ts';
import { BILLING_DIMENSIONS, type BillingDimension, unitPriceForDimension } from '@floway-dev/protocols/common';
import { BILLING_DIMENSIONS, type BillingDimension, resolveEffectivePricing, unitPriceForDimension } from '@floway-dev/protocols/common';

export interface DisplayUsageRecord {
keyId: string;
Expand All @@ -22,13 +22,16 @@ export interface DisplayUsageByUserRecord {

// Cost is pure addition over the dimension rows: Σ tokens × unit_price / 1e6.
// No subtraction is needed because the counts are disjoint and each dimension
// already carries its own resolved unit price snapshot.
// already carries its own resolved unit price snapshot. The bucket's tier
// folds into pricing first so per-tier overrides (Anthropic fast mode,
// OpenAI priority/flex) replace base rates before the dimension lookup.
const recordCostUsd = (record: UsageRecord): number => {
const effective = resolveEffectivePricing(record.cost, record.tier);
let total = 0;
for (const dimension of BILLING_DIMENSIONS) {
const tokens = record.tokens[dimension] ?? 0;
if (tokens === 0) continue;
const unitPrice = unitPriceForDimension(record.cost, dimension);
const unitPrice = unitPriceForDimension(effective, dimension);
if (unitPrice !== null) total += tokens * unitPrice;
}
return total / 1e6;
Expand Down
40 changes: 40 additions & 0 deletions packages/gateway/src/control-plane/token-usage/aggregate_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const baseRecord = (overrides: Partial<UsageRecord>): UsageRecord => ({
model: 'claude-opus-4-7',
upstream: 'up_copilot',
modelKey: 'claude-opus-4-7',
tier: null,
requests: 1,
tokens: { input: 100, output: 50 },
cost: opus47Pricing,
Expand Down Expand Up @@ -83,3 +84,42 @@ test('aggregateUsageForDisplay charges image dimensions separately', () => {
// 10 + 5 + 40 + 30 = $85.
assertAlmostEquals(out[0].cost, 85, 1e-9);
});

test('aggregateUsageForDisplay applies the per-tier override when the bucket carries a tier', () => {
// Opus 4.8 standard: $5 input / $25 output. Fast: $10 / $50.
const cost: ModelPricing = {
input: 5,
output: 25,
tiers: { fast: { input: 10, output: 50 } },
};
const fastRow = baseRecord({ tier: 'fast', cost, tokens: { input: 1_000_000, output: 1_000_000 } });
const standardRow = baseRecord({ tier: null, cost, tokens: { input: 1_000_000, output: 1_000_000 } });

const fastOut = aggregateUsageForDisplay([fastRow]);
// 1M * $10 + 1M * $50 = $60.
assertAlmostEquals(fastOut[0].cost, 60, 1e-9);

const standardOut = aggregateUsageForDisplay([standardRow]);
// 1M * $5 + 1M * $25 = $30.
assertAlmostEquals(standardOut[0].cost, 30, 1e-9);
});

test('aggregateUsageForDisplay leaves base pricing alone when the tier has no override entry', () => {
const cost: ModelPricing = {
input: 5,
output: 25,
tiers: { fast: { input: 10, output: 50 } },
};
const out = aggregateUsageForDisplay([baseRecord({ tier: 'priority', cost, tokens: { input: 1_000_000 } })]);
// Unknown tier → falls back to base $5 input. 1M * $5 = $5.
assertAlmostEquals(out[0].cost, 5, 1e-9);
});

test('aggregateUsageForDisplay prices the input_cache_write_1h dimension via the 1h-specific rate', () => {
const cost: ModelPricing = { input: 5, input_cache_write: 6.25, input_cache_write_1h: 10, output: 25 };
const out = aggregateUsageForDisplay([
baseRecord({ cost, tokens: { input_cache_write_1h: 1_000_000 } }),
]);
// 1M * $10 = $10.
assertAlmostEquals(out[0].cost, 10, 1e-9);
});
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ const seedUsage = async (
upstream: 'up_test',
modelKey: model,
hour,
tier: null,
requests,
tokens: { input: 100, output: 50 },
cost: null,
Expand Down
12 changes: 8 additions & 4 deletions packages/gateway/src/data-plane/llm/chat-completions/respond.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { streamSSE } from 'hono/streaming';

import { CHAT_COMPLETIONS_MISSING_TERMINAL_MESSAGE, collectChatCompletionsProtocolEventsToResult } from './events/to-result.ts';
import { chatCompletionsProtocolFrameToSSEFrame } from './events/to-sse.ts';
import { tokenUsage } from '../../shared/telemetry/usage.ts';
import { normalizeOpenAiServiceTier, tokenUsage } from '../../shared/telemetry/usage.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import { SourceStreamState, eventResultMetadata, plainResultToResponse, recordPerformance, recordUsage } from '../shared/respond.ts';
import { type StreamCompletion, writeSSEFrames } from '../shared/stream/sse.ts';
Expand Down Expand Up @@ -44,7 +44,7 @@ export const respondChatCompletions = async (
try {
const response = await collectChatCompletionsProtocolEventsToResult(frames);
const metadata = await eventResultMetadata(result);
const usage = response.usage ? tokenUsageFromChatCompletionsUsage(response.usage) : null;
const usage = response.usage ? tokenUsageFromChatCompletionsUsage(response.usage, response.service_tier) : null;
await recordUsage(ctx, metadata.modelIdentity, usage);
recordPerformance(ctx, metadata.performance, state.failed);
return { success: true, response: Response.json(response) };
Expand Down Expand Up @@ -80,14 +80,18 @@ export const respondChatCompletions = async (

// OpenAI Chat usage reports prompt_tokens inclusive of cached and
// cache-creation tokens; subtract them to recover the disjoint bare input.
const tokenUsageFromChatCompletionsUsage = (u: NonNullable<ChatCompletionsResult['usage']>) => {
// The top-level `service_tier` echoes the actual processing tier; surface it
// as the `tier` slot so per-tier pricing overrides resolve at recording time.
const tokenUsageFromChatCompletionsUsage = (u: NonNullable<ChatCompletionsResult['usage']>, serviceTier: string | null | undefined) => {
const cacheRead = u.prompt_tokens_details?.cached_tokens ?? 0;
const cacheWrite = u.prompt_tokens_details?.cache_creation_input_tokens ?? 0;
const tier = normalizeOpenAiServiceTier(serviceTier);
return tokenUsage({
input: u.prompt_tokens - cacheRead - cacheWrite,
input_cache_read: cacheRead,
input_cache_write: cacheWrite,
output: u.completion_tokens,
...(tier !== null ? { tier } : {}),
});
};

Expand Down Expand Up @@ -118,7 +122,7 @@ const observeChatCompletionsFrames = async function* (frames: AsyncIterable<Prot
const failed = isChatCompletionsFailureFrame(frame);
if (failed) state.failed = true;
if (observeUsage) {
state.rememberUsage(frame.type === 'event' && Array.isArray(frame.event.choices) && frame.event.choices.length === 0 && frame.event.usage ? tokenUsageFromChatCompletionsUsage(frame.event.usage) : null);
state.rememberUsage(frame.type === 'event' && Array.isArray(frame.event.choices) && frame.event.choices.length === 0 && frame.event.usage ? tokenUsageFromChatCompletionsUsage(frame.event.usage, frame.event.service_tier) : null);
}
if (isChatCompletionsTerminalFrame(frame) && !failed) state.completed = true;
yield frame;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ const applyMessagesUsage = (usage: MessagesUsage, update: Partial<MessagesUsage>
if (update.cache_read_input_tokens != null) {
usage.cache_read_input_tokens = update.cache_read_input_tokens;
}
if (update.cache_creation != null) usage.cache_creation = update.cache_creation;
if (update.service_tier != null) usage.service_tier = update.service_tier;
if (update.speed != null) usage.speed = update.speed;
if (update.server_tool_use != null) {
usage.server_tool_use = update.server_tool_use;
}
Expand Down
Loading