Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion apps/web/src/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ export interface ModelEndpoints {
export type ModelEndpointKey = keyof ModelEndpoints;

// USD per million tokens, keyed by billing dimension.
export type ModelPricing = Partial<Record<'input' | 'input_cache_read' | 'input_cache_write' | 'input_image' | 'output' | 'output_image', number>>;
export type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_cache_write_1h' | 'input_image' | 'output' | 'output_image';
export type ModelPricing = Partial<Record<BillingDimension, number>>;

export interface UpstreamModelConfig {
upstreamModelId: string;
Expand Down
9 changes: 5 additions & 4 deletions apps/web/src/components/upstream-edit/ModelEditor.vue
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { computed } from 'vue';
import EndpointsField from './EndpointsField.vue';
import FlagOverridesEditor from './FlagOverridesEditor.vue';
import { configOf, defaultEndpointsForKind, publicIdOf, titleFor, type Row } from './modelRows.ts';
import type { FlagDef, ModelKind, ModelPricing, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts';
import type { BillingDimension, FlagDef, ModelKind, ModelPricing, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts';
import { Button, Input, Select, Switch } from '@floway-dev/ui';

const props = defineProps<{
Expand Down Expand Up @@ -37,14 +37,15 @@ const kindOptions: { value: ModelKind; label: string }[] = [
const PRICING_LABELS: Record<string, string> = {
input: 'Input ($/MTok)',
input_cache_read: 'Cache Read ($/MTok)',
input_cache_write: 'Cache Write ($/MTok)',
input_cache_write: 'Cache Write 5m ($/MTok)',
input_cache_write_1h: 'Cache Write 1h ($/MTok)',
input_image: 'Image Input ($/MTok)',
output: 'Output ($/MTok)',
output_image: 'Image Output ($/MTok)',
};

const PRICING_BY_KIND: Record<ModelKind, (keyof ModelPricing)[]> = {
chat: ['input', 'input_cache_read', 'input_cache_write', 'output'],
const PRICING_BY_KIND: Record<ModelKind, BillingDimension[]> = {
chat: ['input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'output'],
embedding: ['input'],
image: ['input', 'input_image', 'output', 'output_image'],
};
Expand Down
15 changes: 7 additions & 8 deletions apps/web/src/pages/dashboard/usage.vue
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@ import { defineBasicLoader } from 'unplugin-vue-router/data-loaders/basic';
import { computed, ref, watch } from 'vue';

import { callApi, useApi, type ApiClient } from '../../api/client.ts';
import type { BillingDimension } from '../../api/types.ts';
import ChartCanvas from '../../components/charts/ChartCanvas.vue';
import { bucketKeyForUtcHour, chartColor, chartFont, chartXAxisTick, dashboardBuckets, dashboardRangeQuery, type DashboardRange } from '../../components/charts/dashboard-chart.ts';
import UsageSummaryMetric from '../../components/usage/UsageSummaryMetric.vue';
import { useModelsStore } from '../../composables/useModels.ts';
import { useAuthStore } from '../../stores/auth.ts';
import { OverlayScrollbars, Spinner } from '@floway-dev/ui';

type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_image' | 'output' | 'output_image';

interface DisplayUsageRecord {
keyId: string;
keyName?: string;
Expand Down Expand Up @@ -190,7 +189,7 @@ const tokenSummary = computed(() => {
input += dim(r, 'input');
output += dim(r, 'output');
cacheRead += dim(r, 'input_cache_read');
cacheCreation += dim(r, 'input_cache_write');
cacheCreation += dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h');
inputImage += dim(r, 'input_image');
outputImage += dim(r, 'output_image');
}
Expand Down Expand Up @@ -240,12 +239,12 @@ const metricValue = (r: DisplayUsageRecord, metric: Metric): number => {
switch (metric) {
case 'requests': return r.requests;
case 'cost': return r.cost;
case 'total': return dim(r, 'input') + dim(r, 'output') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_image') + dim(r, 'output_image');
case 'input': return dim(r, 'input') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_image');
case 'total': return dim(r, 'input') + dim(r, 'output') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h') + dim(r, 'input_image') + dim(r, 'output_image');
case 'input': return dim(r, 'input') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h') + dim(r, 'input_image');
case 'output': return dim(r, 'output') + dim(r, 'output_image');
case 'prefill': return dim(r, 'input') + dim(r, 'input_cache_write') + dim(r, 'input_image');
case 'prefill': return dim(r, 'input') + dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h') + dim(r, 'input_image');
case 'cached': return dim(r, 'input_cache_read');
case 'cacheCreation': return dim(r, 'input_cache_write');
case 'cacheCreation': return dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h');
case 'cachedRate':
case 'cacheHitRate':
return 0;
Expand Down Expand Up @@ -339,7 +338,7 @@ const aggregateTokenRecords = (records: readonly DisplayUsageRecord[], groupKey:
detail.input += dim(r, 'input');
detail.output += dim(r, 'output');
detail.cacheRead += dim(r, 'input_cache_read');
detail.cacheCreation += dim(r, 'input_cache_write');
detail.cacheCreation += dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h');
detail.inputImage += dim(r, 'input_image');
detail.outputImage += dim(r, 'output_image');
detail.cost += r.cost;
Expand Down
33 changes: 33 additions & 0 deletions packages/gateway/migrations/0035_usage_input_cache_write_1h.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
-- Widen the `usage.dimension` CHECK list to admit `input_cache_write_1h`.
--
-- Anthropic's `extended-cache-ttl-2025-04-11` beta surfaces 1-hour cache
-- writes under `usage.cache_creation.ephemeral_1h_input_tokens`. Until now
-- we folded both 5m and 1h writes into the same `input_cache_write` bucket,
-- which under-bills 1h writes (priced at input × 2 vs. input × 1.25 for 5m).
-- Adding the dimension as a disjoint bucket requires recreating `usage`
-- because SQLite cannot alter a CHECK constraint in place.
--
-- `usage_requests` is untouched: it does not carry a dimension column.

CREATE TABLE usage_new (
key_id TEXT NOT NULL,
model TEXT NOT NULL,
upstream TEXT,
model_key TEXT NOT NULL,
hour TEXT NOT NULL,
dimension TEXT NOT NULL CHECK (dimension IN (
'input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'input_image', 'output', 'output_image'
)),
tokens INTEGER NOT NULL DEFAULT 0,
unit_price REAL
);

INSERT INTO usage_new (key_id, model, upstream, model_key, hour, dimension, tokens, unit_price)
SELECT key_id, model, upstream, model_key, hour, dimension, tokens, unit_price FROM usage;

DROP TABLE usage;
ALTER TABLE usage_new RENAME TO usage;

CREATE UNIQUE INDEX idx_usage_dimension_identity
ON usage (key_id, model, COALESCE(upstream, ''), model_key, hour, dimension);
CREATE INDEX idx_usage_dimension_hour ON usage (hour);
1 change: 1 addition & 0 deletions packages/gateway/src/control-plane/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ const upstreamModelSchema = z.object({
output: z.number().optional(),
input_cache_read: z.number().optional(),
input_cache_write: z.number().optional(),
input_cache_write_1h: z.number().optional(),
input_image: z.number().optional(),
output_image: z.number().optional(),
}).optional(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ const applyMessagesUsage = (usage: MessagesUsage, update: Partial<MessagesUsage>
if (update.cache_read_input_tokens != null) {
usage.cache_read_input_tokens = update.cache_read_input_tokens;
}
if (update.cache_creation != null) usage.cache_creation = update.cache_creation;
if (update.service_tier != null) usage.service_tier = update.service_tier;
if (update.server_tool_use != null) {
usage.server_tool_use = update.server_tool_use;
Expand Down
19 changes: 14 additions & 5 deletions packages/gateway/src/data-plane/llm/messages/respond.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,23 @@ export const respondMessages = async (
};

// Anthropic already reports disjoint token counts: input_tokens excludes the
// cache figures. Map them straight onto the billing dimensions without summing.
const tokenUsageFromMessagesUsage = (u: MessagesUsageLike) =>
tokenUsage({
// cache figures. Map them straight onto the billing dimensions without
// summing. When the upstream emits the `cache_creation` sub-object
// (extended-cache-ttl-2025-04-11), split the per-TTL counts onto the 5m and
// 1h dimensions; the flat `cache_creation_input_tokens` is the sum and is
// only consulted when the sub-object is absent.
const tokenUsageFromMessagesUsage = (u: MessagesUsageLike) => {
const cacheWrite5m = u.cache_creation?.ephemeral_5m_input_tokens;
const cacheWrite1h = u.cache_creation?.ephemeral_1h_input_tokens;
const cacheWriteRolledUp = u.cache_creation_input_tokens ?? 0;
return tokenUsage({
input: u.input_tokens ?? 0,
input_cache_read: u.cache_read_input_tokens ?? 0,
input_cache_write: u.cache_creation_input_tokens ?? 0,
input_cache_write: cacheWrite5m ?? cacheWriteRolledUp,
input_cache_write_1h: cacheWrite1h ?? 0,
output: u.output_tokens,
});
};

export const createMessagesStreamUsageState = () => ({
current: tokenUsage({}),
Expand All @@ -102,7 +111,7 @@ export const tokenUsageFromMessagesFrame = (frame: ProtocolFrame<MessagesStreamE
// cache reads; the input accounting still arrived, so the flag must reflect
// every input-side dimension, not bare input alone — otherwise a later
// delta carrying input_tokens re-merges and drops the cache counts.
state.gotInputFromStart ||= (state.current.input ?? 0) + (state.current.input_cache_read ?? 0) + (state.current.input_cache_write ?? 0) > 0;
state.gotInputFromStart ||= (state.current.input ?? 0) + (state.current.input_cache_read ?? 0) + (state.current.input_cache_write ?? 0) + (state.current.input_cache_write_1h ?? 0) > 0;
}
if (event.type === 'message_delta' && event.usage) {
if (!state.gotInputFromStart && event.usage.input_tokens !== undefined) {
Expand Down
66 changes: 66 additions & 0 deletions packages/gateway/src/data-plane/llm/messages/respond_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,69 @@ test('Messages stream usage keeps cache-only start when a later delta carries in
output: 50,
});
});

test('Messages stream usage splits cache_creation per-TTL when the sub-object is present', () => {
const state = createMessagesStreamUsageState();

tokenUsageFromMessagesFrame(
eventFrame({
type: 'message_start',
message: {
id: 'msg_1',
type: 'message',
role: 'assistant',
content: [],
model: 'claude-opus-4-8',
stop_reason: null,
stop_sequence: null,
usage: {
input_tokens: 12,
output_tokens: 1,
// The flat field is the sum of both sub-buckets and is consulted
// only as a fallback. With the sub-object present the per-TTL split
// must take precedence — otherwise this row would double-count.
cache_creation_input_tokens: 9,
cache_creation: { ephemeral_5m_input_tokens: 4, ephemeral_1h_input_tokens: 5 },
cache_read_input_tokens: 3,
},
},
} satisfies MessagesStreamEvent),
state,
);

assertEquals(tokenUsageFromMessagesFrame(stop(), state), {
input: 12,
input_cache_read: 3,
input_cache_write: 4,
input_cache_write_1h: 5,
output: 1,
});
});

test('Messages stream usage falls back to the rolled-up cache_creation when the sub-object is absent', () => {
const state = createMessagesStreamUsageState();

tokenUsageFromMessagesFrame(
eventFrame({
type: 'message_start',
message: {
id: 'msg_1',
type: 'message',
role: 'assistant',
content: [],
model: 'claude-sonnet-4-6',
stop_reason: null,
stop_sequence: null,
usage: { input_tokens: 12, output_tokens: 1, cache_creation_input_tokens: 9, cache_read_input_tokens: 3 },
},
} satisfies MessagesStreamEvent),
state,
);

assertEquals(tokenUsageFromMessagesFrame(stop(), state), {
input: 12,
input_cache_read: 3,
input_cache_write: 9,
output: 1,
});
});
23 changes: 16 additions & 7 deletions packages/protocols/src/common/models.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
// Disjoint billing dimensions a single request can be charged on. Every count
// keyed by these is non-overlapping: a prompt token is counted under exactly
// one of `input`, `input_cache_read`, `input_cache_write`, or `input_image`,
// never several at once.
// one of `input`, `input_cache_read`, `input_cache_write`,
// `input_cache_write_1h`, or `input_image`, never several at once.
//
// Convention borrowed from models.dev and LiteLLM: bare `input`/`output` mean
// the text modality AND act as the fallback rate for any modality without a
// dedicated rate; the `_image` variants are the image modality. There are no
// image cache dimensions on purpose — a live probe of Azure gpt-image-2
// confirmed its usage object never emits cached fields.
export type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_image' | 'output' | 'output_image';
//
// `input_cache_write` is the 5-minute (default) TTL bucket; `input_cache_write_1h`
// is the explicit 1-hour bucket Anthropic surfaces under
// `cache_creation.ephemeral_1h_input_tokens` (extended-cache-ttl-2025-04-11).
// They are disjoint subsets of `cache_creation_input_tokens`.
export type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_cache_write_1h' | 'input_image' | 'output' | 'output_image';

// Iteration form of BillingDimension; the type union is the source of truth.
export const BILLING_DIMENSIONS: readonly BillingDimension[] = ['input', 'input_cache_read', 'input_cache_write', 'input_image', 'output', 'output_image'];
export const BILLING_DIMENSIONS: readonly BillingDimension[] = ['input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'input_image', 'output', 'output_image'];

// Per-model pricing in USD per million tokens, aligned with the sst/models.dev
// `Cost` schema (https://github.com/sst/models.dev/blob/main/packages/core/src/schema.ts).
Expand All @@ -22,9 +27,11 @@ export type ModelPricing = Partial<Record<BillingDimension, number>>;

// Resolve the USD-per-million-tokens unit price for one dimension against a
// pricing snapshot, applying the LiteLLM-style fallback chain: a modality with
// no dedicated rate falls back to the bare text rate, and cached input falls
// back to uncached input. Returns null when even the fallback base is absent
// (or the whole snapshot is null), which aggregation treats as cost 0.
// no dedicated rate falls back to the bare text rate, cached input falls back
// to uncached input, and the 1-hour cache write falls back to the 5-minute
// cache write before reaching uncached input. Returns null when even the
// fallback base is absent (or the whole snapshot is null), which aggregation
// treats as cost 0.
export const unitPriceForDimension = (pricing: ModelPricing | null, dimension: BillingDimension): number | null => {
if (!pricing) return null;
switch (dimension) {
Expand All @@ -34,6 +41,8 @@ export const unitPriceForDimension = (pricing: ModelPricing | null, dimension: B
return pricing.input_cache_read ?? pricing.input ?? null;
case 'input_cache_write':
return pricing.input_cache_write ?? pricing.input ?? null;
case 'input_cache_write_1h':
return pricing.input_cache_write_1h ?? pricing.input_cache_write ?? pricing.input ?? null;
case 'input_image':
return pricing.input_image ?? pricing.input ?? null;
case 'output':
Expand Down
35 changes: 35 additions & 0 deletions packages/protocols/src/common/models_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { test } from 'vitest';

import { unitPriceForDimension } from './models.ts';
import { assertEquals } from '../test-assert.ts';

test('unitPriceForDimension returns null when pricing snapshot is null', () => {
assertEquals(unitPriceForDimension(null, 'input'), null);
assertEquals(unitPriceForDimension(null, 'input_cache_write_1h'), null);
});

test('unitPriceForDimension prefers the dimension-specific rate', () => {
const pricing = { input: 1, input_cache_read: 0.1, input_cache_write: 1.25, input_cache_write_1h: 2, output: 5 };
assertEquals(unitPriceForDimension(pricing, 'input'), 1);
assertEquals(unitPriceForDimension(pricing, 'input_cache_read'), 0.1);
assertEquals(unitPriceForDimension(pricing, 'input_cache_write'), 1.25);
assertEquals(unitPriceForDimension(pricing, 'input_cache_write_1h'), 2);
assertEquals(unitPriceForDimension(pricing, 'output'), 5);
});

test('unitPriceForDimension falls input_cache_write_1h back to input_cache_write before reaching input', () => {
// 1h -> 5m -> input. When only 5m is defined, 1h reuses the 5m rate
// rather than skipping straight to the bare input rate.
const pricing = { input: 1, input_cache_write: 1.25 };
assertEquals(unitPriceForDimension(pricing, 'input_cache_write_1h'), 1.25);
});

test('unitPriceForDimension falls input_cache_write_1h all the way back to input when neither cache write is set', () => {
const pricing = { input: 1 };
assertEquals(unitPriceForDimension(pricing, 'input_cache_write_1h'), 1);
});

test('unitPriceForDimension returns null when the fallback chain is empty', () => {
assertEquals(unitPriceForDimension({}, 'input_cache_write_1h'), null);
assertEquals(unitPriceForDimension({ output: 5 }, 'input_cache_write_1h'), null);
});
12 changes: 12 additions & 0 deletions packages/protocols/src/messages/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,14 @@ export interface MessagesUsage {
output_tokens: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
// Per-TTL split for cache writes introduced by extended-cache-ttl-2025-04-11.
// Each `ephemeral_*` field is a disjoint subset of `cache_creation_input_tokens`
// (the legacy flat field is the sum of both); upstreams that have not opted
// into the beta omit `cache_creation` entirely and emit only the flat field.
cache_creation?: {
ephemeral_5m_input_tokens?: number;
ephemeral_1h_input_tokens?: number;
};
service_tier?: 'standard' | 'priority' | 'batch';
server_tool_use?: MessagesUsageServerToolUse;
}
Expand Down Expand Up @@ -300,6 +308,10 @@ export interface MessagesMessageDeltaEvent {
output_tokens: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
cache_creation?: {
ephemeral_5m_input_tokens?: number;
ephemeral_1h_input_tokens?: number;
};
server_tool_use?: MessagesUsageServerToolUse;
};
}
Expand Down
6 changes: 2 additions & 4 deletions packages/provider-custom/src/fetch-models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import type { CustomUpstreamConfig } from './config.ts';
import { customFetchModels } from './fetch.ts';
import type { ModelKind, ModelPricing } from '@floway-dev/protocols/common';
import { BILLING_DIMENSIONS, type ModelKind, type ModelPricing } from '@floway-dev/protocols/common';
import { fetchUpstreamModels, type Fetcher } from '@floway-dev/provider';

export interface CustomRawModel {
Expand Down Expand Up @@ -58,14 +58,12 @@ const parseLimits = (value: unknown): CustomRawModel['limits'] => {
return Object.keys(limits).length > 0 ? limits : undefined;
};

const PRICING_DIMENSIONS: readonly (keyof ModelPricing)[] = ['input', 'input_cache_read', 'input_cache_write', 'input_image', 'output', 'output_image'];

const parseCost = (value: unknown): ModelPricing | undefined => {
// Admit any subset of billing dimensions advertised on the upstream's
// /v1/models cost block; drop the whole block when none are present.
if (!isRecord(value)) return undefined;
const cost: ModelPricing = {};
for (const dimension of PRICING_DIMENSIONS) {
for (const dimension of BILLING_DIMENSIONS) {
const rate = optionalNumberField(value[dimension]);
if (rate !== undefined) cost[dimension] = rate;
}
Expand Down
Loading