From 2b7dc4d5fa29b8b2c6ab17902a65cb2915ab9f12 Mon Sep 17 00:00:00 2001 From: Menci Date: Sat, 20 Jun 2026 03:58:25 +0800 Subject: [PATCH 1/2] feat(protocols,gateway): input_cache_write_1h dimension and per-TTL cache parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anthropic's `extended-cache-ttl-2025-04-11` beta surfaces 1-hour cache writes under `usage.cache_creation.ephemeral_1h_input_tokens`. Until now we folded both 5m and 1h writes into the same `input_cache_write` bucket, under-billing 1h writes (priced at input × 2 vs. input × 1.25 for 5m). Adds `input_cache_write_1h` as a disjoint billing dimension with the fallback chain 1h -> 5m -> input, and teaches the Messages parser to read `cache_creation.ephemeral_5m_input_tokens` / `ephemeral_1h_input_tokens` separately when the sub-object is present; falls back to the flat `cache_creation_input_tokens` when not. Threads the new dimension through the surfaces that hardcode the billing dimension list — the upstream-model zod schema, the `pricingField` / custom `/v1/models` cost parsers (now driven off `BILLING_DIMENSIONS` to follow any future additions), the dashboard usage page (Cache Write column folds both TTLs), and the model editor (separate 5m and 1h input fields so operators on custom upstreams can price each). --- apps/web/src/api/types.ts | 3 +- .../components/upstream-edit/ModelEditor.vue | 9 +-- apps/web/src/pages/dashboard/usage.vue | 15 ++--- packages/gateway/src/control-plane/schemas.ts | 1 + .../llm/messages/events/reassemble.ts | 1 + .../src/data-plane/llm/messages/respond.ts | 19 ++++-- .../data-plane/llm/messages/respond_test.ts | 66 +++++++++++++++++++ packages/protocols/src/common/models.ts | 23 +++++-- packages/protocols/src/common/models_test.ts | 35 ++++++++++ packages/protocols/src/messages/index.ts | 12 ++++ packages/provider-custom/src/fetch-models.ts | 6 +- packages/provider/src/model-config.ts | 6 +- 12 files changed, 163 insertions(+), 33 deletions(-) create mode 100644 packages/protocols/src/common/models_test.ts diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts index 6b91b7ea7..069233d2f 100644 --- a/apps/web/src/api/types.ts +++ b/apps/web/src/api/types.ts @@ -22,7 +22,8 @@ export interface ModelEndpoints { export type ModelEndpointKey = keyof ModelEndpoints; // USD per million tokens, keyed by billing dimension. -export type ModelPricing = Partial>; +export type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_cache_write_1h' | 'input_image' | 'output' | 'output_image'; +export type ModelPricing = Partial>; export interface UpstreamModelConfig { upstreamModelId: string; diff --git a/apps/web/src/components/upstream-edit/ModelEditor.vue b/apps/web/src/components/upstream-edit/ModelEditor.vue index 3d8e18ec7..aa70330cd 100644 --- a/apps/web/src/components/upstream-edit/ModelEditor.vue +++ b/apps/web/src/components/upstream-edit/ModelEditor.vue @@ -4,7 +4,7 @@ import { computed } from 'vue'; import EndpointsField from './EndpointsField.vue'; import FlagOverridesEditor from './FlagOverridesEditor.vue'; import { configOf, defaultEndpointsForKind, publicIdOf, titleFor, type Row } from './modelRows.ts'; -import type { FlagDef, ModelKind, ModelPricing, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts'; +import type { BillingDimension, FlagDef, ModelKind, ModelPricing, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts'; import { Button, Input, Select, Switch } from '@floway-dev/ui'; const props = defineProps<{ @@ -37,14 +37,15 @@ const kindOptions: { value: ModelKind; label: string }[] = [ const PRICING_LABELS: Record = { input: 'Input ($/MTok)', input_cache_read: 'Cache Read ($/MTok)', - input_cache_write: 'Cache Write ($/MTok)', + input_cache_write: 'Cache Write 5m ($/MTok)', + input_cache_write_1h: 'Cache Write 1h ($/MTok)', input_image: 'Image Input ($/MTok)', output: 'Output ($/MTok)', output_image: 'Image Output ($/MTok)', }; -const PRICING_BY_KIND: Record = { - chat: ['input', 'input_cache_read', 'input_cache_write', 'output'], +const PRICING_BY_KIND: Record = { + chat: ['input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'output'], embedding: ['input'], image: ['input', 'input_image', 'output', 'output_image'], }; diff --git a/apps/web/src/pages/dashboard/usage.vue b/apps/web/src/pages/dashboard/usage.vue index ff4909763..2e206639f 100644 --- a/apps/web/src/pages/dashboard/usage.vue +++ b/apps/web/src/pages/dashboard/usage.vue @@ -6,6 +6,7 @@ import { defineBasicLoader } from 'unplugin-vue-router/data-loaders/basic'; import { computed, ref, watch } from 'vue'; import { callApi, useApi, type ApiClient } from '../../api/client.ts'; +import type { BillingDimension } from '../../api/types.ts'; import ChartCanvas from '../../components/charts/ChartCanvas.vue'; import { bucketKeyForUtcHour, chartColor, chartFont, chartXAxisTick, dashboardBuckets, dashboardRangeQuery, type DashboardRange } from '../../components/charts/dashboard-chart.ts'; import UsageSummaryMetric from '../../components/usage/UsageSummaryMetric.vue'; @@ -13,8 +14,6 @@ import { useModelsStore } from '../../composables/useModels.ts'; import { useAuthStore } from '../../stores/auth.ts'; import { OverlayScrollbars, Spinner } from '@floway-dev/ui'; -type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_image' | 'output' | 'output_image'; - interface DisplayUsageRecord { keyId: string; keyName?: string; @@ -190,7 +189,7 @@ const tokenSummary = computed(() => { input += dim(r, 'input'); output += dim(r, 'output'); cacheRead += dim(r, 'input_cache_read'); - cacheCreation += dim(r, 'input_cache_write'); + cacheCreation += dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h'); inputImage += dim(r, 'input_image'); outputImage += dim(r, 'output_image'); } @@ -240,12 +239,12 @@ const metricValue = (r: DisplayUsageRecord, metric: Metric): number => { switch (metric) { case 'requests': return r.requests; case 'cost': return r.cost; - case 'total': return dim(r, 'input') + dim(r, 'output') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_image') + dim(r, 'output_image'); - case 'input': return dim(r, 'input') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_image'); + case 'total': return dim(r, 'input') + dim(r, 'output') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h') + dim(r, 'input_image') + dim(r, 'output_image'); + case 'input': return dim(r, 'input') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h') + dim(r, 'input_image'); case 'output': return dim(r, 'output') + dim(r, 'output_image'); - case 'prefill': return dim(r, 'input') + dim(r, 'input_cache_write') + dim(r, 'input_image'); + case 'prefill': return dim(r, 'input') + dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h') + dim(r, 'input_image'); case 'cached': return dim(r, 'input_cache_read'); - case 'cacheCreation': return dim(r, 'input_cache_write'); + case 'cacheCreation': return dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h'); case 'cachedRate': case 'cacheHitRate': return 0; @@ -339,7 +338,7 @@ const aggregateTokenRecords = (records: readonly DisplayUsageRecord[], groupKey: detail.input += dim(r, 'input'); detail.output += dim(r, 'output'); detail.cacheRead += dim(r, 'input_cache_read'); - detail.cacheCreation += dim(r, 'input_cache_write'); + detail.cacheCreation += dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h'); detail.inputImage += dim(r, 'input_image'); detail.outputImage += dim(r, 'output_image'); detail.cost += r.cost; diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts index 341ca347d..cc428903d 100644 --- a/packages/gateway/src/control-plane/schemas.ts +++ b/packages/gateway/src/control-plane/schemas.ts @@ -74,6 +74,7 @@ const upstreamModelSchema = z.object({ output: z.number().optional(), input_cache_read: z.number().optional(), input_cache_write: z.number().optional(), + input_cache_write_1h: z.number().optional(), input_image: z.number().optional(), output_image: z.number().optional(), }).optional(), diff --git a/packages/gateway/src/data-plane/llm/messages/events/reassemble.ts b/packages/gateway/src/data-plane/llm/messages/events/reassemble.ts index 70b6c026b..71cea87e6 100644 --- a/packages/gateway/src/data-plane/llm/messages/events/reassemble.ts +++ b/packages/gateway/src/data-plane/llm/messages/events/reassemble.ts @@ -100,6 +100,7 @@ const applyMessagesUsage = (usage: MessagesUsage, update: Partial if (update.cache_read_input_tokens != null) { usage.cache_read_input_tokens = update.cache_read_input_tokens; } + if (update.cache_creation != null) usage.cache_creation = update.cache_creation; if (update.service_tier != null) usage.service_tier = update.service_tier; if (update.server_tool_use != null) { usage.server_tool_use = update.server_tool_use; diff --git a/packages/gateway/src/data-plane/llm/messages/respond.ts b/packages/gateway/src/data-plane/llm/messages/respond.ts index d679428aa..0008704cf 100644 --- a/packages/gateway/src/data-plane/llm/messages/respond.ts +++ b/packages/gateway/src/data-plane/llm/messages/respond.ts @@ -77,14 +77,23 @@ export const respondMessages = async ( }; // Anthropic already reports disjoint token counts: input_tokens excludes the -// cache figures. Map them straight onto the billing dimensions without summing. -const tokenUsageFromMessagesUsage = (u: MessagesUsageLike) => - tokenUsage({ +// cache figures. Map them straight onto the billing dimensions without +// summing. When the upstream emits the `cache_creation` sub-object +// (extended-cache-ttl-2025-04-11), split the per-TTL counts onto the 5m and +// 1h dimensions; the flat `cache_creation_input_tokens` is the sum and is +// only consulted when the sub-object is absent. +const tokenUsageFromMessagesUsage = (u: MessagesUsageLike) => { + const cacheWrite5m = u.cache_creation?.ephemeral_5m_input_tokens; + const cacheWrite1h = u.cache_creation?.ephemeral_1h_input_tokens; + const cacheWriteRolledUp = u.cache_creation_input_tokens ?? 0; + return tokenUsage({ input: u.input_tokens ?? 0, input_cache_read: u.cache_read_input_tokens ?? 0, - input_cache_write: u.cache_creation_input_tokens ?? 0, + input_cache_write: cacheWrite5m ?? cacheWriteRolledUp, + input_cache_write_1h: cacheWrite1h ?? 0, output: u.output_tokens, }); +}; export const createMessagesStreamUsageState = () => ({ current: tokenUsage({}), @@ -102,7 +111,7 @@ export const tokenUsageFromMessagesFrame = (frame: ProtocolFrame 0; + state.gotInputFromStart ||= (state.current.input ?? 0) + (state.current.input_cache_read ?? 0) + (state.current.input_cache_write ?? 0) + (state.current.input_cache_write_1h ?? 0) > 0; } if (event.type === 'message_delta' && event.usage) { if (!state.gotInputFromStart && event.usage.input_tokens !== undefined) { diff --git a/packages/gateway/src/data-plane/llm/messages/respond_test.ts b/packages/gateway/src/data-plane/llm/messages/respond_test.ts index e59bd40be..91b186293 100644 --- a/packages/gateway/src/data-plane/llm/messages/respond_test.ts +++ b/packages/gateway/src/data-plane/llm/messages/respond_test.ts @@ -139,3 +139,69 @@ test('Messages stream usage keeps cache-only start when a later delta carries in output: 50, }); }); + +test('Messages stream usage splits cache_creation per-TTL when the sub-object is present', () => { + const state = createMessagesStreamUsageState(); + + tokenUsageFromMessagesFrame( + eventFrame({ + type: 'message_start', + message: { + id: 'msg_1', + type: 'message', + role: 'assistant', + content: [], + model: 'claude-opus-4-8', + stop_reason: null, + stop_sequence: null, + usage: { + input_tokens: 12, + output_tokens: 1, + // The flat field is the sum of both sub-buckets and is consulted + // only as a fallback. With the sub-object present the per-TTL split + // must take precedence — otherwise this row would double-count. + cache_creation_input_tokens: 9, + cache_creation: { ephemeral_5m_input_tokens: 4, ephemeral_1h_input_tokens: 5 }, + cache_read_input_tokens: 3, + }, + }, + } satisfies MessagesStreamEvent), + state, + ); + + assertEquals(tokenUsageFromMessagesFrame(stop(), state), { + input: 12, + input_cache_read: 3, + input_cache_write: 4, + input_cache_write_1h: 5, + output: 1, + }); +}); + +test('Messages stream usage falls back to the rolled-up cache_creation when the sub-object is absent', () => { + const state = createMessagesStreamUsageState(); + + tokenUsageFromMessagesFrame( + eventFrame({ + type: 'message_start', + message: { + id: 'msg_1', + type: 'message', + role: 'assistant', + content: [], + model: 'claude-sonnet-4-6', + stop_reason: null, + stop_sequence: null, + usage: { input_tokens: 12, output_tokens: 1, cache_creation_input_tokens: 9, cache_read_input_tokens: 3 }, + }, + } satisfies MessagesStreamEvent), + state, + ); + + assertEquals(tokenUsageFromMessagesFrame(stop(), state), { + input: 12, + input_cache_read: 3, + input_cache_write: 9, + output: 1, + }); +}); diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts index 9634cc05e..a26fe8f7b 100644 --- a/packages/protocols/src/common/models.ts +++ b/packages/protocols/src/common/models.ts @@ -1,17 +1,22 @@ // Disjoint billing dimensions a single request can be charged on. Every count // keyed by these is non-overlapping: a prompt token is counted under exactly -// one of `input`, `input_cache_read`, `input_cache_write`, or `input_image`, -// never several at once. +// one of `input`, `input_cache_read`, `input_cache_write`, +// `input_cache_write_1h`, or `input_image`, never several at once. // // Convention borrowed from models.dev and LiteLLM: bare `input`/`output` mean // the text modality AND act as the fallback rate for any modality without a // dedicated rate; the `_image` variants are the image modality. There are no // image cache dimensions on purpose — a live probe of Azure gpt-image-2 // confirmed its usage object never emits cached fields. -export type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_image' | 'output' | 'output_image'; +// +// `input_cache_write` is the 5-minute (default) TTL bucket; `input_cache_write_1h` +// is the explicit 1-hour bucket Anthropic surfaces under +// `cache_creation.ephemeral_1h_input_tokens` (extended-cache-ttl-2025-04-11). +// They are disjoint subsets of `cache_creation_input_tokens`. +export type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_cache_write_1h' | 'input_image' | 'output' | 'output_image'; // Iteration form of BillingDimension; the type union is the source of truth. -export const BILLING_DIMENSIONS: readonly BillingDimension[] = ['input', 'input_cache_read', 'input_cache_write', 'input_image', 'output', 'output_image']; +export const BILLING_DIMENSIONS: readonly BillingDimension[] = ['input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'input_image', 'output', 'output_image']; // Per-model pricing in USD per million tokens, aligned with the sst/models.dev // `Cost` schema (https://github.com/sst/models.dev/blob/main/packages/core/src/schema.ts). @@ -22,9 +27,11 @@ export type ModelPricing = Partial>; // Resolve the USD-per-million-tokens unit price for one dimension against a // pricing snapshot, applying the LiteLLM-style fallback chain: a modality with -// no dedicated rate falls back to the bare text rate, and cached input falls -// back to uncached input. Returns null when even the fallback base is absent -// (or the whole snapshot is null), which aggregation treats as cost 0. +// no dedicated rate falls back to the bare text rate, cached input falls back +// to uncached input, and the 1-hour cache write falls back to the 5-minute +// cache write before reaching uncached input. Returns null when even the +// fallback base is absent (or the whole snapshot is null), which aggregation +// treats as cost 0. export const unitPriceForDimension = (pricing: ModelPricing | null, dimension: BillingDimension): number | null => { if (!pricing) return null; switch (dimension) { @@ -34,6 +41,8 @@ export const unitPriceForDimension = (pricing: ModelPricing | null, dimension: B return pricing.input_cache_read ?? pricing.input ?? null; case 'input_cache_write': return pricing.input_cache_write ?? pricing.input ?? null; + case 'input_cache_write_1h': + return pricing.input_cache_write_1h ?? pricing.input_cache_write ?? pricing.input ?? null; case 'input_image': return pricing.input_image ?? pricing.input ?? null; case 'output': diff --git a/packages/protocols/src/common/models_test.ts b/packages/protocols/src/common/models_test.ts new file mode 100644 index 000000000..ffe10d8c3 --- /dev/null +++ b/packages/protocols/src/common/models_test.ts @@ -0,0 +1,35 @@ +import { test } from 'vitest'; + +import { unitPriceForDimension } from './models.ts'; +import { assertEquals } from '../test-assert.ts'; + +test('unitPriceForDimension returns null when pricing snapshot is null', () => { + assertEquals(unitPriceForDimension(null, 'input'), null); + assertEquals(unitPriceForDimension(null, 'input_cache_write_1h'), null); +}); + +test('unitPriceForDimension prefers the dimension-specific rate', () => { + const pricing = { input: 1, input_cache_read: 0.1, input_cache_write: 1.25, input_cache_write_1h: 2, output: 5 }; + assertEquals(unitPriceForDimension(pricing, 'input'), 1); + assertEquals(unitPriceForDimension(pricing, 'input_cache_read'), 0.1); + assertEquals(unitPriceForDimension(pricing, 'input_cache_write'), 1.25); + assertEquals(unitPriceForDimension(pricing, 'input_cache_write_1h'), 2); + assertEquals(unitPriceForDimension(pricing, 'output'), 5); +}); + +test('unitPriceForDimension falls input_cache_write_1h back to input_cache_write before reaching input', () => { + // 1h -> 5m -> input. When only 5m is defined, 1h reuses the 5m rate + // rather than skipping straight to the bare input rate. + const pricing = { input: 1, input_cache_write: 1.25 }; + assertEquals(unitPriceForDimension(pricing, 'input_cache_write_1h'), 1.25); +}); + +test('unitPriceForDimension falls input_cache_write_1h all the way back to input when neither cache write is set', () => { + const pricing = { input: 1 }; + assertEquals(unitPriceForDimension(pricing, 'input_cache_write_1h'), 1); +}); + +test('unitPriceForDimension returns null when the fallback chain is empty', () => { + assertEquals(unitPriceForDimension({}, 'input_cache_write_1h'), null); + assertEquals(unitPriceForDimension({ output: 5 }, 'input_cache_write_1h'), null); +}); diff --git a/packages/protocols/src/messages/index.ts b/packages/protocols/src/messages/index.ts index dc4c02435..094955a5c 100644 --- a/packages/protocols/src/messages/index.ts +++ b/packages/protocols/src/messages/index.ts @@ -225,6 +225,14 @@ export interface MessagesUsage { output_tokens: number; cache_creation_input_tokens?: number; cache_read_input_tokens?: number; + // Per-TTL split for cache writes introduced by extended-cache-ttl-2025-04-11. + // Each `ephemeral_*` field is a disjoint subset of `cache_creation_input_tokens` + // (the legacy flat field is the sum of both); upstreams that have not opted + // into the beta omit `cache_creation` entirely and emit only the flat field. + cache_creation?: { + ephemeral_5m_input_tokens?: number; + ephemeral_1h_input_tokens?: number; + }; service_tier?: 'standard' | 'priority' | 'batch'; server_tool_use?: MessagesUsageServerToolUse; } @@ -300,6 +308,10 @@ export interface MessagesMessageDeltaEvent { output_tokens: number; cache_creation_input_tokens?: number; cache_read_input_tokens?: number; + cache_creation?: { + ephemeral_5m_input_tokens?: number; + ephemeral_1h_input_tokens?: number; + }; server_tool_use?: MessagesUsageServerToolUse; }; } diff --git a/packages/provider-custom/src/fetch-models.ts b/packages/provider-custom/src/fetch-models.ts index ab9ddefa5..40d567740 100644 --- a/packages/provider-custom/src/fetch-models.ts +++ b/packages/provider-custom/src/fetch-models.ts @@ -11,7 +11,7 @@ import type { CustomUpstreamConfig } from './config.ts'; import { customFetchModels } from './fetch.ts'; -import type { ModelKind, ModelPricing } from '@floway-dev/protocols/common'; +import { BILLING_DIMENSIONS, type ModelKind, type ModelPricing } from '@floway-dev/protocols/common'; import { fetchUpstreamModels, type Fetcher } from '@floway-dev/provider'; export interface CustomRawModel { @@ -58,14 +58,12 @@ const parseLimits = (value: unknown): CustomRawModel['limits'] => { return Object.keys(limits).length > 0 ? limits : undefined; }; -const PRICING_DIMENSIONS: readonly (keyof ModelPricing)[] = ['input', 'input_cache_read', 'input_cache_write', 'input_image', 'output', 'output_image']; - const parseCost = (value: unknown): ModelPricing | undefined => { // Admit any subset of billing dimensions advertised on the upstream's // /v1/models cost block; drop the whole block when none are present. if (!isRecord(value)) return undefined; const cost: ModelPricing = {}; - for (const dimension of PRICING_DIMENSIONS) { + for (const dimension of BILLING_DIMENSIONS) { const rate = optionalNumberField(value[dimension]); if (rate !== undefined) cost[dimension] = rate; } diff --git a/packages/provider/src/model-config.ts b/packages/provider/src/model-config.ts index c3804052a..da1692d40 100644 --- a/packages/provider/src/model-config.ts +++ b/packages/provider/src/model-config.ts @@ -1,5 +1,5 @@ import { isKnownFlagId } from './flags.ts'; -import type { ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing } from '@floway-dev/protocols/common'; +import { BILLING_DIMENSIONS, type ModelEndpointKey, type ModelEndpoints, type ModelKind, type ModelPricing } from '@floway-dev/protocols/common'; import { kindForEndpoints } from '@floway-dev/protocols/common'; export interface UpstreamModelLimits { @@ -120,13 +120,11 @@ const nonNegativeNumberField = (value: unknown, label: string): number => { return value; }; -const PRICING_DIMENSIONS: readonly (keyof ModelPricing)[] = ['input', 'input_cache_read', 'input_cache_write', 'input_image', 'output', 'output_image']; - export const pricingField = (value: unknown, label: string): ModelPricing | undefined => { const record = optionalMetadataRecord(value, label); if (!record) return undefined; const pricing: ModelPricing = {}; - for (const dimension of PRICING_DIMENSIONS) { + for (const dimension of BILLING_DIMENSIONS) { if (record[dimension] !== undefined) pricing[dimension] = nonNegativeNumberField(record[dimension], `${label}.${dimension}`); } return Object.keys(pricing).length > 0 ? pricing : undefined; From 92685bdb09bfdd2f3cb776872560c1605c274041 Mon Sep 17 00:00:00 2001 From: Menci Date: Sat, 20 Jun 2026 03:58:36 +0800 Subject: [PATCH 2/2] feat(gateway): migration 0035 widens dimension CHECK for input_cache_write_1h SQLite cannot alter a CHECK constraint in place, so widening the `usage.dimension` list to admit `input_cache_write_1h` requires recreating the table. `usage_requests` is untouched: it has no dimension column. Existing rows backfill cleanly into the recreated table; aggregation treats them identically to before. --- .../0035_usage_input_cache_write_1h.sql | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 packages/gateway/migrations/0035_usage_input_cache_write_1h.sql diff --git a/packages/gateway/migrations/0035_usage_input_cache_write_1h.sql b/packages/gateway/migrations/0035_usage_input_cache_write_1h.sql new file mode 100644 index 000000000..ebdc25e18 --- /dev/null +++ b/packages/gateway/migrations/0035_usage_input_cache_write_1h.sql @@ -0,0 +1,33 @@ +-- Widen the `usage.dimension` CHECK list to admit `input_cache_write_1h`. +-- +-- Anthropic's `extended-cache-ttl-2025-04-11` beta surfaces 1-hour cache +-- writes under `usage.cache_creation.ephemeral_1h_input_tokens`. Until now +-- we folded both 5m and 1h writes into the same `input_cache_write` bucket, +-- which under-bills 1h writes (priced at input × 2 vs. input × 1.25 for 5m). +-- Adding the dimension as a disjoint bucket requires recreating `usage` +-- because SQLite cannot alter a CHECK constraint in place. +-- +-- `usage_requests` is untouched: it does not carry a dimension column. + +CREATE TABLE usage_new ( + key_id TEXT NOT NULL, + model TEXT NOT NULL, + upstream TEXT, + model_key TEXT NOT NULL, + hour TEXT NOT NULL, + dimension TEXT NOT NULL CHECK (dimension IN ( + 'input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'input_image', 'output', 'output_image' + )), + tokens INTEGER NOT NULL DEFAULT 0, + unit_price REAL +); + +INSERT INTO usage_new (key_id, model, upstream, model_key, hour, dimension, tokens, unit_price) + SELECT key_id, model, upstream, model_key, hour, dimension, tokens, unit_price FROM usage; + +DROP TABLE usage; +ALTER TABLE usage_new RENAME TO usage; + +CREATE UNIQUE INDEX idx_usage_dimension_identity + ON usage (key_id, model, COALESCE(upstream, ''), model_key, hour, dimension); +CREATE INDEX idx_usage_dimension_hour ON usage (hour);