Menci · Menci · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
@@ -22,7 +22,8 @@ export interface ModelEndpoints {
 export type ModelEndpointKey = keyof ModelEndpoints;
 
 // USD per million tokens, keyed by billing dimension.
-export type ModelPricing = Partial<Record<'input' | 'input_cache_read' | 'input_cache_write' | 'input_image' | 'output' | 'output_image', number>>;
+export type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_cache_write_1h' | 'input_image' | 'output' | 'output_image';
+export type ModelPricing = Partial<Record<BillingDimension, number>>;
 
 export interface UpstreamModelConfig {
   upstreamModelId: string;

diff --git a/apps/web/src/components/upstream-edit/ModelEditor.vue b/apps/web/src/components/upstream-edit/ModelEditor.vue
@@ -4,7 +4,7 @@ import { computed } from 'vue';
 import EndpointsField from './EndpointsField.vue';
 import FlagOverridesEditor from './FlagOverridesEditor.vue';
 import { configOf, defaultEndpointsForKind, publicIdOf, titleFor, type Row } from './modelRows.ts';
-import type { FlagDef, ModelKind, ModelPricing, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts';
+import type { BillingDimension, FlagDef, ModelKind, ModelPricing, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts';
 import { Button, Input, Select, Switch } from '@floway-dev/ui';
 
 const props = defineProps<{
@@ -37,14 +37,15 @@ const kindOptions: { value: ModelKind; label: string }[] = [
 const PRICING_LABELS: Record<string, string> = {
   input: 'Input ($/MTok)',
   input_cache_read: 'Cache Read ($/MTok)',
-  input_cache_write: 'Cache Write ($/MTok)',
+  input_cache_write: 'Cache Write 5m ($/MTok)',
+  input_cache_write_1h: 'Cache Write 1h ($/MTok)',
   input_image: 'Image Input ($/MTok)',
   output: 'Output ($/MTok)',
   output_image: 'Image Output ($/MTok)',
 };
 
-const PRICING_BY_KIND: Record<ModelKind, (keyof ModelPricing)[]> = {
-  chat: ['input', 'input_cache_read', 'input_cache_write', 'output'],
+const PRICING_BY_KIND: Record<ModelKind, BillingDimension[]> = {
+  chat: ['input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'output'],
   embedding: ['input'],
   image: ['input', 'input_image', 'output', 'output_image'],
 };
@@ -81,7 +82,7 @@ const updateLimit = (
   patch({ limits: Object.keys(limits).length > 0 ? limits : undefined });
 };
 
-const updateCost = (key: keyof ModelPricing, raw: string | number | null | undefined) => {
+const updateCost = (key: BillingDimension, raw: string | number | null | undefined) => {
   if (!config.value) return;
   const cost = { ...(config.value.cost ?? {}) } as Record<string, unknown>;
   const num = parseOptionalNumber(raw);

diff --git a/apps/web/src/pages/dashboard/usage.vue b/apps/web/src/pages/dashboard/usage.vue
@@ -6,15 +6,14 @@ import { defineBasicLoader } from 'unplugin-vue-router/data-loaders/basic';
 import { computed, ref, watch } from 'vue';
 
 import { callApi, useApi, type ApiClient } from '../../api/client.ts';
+import type { BillingDimension } from '../../api/types.ts';
 import ChartCanvas from '../../components/charts/ChartCanvas.vue';
 import { bucketKeyForUtcHour, chartColor, chartFont, chartXAxisTick, dashboardBuckets, dashboardRangeQuery, type DashboardRange } from '../../components/charts/dashboard-chart.ts';
 import UsageSummaryMetric from '../../components/usage/UsageSummaryMetric.vue';
 import { useModelsStore } from '../../composables/useModels.ts';
 import { useAuthStore } from '../../stores/auth.ts';
 import { OverlayScrollbars, Spinner } from '@floway-dev/ui';
 
-type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_image' | 'output' | 'output_image';
-
 interface DisplayUsageRecord {
   keyId: string;
   keyName?: string;
@@ -114,6 +113,7 @@ type Metric =
 type Range = DashboardRange;
 
 const dim = (r: DisplayUsageRecord, k: BillingDimension): number => r.tokens[k] ?? 0;
+const cacheWrite = (r: DisplayUsageRecord): number => dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h');
 
 const api = useApi();
 const auth = useAuthStore();
@@ -190,7 +190,7 @@ const tokenSummary = computed(() => {
     input += dim(r, 'input');
     output += dim(r, 'output');
     cacheRead += dim(r, 'input_cache_read');
-    cacheCreation += dim(r, 'input_cache_write');
+    cacheCreation += cacheWrite(r);
     inputImage += dim(r, 'input_image');
     outputImage += dim(r, 'output_image');
   }
@@ -240,12 +240,12 @@ const metricValue = (r: DisplayUsageRecord, metric: Metric): number => {
   switch (metric) {
   case 'requests': return r.requests;
   case 'cost': return r.cost;
-  case 'total': return dim(r, 'input') + dim(r, 'output') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_image') + dim(r, 'output_image');
-  case 'input': return dim(r, 'input') + dim(r, 'input_cache_read') + dim(r, 'input_cache_write') + dim(r, 'input_image');
+  case 'total': return dim(r, 'input') + dim(r, 'output') + dim(r, 'input_cache_read') + cacheWrite(r) + dim(r, 'input_image') + dim(r, 'output_image');
+  case 'input': return dim(r, 'input') + dim(r, 'input_cache_read') + cacheWrite(r) + dim(r, 'input_image');
   case 'output': return dim(r, 'output') + dim(r, 'output_image');
-  case 'prefill': return dim(r, 'input') + dim(r, 'input_cache_write') + dim(r, 'input_image');
+  case 'prefill': return dim(r, 'input') + cacheWrite(r) + dim(r, 'input_image');
   case 'cached': return dim(r, 'input_cache_read');
-  case 'cacheCreation': return dim(r, 'input_cache_write');
+  case 'cacheCreation': return cacheWrite(r);
   case 'cachedRate':
   case 'cacheHitRate':
     return 0;
@@ -339,7 +339,7 @@ const aggregateTokenRecords = (records: readonly DisplayUsageRecord[], groupKey:
     detail.input += dim(r, 'input');
     detail.output += dim(r, 'output');
     detail.cacheRead += dim(r, 'input_cache_read');
-    detail.cacheCreation += dim(r, 'input_cache_write');
+    detail.cacheCreation += dim(r, 'input_cache_write') + dim(r, 'input_cache_write_1h');
     detail.inputImage += dim(r, 'input_image');
     detail.outputImage += dim(r, 'output_image');
     detail.cost += r.cost;

diff --git a/packages/gateway/migrations/0034_usage_per_ttl_and_tier.sql b/packages/gateway/migrations/0034_usage_per_ttl_and_tier.sql
@@ -0,0 +1,49 @@
+-- Add `tier` (Anthropic `usage.speed`, OpenAI `usage.service_tier`) to usage
+-- and usage_requests, and `input_cache_write_1h` to the dimension CHECK list.
+-- Existing rows backfill with `tier = NULL` so historical aggregations compute
+-- identically. SQLite cannot extend a CHECK constraint or a UNIQUE INDEX in
+-- place over a new column, so both tables are rebuilt.
+
+CREATE TABLE usage_new (
+  key_id TEXT NOT NULL,
+  model TEXT NOT NULL,
+  upstream TEXT,
+  model_key TEXT NOT NULL,
+  hour TEXT NOT NULL,
+  tier TEXT,
+  dimension TEXT NOT NULL CHECK (dimension IN (
+    'input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'input_image', 'output', 'output_image'
+  )),
+  tokens INTEGER NOT NULL DEFAULT 0,
+  unit_price REAL
+);
+
+INSERT INTO usage_new (key_id, model, upstream, model_key, hour, tier, dimension, tokens, unit_price)
+  SELECT key_id, model, upstream, model_key, hour, NULL, dimension, tokens, unit_price FROM usage;
+
+DROP TABLE usage;
+ALTER TABLE usage_new RENAME TO usage;
+
+CREATE UNIQUE INDEX idx_usage_dimension_identity
+  ON usage (key_id, model, COALESCE(upstream, ''), model_key, hour, COALESCE(tier, ''), dimension);
+CREATE INDEX idx_usage_dimension_hour ON usage (hour);
+
+CREATE TABLE usage_requests_new (
+  key_id TEXT NOT NULL,
+  model TEXT NOT NULL,
+  upstream TEXT,
+  model_key TEXT NOT NULL,
+  hour TEXT NOT NULL,
+  tier TEXT,
+  requests INTEGER NOT NULL DEFAULT 0
+);
+
+INSERT INTO usage_requests_new (key_id, model, upstream, model_key, hour, tier, requests)
+  SELECT key_id, model, upstream, model_key, hour, NULL, requests FROM usage_requests;
+
+DROP TABLE usage_requests;
+ALTER TABLE usage_requests_new RENAME TO usage_requests;
+
+CREATE UNIQUE INDEX idx_usage_requests_identity
+  ON usage_requests (key_id, model, COALESCE(upstream, ''), model_key, hour, COALESCE(tier, ''));
+CREATE INDEX idx_usage_requests_hour ON usage_requests (hour);
diff --git a/packages/gateway/src/app-control_test.ts b/packages/gateway/src/app-control_test.ts
@@ -108,6 +108,7 @@ test('/api/token-usage scopes to the actor\'s keys when called with an API key',
     upstream: null,
     modelKey: 'claude-sonnet-4',
     hour: '2026-03-15T10',
+    tier: null,
     requests: 2,
     tokens: { input: 10, output: 5, input_cache_read: 4, input_cache_write: 1 },
     cost: null,
@@ -118,6 +119,7 @@ test('/api/token-usage scopes to the actor\'s keys when called with an API key',
     upstream: null,
     modelKey: 'gpt-5',
     hour: '2026-03-15T11',
+    tier: null,
     requests: 1,
     tokens: { input: 20, output: 8, input_cache_read: 6, input_cache_write: 2 },
     cost: null,
@@ -155,6 +157,7 @@ test('/api/token-usage in self-by-key mode includes per-key metadata for the act
     upstream: null,
     modelKey: 'gpt-5',
     hour: '2026-03-16T10',
+    tier: null,
     requests: 1,
     tokens: { input: 20, output: 8 },
     cost: null,
@@ -182,6 +185,7 @@ test('/api/token-usage all-by-user view aggregates across keys per user', async
     upstream: null,
     modelKey: 'gpt-5',
     hour: '2026-03-15T10',
+    tier: null,
     requests: 1,
     tokens: { input: 10, output: 5 },
     cost: null,
@@ -213,6 +217,7 @@ test('/api/token-usage merges Claude variants into backend base model records',
     keyId: apiKey.id,
     hour: '2026-03-17T10',
     upstream: 'copilot:1',
+    tier: null,
     requests: 1,
     tokens: { input: 10, output: 5, input_cache_read: 2, input_cache_write: 1 },
   };

diff --git a/packages/gateway/src/control-plane/data-transfer/routes.ts b/packages/gateway/src/control-plane/data-transfer/routes.ts
@@ -402,6 +402,9 @@ const parseUsageRecords = (value: unknown): { type: 'ok'; records: UsageRecord[]
     if (typeof record.upstream === 'string' && isLegacyUpstreamIdentity(record.upstream)) {
       return { type: 'invalid', index: i, error: 'upstream must use a raw upstream id, not a legacy provider-prefixed identity' };
     }
+    if (record.tier !== undefined && record.tier !== null && typeof record.tier !== 'string') {
+      return { type: 'invalid', index: i, error: 'record has invalid tier (must be a string or null)' };
+    }
     const tokensResult = parseImportedTokens(record.tokens);
     if (tokensResult.type === 'invalid') return { type: 'invalid', index: i, error: 'record has invalid token dimension counts' };
     const costResult = parseImportedCost(record.cost);
@@ -412,6 +415,7 @@ const parseUsageRecords = (value: unknown): { type: 'ok'; records: UsageRecord[]
       upstream: record.upstream as string | null,
       modelKey: record.modelKey,
       hour: record.hour,
+      tier: (record.tier as string | null | undefined) ?? null,
       requests: record.requests,
       tokens: tokensResult.tokens,
       cost: costResult.cost,

diff --git a/packages/gateway/src/control-plane/data-transfer/routes_test.ts b/packages/gateway/src/control-plane/data-transfer/routes_test.ts
@@ -177,6 +177,7 @@ const USAGE_1: UsageRecord = {
   upstream: 'up_copilot_a',
   modelKey: 'claude-opus-4.7',
   hour: '2026-01-01T10',
+  tier: null,
   requests: 5,
   tokens: { input: 1000, output: 500, input_cache_read: 120, input_cache_write: 80 },
   cost: null,
@@ -188,6 +189,7 @@ const USAGE_2: UsageRecord = {
   upstream: 'up_azure_a',
   modelKey: 'gpt-prod',
   hour: '2026-01-01T11',
+  tier: null,
   requests: 3,
   tokens: { input: 2000, output: 800, input_cache_read: 200, input_cache_write: 50 },
   cost: null,

diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
@@ -74,6 +74,7 @@ const upstreamModelSchema = z.object({
     output: z.number().optional(),
     input_cache_read: z.number().optional(),
     input_cache_write: z.number().optional(),
+    input_cache_write_1h: z.number().optional(),
     input_image: z.number().optional(),
     output_image: z.number().optional(),
   }).optional(),

diff --git a/packages/gateway/src/control-plane/token-usage/aggregate.ts b/packages/gateway/src/control-plane/token-usage/aggregate.ts
@@ -1,5 +1,5 @@
 import type { UsageRecord } from '../../repo/types.ts';
-import { BILLING_DIMENSIONS, type BillingDimension, unitPriceForDimension } from '@floway-dev/protocols/common';
+import { BILLING_DIMENSIONS, type BillingDimension, resolveEffectivePricing, unitPriceForDimension } from '@floway-dev/protocols/common';
 
 export interface DisplayUsageRecord {
   keyId: string;
@@ -22,13 +22,16 @@ export interface DisplayUsageByUserRecord {
 
 // Cost is pure addition over the dimension rows: Σ tokens × unit_price / 1e6.
 // No subtraction is needed because the counts are disjoint and each dimension
-// already carries its own resolved unit price snapshot.
+// already carries its own resolved unit price snapshot. The bucket's tier
+// folds into pricing first so per-tier overrides (Anthropic fast mode,
+// OpenAI priority/flex) replace base rates before the dimension lookup.
 const recordCostUsd = (record: UsageRecord): number => {
+  const effective = resolveEffectivePricing(record.cost, record.tier);
   let total = 0;
   for (const dimension of BILLING_DIMENSIONS) {
     const tokens = record.tokens[dimension] ?? 0;
     if (tokens === 0) continue;
-    const unitPrice = unitPriceForDimension(record.cost, dimension);
+    const unitPrice = unitPriceForDimension(effective, dimension);
     if (unitPrice !== null) total += tokens * unitPrice;
   }
   return total / 1e6;

diff --git a/packages/gateway/src/control-plane/token-usage/aggregate_test.ts b/packages/gateway/src/control-plane/token-usage/aggregate_test.ts
@@ -14,6 +14,7 @@ const baseRecord = (overrides: Partial<UsageRecord>): UsageRecord => ({
   model: 'claude-opus-4-7',
   upstream: 'up_copilot',
   modelKey: 'claude-opus-4-7',
+  tier: null,
   requests: 1,
   tokens: { input: 100, output: 50 },
   cost: opus47Pricing,
@@ -83,3 +84,42 @@ test('aggregateUsageForDisplay charges image dimensions separately', () => {
   // 10 + 5 + 40 + 30 = $85.
   assertAlmostEquals(out[0].cost, 85, 1e-9);
 });
+
+test('aggregateUsageForDisplay applies the per-tier override when the bucket carries a tier', () => {
+  // Opus 4.8 standard: $5 input / $25 output. Fast: $10 / $50.
+  const cost: ModelPricing = {
+    input: 5,
+    output: 25,
+    tiers: { fast: { input: 10, output: 50 } },
+  };
+  const fastRow = baseRecord({ tier: 'fast', cost, tokens: { input: 1_000_000, output: 1_000_000 } });
+  const standardRow = baseRecord({ tier: null, cost, tokens: { input: 1_000_000, output: 1_000_000 } });
+
+  const fastOut = aggregateUsageForDisplay([fastRow]);
+  // 1M * $10 + 1M * $50 = $60.
+  assertAlmostEquals(fastOut[0].cost, 60, 1e-9);
+
+  const standardOut = aggregateUsageForDisplay([standardRow]);
+  // 1M * $5 + 1M * $25 = $30.
+  assertAlmostEquals(standardOut[0].cost, 30, 1e-9);
+});
+
+test('aggregateUsageForDisplay leaves base pricing alone when the tier has no override entry', () => {
+  const cost: ModelPricing = {
+    input: 5,
+    output: 25,
+    tiers: { fast: { input: 10, output: 50 } },
+  };
+  const out = aggregateUsageForDisplay([baseRecord({ tier: 'priority', cost, tokens: { input: 1_000_000 } })]);
+  // Unknown tier → falls back to base $5 input. 1M * $5 = $5.
+  assertAlmostEquals(out[0].cost, 5, 1e-9);
+});
+
+test('aggregateUsageForDisplay prices the input_cache_write_1h dimension via the 1h-specific rate', () => {
+  const cost: ModelPricing = { input: 5, input_cache_write: 6.25, input_cache_write_1h: 10, output: 25 };
+  const out = aggregateUsageForDisplay([
+    baseRecord({ cost, tokens: { input_cache_write_1h: 1_000_000 } }),
+  ]);
+  // 1M * $10 = $10.
+  assertAlmostEquals(out[0].cost, 10, 1e-9);
+});
diff --git a/packages/gateway/src/control-plane/token-usage/routes_test.ts b/packages/gateway/src/control-plane/token-usage/routes_test.ts
@@ -16,6 +16,7 @@ const seedUsage = async (
     upstream: 'up_test',
     modelKey: model,
     hour,
+    tier: null,
     requests,
     tokens: { input: 100, output: 50 },
     cost: null,

diff --git a/packages/gateway/src/data-plane/llm/chat-completions/respond.ts b/packages/gateway/src/data-plane/llm/chat-completions/respond.ts
@@ -3,7 +3,7 @@ import { streamSSE } from 'hono/streaming';
 
 import { CHAT_COMPLETIONS_MISSING_TERMINAL_MESSAGE, collectChatCompletionsProtocolEventsToResult } from './events/to-result.ts';
 import { chatCompletionsProtocolFrameToSSEFrame } from './events/to-sse.ts';
-import { tokenUsage } from '../../shared/telemetry/usage.ts';
+import { normalizeOpenAiServiceTier, tokenUsage } from '../../shared/telemetry/usage.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { SourceStreamState, eventResultMetadata, plainResultToResponse, recordPerformance, recordUsage } from '../shared/respond.ts';
 import { type StreamCompletion, writeSSEFrames } from '../shared/stream/sse.ts';
@@ -44,7 +44,7 @@ export const respondChatCompletions = async (
     try {
       const response = await collectChatCompletionsProtocolEventsToResult(frames);
       const metadata = await eventResultMetadata(result);
-      const usage = response.usage ? tokenUsageFromChatCompletionsUsage(response.usage) : null;
+      const usage = response.usage ? tokenUsageFromChatCompletionsUsage(response.usage, response.service_tier) : null;
       await recordUsage(ctx, metadata.modelIdentity, usage);
       recordPerformance(ctx, metadata.performance, state.failed);
       return { success: true, response: Response.json(response) };
@@ -80,14 +80,18 @@ export const respondChatCompletions = async (
 
 // OpenAI Chat usage reports prompt_tokens inclusive of cached and
 // cache-creation tokens; subtract them to recover the disjoint bare input.
-const tokenUsageFromChatCompletionsUsage = (u: NonNullable<ChatCompletionsResult['usage']>) => {
+// The top-level `service_tier` echoes the actual processing tier; surface it
+// as the `tier` slot so per-tier pricing overrides resolve at recording time.
+const tokenUsageFromChatCompletionsUsage = (u: NonNullable<ChatCompletionsResult['usage']>, serviceTier: string | null | undefined) => {
   const cacheRead = u.prompt_tokens_details?.cached_tokens ?? 0;
   const cacheWrite = u.prompt_tokens_details?.cache_creation_input_tokens ?? 0;
+  const tier = normalizeOpenAiServiceTier(serviceTier);
   return tokenUsage({
     input: u.prompt_tokens - cacheRead - cacheWrite,
     input_cache_read: cacheRead,
     input_cache_write: cacheWrite,
     output: u.completion_tokens,
+    ...(tier !== null ? { tier } : {}),
   });
 };
 
@@ -118,7 +122,7 @@ const observeChatCompletionsFrames = async function* (frames: AsyncIterable<Prot
     const failed = isChatCompletionsFailureFrame(frame);
     if (failed) state.failed = true;
     if (observeUsage) {
-      state.rememberUsage(frame.type === 'event' && Array.isArray(frame.event.choices) && frame.event.choices.length === 0 && frame.event.usage ? tokenUsageFromChatCompletionsUsage(frame.event.usage) : null);
+      state.rememberUsage(frame.type === 'event' && Array.isArray(frame.event.choices) && frame.event.choices.length === 0 && frame.event.usage ? tokenUsageFromChatCompletionsUsage(frame.event.usage, frame.event.service_tier) : null);
     }
     if (isChatCompletionsTerminalFrame(frame) && !failed) state.completed = true;
     yield frame;

diff --git a/packages/gateway/src/data-plane/llm/messages/events/reassemble.ts b/packages/gateway/src/data-plane/llm/messages/events/reassemble.ts
@@ -100,7 +100,9 @@ const applyMessagesUsage = (usage: MessagesUsage, update: Partial<MessagesUsage>
   if (update.cache_read_input_tokens != null) {
     usage.cache_read_input_tokens = update.cache_read_input_tokens;
   }
+  if (update.cache_creation != null) usage.cache_creation = update.cache_creation;
   if (update.service_tier != null) usage.service_tier = update.service_tier;
+  if (update.speed != null) usage.speed = update.speed;
   if (update.server_tool_use != null) {
     usage.server_tool_use = update.server_tool_use;
   }