Menci · Menci · Jun 22, 2026 · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
@@ -21,9 +21,11 @@ export interface ModelEndpoints {
 
 export type ModelEndpointKey = keyof ModelEndpoints;
 
-// USD per million tokens, keyed by billing dimension.
-export type BillingDimension = 'input' | 'input_cache_read' | 'input_cache_write' | 'input_cache_write_1h' | 'input_image' | 'output' | 'output_image';
-export type ModelPricing = Partial<Record<BillingDimension, number>>;
+// USD per million tokens, keyed by billing dimension. Imported from the
+// gateway so the dashboard's pricing form stays locked to the same definition
+// the backend writes against — same pattern as `ProxyRecord` below.
+import type { BillingDimension, ModelPricing } from '@floway-dev/gateway/control-plane/pricing/types';
+export type { BillingDimension, ModelPricing };
 
 export interface UpstreamModelConfig {
   upstreamModelId: string;

diff --git a/apps/web/src/components/upstream-edit/ModelEditor.vue b/apps/web/src/components/upstream-edit/ModelEditor.vue
diff --git a/packages/gateway/migrations/0036_usage_tier_column.sql b/packages/gateway/migrations/0036_usage_tier_column.sql
@@ -0,0 +1,56 @@
+-- Add the per-request service tier column to `usage` + `usage_requests`.
+--
+-- `tier` is the upstream-stamped service-tier marker (Anthropic `usage.speed`,
+-- OpenAI `usage.service_tier`). It participates in bucket identity so a model
+-- billed at multiple tiers in one hour aggregates as separate buckets with
+-- distinct unit prices; recording writes NULL for base-tier requests and a
+-- non-empty string otherwise. The unique index uses `COALESCE(tier, '')`
+-- because SQLite treats NULLs as distinct under UNIQUE.
+--
+-- SQLite cannot add a column to the middle of a UNIQUE INDEX in place, so
+-- both tables are recreated. Existing rows backfill `tier = NULL`, which the
+-- aggregator treats as base pricing — historical buckets compute identically.
+
+CREATE TABLE usage_new (
+  key_id TEXT NOT NULL,
+  model TEXT NOT NULL,
+  upstream TEXT,
+  model_key TEXT NOT NULL,
+  hour TEXT NOT NULL,
+  tier TEXT,
+  dimension TEXT NOT NULL CHECK (dimension IN (
+    'input', 'input_cache_read', 'input_cache_write', 'input_cache_write_1h', 'input_image', 'output', 'output_image'
+  )),
+  tokens INTEGER NOT NULL DEFAULT 0,
+  unit_price REAL
+);
+
+INSERT INTO usage_new (key_id, model, upstream, model_key, hour, tier, dimension, tokens, unit_price)
+  SELECT key_id, model, upstream, model_key, hour, NULL, dimension, tokens, unit_price FROM usage;
+
+DROP TABLE usage;
+ALTER TABLE usage_new RENAME TO usage;
+
+CREATE UNIQUE INDEX idx_usage_dimension_identity
+  ON usage (key_id, model, COALESCE(upstream, ''), model_key, hour, COALESCE(tier, ''), dimension);
+CREATE INDEX idx_usage_dimension_hour ON usage (hour);
+
+CREATE TABLE usage_requests_new (
+  key_id TEXT NOT NULL,
+  model TEXT NOT NULL,
+  upstream TEXT,
+  model_key TEXT NOT NULL,
+  hour TEXT NOT NULL,
+  tier TEXT,
+  requests INTEGER NOT NULL DEFAULT 0
+);
+
+INSERT INTO usage_requests_new (key_id, model, upstream, model_key, hour, tier, requests)
+  SELECT key_id, model, upstream, model_key, hour, NULL, requests FROM usage_requests;
+
+DROP TABLE usage_requests;
+ALTER TABLE usage_requests_new RENAME TO usage_requests;
+
+CREATE UNIQUE INDEX idx_usage_requests_identity
+  ON usage_requests (key_id, model, COALESCE(upstream, ''), model_key, hour, COALESCE(tier, ''));
+CREATE INDEX idx_usage_requests_hour ON usage_requests (hour);
diff --git a/packages/gateway/package.json b/packages/gateway/package.json
@@ -7,6 +7,7 @@
     ".": { "import": "./src/index.ts", "types": "./src/index.ts" },
     "./app-type": { "types": "./src/app.ts" },
     "./control-plane/proxies/serialize": { "types": "./src/control-plane/proxies/serialize.ts" },
+    "./control-plane/pricing/types": { "types": "./src/control-plane/pricing/types.ts" },
     "./data-plane/tools/web-search/types": {
       "import": "./src/data-plane/tools/web-search/types.ts",
       "types": "./src/data-plane/tools/web-search/types.ts"

diff --git a/packages/gateway/src/app-control_test.ts b/packages/gateway/src/app-control_test.ts
@@ -108,6 +108,7 @@ test('/api/token-usage scopes to the actor\'s keys when called with an API key',
     upstream: null,
     modelKey: 'claude-sonnet-4',
     hour: '2026-03-15T10',
+    tier: null,
     requests: 2,
     tokens: { input: 10, output: 5, input_cache_read: 4, input_cache_write: 1 },
     cost: null,
@@ -118,6 +119,7 @@ test('/api/token-usage scopes to the actor\'s keys when called with an API key',
     upstream: null,
     modelKey: 'gpt-5',
     hour: '2026-03-15T11',
+    tier: null,
     requests: 1,
     tokens: { input: 20, output: 8, input_cache_read: 6, input_cache_write: 2 },
     cost: null,
@@ -155,6 +157,7 @@ test('/api/token-usage in self-by-key mode includes per-key metadata for the act
     upstream: null,
     modelKey: 'gpt-5',
     hour: '2026-03-16T10',
+    tier: null,
     requests: 1,
     tokens: { input: 20, output: 8 },
     cost: null,
@@ -182,6 +185,7 @@ test('/api/token-usage all-by-user view aggregates across keys per user', async
     upstream: null,
     modelKey: 'gpt-5',
     hour: '2026-03-15T10',
+    tier: null,
     requests: 1,
     tokens: { input: 10, output: 5 },
     cost: null,
@@ -213,6 +217,7 @@ test('/api/token-usage merges Claude variants into backend base model records',
     keyId: apiKey.id,
     hour: '2026-03-17T10',
     upstream: 'copilot:1',
+    tier: null,
     requests: 1,
     tokens: { input: 10, output: 5, input_cache_read: 2, input_cache_write: 1 },
   };

diff --git a/packages/gateway/src/control-plane/data-transfer/routes.ts b/packages/gateway/src/control-plane/data-transfer/routes.ts
@@ -402,6 +402,16 @@ const parseUsageRecords = (value: unknown): { type: 'ok'; records: UsageRecord[]
     if (typeof record.upstream === 'string' && isLegacyUpstreamIdentity(record.upstream)) {
       return { type: 'invalid', index: i, error: 'upstream must use a raw upstream id, not a legacy provider-prefixed identity' };
     }
+    if (record.tier !== undefined && record.tier !== null && typeof record.tier !== 'string') {
+      return { type: 'invalid', index: i, error: 'tier, when present, must be a string or null' };
+    }
+    if (record.tier === '') {
+      return { type: 'invalid', index: i, error: 'tier must be a non-empty string or null/absent' };
+    }
+    // Empty-string is rejected rather than normalized to null: the unique
+    // index folds NULL/'' under COALESCE, so a '' import would silently
+    // merge with base-tier rows.
+    const tier: string | null = typeof record.tier === 'string' ? record.tier : null;
     const tokensResult = parseImportedTokens(record.tokens);
     if (tokensResult.type === 'invalid') return { type: 'invalid', index: i, error: 'record has invalid token dimension counts' };
     const costResult = parseImportedCost(record.cost);
@@ -412,6 +422,7 @@ const parseUsageRecords = (value: unknown): { type: 'ok'; records: UsageRecord[]
       upstream: record.upstream as string | null,
       modelKey: record.modelKey,
       hour: record.hour,
+      tier,
       requests: record.requests,
       tokens: tokensResult.tokens,
       cost: costResult.cost,

diff --git a/packages/gateway/src/control-plane/data-transfer/routes_test.ts b/packages/gateway/src/control-plane/data-transfer/routes_test.ts
@@ -177,6 +177,7 @@ const USAGE_1: UsageRecord = {
   upstream: 'up_copilot_a',
   modelKey: 'claude-opus-4.7',
   hour: '2026-01-01T10',
+  tier: 'fast',
   requests: 5,
   tokens: { input: 1000, output: 500, input_cache_read: 120, input_cache_write: 80 },
   cost: null,
@@ -188,6 +189,7 @@ const USAGE_2: UsageRecord = {
   upstream: 'up_azure_a',
   modelKey: 'gpt-prod',
   hour: '2026-01-01T11',
+  tier: null,
   requests: 3,
   tokens: { input: 2000, output: 800, input_cache_read: 200, input_cache_write: 50 },
   cost: null,

diff --git a/packages/gateway/src/control-plane/pricing/types.ts b/packages/gateway/src/control-plane/pricing/types.ts
@@ -0,0 +1 @@
+export type { BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
@@ -60,6 +60,18 @@ const modelEndpointsSchema = z.object({
   imagesEdits: z.object({}).optional(),
 });
 
+// Shared between base pricing and per-tier overlays so the two always carry
+// the same dimension set.
+const pricingDimensionShape = {
+  input: z.number().nonnegative().optional(),
+  output: z.number().nonnegative().optional(),
+  input_cache_read: z.number().nonnegative().optional(),
+  input_cache_write: z.number().nonnegative().optional(),
+  input_cache_write_1h: z.number().nonnegative().optional(),
+  input_image: z.number().nonnegative().optional(),
+  output_image: z.number().nonnegative().optional(),
+};
+
 // Mirrors the runtime UpstreamModelConfig in @floway-dev/provider.
 // Azure and custom upstreams share this per-model entry; the canonical
 // per-model endpoint validation lives in the runtime validator.
@@ -70,13 +82,15 @@ const upstreamModelSchema = z.object({
   endpoints: modelEndpointsSchema,
   display_name: z.string().optional(),
   cost: z.object({
-    input: z.number().optional(),
-    output: z.number().optional(),
-    input_cache_read: z.number().optional(),
-    input_cache_write: z.number().optional(),
-    input_cache_write_1h: z.number().optional(),
-    input_image: z.number().optional(),
-    output_image: z.number().optional(),
+    ...pricingDimensionShape,
+    // See ModelPricing.tiers in @floway-dev/protocols/common for semantics.
+    tiers: z.record(
+      z.string().min(1),
+      z.object(pricingDimensionShape).refine(
+        t => Object.values(t).some(v => v !== undefined),
+        { message: 'tier overlay must declare at least one rate' },
+      ),
+    ).optional(),
   }).optional(),
   flagOverrides: z.object({
     enabled: z.boolean(),

diff --git a/packages/gateway/src/control-plane/token-usage/aggregate.ts b/packages/gateway/src/control-plane/token-usage/aggregate.ts
@@ -22,7 +22,10 @@ export interface DisplayUsageByUserRecord {
 
 // Cost is pure addition over the dimension rows: Σ tokens × unit_price / 1e6.
 // No subtraction is needed because the counts are disjoint and each dimension
-// already carries its own resolved unit price snapshot.
+// already carries its own resolved unit price snapshot. `record.cost` here
+// is the per-row reconstruction of the per-dimension `unit_price` columns
+// the repo writer already folded the bucket's tier into — so the dimension
+// lookup is a direct hit, no tier resolution needed at read time.
 const recordCostUsd = (record: UsageRecord): number => {
   let total = 0;
   for (const dimension of BILLING_DIMENSIONS) {

diff --git a/packages/gateway/src/control-plane/token-usage/aggregate_test.ts b/packages/gateway/src/control-plane/token-usage/aggregate_test.ts
@@ -14,6 +14,7 @@ const baseRecord = (overrides: Partial<UsageRecord>): UsageRecord => ({
   model: 'claude-opus-4-7',
   upstream: 'up_copilot',
   modelKey: 'claude-opus-4-7',
+  tier: null,
   requests: 1,
   tokens: { input: 100, output: 50 },
   cost: opus47Pricing,
@@ -83,3 +84,30 @@ test('aggregateUsageForDisplay charges image dimensions separately', () => {
   // 10 + 5 + 40 + 30 = $85.
   assertAlmostEquals(out[0].cost, 85, 1e-9);
 });
+
+test('aggregateUsageForDisplay reads unit prices from the already-folded cost the repo writer hands back', () => {
+  // The repo write path (`repo/sql.ts:dimensionRows`, `repo/memory.ts:dimensionEntries`)
+  // resolves the bucket's tier into per-dimension unit prices BEFORE storing,
+  // so by the time aggregate sees a UsageRecord the `cost` field is already
+  // the effective pricing for that bucket's tier and tier resolution is a
+  // no-op. Two same-tier records below model the post-write shape.
+  // Opus 4.8: standard $5 / $25, fast $10 / $50.
+  const fastRow = baseRecord({
+    tier: 'fast',
+    cost: { input: 10, output: 50 },
+    tokens: { input: 1_000_000, output: 1_000_000 },
+  });
+  const standardRow = baseRecord({
+    tier: null,
+    cost: { input: 5, output: 25 },
+    tokens: { input: 1_000_000, output: 1_000_000 },
+  });
+
+  const fastOut = aggregateUsageForDisplay([fastRow]);
+  // 1M * $10 + 1M * $50 = $60.
+  assertAlmostEquals(fastOut[0].cost, 60, 1e-9);
+
+  const standardOut = aggregateUsageForDisplay([standardRow]);
+  // 1M * $5 + 1M * $25 = $30.
+  assertAlmostEquals(standardOut[0].cost, 30, 1e-9);
+});
diff --git a/packages/gateway/src/control-plane/token-usage/routes_test.ts b/packages/gateway/src/control-plane/token-usage/routes_test.ts
@@ -16,6 +16,7 @@ const seedUsage = async (
     upstream: 'up_test',
     modelKey: model,
     hour,
+    tier: null,
     requests,
     tokens: { input: 100, output: 50 },
     cost: null,

diff --git a/packages/gateway/src/data-plane/llm/chat-completions/respond.ts b/packages/gateway/src/data-plane/llm/chat-completions/respond.ts
@@ -3,11 +3,11 @@ import { streamSSE } from 'hono/streaming';
 
 import { CHAT_COMPLETIONS_MISSING_TERMINAL_MESSAGE, collectChatCompletionsProtocolEventsToResult } from './events/to-result.ts';
 import { chatCompletionsProtocolFrameToSSEFrame } from './events/to-sse.ts';
-import { tokenUsage } from '../../shared/telemetry/usage.ts';
+import { tokenUsageFromChatCompletionsUsage } from './usage.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { SourceStreamState, eventResultMetadata, forwardUpstreamHeaders, mergeForwardedUpstreamHeaders, plainResultToResponse, recordPerformance, recordUsage } from '../shared/respond.ts';
 import { type StreamCompletion, writeSSEFrames } from '../shared/stream/sse.ts';
-import type { ChatCompletionsStreamEvent, ChatCompletionsResult } from '@floway-dev/protocols/chat-completions';
+import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { chatCompletionsErrorPayloadMessage } from '@floway-dev/protocols/chat-completions';
 import { type ProtocolFrame, sseCommentFrame, sseFrame } from '@floway-dev/protocols/common';
 import { type ExecuteResult, type PlainResult, type InternalDebugError, toInternalDebugError } from '@floway-dev/provider';
@@ -44,7 +44,7 @@ export const respondChatCompletions = async (
     try {
       const response = await collectChatCompletionsProtocolEventsToResult(frames);
       const metadata = await eventResultMetadata(result);
-      const usage = response.usage ? tokenUsageFromChatCompletionsUsage(response.usage) : null;
+      const usage = response.usage ? tokenUsageFromChatCompletionsUsage(response.usage, response.service_tier) : null;
       await recordUsage(ctx, metadata.modelIdentity, usage);
       recordPerformance(ctx, metadata.performance, state.failed);
       return { success: true, response: Response.json(response, { headers: mergeForwardedUpstreamHeaders(undefined, result.headers) }) };
@@ -77,21 +77,6 @@ export const respondChatCompletions = async (
   return { success: true, response };
 };
 
-// --- token usage ---
-
-// OpenAI Chat usage reports prompt_tokens inclusive of cached and
-// cache-creation tokens; subtract them to recover the disjoint bare input.
-const tokenUsageFromChatCompletionsUsage = (u: NonNullable<ChatCompletionsResult['usage']>) => {
-  const cacheRead = u.prompt_tokens_details?.cached_tokens ?? 0;
-  const cacheWrite = u.prompt_tokens_details?.cache_creation_input_tokens ?? 0;
-  return tokenUsage({
-    input: u.prompt_tokens - cacheRead - cacheWrite,
-    input_cache_read: cacheRead,
-    input_cache_write: cacheWrite,
-    output: u.completion_tokens,
-  });
-};
-
 // --- error rendering ---
 
 const internalChatCompletionsErrorPayload = (error: InternalDebugError) => ({
@@ -119,7 +104,7 @@ const observeChatCompletionsFrames = async function* (frames: AsyncIterable<Prot
     const failed = isChatCompletionsFailureFrame(frame);
     if (failed) state.failed = true;
     if (observeUsage) {
-      state.rememberUsage(frame.type === 'event' && Array.isArray(frame.event.choices) && frame.event.choices.length === 0 && frame.event.usage ? tokenUsageFromChatCompletionsUsage(frame.event.usage) : null);
+      state.rememberUsage(frame.type === 'event' && Array.isArray(frame.event.choices) && frame.event.choices.length === 0 && frame.event.usage ? tokenUsageFromChatCompletionsUsage(frame.event.usage, frame.event.service_tier) : null);
     }
     if (isChatCompletionsTerminalFrame(frame) && !failed) state.completed = true;
     yield frame;

diff --git a/packages/gateway/src/data-plane/llm/chat-completions/usage.ts b/packages/gateway/src/data-plane/llm/chat-completions/usage.ts
@@ -0,0 +1,19 @@
+import { billableServiceTier, tokenUsage } from '../../shared/telemetry/usage.ts';
+import type { ChatCompletionsResult } from '@floway-dev/protocols/chat-completions';
+
+// OpenAI Chat usage reports prompt_tokens inclusive of cached and
+// cache-creation tokens; subtract them to recover the disjoint bare input.
+// The top-level `service_tier` echoes the actual processing tier; surface it
+// via `billableServiceTier` so per-tier pricing overrides resolve at
+// recording time. https://developers.openai.com/api/docs/guides/priority-processing
+export const tokenUsageFromChatCompletionsUsage = (u: NonNullable<ChatCompletionsResult['usage']>, serviceTier: string | null | undefined) => {
+  const cacheRead = u.prompt_tokens_details?.cached_tokens ?? 0;
+  const cacheWrite = u.prompt_tokens_details?.cache_creation_input_tokens ?? 0;
+  return tokenUsage({
+    input: u.prompt_tokens - cacheRead - cacheWrite,
+    input_cache_read: cacheRead,
+    input_cache_write: cacheWrite,
+    output: u.completion_tokens,
+    tier: billableServiceTier(serviceTier),
+  });
+};
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		export type { BillingDimension, ModelPricing } from '@floway-dev/protocols/common';