diff --git a/src/app/v1/_lib/proxy/response-handler.ts b/src/app/v1/_lib/proxy/response-handler.ts index e2dd19edc..7f0b4c7d0 100644 --- a/src/app/v1/_lib/proxy/response-handler.ts +++ b/src/app/v1/_lib/proxy/response-handler.ts @@ -1257,8 +1257,11 @@ export class ProxyResponseHandler { if (resolvedPricing) { ensurePricingResolutionSpecialSetting(session, resolvedPricing); const longContextPricing = - matchLongContextPricing(billableUsageMetrics, resolvedPricing.priceData) - ?.pricing ?? null; + matchLongContextPricing( + billableUsageMetrics, + resolvedPricing.priceData, + priorityServiceTierApplied ? "priority" : null + )?.pricing ?? null; const cost = calculateRequestCost( billableUsageMetrics, resolvedPricing.priceData, @@ -2373,8 +2376,11 @@ export class ProxyResponseHandler { if (resolvedPricing) { ensurePricingResolutionSpecialSetting(session, resolvedPricing); const longContextPricing = - matchLongContextPricing(billableUsageForCost, resolvedPricing.priceData) - ?.pricing ?? null; + matchLongContextPricing( + billableUsageForCost, + resolvedPricing.priceData, + priorityServiceTierApplied ? "priority" : null + )?.pricing ?? null; const cost = calculateRequestCost( billableUsageForCost, resolvedPricing.priceData, @@ -3466,7 +3472,11 @@ async function updateRequestCostFromUsage( } const longContextPricing = - matchLongContextPricing(usage, resolvedPricing.priceData)?.pricing ?? null; + matchLongContextPricing( + usage, + resolvedPricing.priceData, + priorityServiceTierApplied ? "priority" : null + )?.pricing ?? null; const cost = calculateRequestCost( usage, resolvedPricing.priceData, @@ -3669,8 +3679,11 @@ export async function finalizeRequestStats( if (resolvedPricing) { ensurePricingResolutionSpecialSetting(session, resolvedPricing); const longContextPricing = - matchLongContextPricing(billableNormalizedUsage, resolvedPricing.priceData)?.pricing ?? - null; + matchLongContextPricing( + billableNormalizedUsage, + resolvedPricing.priceData, + priorityServiceTierApplied ? "priority" : null + )?.pricing ?? null; const cost = calculateRequestCost( billableNormalizedUsage, resolvedPricing.priceData, @@ -3778,7 +3791,11 @@ async function trackCostToRedis( ensurePricingResolutionSpecialSetting(session, resolvedPricing); const longContextPricing = longContextPricingOverride === undefined - ? (matchLongContextPricing(usage, resolvedPricing.priceData)?.pricing ?? null) + ? (matchLongContextPricing( + usage, + resolvedPricing.priceData, + priorityServiceTierApplied ? "priority" : null + )?.pricing ?? null) : longContextPricingOverride; const cost = calculateRequestCost( diff --git a/src/lib/utils/cost-calculation.ts b/src/lib/utils/cost-calculation.ts index 5bfee1c26..7ee803952 100644 --- a/src/lib/utils/cost-calculation.ts +++ b/src/lib/utils/cost-calculation.ts @@ -1,5 +1,5 @@ import { CONTEXT_1M_TOKEN_THRESHOLD } from "@/lib/special-attributes"; -import type { ModelPriceData } from "@/types/model-price"; +import type { ModelPriceData, ServiceTierPricing } from "@/types/model-price"; import { COST_SCALE, Decimal, toDecimal } from "./currency"; const OPENAI_LONG_CONTEXT_TOKEN_THRESHOLD = 272000; @@ -40,6 +40,8 @@ type RequestCostBreakdownOptions = Omit< "multiplier" | "groupMultiplier" >; +type ServiceTierName = "priority"; + export interface LongContextPricingMatch { thresholdTokens: number; scope: "request" | "session"; @@ -111,6 +113,47 @@ function multiplyCost(quantity: number | undefined, unitCost: number | undefined return qtyDecimal.mul(costDecimal); } +function getServiceTierPricing( + priceData: ModelPriceData, + serviceTier?: ServiceTierName | null +): ServiceTierPricing | null { + if (!serviceTier) { + return null; + } + + const pricing = priceData.service_tier_pricing; + if (!pricing || typeof pricing !== "object" || Array.isArray(pricing)) { + return null; + } + + const tierPricing = pricing[serviceTier]; + if (!tierPricing || typeof tierPricing !== "object" || Array.isArray(tierPricing)) { + return null; + } + + return tierPricing; +} + +function getPriorityServiceTierPricing( + priceData: ModelPriceData, + priorityServiceTierApplied: boolean +): ServiceTierPricing | null { + return priorityServiceTierApplied ? getServiceTierPricing(priceData, "priority") : null; +} + +function resolvePriorityAwareRate( + priorityServiceTierApplied: boolean, + serviceTierRate: number | undefined, + legacyPriorityRate: number | undefined, + baseRate: number | undefined +): number | undefined { + if (!priorityServiceTierApplied) { + return baseRate; + } + + return serviceTierRate ?? legacyPriorityRate ?? baseRate; +} + function resolveLongContextThreshold(priceData: ModelPriceData): number { const has272kFields = typeof priceData.input_cost_per_token_above_272k_tokens === "number" || @@ -131,9 +174,11 @@ function resolveLongContextThreshold(priceData: ModelPriceData): number { } export function resolveLongContextPricing( - priceData: ModelPriceData + priceData: ModelPriceData, + serviceTier?: ServiceTierName | null ): ResolvedLongContextPricing | null { - const pricing = priceData.long_context_pricing; + const serviceTierPricing = getServiceTierPricing(priceData, serviceTier); + const pricing = serviceTierPricing?.long_context_pricing ?? priceData.long_context_pricing; if (!pricing) { return null; } @@ -143,17 +188,35 @@ export function resolveLongContextPricing( return null; } - const baseInputCost = priceData.input_cost_per_token; - const baseOutputCost = priceData.output_cost_per_token; + const priorityServiceTierApplied = serviceTier === "priority"; + const baseInputCost = resolvePriorityAwareRate( + priorityServiceTierApplied, + serviceTierPricing?.input_cost_per_token, + priceData.input_cost_per_token_priority, + priceData.input_cost_per_token + ); + const baseOutputCost = resolvePriorityAwareRate( + priorityServiceTierApplied, + serviceTierPricing?.output_cost_per_token, + priceData.output_cost_per_token_priority, + priceData.output_cost_per_token + ); const baseCacheCreation5mCost = + serviceTierPricing?.cache_creation_input_token_cost ?? priceData.cache_creation_input_token_cost ?? (baseInputCost != null ? baseInputCost * 1.25 : undefined); const baseCacheCreation1hCost = + serviceTierPricing?.cache_creation_input_token_cost_above_1hr ?? priceData.cache_creation_input_token_cost_above_1hr ?? (baseInputCost != null ? baseInputCost * 2 : undefined) ?? baseCacheCreation5mCost; const baseCacheReadCost = - priceData.cache_read_input_token_cost ?? + resolvePriorityAwareRate( + priorityServiceTierApplied, + serviceTierPricing?.cache_read_input_token_cost, + priceData.cache_read_input_token_cost_priority, + priceData.cache_read_input_token_cost + ) ?? (baseInputCost != null ? baseInputCost * 0.1 : baseOutputCost != null @@ -301,11 +364,18 @@ function resolvePriorityAwareLongContextRate( above272kPriority?: number; above200k?: number; above200kPriority?: number; + serviceTierAbove272k?: number; + serviceTierAbove200k?: number; } ): number | undefined { if (priorityServiceTierApplied) { return ( - fields.above272kPriority ?? fields.above200kPriority ?? fields.above272k ?? fields.above200k + fields.serviceTierAbove272k ?? + fields.serviceTierAbove200k ?? + fields.above272kPriority ?? + fields.above200kPriority ?? + fields.above272k ?? + fields.above200k ); } @@ -332,9 +402,10 @@ export function getLongContextTriggerInputTokens( export function matchLongContextPricing( usage: UsageMetrics, - priceData: ModelPriceData + priceData: ModelPriceData, + serviceTier?: ServiceTierName | null ): LongContextPricingMatch | null { - const pricing = resolveLongContextPricing(priceData); + const pricing = resolveLongContextPricing(priceData, serviceTier); if (!pricing) { return null; } @@ -387,17 +458,24 @@ export function calculateRequestCostBreakdown( const baseInputCostPerToken = priceData.input_cost_per_token; const baseOutputCostPerToken = priceData.output_cost_per_token; - const inputCostPerToken = - options.priorityServiceTierApplied && - typeof priceData.input_cost_per_token_priority === "number" - ? priceData.input_cost_per_token_priority - : baseInputCostPerToken; - const outputCostPerToken = - options.priorityServiceTierApplied && - typeof priceData.output_cost_per_token_priority === "number" - ? priceData.output_cost_per_token_priority - : baseOutputCostPerToken; - const inputCostPerRequest = priceData.input_cost_per_request; + const priorityTierPricing = getPriorityServiceTierPricing( + priceData, + options.priorityServiceTierApplied + ); + const inputCostPerToken = resolvePriorityAwareRate( + options.priorityServiceTierApplied, + priorityTierPricing?.input_cost_per_token, + priceData.input_cost_per_token_priority, + baseInputCostPerToken + ); + const outputCostPerToken = resolvePriorityAwareRate( + options.priorityServiceTierApplied, + priorityTierPricing?.output_cost_per_token, + priceData.output_cost_per_token_priority, + baseOutputCostPerToken + ); + const inputCostPerRequest = + priorityTierPricing?.input_cost_per_request ?? priceData.input_cost_per_request; const longContextPricing = options.longContextPricing; // Per-request cost -> input bucket @@ -413,23 +491,27 @@ export function calculateRequestCostBreakdown( } const cacheCreation5mCost = + priorityTierPricing?.cache_creation_input_token_cost ?? priceData.cache_creation_input_token_cost ?? - (baseInputCostPerToken != null ? baseInputCostPerToken * 1.25 : undefined); + (inputCostPerToken != null ? inputCostPerToken * 1.25 : undefined); const cacheCreation1hCost = + priorityTierPricing?.cache_creation_input_token_cost_above_1hr ?? priceData.cache_creation_input_token_cost_above_1hr ?? - (baseInputCostPerToken != null ? baseInputCostPerToken * 2 : undefined) ?? + (inputCostPerToken != null ? inputCostPerToken * 2 : undefined) ?? cacheCreation5mCost; const cacheReadCost = - (options.priorityServiceTierApplied && - typeof priceData.cache_read_input_token_cost_priority === "number" - ? priceData.cache_read_input_token_cost_priority - : priceData.cache_read_input_token_cost) ?? - (baseInputCostPerToken != null - ? baseInputCostPerToken * 0.1 - : baseOutputCostPerToken != null - ? baseOutputCostPerToken * 0.1 + resolvePriorityAwareRate( + options.priorityServiceTierApplied, + priorityTierPricing?.cache_read_input_token_cost, + priceData.cache_read_input_token_cost_priority, + priceData.cache_read_input_token_cost + ) ?? + (inputCostPerToken != null + ? inputCostPerToken * 0.1 + : outputCostPerToken != null + ? outputCostPerToken * 0.1 : undefined); // Derive cache creation tokens by TTL @@ -457,6 +539,8 @@ export function calculateRequestCostBreakdown( above272kPriority: priceData.input_cost_per_token_above_272k_tokens_priority, above200k: priceData.input_cost_per_token_above_200k_tokens, above200kPriority: priceData.input_cost_per_token_above_200k_tokens_priority, + serviceTierAbove272k: priorityTierPricing?.input_cost_per_token_above_272k_tokens, + serviceTierAbove200k: priorityTierPricing?.input_cost_per_token_above_200k_tokens, } ); const outputAboveThreshold = resolvePriorityAwareLongContextRate( @@ -466,12 +550,18 @@ export function calculateRequestCostBreakdown( above272kPriority: priceData.output_cost_per_token_above_272k_tokens_priority, above200k: priceData.output_cost_per_token_above_200k_tokens, above200kPriority: priceData.output_cost_per_token_above_200k_tokens_priority, + serviceTierAbove272k: priorityTierPricing?.output_cost_per_token_above_272k_tokens, + serviceTierAbove200k: priorityTierPricing?.output_cost_per_token_above_200k_tokens, } ); const cacheCreationAboveThreshold = + priorityTierPricing?.cache_creation_input_token_cost_above_272k_tokens ?? + priorityTierPricing?.cache_creation_input_token_cost_above_200k_tokens ?? priceData.cache_creation_input_token_cost_above_272k_tokens ?? priceData.cache_creation_input_token_cost_above_200k_tokens; const cacheCreation1hAboveThreshold = + priorityTierPricing?.cache_creation_input_token_cost_above_1hr_above_272k_tokens ?? + priorityTierPricing?.cache_creation_input_token_cost_above_1hr_above_200k_tokens ?? priceData.cache_creation_input_token_cost_above_1hr_above_272k_tokens ?? priceData.cache_creation_input_token_cost_above_1hr_above_200k_tokens ?? cacheCreationAboveThreshold; @@ -482,6 +572,8 @@ export function calculateRequestCostBreakdown( above272kPriority: priceData.cache_read_input_token_cost_above_272k_tokens_priority, above200k: priceData.cache_read_input_token_cost_above_200k_tokens, above200kPriority: priceData.cache_read_input_token_cost_above_200k_tokens_priority, + serviceTierAbove272k: priorityTierPricing?.cache_read_input_token_cost_above_272k_tokens, + serviceTierAbove200k: priorityTierPricing?.cache_read_input_token_cost_above_200k_tokens, } ); const longContextThreshold = resolveLongContextThreshold(priceData); @@ -656,17 +748,24 @@ export function calculateRequestCost( const baseInputCostPerToken = priceData.input_cost_per_token; const baseOutputCostPerToken = priceData.output_cost_per_token; - const inputCostPerToken = - options.priorityServiceTierApplied && - typeof priceData.input_cost_per_token_priority === "number" - ? priceData.input_cost_per_token_priority - : baseInputCostPerToken; - const outputCostPerToken = - options.priorityServiceTierApplied && - typeof priceData.output_cost_per_token_priority === "number" - ? priceData.output_cost_per_token_priority - : baseOutputCostPerToken; - const inputCostPerRequest = priceData.input_cost_per_request; + const priorityTierPricing = getPriorityServiceTierPricing( + priceData, + options.priorityServiceTierApplied + ); + const inputCostPerToken = resolvePriorityAwareRate( + options.priorityServiceTierApplied, + priorityTierPricing?.input_cost_per_token, + priceData.input_cost_per_token_priority, + baseInputCostPerToken + ); + const outputCostPerToken = resolvePriorityAwareRate( + options.priorityServiceTierApplied, + priorityTierPricing?.output_cost_per_token, + priceData.output_cost_per_token_priority, + baseOutputCostPerToken + ); + const inputCostPerRequest = + priorityTierPricing?.input_cost_per_request ?? priceData.input_cost_per_request; const longContextPricing = options.longContextPricing; if ( @@ -681,23 +780,27 @@ export function calculateRequestCost( } const cacheCreation5mCost = + priorityTierPricing?.cache_creation_input_token_cost ?? priceData.cache_creation_input_token_cost ?? - (baseInputCostPerToken != null ? baseInputCostPerToken * 1.25 : undefined); + (inputCostPerToken != null ? inputCostPerToken * 1.25 : undefined); const cacheCreation1hCost = + priorityTierPricing?.cache_creation_input_token_cost_above_1hr ?? priceData.cache_creation_input_token_cost_above_1hr ?? - (baseInputCostPerToken != null ? baseInputCostPerToken * 2 : undefined) ?? + (inputCostPerToken != null ? inputCostPerToken * 2 : undefined) ?? cacheCreation5mCost; const cacheReadCost = - (options.priorityServiceTierApplied && - typeof priceData.cache_read_input_token_cost_priority === "number" - ? priceData.cache_read_input_token_cost_priority - : priceData.cache_read_input_token_cost) ?? - (baseInputCostPerToken != null - ? baseInputCostPerToken * 0.1 - : baseOutputCostPerToken != null - ? baseOutputCostPerToken * 0.1 + resolvePriorityAwareRate( + options.priorityServiceTierApplied, + priorityTierPricing?.cache_read_input_token_cost, + priceData.cache_read_input_token_cost_priority, + priceData.cache_read_input_token_cost + ) ?? + (inputCostPerToken != null + ? inputCostPerToken * 0.1 + : outputCostPerToken != null + ? outputCostPerToken * 0.1 : undefined); // Derive cache creation tokens by TTL @@ -725,6 +828,8 @@ export function calculateRequestCost( above272kPriority: priceData.input_cost_per_token_above_272k_tokens_priority, above200k: priceData.input_cost_per_token_above_200k_tokens, above200kPriority: priceData.input_cost_per_token_above_200k_tokens_priority, + serviceTierAbove272k: priorityTierPricing?.input_cost_per_token_above_272k_tokens, + serviceTierAbove200k: priorityTierPricing?.input_cost_per_token_above_200k_tokens, } ); const outputAboveThreshold = resolvePriorityAwareLongContextRate( @@ -734,12 +839,18 @@ export function calculateRequestCost( above272kPriority: priceData.output_cost_per_token_above_272k_tokens_priority, above200k: priceData.output_cost_per_token_above_200k_tokens, above200kPriority: priceData.output_cost_per_token_above_200k_tokens_priority, + serviceTierAbove272k: priorityTierPricing?.output_cost_per_token_above_272k_tokens, + serviceTierAbove200k: priorityTierPricing?.output_cost_per_token_above_200k_tokens, } ); const cacheCreationAboveThreshold = + priorityTierPricing?.cache_creation_input_token_cost_above_272k_tokens ?? + priorityTierPricing?.cache_creation_input_token_cost_above_200k_tokens ?? priceData.cache_creation_input_token_cost_above_272k_tokens ?? priceData.cache_creation_input_token_cost_above_200k_tokens; const cacheCreation1hAboveThreshold = + priorityTierPricing?.cache_creation_input_token_cost_above_1hr_above_272k_tokens ?? + priorityTierPricing?.cache_creation_input_token_cost_above_1hr_above_200k_tokens ?? priceData.cache_creation_input_token_cost_above_1hr_above_272k_tokens ?? priceData.cache_creation_input_token_cost_above_1hr_above_200k_tokens ?? cacheCreationAboveThreshold; @@ -750,6 +861,8 @@ export function calculateRequestCost( above272kPriority: priceData.cache_read_input_token_cost_above_272k_tokens_priority, above200k: priceData.cache_read_input_token_cost_above_200k_tokens, above200kPriority: priceData.cache_read_input_token_cost_above_200k_tokens_priority, + serviceTierAbove272k: priorityTierPricing?.cache_read_input_token_cost_above_272k_tokens, + serviceTierAbove200k: priorityTierPricing?.cache_read_input_token_cost_above_200k_tokens, } ); const longContextThreshold = resolveLongContextThreshold(priceData); diff --git a/src/lib/utils/model-price-fields.ts b/src/lib/utils/model-price-fields.ts index dea76b635..24fab89d9 100644 --- a/src/lib/utils/model-price-fields.ts +++ b/src/lib/utils/model-price-fields.ts @@ -59,6 +59,11 @@ const SUPPORTED_LONG_CONTEXT_KEYS = new Set([ "cache_read_input_token_cost", ]); +const SUPPORTED_SERVICE_TIER_KEYS = new Set([ + ...SUPPORTED_TOP_LEVEL_BILLING_KEYS, + "long_context_pricing", +]); + const CORE_TOP_LEVEL_FIELDS = new Set([ "mode", "display_name", @@ -124,7 +129,8 @@ export function isPriceLikeFieldPath(path: string): boolean { function classifyField(path: string, key: string): ModelPriceFieldKind { if ( SUPPORTED_TOP_LEVEL_BILLING_KEYS.has(key) || - (path.startsWith("long_context_pricing.") && SUPPORTED_LONG_CONTEXT_KEYS.has(key)) + (path.startsWith("long_context_pricing.") && SUPPORTED_LONG_CONTEXT_KEYS.has(key)) || + (path.startsWith("service_tier_pricing.") && SUPPORTED_SERVICE_TIER_KEYS.has(key)) ) { return "supported"; } @@ -150,7 +156,11 @@ function isCoreField( return true; } - return CORE_TOP_LEVEL_FIELDS.has(key) || path.startsWith("long_context_pricing."); + return ( + CORE_TOP_LEVEL_FIELDS.has(key) || + path.startsWith("long_context_pricing.") || + path.startsWith("service_tier_pricing.") + ); } function pushEntries( diff --git a/src/lib/utils/pricing-resolution.ts b/src/lib/utils/pricing-resolution.ts index 4f3f7d4e0..d1d656476 100644 --- a/src/lib/utils/pricing-resolution.ts +++ b/src/lib/utils/pricing-resolution.ts @@ -68,7 +68,7 @@ const PROVIDER_DETAIL_FIELDS = [ "input_cost_per_image_token", ] as const; -const DETAIL_SCORE_OBJECT_FIELDS = ["long_context_pricing"] as const; +const DETAIL_SCORE_OBJECT_FIELDS = ["long_context_pricing", "service_tier_pricing"] as const; const DETAIL_TIE_BREAK_ORDER = [ "openrouter", diff --git a/src/proxy.ts b/src/proxy.ts index 65b1b4628..86471fed3 100644 --- a/src/proxy.ts +++ b/src/proxy.ts @@ -1,6 +1,6 @@ import { type NextRequest, NextResponse } from "next/server"; import createMiddleware from "next-intl/middleware"; -import { type Locale, localeCookieName } from "@/i18n/config"; +import { defaultLocale, type Locale, localeCookieName } from "@/i18n/config"; import { getLocaleFromValue, normalizePathnameForLocaleNavigation } from "@/i18n/pathname"; import { routing } from "@/i18n/routing"; import { AUTH_COOKIE_NAME } from "@/lib/auth"; @@ -18,6 +18,7 @@ const PUBLIC_PATH_PATTERNS = [ ]; const API_PROXY_PATH = "/v1"; +const SYSTEM_STATUS_ALIAS_PATH = "/system-status"; function matchesPublicPath(pathname: string, pattern: string) { return pathname === pattern || pathname.startsWith(`${pattern}/`); @@ -46,6 +47,21 @@ function proxyHandler(request: NextRequest) { return NextResponse.next(); } + if (pathname === SYSTEM_STATUS_ALIAS_PATH) { + const url = request.nextUrl.clone(); + url.pathname = `/${defaultLocale}/status`; + return NextResponse.redirect(url); + } + + const localePrefixedSystemStatusAlias = routing.locales.find( + (locale) => pathname === `/${locale}${SYSTEM_STATUS_ALIAS_PATH}` + ); + if (localePrefixedSystemStatusAlias) { + const url = request.nextUrl.clone(); + url.pathname = `/${localePrefixedSystemStatusAlias}/status`; + return NextResponse.redirect(url); + } + const isLocalePrefixedPublicStatusPath = routing.locales.some( (locale) => pathname === `/${locale}/status` || pathname.startsWith(`/${locale}/status/`) ); diff --git a/src/types/model-price.ts b/src/types/model-price.ts index 734897aac..035a98759 100644 --- a/src/types/model-price.ts +++ b/src/types/model-price.ts @@ -18,6 +18,27 @@ export interface LongContextPricing { cache_read_input_token_cost?: number; } +export interface ServiceTierPricing { + input_cost_per_token?: number; + output_cost_per_token?: number; + cache_creation_input_token_cost?: number; + cache_creation_input_token_cost_above_1hr?: number; + cache_read_input_token_cost?: number; + input_cost_per_token_above_200k_tokens?: number; + output_cost_per_token_above_200k_tokens?: number; + cache_creation_input_token_cost_above_200k_tokens?: number; + cache_read_input_token_cost_above_200k_tokens?: number; + cache_creation_input_token_cost_above_1hr_above_200k_tokens?: number; + input_cost_per_token_above_272k_tokens?: number; + output_cost_per_token_above_272k_tokens?: number; + cache_creation_input_token_cost_above_272k_tokens?: number; + cache_read_input_token_cost_above_272k_tokens?: number; + cache_creation_input_token_cost_above_1hr_above_272k_tokens?: number; + input_cost_per_request?: number; + long_context_pricing?: LongContextPricing; + [key: string]: unknown; +} + export interface ModelPriceData { // 基础价格信息 input_cost_per_token?: number; @@ -73,6 +94,9 @@ export interface ModelPriceData { // 长上下文价格(例如 GPT-5.4 超过 272K 后的 premium 费率) long_context_pricing?: LongContextPricing; + // 服务等级定价(例如 OpenAI service_tier="priority") + service_tier_pricing?: Record; + // 模型能力信息 display_name?: string; litellm_provider?: string; diff --git a/tests/integration/billing-model-source.test.ts b/tests/integration/billing-model-source.test.ts index d9ef3e2d9..f543de375 100644 --- a/tests/integration/billing-model-source.test.ts +++ b/tests/integration/billing-model-source.test.ts @@ -630,6 +630,74 @@ describe("Billing model source - Redis session cost vs DB cost", () => { expect(rateLimitCosts[0]).toBe(64); }); + it("codex fast: uses model-specific service_tier_pricing priority rates", async () => { + vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected")); + vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined); + vi.mocked(updateMessageRequestDuration).mockResolvedValue(undefined); + vi.mocked(SessionManager.storeSessionResponse).mockResolvedValue(undefined); + vi.mocked(RateLimitService.trackUserDailyCost).mockResolvedValue(undefined); + vi.mocked(SessionTracker.refreshSession).mockResolvedValue(undefined); + + vi.mocked(findLatestPriceByModel).mockImplementation(async (modelName: string) => { + if (modelName === "gpt-5.5") { + return makePriceRecord(modelName, { + mode: "responses", + model_family: "gpt", + litellm_provider: "chatgpt", + pricing: { + openai: { + input_cost_per_token: 0.000005, + output_cost_per_token: 0.00003, + cache_read_input_token_cost: 0.0000005, + input_cost_per_token_priority: 0.00001, + output_cost_per_token_priority: 0.00006, + cache_read_input_token_cost_priority: 0.000001, + service_tier_pricing: { + priority: { + input_cost_per_token: 0.0000125, + output_cost_per_token: 0.000075, + cache_read_input_token_cost: 0.00000125, + }, + }, + }, + }, + }); + } + return null; + }); + + const dbCosts: string[] = []; + vi.mocked(updateMessageRequestCostWithBreakdown).mockImplementation( + async (_id: number, costUsd: unknown) => { + dbCosts.push(String(costUsd)); + } + ); + const rateLimitCosts = captureRateLimitCosts(); + + const session = createSession({ + originalModel: "gpt-5.5", + redirectedModel: "gpt-5.5", + sessionId: "sess-gpt55-priority-service-tier-pricing", + messageId: 3202, + providerOverrides: { + name: "ChatGPT", + url: "https://chatgpt.com/backend-api/codex", + providerType: "codex", + }, + requestMessage: { service_tier: "priority" }, + }); + + const response = createNonStreamResponse({ + input_tokens: 1_000_000, + output_tokens: 1_000_000, + }); + await ProxyResponseHandler.dispatch(session, response); + await drainAsyncTasks(); + + expect(dbCosts[0]).toBe("87.5"); + expect(rateLimitCosts[0]).toBe(87.5); + }); + it("codex fast: uses long-context priority pricing when request is priority and response omits service_tier", async () => { vi.mocked(getSystemSettings).mockResolvedValue(makeSystemSettings("redirected")); vi.mocked(updateMessageRequestDetails).mockResolvedValue(undefined); diff --git a/tests/unit/lib/cost-calculation-breakdown.test.ts b/tests/unit/lib/cost-calculation-breakdown.test.ts index 8ea9a47dd..33266a021 100644 --- a/tests/unit/lib/cost-calculation-breakdown.test.ts +++ b/tests/unit/lib/cost-calculation-breakdown.test.ts @@ -134,6 +134,35 @@ describe("calculateRequestCostBreakdown", () => { expect(result.total).toBe(1904154); }); + test("uses service_tier_pricing.priority in breakdown before legacy priority fields", () => { + const result = calculateRequestCostBreakdown( + { + input_tokens: 2, + output_tokens: 3, + cache_read_input_tokens: 5, + }, + makePriceData({ + input_cost_per_token_priority: 2, + output_cost_per_token_priority: 20, + cache_read_input_token_cost_priority: 0.2, + service_tier_pricing: { + priority: { + input_cost_per_token: 3, + output_cost_per_token: 30, + cache_read_input_token_cost: 0.3, + }, + }, + }), + false, + true + ); + + expect(result.input).toBe(6); + expect(result.output).toBe(90); + expect(result.cache_read).toBe(1.5); + expect(result.total).toBe(97.5); + }); + test("falls back to regular long-context pricing in breakdown when priority long-context fields are absent", () => { const result = calculateRequestCostBreakdown( { diff --git a/tests/unit/lib/cost-calculation-priority.test.ts b/tests/unit/lib/cost-calculation-priority.test.ts index 0342b94be..1794f2420 100644 --- a/tests/unit/lib/cost-calculation-priority.test.ts +++ b/tests/unit/lib/cost-calculation-priority.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from "vitest"; -import { calculateRequestCost } from "@/lib/utils/cost-calculation"; +import { calculateRequestCost, matchLongContextPricing } from "@/lib/utils/cost-calculation"; import type { ModelPriceData } from "@/types/model-price"; function makePriceData(overrides: Partial = {}): ModelPriceData { @@ -16,6 +16,79 @@ function makePriceData(overrides: Partial = {}): ModelPriceData } describe("calculateRequestCost priority service tier", () => { + test("uses service_tier_pricing.priority before legacy priority fields", () => { + const cost = calculateRequestCost( + { input_tokens: 2, output_tokens: 3, cache_read_input_tokens: 5 }, + makePriceData({ + service_tier_pricing: { + priority: { + input_cost_per_token: 3, + output_cost_per_token: 30, + cache_read_input_token_cost: 0.3, + }, + }, + }), + 1, + false, + true + ); + + expect(Number(cost.toString())).toBe(97.5); + }); + + test("keeps service_tier_pricing.priority scoped to priority requests", () => { + const cost = calculateRequestCost( + { input_tokens: 2, output_tokens: 3, cache_read_input_tokens: 5 }, + makePriceData({ + service_tier_pricing: { + priority: { + input_cost_per_token: 3, + output_cost_per_token: 30, + cache_read_input_token_cost: 0.3, + }, + }, + }), + 1, + false, + false + ); + + expect(Number(cost.toString())).toBe(32.5); + }); + + test("allows different models to define different priority tier prices", () => { + const usage = { input_tokens: 1, output_tokens: 1, cache_read_input_tokens: 1 }; + const gpt55Cost = calculateRequestCost( + usage, + makePriceData({ + service_tier_pricing: { + priority: { + input_cost_per_token: 0.0000125, + output_cost_per_token: 0.000075, + cache_read_input_token_cost: 0.00000125, + }, + }, + }), + { priorityServiceTierApplied: true } + ); + const gpt54Cost = calculateRequestCost( + usage, + makePriceData({ + service_tier_pricing: { + priority: { + input_cost_per_token: 0.000005, + output_cost_per_token: 0.00003, + cache_read_input_token_cost: 0.0000005, + }, + }, + }), + { priorityServiceTierApplied: true } + ); + + expect(gpt55Cost.toNumber()).toBe(0.00008875); + expect(gpt54Cost.toNumber()).toBe(0.0000355); + }); + test("uses priority pricing fields when priority service tier is applied", () => { const cost = calculateRequestCost( { input_tokens: 2, output_tokens: 3, cache_read_input_tokens: 5 }, @@ -115,4 +188,32 @@ describe("calculateRequestCost priority service tier", () => { expect(Number(cost.toString())).toBe(1904147); }); + + test("uses service_tier_pricing.priority long_context_pricing when matched", () => { + const usage = { + input_tokens: 101, + output_tokens: 2, + }; + const priceData = makePriceData({ + service_tier_pricing: { + priority: { + input_cost_per_token: 4, + output_cost_per_token: 40, + long_context_pricing: { + threshold_tokens: 100, + input_multiplier: 2, + output_multiplier: 2, + }, + }, + }, + }); + const match = matchLongContextPricing(usage, priceData, "priority"); + const cost = calculateRequestCost(usage, priceData, { + priorityServiceTierApplied: true, + longContextPricing: match?.pricing ?? null, + }); + + expect(match).not.toBeNull(); + expect(Number(cost.toString())).toBe(968); + }); }); diff --git a/tests/unit/lib/model-price-fields.test.ts b/tests/unit/lib/model-price-fields.test.ts index ef7647b84..a917f8cb0 100644 --- a/tests/unit/lib/model-price-fields.test.ts +++ b/tests/unit/lib/model-price-fields.test.ts @@ -23,6 +23,13 @@ describe("model-price-fields", () => { threshold_tokens: 128000, input_cost_per_token: 0.000005, }, + service_tier_pricing: { + priority: { + input_cost_per_token: 0.0000125, + output_cost_per_token: 0.000075, + cache_read_input_token_cost: 0.00000125, + }, + }, pricing: { openai: { input_cost_per_token: 0.0000025, @@ -45,6 +52,10 @@ describe("model-price-fields", () => { expect( entries.find((entry) => entry.path === "long_context_pricing.input_cost_per_token")?.kind ).toBe("supported"); + expect( + entries.find((entry) => entry.path === "service_tier_pricing.priority.input_cost_per_token") + ?.kind + ).toBe("supported"); }); test("extracts editable extra price data by excluding managed fields", () => { diff --git a/tests/unit/lib/price-data-price-like-fields.test.ts b/tests/unit/lib/price-data-price-like-fields.test.ts index 440e4427e..4e46538b3 100644 --- a/tests/unit/lib/price-data-price-like-fields.test.ts +++ b/tests/unit/lib/price-data-price-like-fields.test.ts @@ -23,6 +23,18 @@ describe("hasValidPriceData: generic price-like fields", () => { ).toBe(true); }); + test("treats service tier pricing as valid price data", () => { + expect( + hasValidPriceData({ + service_tier_pricing: { + priority: { + input_cost_per_token: 0.0000125, + }, + }, + }) + ).toBe(true); + }); + test("ignores non price-like numeric metadata", () => { expect( hasValidPriceData({ diff --git a/tests/unit/lib/utils/pricing-resolution.test.ts b/tests/unit/lib/utils/pricing-resolution.test.ts index d9196449e..0ff1d884d 100644 --- a/tests/unit/lib/utils/pricing-resolution.test.ts +++ b/tests/unit/lib/utils/pricing-resolution.test.ts @@ -225,4 +225,50 @@ describe("resolvePricingForModelRecords", () => { input_cost_per_token: 0.000005, }); }); + + test("provider detail scoring counts service_tier_pricing", () => { + const cloudRecord = makeRecord("gpt-5.5", { + mode: "responses", + model_family: "gpt", + pricing: { + fallback: { + input_cost_per_token: 0.000005, + output_cost_per_token: 0.00003, + }, + openai: { + input_cost_per_token: 0.000005, + output_cost_per_token: 0.00003, + service_tier_pricing: { + priority: { + input_cost_per_token: 0.0000125, + output_cost_per_token: 0.000075, + cache_read_input_token_cost: 0.00000125, + }, + }, + }, + }, + }); + + const resolved = resolvePricingForModelRecords({ + provider: { + id: 5, + name: "OpenAI", + url: "https://api.openai.com/v1/responses", + } as never, + primaryModelName: "gpt-5.5", + fallbackModelName: null, + primaryRecord: cloudRecord, + fallbackRecord: null, + }); + + expect(resolved).not.toBeNull(); + expect(resolved?.resolvedPricingProviderKey).toBe("openai"); + expect(resolved?.priceData.service_tier_pricing).toEqual({ + priority: { + input_cost_per_token: 0.0000125, + output_cost_per_token: 0.000075, + cache_read_input_token_cost: 0.00000125, + }, + }); + }); });