Skip to content

Commit 17f342c

Browse files
feat(providers): hosted-key support for LLM providers (flag-gated, no rate limiting)
1 parent f238184 commit 17f342c

22 files changed

Lines changed: 615 additions & 107 deletions

File tree

apps/sim/lib/api-key/byok.ts

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ import { and, asc, eq } from 'drizzle-orm'
55
import { getRotatingApiKey } from '@/lib/core/config/api-keys'
66
import { env } from '@/lib/core/config/env'
77
import { isHosted } from '@/lib/core/config/env-flags'
8+
import { isFeatureEnabled } from '@/lib/core/config/feature-flags'
9+
import { getHostedKeyRateLimiter } from '@/lib/core/rate-limiter'
810
import { decryptSecret } from '@/lib/core/security/encryption'
911
import { getWorkspaceById } from '@/lib/workspaces/permissions/utils'
10-
import { getHostedModels } from '@/providers/models'
12+
import { getHostedModels, getProviderHosting } from '@/providers/models'
1113
import { PROVIDER_PLACEHOLDER_KEY } from '@/providers/utils'
1214
import { useProvidersStore } from '@/stores/providers/store'
1315
import type { BYOKProviderId } from '@/tools/types'
@@ -90,12 +92,60 @@ export async function getBYOKKey(
9092
}
9193
}
9294

95+
export interface ApiKeyResolution {
96+
apiKey: string
97+
isBYOK: boolean
98+
/** Env var name of the platform key used (only when a hosted-key-pool key was acquired). */
99+
hostedKeyEnvVar?: string
100+
}
101+
93102
export async function getApiKeyWithBYOK(
94103
provider: string,
95104
model: string,
96105
workspaceId: string | undefined | null,
97-
userProvidedKey?: string
98-
): Promise<{ apiKey: string; isBYOK: boolean }> {
106+
userProvidedKey?: string,
107+
userId?: string | null
108+
): Promise<ApiKeyResolution> {
109+
// Unified hosted-key path (flag-gated). For any provider with a hosting config:
110+
// BYOK workspace key wins; otherwise acquire a platform key through the shared
111+
// hosted-key framework with no rate limiting. Falls through to the legacy
112+
// per-provider logic when the flag is off or no platform keys are configured,
113+
// keeping flag-off behavior identical.
114+
if (isHosted && workspaceId) {
115+
const hosting = getProviderHosting(provider)
116+
if (hosting && (await isFeatureEnabled('hosted-key-llm', { userId }))) {
117+
const byokResult = await getBYOKKey(workspaceId, hosting.byokProviderId)
118+
if (byokResult) {
119+
logger.info('Using BYOK key (hosted-key-llm)', { provider, model, workspaceId })
120+
return byokResult
121+
}
122+
123+
const acquired = await getHostedKeyRateLimiter().acquireKey(
124+
hosting.byokProviderId,
125+
hosting.envKeyPrefix,
126+
{ mode: 'none' },
127+
workspaceId
128+
)
129+
if (acquired.success && acquired.key) {
130+
logger.info('Using hosted platform key (hosted-key-llm)', {
131+
provider,
132+
model,
133+
workspaceId,
134+
key: acquired.envVarName,
135+
})
136+
return {
137+
apiKey: acquired.key,
138+
isBYOK: false,
139+
hostedKeyEnvVar: acquired.envVarName,
140+
}
141+
}
142+
logger.debug('No hosted platform keys configured, falling back to legacy path', {
143+
provider,
144+
model,
145+
})
146+
}
147+
}
148+
99149
const isOllamaModel =
100150
provider === 'ollama' || useProvidersStore.getState().providers.ollama.models.includes(model)
101151
if (isOllamaModel) {
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/**
2+
* @vitest-environment node
3+
*/
4+
import { beforeEach, describe, expect, it, vi } from 'vitest'
5+
6+
const { mockRecordUsed, mockRecordCostCharged } = vi.hoisted(() => ({
7+
mockRecordUsed: vi.fn(),
8+
mockRecordCostCharged: vi.fn(),
9+
}))
10+
11+
vi.mock('@/lib/monitoring/metrics', () => ({
12+
hostedKeyMetrics: {
13+
recordUsed: mockRecordUsed,
14+
recordCostCharged: mockRecordCostCharged,
15+
},
16+
}))
17+
18+
import {
19+
calculateHostedCost,
20+
classifyHostedKeyFailure,
21+
emitHostedKeyUsage,
22+
} from '@/lib/api-key/hosted-cost'
23+
24+
describe('calculateHostedCost (tool pricing)', () => {
25+
it('per_request returns the flat fee', () => {
26+
expect(calculateHostedCost({ type: 'per_request', cost: 0.005 }, {}, {})).toEqual({
27+
cost: 0.005,
28+
})
29+
})
30+
31+
it('custom returns a numeric getCost result', () => {
32+
const pricing = { type: 'custom' as const, getCost: () => 0.42 }
33+
expect(calculateHostedCost(pricing, {}, {})).toEqual({ cost: 0.42 })
34+
})
35+
36+
it('custom passes through a structured getCost result with metadata', () => {
37+
const pricing = {
38+
type: 'custom' as const,
39+
getCost: () => ({ cost: 1.5, metadata: { units: 3 } }),
40+
}
41+
expect(calculateHostedCost(pricing, {}, {})).toEqual({ cost: 1.5, metadata: { units: 3 } })
42+
})
43+
44+
it('forwards params and response to custom getCost', () => {
45+
const getCost = vi.fn(() => 1)
46+
const params = { a: 1 }
47+
const response = { b: 2 }
48+
calculateHostedCost({ type: 'custom', getCost }, params, response)
49+
expect(getCost).toHaveBeenCalledWith(params, response)
50+
})
51+
})
52+
53+
describe('classifyHostedKeyFailure', () => {
54+
it('classifies structured SDK errors by status', () => {
55+
expect(classifyHostedKeyFailure({ status: 429 })).toBe('rate_limited')
56+
expect(classifyHostedKeyFailure({ status: 503 })).toBe('rate_limited')
57+
expect(classifyHostedKeyFailure({ status: 401 })).toBe('auth')
58+
expect(classifyHostedKeyFailure({ status: 403, message: 'quota exceeded' })).toBe(
59+
'rate_limited'
60+
)
61+
expect(classifyHostedKeyFailure({ status: 500 })).toBe('other')
62+
})
63+
64+
it('classifies message-embedded status (provider errors with no .status)', () => {
65+
// Regression: the previous `\bunauthor\b` regex never matched "Unauthorized".
66+
expect(classifyHostedKeyFailure(new Error('Unauthorized'))).toBe('auth')
67+
expect(classifyHostedKeyFailure(new Error('OpenAI API error (401): bad key'))).toBe('auth')
68+
expect(classifyHostedKeyFailure(new Error('Forbidden'))).toBe('auth')
69+
expect(classifyHostedKeyFailure(new Error('Invalid API key provided'))).toBe('auth')
70+
expect(classifyHostedKeyFailure(new Error('API error (429): rate limit'))).toBe('rate_limited')
71+
expect(classifyHostedKeyFailure(new Error('Internal Server Error (500)'))).toBe('other')
72+
})
73+
})
74+
75+
describe('emitHostedKeyUsage', () => {
76+
beforeEach(() => {
77+
vi.clearAllMocks()
78+
})
79+
80+
it('records both usage and cost with the provider/tool/key labels', () => {
81+
emitHostedKeyUsage({
82+
provider: 'openai',
83+
tool: 'gpt-4o',
84+
key: 'OPENAI_API_KEY_2',
85+
costTotal: 0.03,
86+
})
87+
88+
expect(mockRecordUsed).toHaveBeenCalledWith({
89+
provider: 'openai',
90+
tool: 'gpt-4o',
91+
key: 'OPENAI_API_KEY_2',
92+
})
93+
expect(mockRecordCostCharged).toHaveBeenCalledWith(0.03, { provider: 'openai', tool: 'gpt-4o' })
94+
})
95+
})
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import { hostedKeyMetrics } from '@/lib/monitoring/metrics'
2+
import type { ToolHostingPricing } from '@/tools/types'
3+
4+
export interface HostedCostResult {
5+
/** Total billable cost in dollars. */
6+
cost: number
7+
/** Optional metadata about the cost (e.g. provider breakdown from `custom` pricing). */
8+
metadata?: Record<string, unknown>
9+
}
10+
11+
/**
12+
* Cost for a hosted-key **tool** call. Tools declare config-driven pricing —
13+
* a flat `per_request` fee or a response-derived `custom` fee. LLM providers do
14+
* NOT use this: their cost is token-based and computed directly via
15+
* {@link import('@/providers/utils').calculateCost}.
16+
*/
17+
export function calculateHostedCost(
18+
pricing: ToolHostingPricing,
19+
params: Record<string, unknown>,
20+
response: Record<string, unknown>
21+
): HostedCostResult {
22+
switch (pricing.type) {
23+
case 'per_request':
24+
return { cost: pricing.cost }
25+
26+
case 'custom': {
27+
const result = pricing.getCost(params, response)
28+
return typeof result === 'number' ? { cost: result } : result
29+
}
30+
31+
default: {
32+
const exhaustiveCheck: never = pricing
33+
throw new Error(`Unknown pricing type: ${(exhaustiveCheck as ToolHostingPricing).type}`)
34+
}
35+
}
36+
}
37+
38+
/**
39+
* Classify a thrown error into a hosted-key failure reason for metrics. Handles
40+
* both structured SDK errors (numeric `.status`) and provider errors that embed
41+
* the status in the message string (e.g. `API error (401): ...`). Some providers
42+
* signal quota/rate-limit via 401/403 + a descriptive message, so those count as
43+
* `rate_limited`, not `auth`.
44+
*/
45+
export function classifyHostedKeyFailure(error: unknown): 'rate_limited' | 'auth' | 'other' {
46+
const status = (error as { status?: number } | null)?.status
47+
const message = ((error as { message?: string } | null)?.message ?? '').toLowerCase()
48+
49+
if (status === 429 || status === 503) return 'rate_limited'
50+
if (status === 401 || status === 403) {
51+
return message.includes('quota') || message.includes('rate limit') ? 'rate_limited' : 'auth'
52+
}
53+
54+
// No structured status (e.g. provider errors that embed it in the message).
55+
if (status === undefined) {
56+
if (
57+
message.includes('quota') ||
58+
message.includes('rate limit') ||
59+
/\b(429|503)\b/.test(message)
60+
)
61+
return 'rate_limited'
62+
if (
63+
/\b(401|403)\b/.test(message) ||
64+
message.includes('unauthor') ||
65+
message.includes('forbidden') ||
66+
message.includes('invalid api key')
67+
)
68+
return 'auth'
69+
}
70+
return 'other'
71+
}
72+
73+
/**
74+
* Emit hosted-key usage telemetry for a completed call. CloudWatch only — never
75+
* a billing write. `recordCostCharged` self-guards on `costTotal > 0`. The
76+
* `tool` label carries the tool id for tools, or the model id for LLM calls.
77+
*/
78+
export function emitHostedKeyUsage(labels: {
79+
provider: string
80+
tool: string
81+
key: string
82+
costTotal: number
83+
}): void {
84+
hostedKeyMetrics.recordUsed({
85+
provider: labels.provider,
86+
tool: labels.tool,
87+
key: labels.key,
88+
})
89+
hostedKeyMetrics.recordCostCharged(labels.costTotal, {
90+
provider: labels.provider,
91+
tool: labels.tool,
92+
})
93+
}

apps/sim/lib/core/config/env.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ export const env = createEnv({
409409
DISABLE_INVITATIONS: z.boolean().optional(), // Disable workspace invitations globally (for self-hosted deployments)
410410
DISABLE_PUBLIC_API: z.boolean().optional(), // Disable public API access globally (for self-hosted deployments)
411411
MOTHERSHIP_BETA_FEATURES: z.boolean().optional(), // Enable beta Mothership planning/changelog artifact surfaces
412+
HOSTED_KEY_LLM: z.boolean().optional(), // Route hosted LLM calls through the hosted-key framework (acquire + centralized cost + metrics), no rate limiting
412413

413414
// Development Tools
414415
REACT_GRAB_ENABLED: z.boolean().optional(), // Enable React Grab for UI element debugging in Cursor/AI agents (dev only)

apps/sim/lib/core/config/feature-flags.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,14 @@ const FEATURE_FLAGS = {
7474
'user context — use enabled:true for global rollout rather than per-user targeting.',
7575
fallback: 'MOTHERSHIP_BETA_FEATURES',
7676
},
77+
'hosted-key-llm': {
78+
description:
79+
'Route hosted LLM provider calls through the hosted-key framework (acquire + centralized ' +
80+
'cost + metrics), with no rate limiting. Off = legacy getRotatingApiKey path. Evaluated ' +
81+
'server-side with userId only (no orgId in the provider request), so roll out globally or ' +
82+
'per-userId.',
83+
fallback: 'HOSTED_KEY_LLM',
84+
},
7785
} satisfies Record<string, FeatureFlagDefinition>
7886

7987
/**

apps/sim/lib/core/rate-limiter/hosted-key/hosted-key-rate-limiter.test.ts

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,13 @@ describe('HostedKeyRateLimiter', () => {
8585
}
8686
mockAdapter.consumeTokens.mockResolvedValue(allowedResult)
8787

88-
process.env.EXA_API_KEY_COUNT = undefined
89-
process.env.EXA_API_KEY_1 = undefined
90-
process.env.EXA_API_KEY_2 = undefined
91-
process.env.EXA_API_KEY_3 = undefined
88+
// Empty string is falsy, so no key resolves. (Assigning `undefined` would
89+
// leave the string "undefined" under vitest's env handling, which the
90+
// `_1.._N` probe — used when `_COUNT` is absent — would treat as present.)
91+
process.env.EXA_API_KEY_COUNT = ''
92+
process.env.EXA_API_KEY_1 = ''
93+
process.env.EXA_API_KEY_2 = ''
94+
process.env.EXA_API_KEY_3 = ''
9295

9396
const result = await rateLimiter.acquireKey(
9497
testProvider,
@@ -101,6 +104,38 @@ describe('HostedKeyRateLimiter', () => {
101104
expect(result.error).toContain('No hosted keys configured')
102105
})
103106

107+
it('mode: none returns a key without touching the queue or token bucket', async () => {
108+
const result = await rateLimiter.acquireKey(
109+
testProvider,
110+
envKeyPrefix,
111+
{ mode: 'none' },
112+
'workspace-1'
113+
)
114+
115+
expect(result.success).toBe(true)
116+
expect(result.key).toBe('test-key-1')
117+
expect(result.envVarName).toBe('EXA_API_KEY_1')
118+
expect(mockQueue.enqueue).not.toHaveBeenCalled()
119+
expect(mockAdapter.consumeTokens).not.toHaveBeenCalled()
120+
})
121+
122+
it('mode: none still reports an error when no keys are configured', async () => {
123+
process.env.EXA_API_KEY_COUNT = ''
124+
process.env.EXA_API_KEY_1 = ''
125+
process.env.EXA_API_KEY_2 = ''
126+
process.env.EXA_API_KEY_3 = ''
127+
128+
const result = await rateLimiter.acquireKey(
129+
testProvider,
130+
envKeyPrefix,
131+
{ mode: 'none' },
132+
'workspace-1'
133+
)
134+
135+
expect(result.success).toBe(false)
136+
expect(mockQueue.enqueue).not.toHaveBeenCalled()
137+
})
138+
104139
it('should rate limit billing actor when wait exceeds the queue cap', async () => {
105140
// resetAt past the 5-minute cap forces the wait loop to bail immediately.
106141
const rateLimitedResult: ConsumeResult = {

0 commit comments

Comments
 (0)