simstudioai
diff --git a/‎apps/sim/lib/api-key/byok.ts‎
Lines changed: 53 additions & 3 deletions b/‎apps/sim/lib/api-key/byok.ts‎
Lines changed: 53 additions & 3 deletions
diff --git a/‎apps/sim/lib/api-key/hosted-cost.test.ts‎
Lines changed: 95 additions & 0 deletions b/‎apps/sim/lib/api-key/hosted-cost.test.ts‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎apps/sim/lib/api-key/hosted-cost.ts‎
Lines changed: 93 additions & 0 deletions b/‎apps/sim/lib/api-key/hosted-cost.ts‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎apps/sim/lib/core/config/env.ts‎
Lines changed: 1 addition & 0 deletions b/‎apps/sim/lib/core/config/env.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎apps/sim/lib/core/config/feature-flags.ts‎
Lines changed: 8 additions & 0 deletions b/‎apps/sim/lib/core/config/feature-flags.ts‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎apps/sim/lib/core/rate-limiter/hosted-key/hosted-key-rate-limiter.test.ts‎
Lines changed: 39 additions & 4 deletions b/‎apps/sim/lib/core/rate-limiter/hosted-key/hosted-key-rate-limiter.test.ts‎
Lines changed: 39 additions & 4 deletions
@@ -5,9 +5,11 @@ import { and, asc, eq } from 'drizzle-orm'
 import { getRotatingApiKey } from '@/lib/core/config/api-keys'
 import { env } from '@/lib/core/config/env'
 import { isHosted } from '@/lib/core/config/env-flags'
+import { isFeatureEnabled } from '@/lib/core/config/feature-flags'
+import { getHostedKeyRateLimiter } from '@/lib/core/rate-limiter'
 import { decryptSecret } from '@/lib/core/security/encryption'
 import { getWorkspaceById } from '@/lib/workspaces/permissions/utils'
-import { getHostedModels } from '@/providers/models'
+import { getHostedModels, getProviderHosting } from '@/providers/models'
 import { PROVIDER_PLACEHOLDER_KEY } from '@/providers/utils'
 import { useProvidersStore } from '@/stores/providers/store'
 import type { BYOKProviderId } from '@/tools/types'
@@ -90,12 +92,60 @@ export async function getBYOKKey(
   }
 }
 
+export interface ApiKeyResolution {
+  apiKey: string
+  isBYOK: boolean
+  /** Env var name of the platform key used (only when a hosted-key-pool key was acquired). */
+  hostedKeyEnvVar?: string
+}
+
 export async function getApiKeyWithBYOK(
   provider: string,
   model: string,
   workspaceId: string | undefined | null,
-  userProvidedKey?: string
-): Promise<{ apiKey: string; isBYOK: boolean }> {
+  userProvidedKey?: string,
+  userId?: string | null
+): Promise<ApiKeyResolution> {
+  // Unified hosted-key path (flag-gated). For any provider with a hosting config:
+  // BYOK workspace key wins; otherwise acquire a platform key through the shared
+  // hosted-key framework with no rate limiting. Falls through to the legacy
+  // per-provider logic when the flag is off or no platform keys are configured,
+  // keeping flag-off behavior identical.
+  if (isHosted && workspaceId) {
+    const hosting = getProviderHosting(provider)
+    if (hosting && (await isFeatureEnabled('hosted-key-llm', { userId }))) {
+      const byokResult = await getBYOKKey(workspaceId, hosting.byokProviderId)
+      if (byokResult) {
+        logger.info('Using BYOK key (hosted-key-llm)', { provider, model, workspaceId })
+        return byokResult
+      }
+
+      const acquired = await getHostedKeyRateLimiter().acquireKey(
+        hosting.byokProviderId,
+        hosting.envKeyPrefix,
+        { mode: 'none' },
+        workspaceId
+      )
+      if (acquired.success && acquired.key) {
+        logger.info('Using hosted platform key (hosted-key-llm)', {
+          provider,
+          model,
+          workspaceId,
+          key: acquired.envVarName,
+        })
+        return {
+          apiKey: acquired.key,
+          isBYOK: false,
+          hostedKeyEnvVar: acquired.envVarName,
+        }
+      }
+      logger.debug('No hosted platform keys configured, falling back to legacy path', {
+        provider,
+        model,
+      })
+    }
+  }
+
   const isOllamaModel =
     provider === 'ollama' || useProvidersStore.getState().providers.ollama.models.includes(model)
   if (isOllamaModel) {
 
@@ -0,0 +1,95 @@
+/**
+ * @vitest-environment node
+ */
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+const { mockRecordUsed, mockRecordCostCharged } = vi.hoisted(() => ({
+  mockRecordUsed: vi.fn(),
+  mockRecordCostCharged: vi.fn(),
+}))
+
+vi.mock('@/lib/monitoring/metrics', () => ({
+  hostedKeyMetrics: {
+    recordUsed: mockRecordUsed,
+    recordCostCharged: mockRecordCostCharged,
+  },
+}))
+
+import {
+  calculateHostedCost,
+  classifyHostedKeyFailure,
+  emitHostedKeyUsage,
+} from '@/lib/api-key/hosted-cost'
+
+describe('calculateHostedCost (tool pricing)', () => {
+  it('per_request returns the flat fee', () => {
+    expect(calculateHostedCost({ type: 'per_request', cost: 0.005 }, {}, {})).toEqual({
+      cost: 0.005,
+    })
+  })
+
+  it('custom returns a numeric getCost result', () => {
+    const pricing = { type: 'custom' as const, getCost: () => 0.42 }
+    expect(calculateHostedCost(pricing, {}, {})).toEqual({ cost: 0.42 })
+  })
+
+  it('custom passes through a structured getCost result with metadata', () => {
+    const pricing = {
+      type: 'custom' as const,
+      getCost: () => ({ cost: 1.5, metadata: { units: 3 } }),
+    }
+    expect(calculateHostedCost(pricing, {}, {})).toEqual({ cost: 1.5, metadata: { units: 3 } })
+  })
+
+  it('forwards params and response to custom getCost', () => {
+    const getCost = vi.fn(() => 1)
+    const params = { a: 1 }
+    const response = { b: 2 }
+    calculateHostedCost({ type: 'custom', getCost }, params, response)
+    expect(getCost).toHaveBeenCalledWith(params, response)
+  })
+})
+
+describe('classifyHostedKeyFailure', () => {
+  it('classifies structured SDK errors by status', () => {
+    expect(classifyHostedKeyFailure({ status: 429 })).toBe('rate_limited')
+    expect(classifyHostedKeyFailure({ status: 503 })).toBe('rate_limited')
+    expect(classifyHostedKeyFailure({ status: 401 })).toBe('auth')
+    expect(classifyHostedKeyFailure({ status: 403, message: 'quota exceeded' })).toBe(
+      'rate_limited'
+    )
+    expect(classifyHostedKeyFailure({ status: 500 })).toBe('other')
+  })
+
+  it('classifies message-embedded status (provider errors with no .status)', () => {
+    // Regression: the previous `\bunauthor\b` regex never matched "Unauthorized".
+    expect(classifyHostedKeyFailure(new Error('Unauthorized'))).toBe('auth')
+    expect(classifyHostedKeyFailure(new Error('OpenAI API error (401): bad key'))).toBe('auth')
+    expect(classifyHostedKeyFailure(new Error('Forbidden'))).toBe('auth')
+    expect(classifyHostedKeyFailure(new Error('Invalid API key provided'))).toBe('auth')
+    expect(classifyHostedKeyFailure(new Error('API error (429): rate limit'))).toBe('rate_limited')
+    expect(classifyHostedKeyFailure(new Error('Internal Server Error (500)'))).toBe('other')
+  })
+})
+
+describe('emitHostedKeyUsage', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it('records both usage and cost with the provider/tool/key labels', () => {
+    emitHostedKeyUsage({
+      provider: 'openai',
+      tool: 'gpt-4o',
+      key: 'OPENAI_API_KEY_2',
+      costTotal: 0.03,
+    })
+
+    expect(mockRecordUsed).toHaveBeenCalledWith({
+      provider: 'openai',
+      tool: 'gpt-4o',
+      key: 'OPENAI_API_KEY_2',
+    })
+    expect(mockRecordCostCharged).toHaveBeenCalledWith(0.03, { provider: 'openai', tool: 'gpt-4o' })
+  })
+})
@@ -0,0 +1,93 @@
+import { hostedKeyMetrics } from '@/lib/monitoring/metrics'
+import type { ToolHostingPricing } from '@/tools/types'
+
+export interface HostedCostResult {
+  /** Total billable cost in dollars. */
+  cost: number
+  /** Optional metadata about the cost (e.g. provider breakdown from `custom` pricing). */
+  metadata?: Record<string, unknown>
+}
+
+/**
+ * Cost for a hosted-key **tool** call. Tools declare config-driven pricing —
+ * a flat `per_request` fee or a response-derived `custom` fee. LLM providers do
+ * NOT use this: their cost is token-based and computed directly via
+ * {@link import('@/providers/utils').calculateCost}.
+ */
+export function calculateHostedCost(
+  pricing: ToolHostingPricing,
+  params: Record<string, unknown>,
+  response: Record<string, unknown>
+): HostedCostResult {
+  switch (pricing.type) {
+    case 'per_request':
+      return { cost: pricing.cost }
+
+    case 'custom': {
+      const result = pricing.getCost(params, response)
+      return typeof result === 'number' ? { cost: result } : result
+    }
+
+    default: {
+      const exhaustiveCheck: never = pricing
+      throw new Error(`Unknown pricing type: ${(exhaustiveCheck as ToolHostingPricing).type}`)
+    }
+  }
+}
+
+/**
+ * Classify a thrown error into a hosted-key failure reason for metrics. Handles
+ * both structured SDK errors (numeric `.status`) and provider errors that embed
+ * the status in the message string (e.g. `API error (401): ...`). Some providers
+ * signal quota/rate-limit via 401/403 + a descriptive message, so those count as
+ * `rate_limited`, not `auth`.
+ */
+export function classifyHostedKeyFailure(error: unknown): 'rate_limited' | 'auth' | 'other' {
+  const status = (error as { status?: number } | null)?.status
+  const message = ((error as { message?: string } | null)?.message ?? '').toLowerCase()
+
+  if (status === 429 || status === 503) return 'rate_limited'
+  if (status === 401 || status === 403) {
+    return message.includes('quota') || message.includes('rate limit') ? 'rate_limited' : 'auth'
+  }
+
+  // No structured status (e.g. provider errors that embed it in the message).
+  if (status === undefined) {
+    if (
+      message.includes('quota') ||
+      message.includes('rate limit') ||
+      /\b(429|503)\b/.test(message)
+    )
+      return 'rate_limited'
+    if (
+      /\b(401|403)\b/.test(message) ||
+      message.includes('unauthor') ||
+      message.includes('forbidden') ||
+      message.includes('invalid api key')
+    )
+      return 'auth'
+  }
+  return 'other'
+}
+
+/**
+ * Emit hosted-key usage telemetry for a completed call. CloudWatch only — never
+ * a billing write. `recordCostCharged` self-guards on `costTotal > 0`. The
+ * `tool` label carries the tool id for tools, or the model id for LLM calls.
+ */
+export function emitHostedKeyUsage(labels: {
+  provider: string
+  tool: string
+  key: string
+  costTotal: number
+}): void {
+  hostedKeyMetrics.recordUsed({
+    provider: labels.provider,
+    tool: labels.tool,
+    key: labels.key,
+  })
+  hostedKeyMetrics.recordCostCharged(labels.costTotal, {
+    provider: labels.provider,
+    tool: labels.tool,
+  })
+}
@@ -409,6 +409,7 @@ export const env = createEnv({
     DISABLE_INVITATIONS:                   z.boolean().optional(),                 // Disable workspace invitations globally (for self-hosted deployments)
     DISABLE_PUBLIC_API:                    z.boolean().optional(),                 // Disable public API access globally (for self-hosted deployments)
     MOTHERSHIP_BETA_FEATURES:              z.boolean().optional(),                 // Enable beta Mothership planning/changelog artifact surfaces
+    HOSTED_KEY_LLM:                        z.boolean().optional(),                 // Route hosted LLM calls through the hosted-key framework (acquire + centralized cost + metrics), no rate limiting
 
     // Development Tools
     REACT_GRAB_ENABLED:                    z.boolean().optional(),                 // Enable React Grab for UI element debugging in Cursor/AI agents (dev only)
 
@@ -74,6 +74,14 @@ const FEATURE_FLAGS = {
       'user context — use enabled:true for global rollout rather than per-user targeting.',
     fallback: 'MOTHERSHIP_BETA_FEATURES',
   },
+  'hosted-key-llm': {
+    description:
+      'Route hosted LLM provider calls through the hosted-key framework (acquire + centralized ' +
+      'cost + metrics), with no rate limiting. Off = legacy getRotatingApiKey path. Evaluated ' +
+      'server-side with userId only (no orgId in the provider request), so roll out globally or ' +
+      'per-userId.',
+    fallback: 'HOSTED_KEY_LLM',
+  },
 } satisfies Record<string, FeatureFlagDefinition>
 
 /**
 
@@ -85,10 +85,13 @@ describe('HostedKeyRateLimiter', () => {
       }
       mockAdapter.consumeTokens.mockResolvedValue(allowedResult)
 
-      process.env.EXA_API_KEY_COUNT = undefined
-      process.env.EXA_API_KEY_1 = undefined
-      process.env.EXA_API_KEY_2 = undefined
-      process.env.EXA_API_KEY_3 = undefined
+      // Empty string is falsy, so no key resolves. (Assigning `undefined` would
+      // leave the string "undefined" under vitest's env handling, which the
+      // `_1.._N` probe — used when `_COUNT` is absent — would treat as present.)
+      process.env.EXA_API_KEY_COUNT = ''
+      process.env.EXA_API_KEY_1 = ''
+      process.env.EXA_API_KEY_2 = ''
+      process.env.EXA_API_KEY_3 = ''
 
       const result = await rateLimiter.acquireKey(
         testProvider,
@@ -101,6 +104,38 @@ describe('HostedKeyRateLimiter', () => {
       expect(result.error).toContain('No hosted keys configured')
     })
 
+    it('mode: none returns a key without touching the queue or token bucket', async () => {
+      const result = await rateLimiter.acquireKey(
+        testProvider,
+        envKeyPrefix,
+        { mode: 'none' },
+        'workspace-1'
+      )
+
+      expect(result.success).toBe(true)
+      expect(result.key).toBe('test-key-1')
+      expect(result.envVarName).toBe('EXA_API_KEY_1')
+      expect(mockQueue.enqueue).not.toHaveBeenCalled()
+      expect(mockAdapter.consumeTokens).not.toHaveBeenCalled()
+    })
+
+    it('mode: none still reports an error when no keys are configured', async () => {
+      process.env.EXA_API_KEY_COUNT = ''
+      process.env.EXA_API_KEY_1 = ''
+      process.env.EXA_API_KEY_2 = ''
+      process.env.EXA_API_KEY_3 = ''
+
+      const result = await rateLimiter.acquireKey(
+        testProvider,
+        envKeyPrefix,
+        { mode: 'none' },
+        'workspace-1'
+      )
+
+      expect(result.success).toBe(false)
+      expect(mockQueue.enqueue).not.toHaveBeenCalled()
+    })
+
     it('should rate limit billing actor when wait exceeds the queue cap', async () => {
       // resetAt past the 5-minute cap forces the wait loop to bail immediately.
       const rateLimitedResult: ConsumeResult = {