|
1 | 1 | import type { PerplexityChatParams, PerplexityChatResponse } from '@/tools/perplexity/types' |
2 | 2 | import type { ToolConfig } from '@/tools/types' |
3 | 3 |
|
| 4 | +/** |
| 5 | + * Per-token rates by model from https://docs.perplexity.ai/guides/pricing |
| 6 | + * Per-request fees assume Low context size (the API default). |
| 7 | + */ |
| 8 | +const MODEL_PRICING: Record<string, { inputPerM: number; outputPerM: number; requestPer1K: number }> = |
| 9 | + { |
| 10 | + 'sonar-deep-research': { inputPerM: 2, outputPerM: 8, requestPer1K: 0 }, |
| 11 | + 'sonar-reasoning-pro': { inputPerM: 2, outputPerM: 8, requestPer1K: 6 }, |
| 12 | + 'sonar-pro': { inputPerM: 3, outputPerM: 15, requestPer1K: 6 }, |
| 13 | + sonar: { inputPerM: 1, outputPerM: 1, requestPer1K: 5 }, |
| 14 | + } |
| 15 | + |
| 16 | +function getModelPricing(model: string) { |
| 17 | + for (const [key, pricing] of Object.entries(MODEL_PRICING)) { |
| 18 | + if (model.includes(key)) return pricing |
| 19 | + } |
| 20 | + return MODEL_PRICING.sonar |
| 21 | +} |
| 22 | + |
4 | 23 | export const chatTool: ToolConfig<PerplexityChatParams, PerplexityChatResponse> = { |
5 | 24 | id: 'perplexity_chat', |
6 | 25 | name: 'Perplexity Chat', |
@@ -48,6 +67,41 @@ export const chatTool: ToolConfig<PerplexityChatParams, PerplexityChatResponse> |
48 | 67 | }, |
49 | 68 | }, |
50 | 69 |
|
| 70 | + hosting: { |
| 71 | + envKeyPrefix: 'PERPLEXITY_API_KEY', |
| 72 | + apiKeyParam: 'apiKey', |
| 73 | + byokProviderId: 'perplexity', |
| 74 | + pricing: { |
| 75 | + type: 'custom', |
| 76 | + getCost: (params, output) => { |
| 77 | + const usage = output.usage as |
| 78 | + | { prompt_tokens?: number; completion_tokens?: number } |
| 79 | + | undefined |
| 80 | + if (!usage || usage.prompt_tokens == null || usage.completion_tokens == null) { |
| 81 | + throw new Error('Perplexity chat response missing token usage data') |
| 82 | + } |
| 83 | + |
| 84 | + const model = ((output.model as string) || params.model) as string |
| 85 | + const pricing = getModelPricing(model) |
| 86 | + const inputTokens = usage.prompt_tokens |
| 87 | + const outputTokens = usage.completion_tokens |
| 88 | + |
| 89 | + const tokenCost = |
| 90 | + (inputTokens * pricing.inputPerM) / 1_000_000 + |
| 91 | + (outputTokens * pricing.outputPerM) / 1_000_000 |
| 92 | + const requestFee = pricing.requestPer1K / 1000 |
| 93 | + const cost = tokenCost + requestFee |
| 94 | + |
| 95 | + return { cost, metadata: { model, inputTokens, outputTokens, tokenCost, requestFee } } |
| 96 | + }, |
| 97 | + }, |
| 98 | + rateLimit: { |
| 99 | + mode: 'per_request', |
| 100 | + requestsPerMinute: 20, |
| 101 | + }, |
| 102 | + skipFixedUsageLog: true, |
| 103 | + }, |
| 104 | + |
51 | 105 | request: { |
52 | 106 | method: 'POST', |
53 | 107 | url: () => 'https://api.perplexity.ai/chat/completions', |
|
0 commit comments