@@ -138,13 +138,21 @@ async function injectHostedKeyIfNeeded(
138138}
139139
140140/**
141- * Check if an error is a rate limit (throttling) error
141+ * Check if an error is a rate limit (throttling) or quota exhaustion error.
142+ * Some providers (e.g. Perplexity) return 401/403 with "insufficient_quota"
143+ * instead of the standard 429, so we also inspect the error message.
142144 */
143145function isRateLimitError ( error : unknown ) : boolean {
144146 if ( error && typeof error === 'object' ) {
145147 const status = ( error as { status ?: number } ) . status
146- // 429 = Too Many Requests, 503 = Service Unavailable (sometimes used for rate limiting)
147148 if ( status === 429 || status === 503 ) return true
149+
150+ if ( status === 401 || status === 403 ) {
151+ const message = ( ( error as { message ?: string } ) . message || '' ) . toLowerCase ( )
152+ if ( message . includes ( 'quota' ) || message . includes ( 'rate limit' ) ) {
153+ return true
154+ }
155+ }
148156 }
149157 return false
150158}
@@ -277,7 +285,7 @@ async function processHostedKeyCost(
277285
278286 if ( ! userId ) return { cost, metadata }
279287
280- const skipLog = ! ! ctx ?. skipFixedUsageLog
288+ const skipLog = ! ! ctx ?. skipFixedUsageLog || ! ! tool . hosting ?. skipFixedUsageLog
281289 if ( ! skipLog ) {
282290 try {
283291 await logFixedUsage ( {
@@ -377,6 +385,13 @@ async function applyHostedKeyCostToResult(
377385) : Promise < void > {
378386 await reportCustomDimensionUsage ( tool , params , finalResult . output , executionContext , requestId )
379387
388+ if ( tool . hosting ?. skipFixedUsageLog ) {
389+ const ctx = params . _context as Record < string , unknown > | undefined
390+ if ( ctx ) {
391+ ctx . skipFixedUsageLog = true
392+ }
393+ }
394+
380395 const { cost : hostedKeyCost , metadata } = await processHostedKeyCost (
381396 tool ,
382397 params ,
0 commit comments