diff --git a/src/clients/retry.js b/src/clients/retry.js index 2178206..5d90654 100644 --- a/src/clients/retry.js +++ b/src/clients/retry.js @@ -10,7 +10,7 @@ const DEFAULT_CONFIG = { backoffMultiplier: 2, jitterFactor: 0.1, // 10% jitter retryableStatuses: [429, 500, 502, 503, 504], - retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH'], + retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH', 'ECONNREFUSED'], }; /** @@ -44,6 +44,11 @@ function isRetryable(error, response, config) { return true; } + // Check nested cause (Node undici wraps connection errors as TypeError) + if (error && error.cause?.code && config.retryableErrors.includes(error.cause.code)) { + return true; + } + // Check for network errors if (error && (error.name === 'FetchError' || error.name === 'AbortError')) { return true; diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index d553b69..f46f98d 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -1694,7 +1694,33 @@ IMPORTANT TOOL USAGE RULES: }); } - const databricksResponse = await invokeModel(cleanPayload); + let databricksResponse; + try { + databricksResponse = await invokeModel(cleanPayload); + } catch (modelError) { + const isConnectionError = modelError.cause?.code === 'ECONNREFUSED' + || modelError.message?.includes('fetch failed') + || modelError.code === 'ECONNREFUSED'; + if (isConnectionError) { + logger.error(`Provider ${providerType} is unreachable (connection refused). Is it running?`); + return { + response: { + status: 503, + body: { + error: { + type: "provider_unreachable", + message: `Provider ${providerType} is unreachable. Is the service running?`, + }, + }, + terminationReason: "provider_unreachable", + }, + steps, + durationMs: Date.now() - start, + terminationReason: "provider_unreachable", + }; + } + throw modelError; + } // Extract and log actual token usage const actualUsage = databricksResponse.ok && config.tokenTracking?.enabled !== false