Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ COPY . *.env ./
RUN yarn install --no-progress && \
yarn build-release

CMD node --experimental-json-modules build/main.js
CMD node --max-old-space-size=1024 --max-semi-space-size=128 --optimize-for-size --gc-interval=100 --expose-gc --experimental-json-modules build/main.js
11 changes: 10 additions & 1 deletion src/db/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,19 @@ export const connectDB = async (onConnected: () => any = defaultFn): Promise<voi
mongoose.connection.on(
'error', (e) => {
logger.error('MongoDB connection error', e)
process.exit(1)
// Don't exit immediately, let the app try to reconnect
// process.exit(1)
}
)

mongoose.connection.on('disconnected', () => {
logger.warn('MongoDB disconnected. Attempting to reconnect...')
})

mongoose.connection.on('reconnected', () => {
logger.info('MongoDB reconnected successfully')
})

await mongoose.connect(
`${scheme}://${user}:${pass}@${server}/${dbName}?authSource=${authDb}&tls=${tlsFlag}&replicaSet=${rsName}`,
{ autoIndex: true }
Expand Down
82 changes: 60 additions & 22 deletions src/db/utils/jobs/migration/SirvClient.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import axios from 'axios'
import { CircuitBreaker, retryWithBackoff } from '../../../../utils/CircuitBreaker'

const SIRV_CONFIG = {
clientId: process.env.SIRV_CLIENT_ID_RO ?? null,
Expand All @@ -9,9 +10,27 @@ const client = axios.create({
baseURL: 'https://api.sirv.com/v2',
headers: {
'content-type': 'application/json'
}
},
timeout: 30000 // 30 second timeout
})

// Add axios interceptors for better error handling
client.interceptors.response.use(
response => response,
async error => {
console.error('Sirv API error:', {
status: error.response?.status,
statusText: error.response?.statusText,
data: error.response?.data,
config: {
method: error.config?.method,
url: error.config?.url
}
})
return await Promise.reject(error)
}
)

const headers = {
'content-type': 'application/json'
}
Expand All @@ -21,23 +40,33 @@ interface TokenParamsType {
clientSecret: string | null
}

// Circuit breaker for Sirv API calls
const sirvCircuitBreaker = new CircuitBreaker({
failureThreshold: 3,
resetTimeout: 60000, // 1 minute
monitoringPeriod: 10000 // 10 seconds
})

const getToken = async (): Promise<string | null> => {
const params: TokenParamsType = {
clientId: SIRV_CONFIG.clientId,
clientSecret: SIRV_CONFIG.clientSecret
}

try {
const res = await client.post(
'/token',
params)
const res = await sirvCircuitBreaker.execute(async () => {
return await retryWithBackoff(async () => {
return await client.post('/token', params)
}, 3, 1000, 5000)
})

if (res.status === 200) {
return res.data.token
}
} catch (e) {
console.error(e)
process.exit(1)
console.error('Failed to get Sirv token after retries:', e)
// Don't exit process - let the app continue without Sirv functionality
return null
}
return null
}
Expand All @@ -57,22 +86,31 @@ interface FileMetadaata {
* @returns
*/
export const getFileInfo = async (filename: string): Promise<FileMetadaata> => {
const res = await client.get(
'/files/stat?filename=' + encodeURIComponent(filename),
{
headers: {
...headers,
Authorization: `bearer ${token}`
}
}
)

if (res.status === 200) {
const { ctime, mtime } = res.data
return ({
btime: new Date(ctime),
mtime: new Date(mtime)
try {
const res = await sirvCircuitBreaker.execute(async () => {
return await retryWithBackoff(async () => {
return await client.get(
'/files/stat?filename=' + encodeURIComponent(filename),
{
headers: {
...headers,
Authorization: `bearer ${token}`
}
}
)
}, 3, 1000, 5000)
})

if (res.status === 200) {
const { ctime, mtime } = res.data
return ({
btime: new Date(ctime),
mtime: new Date(mtime)
})
}
throw new Error('Sirv API.getFileInfo() error: ' + String(res.statusText))
} catch (e) {
console.error('Failed to get file info after retries:', e)
throw e
}
throw new Error('Sirv API.getFileInfo() error' + res.statusText)
}
56 changes: 54 additions & 2 deletions src/main.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,57 @@
import { connectDB, defaultPostConnect } from './db/index.js'
import { createServer } from './server.js'
import { errorMonitor, setupGlobalErrorHandlers } from './utils/ErrorMonitor.js'

await connectDB(defaultPostConnect)
await createServer()
// Setup enhanced error monitoring
setupGlobalErrorHandlers()

// Enhanced error handling with graceful shutdown
let isShuttingDown = false

process.on('uncaughtException', (error) => {
console.error('Uncaught Exception:', error)
errorMonitor.logError(error, 'UNCAUGHT_EXCEPTION')

if (!isShuttingDown) {
isShuttingDown = true
// Give some time for cleanup before exiting
setTimeout(() => {
console.log('Final error stats:', errorMonitor.getStats())
process.exit(1)
}, 5000)
}
})

process.on('unhandledRejection', (reason, promise) => {
console.error('Unhandled Rejection at:', promise, 'reason:', reason)
const error = reason instanceof Error ? reason : new Error(String(reason))
errorMonitor.logError(error, 'UNHANDLED_REJECTION', { promise })

// Don't exit immediately on unhandled rejections in production
// Log the error and continue running
if (process.env.NODE_ENV !== 'production') {
if (!isShuttingDown) {
isShuttingDown = true
setTimeout(() => process.exit(1), 5000)
}
}
})

process.on('SIGTERM', () => {
console.log('SIGTERM received, shutting down gracefully')
process.exit(0)
})

process.on('SIGINT', () => {
console.log('SIGINT received, shutting down gracefully')
process.exit(0)
})

try {
await connectDB(defaultPostConnect)
await createServer()
console.log('🚀 Server started successfully')
} catch (error) {
console.error('Failed to start server:', error)
process.exit(1)
}
19 changes: 17 additions & 2 deletions src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,31 @@ export async function createServer (): Promise<{ app: express.Application, serve
schema,
plugins: [ApolloServerPluginDrainHttpServer({ httpServer })],
cache: new InMemoryLRUCache({
max: 100
max: 50,
maxSize: 1024 * 1024 * 10
})
})
// server must be started before applying middleware
await server.start()

const context = process.env.LOCAL_DEV_BYPASS_AUTH === 'true' ? localDevBypassAuthContext : createContext

app.get('/health', (req, res) => {
const memUsage = process.memoryUsage()
res.json({
status: 'ok',
timestamp: new Date().toISOString(),
memory: {
rss: `${Math.round(memUsage.rss / 1024 / 1024)}MB`,
heapTotal: `${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`,
heapUsed: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB`,
external: `${Math.round(memUsage.external / 1024 / 1024)}MB`
}
})
})

app.use('/',
bodyParser.json({ limit: '10mb' }),
bodyParser.json({ limit: '5mb' }),
cors<cors.CorsRequest>(),
express.json(),
expressMiddleware(server, {
Expand Down
120 changes: 120 additions & 0 deletions src/utils/CircuitBreaker.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/**
* Circuit breaker pattern implementation for handling network failures
*/

export enum CircuitState {
CLOSED = 'CLOSED',
OPEN = 'OPEN',
HALF_OPEN = 'HALF_OPEN'
}

export interface CircuitBreakerOptions {
failureThreshold: number
resetTimeout: number
monitoringPeriod: number
}

export class CircuitBreaker {
private state: CircuitState = CircuitState.CLOSED
private failureCount: number = 0
private lastFailureTime?: number
private successCount: number = 0

constructor (
private readonly options: CircuitBreakerOptions = {
failureThreshold: 5,
resetTimeout: 60000, // 1 minute
monitoringPeriod: 10000 // 10 seconds
}
) {}

async execute<T>(operation: () => Promise<T>): Promise<T> {
if (this.state === CircuitState.OPEN) {
if (this.shouldAttemptReset()) {
this.state = CircuitState.HALF_OPEN
} else {
throw new Error('Circuit breaker is OPEN - operation not allowed')
}
}

try {
const result = await operation()
this.onSuccess()
return result
} catch (error) {
this.onFailure()
throw error
}
}

private onSuccess (): void {
this.failureCount = 0
if (this.state === CircuitState.HALF_OPEN) {
this.state = CircuitState.CLOSED
}
this.successCount++
}

private onFailure (): void {
this.failureCount++
this.lastFailureTime = Date.now()

if (this.failureCount >= this.options.failureThreshold) {
this.state = CircuitState.OPEN
}
}

private shouldAttemptReset (): boolean {
return (
this.lastFailureTime != null &&
Date.now() - this.lastFailureTime >= this.options.resetTimeout
)
}

getState (): CircuitState {
return this.state
}

getStats (): { state: CircuitState, failureCount: number, successCount: number, lastFailureTime?: number } {
return {
state: this.state,
failureCount: this.failureCount,
successCount: this.successCount,
lastFailureTime: this.lastFailureTime
}
}
}

/**
* Retry with exponential backoff
*/
export async function retryWithBackoff<T> (
operation: () => Promise<T>,
maxRetries: number = 3,
initialDelay: number = 1000,
maxDelay: number = 10000
): Promise<T> {
let lastError: Error | undefined

for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
return await operation()
} catch (error) {
lastError = error as Error

if (attempt === maxRetries) {
break
}

const delay = Math.min(
initialDelay * Math.pow(2, attempt - 1),
maxDelay
)

console.warn(`Operation failed (attempt ${attempt}/${maxRetries}), retrying in ${delay}ms:`, error)
await new Promise(resolve => setTimeout(resolve, delay))
}
}

throw lastError ?? new Error('Operation failed after all retry attempts')
}
Loading
Loading