11import { Injectable } from '@nestjs/common'
22import { statfs } from 'fs/promises'
33import { cpus , loadavg , totalmem } from 'os'
4- import { DiskHealthIndicator , HealthCheckError , HealthCheckResult , HealthCheckService , HttpHealthIndicator , MemoryHealthIndicator , MongooseHealthIndicator } from '@nestjs/terminus'
4+ import { DiskHealthIndicator , HealthCheckError , HealthCheckResult , HealthCheckService , HealthIndicatorResult , HttpHealthIndicator , MemoryHealthIndicator , MongooseHealthIndicator } from '@nestjs/terminus'
55
66const MEMORY_MULTIPLIER = 1024 * 1024
77const GIGABYTE_MULTIPLIER = 1024 * 1024 * 1024
88const CPU_LOAD_THRESHOLD = 0.85
99const DISK_THRESHOLD_PERCENT = 0.95
10- const HEAP_MEMORY_THRESHOLD_MB = 512
11- const RSS_MEMORY_THRESHOLD_MB = 512
10+ const IS_DEV = process . env . NODE_ENV !== 'production'
11+
12+ const readPositiveIntegerEnv = ( key : string ) : number | null => {
13+ const rawValue = process . env [ key ]
14+ if ( ! rawValue ) {
15+ return null
16+ }
17+
18+ const parsedValue = Number . parseInt ( rawValue , 10 )
19+ if ( ! Number . isFinite ( parsedValue ) || parsedValue <= 0 ) {
20+ return null
21+ }
22+
23+ return parsedValue
24+ }
25+
26+ const resolveThresholdMb = ( baseKey : string , defaults : { dev : number ; prod : number } ) : number => {
27+ const envSuffix = IS_DEV ? 'DEV' : 'PROD'
28+ return (
29+ readPositiveIntegerEnv ( baseKey ) ||
30+ readPositiveIntegerEnv ( `${ baseKey } _${ envSuffix } ` ) ||
31+ ( IS_DEV ? defaults . dev : defaults . prod )
32+ )
33+ }
34+
35+ const HEAP_MEMORY_THRESHOLD_MB = resolveThresholdMb ( 'SESAME_HEALTH_HEAP_THRESHOLD_MB' , { dev : 1024 , prod : 512 } )
36+ const RSS_MEMORY_THRESHOLD_MB = resolveThresholdMb ( 'SESAME_HEALTH_RSS_THRESHOLD_MB' , { dev : 3072 , prod : 1024 } )
37+ const NATIVE_MEMORY_DERIVE_MIN_SAMPLES = 6
38+ const NATIVE_MEMORY_DERIVE_MIN_GROWTH_MB = resolveThresholdMb ( 'SESAME_HEALTH_NATIVE_DERIVE_MIN_GROWTH_MB' , { dev : 256 , prod : 128 } )
1239
1340export type HealthSnapshotPayload = HealthCheckResult & {
1441 system : {
1542 memory : {
1643 heapUsedMb : number
1744 heapTotalMb : number
1845 rssMb : number
46+ externalMb : number
47+ arrayBuffersMb : number
48+ nativeMb : number
1949 totalSystemMemoryMb : number
2050 }
2151 cpu : {
@@ -42,6 +72,8 @@ export type HealthSnapshotPayload = HealthCheckResult & {
4272
4373@Injectable ( )
4474export class HealthSnapshotService {
75+ private nativeMemoryHistory : number [ ] = [ ]
76+
4577 public constructor (
4678 private readonly health : HealthCheckService ,
4779 private readonly mongoose : MongooseHealthIndicator ,
@@ -51,26 +83,34 @@ export class HealthSnapshotService {
5183 ) { }
5284
5385 public async collectSnapshot ( ) : Promise < HealthSnapshotPayload > {
54- const healthResult = await this . health . check ( [
55- ( ) => this . checkMongoose ( ) ,
56- ( ) => this . http . pingCheck ( 'http-github' , 'https://github.com' ) ,
57- ( ) => this . checkStorage ( ) ,
58- ( ) => this . checkMemoryHeap ( ) ,
59- ( ) => this . checkMemoryRss ( ) ,
60- ( ) => this . checkCpu ( ) ,
61- ] )
86+ const healthResult = await this . collectHealthResult ( )
6287
6388 const memoryUsage = process . memoryUsage ( )
6489 const cpuCount = Math . max ( cpus ( ) . length , 1 )
6590 const [ load1m , load5m , load15m ] = loadavg ( )
91+ const externalMb = Number ( ( memoryUsage . external / MEMORY_MULTIPLIER ) . toFixed ( 2 ) )
92+ const arrayBuffersMb = Number ( ( memoryUsage . arrayBuffers / MEMORY_MULTIPLIER ) . toFixed ( 2 ) )
93+ const nativeMb = Number ( ( externalMb + arrayBuffersMb ) . toFixed ( 2 ) )
94+ const memoryNativeIndicator = this . buildMemoryNativeIndicator ( nativeMb , externalMb , arrayBuffersMb )
95+
96+ const details = {
97+ ...( ( healthResult . details || { } ) as HealthIndicatorResult ) ,
98+ memory_native : memoryNativeIndicator ,
99+ }
100+ const hasAnyDown = Object . values ( details ) . some ( ( indicator ) => indicator ?. status === 'down' )
66101
67102 return {
68103 ...healthResult ,
104+ status : hasAnyDown ? 'error' : 'ok' ,
105+ details,
69106 system : {
70107 memory : {
71108 heapUsedMb : Number ( ( memoryUsage . heapUsed / MEMORY_MULTIPLIER ) . toFixed ( 2 ) ) ,
72109 heapTotalMb : Number ( ( memoryUsage . heapTotal / MEMORY_MULTIPLIER ) . toFixed ( 2 ) ) ,
73110 rssMb : Number ( ( memoryUsage . rss / MEMORY_MULTIPLIER ) . toFixed ( 2 ) ) ,
111+ externalMb,
112+ arrayBuffersMb,
113+ nativeMb,
74114 totalSystemMemoryMb : Number ( ( totalmem ( ) / MEMORY_MULTIPLIER ) . toFixed ( 2 ) ) ,
75115 } ,
76116 cpu : {
@@ -96,6 +136,79 @@ export class HealthSnapshotService {
96136 }
97137 }
98138
139+ private async collectHealthResult ( ) : Promise < HealthCheckResult > {
140+ try {
141+ return await this . health . check ( [
142+ ( ) => this . checkMongoose ( ) ,
143+ ( ) => this . http . pingCheck ( 'http-github' , 'https://github.com' ) ,
144+ ( ) => this . checkStorage ( ) ,
145+ ( ) => this . checkMemoryHeap ( ) ,
146+ ( ) => this . checkMemoryRss ( ) ,
147+ ( ) => this . checkCpu ( ) ,
148+ ] )
149+ } catch ( error ) {
150+ if ( error instanceof HealthCheckError ) {
151+ const details = this . extractHealthErrorDetails ( error )
152+ return {
153+ status : 'error' ,
154+ info : { } ,
155+ error : details ,
156+ details,
157+ }
158+ }
159+
160+ throw error
161+ }
162+ }
163+
164+ private extractHealthErrorDetails ( error : HealthCheckError ) : HealthIndicatorResult {
165+ const candidate = error . causes || { }
166+ if ( typeof candidate === 'object' && candidate !== null && Object . keys ( candidate ) . length > 0 ) {
167+ return candidate as HealthIndicatorResult
168+ }
169+
170+ return {
171+ unknown : {
172+ status : 'down' ,
173+ message : error . message ,
174+ } ,
175+ }
176+ }
177+
178+ private buildMemoryNativeIndicator ( nativeMb : number , externalMb : number , arrayBuffersMb : number ) : {
179+ status : 'up' | 'down'
180+ nativeMb : number
181+ externalMb : number
182+ arrayBuffersMb : number
183+ growthMb : number
184+ growthThresholdMb : number
185+ sampleCount : number
186+ } {
187+ this . nativeMemoryHistory . push ( nativeMb )
188+ if ( this . nativeMemoryHistory . length > NATIVE_MEMORY_DERIVE_MIN_SAMPLES ) {
189+ this . nativeMemoryHistory . shift ( )
190+ }
191+
192+ const sampleCount = this . nativeMemoryHistory . length
193+ const firstValue = this . nativeMemoryHistory [ 0 ] || nativeMb
194+ const growthMb = Number ( ( nativeMb - firstValue ) . toFixed ( 2 ) )
195+ const hasEnoughSamples = sampleCount >= NATIVE_MEMORY_DERIVE_MIN_SAMPLES
196+ const isStrictlyIncreasing =
197+ hasEnoughSamples &&
198+ this . nativeMemoryHistory . every ( ( value , index , values ) => index === 0 || value > values [ index - 1 ] )
199+ const isDrifting = isStrictlyIncreasing && growthMb >= NATIVE_MEMORY_DERIVE_MIN_GROWTH_MB
200+
201+ return {
202+ status : isDrifting ? 'down' : 'up' ,
203+ nativeMb,
204+ externalMb,
205+ arrayBuffersMb,
206+ growthMb,
207+ growthThresholdMb : NATIVE_MEMORY_DERIVE_MIN_GROWTH_MB ,
208+ sampleCount,
209+ }
210+ }
211+
99212 private checkCpu ( ) : Record < string , { status : 'up' | 'down' ; load1mPerCore : number ; threshold : number ; cores : number } > {
100213 const cpuCount = Math . max ( cpus ( ) . length , 1 )
101214 const perCoreLoad = loadavg ( ) [ 0 ] / cpuCount
0 commit comments