diff --git a/.changeset/quieter-sustained-aggregation.md b/.changeset/quieter-sustained-aggregation.md new file mode 100644 index 0000000..8b5540e --- /dev/null +++ b/.changeset/quieter-sustained-aggregation.md @@ -0,0 +1,11 @@ +--- +'@iqai/alert-logger': minor +--- + +Make sustained alerting quieter and more informative by: + +- adding rate-aware early handoff from ramp to sustained mode +- changing the default sustained update interval from 5 minutes to 15 minutes +- adding `aggregation.periodCount` for per-update deltas while keeping `suppressedSince` for compatibility +- exposing `aggregation.rampExitRatePerSecond` and `aggregation.rampExitRateWindowMs` configuration knobs +- updating sustained formatter output to show both per-period and total counts diff --git a/README.md b/README.md index 4831f21..3f8d1cd 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Stop drowning in alert storms. `@iqai/alert-logger` groups repeated errors using ## ✨ Features - **Unified API** — `logger.error('msg', error, { fields })` routes to every configured adapter -- **Exponential suppression** — alerts fire at 1, 2, 4, 8, 16, 32, 64... then switch to periodic digests +- **Rate-aware suppression** — alerts ramp quickly, then switch to quieter periodic updates when an incident is clearly ongoing - **Resolution detection** — get a "resolved" message when an error stops occurring - **Error fingerprinting** — same bug from different requests groups automatically (strips IDs, timestamps, UUIDs) - **Multi-channel routing** — route by severity level or custom tags to different channels @@ -152,12 +152,14 @@ When the same error fires repeatedly, the library doesn't spam your channel: | Phase | Trigger | What gets sent | |-------|---------|----------------| | **Onset** | 1st occurrence | Full alert with stack trace, fields, tags | -| **Ramp** | 2nd, 4th, 8th, 16th, 32nd, 64th | Compact: `"Payment failed (x8 — 4 suppressed)"` | -| **Sustained** | >64 in window | Digest every 5min: `"x4,812 in last 5m"` | +| **Ramp** | 2nd, 4th, 8th, 16th, 32nd, 64th until rate/count handoff | Compact: `"Payment failed (x8 — 4 suppressed)"` | +| **Sustained** | >64 total, or current rate crosses threshold after at least one ramp alert | Digest every 15min: `"x37 since last update · x412 total"` | | **Resolution** | 0 hits for 2min | `"Resolved: Payment failed — 12,847 total over 23m"` | Errors are grouped by **fingerprint** — the library strips variable parts (IDs, timestamps, UUIDs, hex addresses) from the error message and hashes it with the top stack frames. Same bug, different request = same group. +By default, the rate check uses a 1-minute sliding window and exits ramp early at `0.5` events/sec after the first ramp checkpoint has been sent. + ## 🌍 Per-Environment Config Same codebase, different behavior per environment. Dev won't bug you as much as prod: @@ -169,7 +171,7 @@ AlertLogger.init({ environments: { production: { levels: ['warning', 'critical'], - aggregation: { digestIntervalMs: 5 * 60_000 }, + aggregation: { digestIntervalMs: 15 * 60_000 }, }, staging: { levels: ['critical'], // only errors, no warnings @@ -177,7 +179,11 @@ AlertLogger.init({ }, development: { levels: ['critical'], - aggregation: { rampThreshold: 8, digestIntervalMs: 30 * 60_000 }, + aggregation: { + rampThreshold: 8, + rampExitRatePerSecond: 0.25, + digestIntervalMs: 30 * 60_000, + }, }, }, }) @@ -279,8 +285,10 @@ AlertLogger.init({ // Aggregation tuning aggregation: { - rampThreshold: 64, // switch from ramp to digest phase - digestIntervalMs: 5 * 60_000, // how often to send digests + rampThreshold: 64, // count-based handoff into sustained mode + rampExitRatePerSecond: 0.5, // early sustained handoff after a ramp alert + rampExitRateWindowMs: 60_000, // sliding window used for current-rate calculation + digestIntervalMs: 15 * 60_000, // how often to send sustained updates resolutionCooldownMs: 2 * 60_000, // silence before "resolved" }, diff --git a/src/adapters/console/console-adapter.ts b/src/adapters/console/console-adapter.ts index 2337bbd..1cd39d9 100644 --- a/src/adapters/console/console-adapter.ts +++ b/src/adapters/console/console-adapter.ts @@ -56,7 +56,9 @@ export class ConsoleAdapter implements AlertAdapter { lines.push(` fields: ${pairs}`) } - lines.push(` count: ${aggregation.count} | phase: ${aggregation.phase}`) + lines.push( + ` count: ${aggregation.count} | periodCount: ${aggregation.periodCount} | phase: ${aggregation.phase}`, + ) return lines.join('\n') } @@ -72,6 +74,7 @@ export class ConsoleAdapter implements AlertAdapter { aggregation: { phase: alert.aggregation.phase, count: alert.aggregation.count, + periodCount: alert.aggregation.periodCount, }, }) } diff --git a/src/adapters/discord/discord-adapter.test.ts b/src/adapters/discord/discord-adapter.test.ts index 7da0512..eb92699 100644 --- a/src/adapters/discord/discord-adapter.test.ts +++ b/src/adapters/discord/discord-adapter.test.ts @@ -15,6 +15,7 @@ function makeAlert(overrides: Partial = {}): FormattedAlert { phase: 'onset', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), diff --git a/src/adapters/discord/formatter.test.ts b/src/adapters/discord/formatter.test.ts index b1f475e..e41ccaf 100644 --- a/src/adapters/discord/formatter.test.ts +++ b/src/adapters/discord/formatter.test.ts @@ -14,6 +14,7 @@ function makeAlert(overrides: Partial = {}): FormattedAlert { phase: 'onset', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), @@ -86,6 +87,7 @@ describe('formatDiscordEmbed', () => { phase: 'ramp', fingerprint: 'abc123', count: 10, + periodCount: 5, suppressedSince: 5, firstSeen: Date.now(), lastSeen: Date.now(), @@ -106,6 +108,7 @@ describe('formatDiscordEmbed', () => { phase: 'sustained', fingerprint: 'abc123', count: 200, + periodCount: 37, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), @@ -114,7 +117,8 @@ describe('formatDiscordEmbed', () => { }) const embed = formatDiscordEmbed(alert) - expect(embed.title).toContain('x200') + expect(embed.title).toContain('x37 since last update') + expect(embed.title).toContain('x200 total') expect(embed.title).toContain('peak rate: 3.7/s') }) }) @@ -127,6 +131,7 @@ describe('formatDiscordEmbed', () => { phase: 'resolution', fingerprint: 'abc123', count: 50, + periodCount: 0, suppressedSince: 0, firstSeen: now - 3_600_000, lastSeen: now, @@ -146,6 +151,7 @@ describe('formatDiscordEmbed', () => { phase: 'resolution', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), diff --git a/src/adapters/discord/formatter.ts b/src/adapters/discord/formatter.ts index 56efa1f..a6348a8 100644 --- a/src/adapters/discord/formatter.ts +++ b/src/adapters/discord/formatter.ts @@ -94,7 +94,7 @@ export function formatDiscordEmbed(alert: FormattedAlert): DiscordEmbed { case 'sustained': { const title = truncate( - `${badge} [${alert.level.toUpperCase()}] ${safeTitle} (x${aggregation.count} in last digest period \u00B7 peak rate: ${aggregation.peakRate.toFixed(1)}/s)`, + `${badge} [${alert.level.toUpperCase()}] ${safeTitle} (x${aggregation.periodCount} since last update \u00B7 x${aggregation.count} total \u00B7 peak rate: ${aggregation.peakRate.toFixed(1)}/s)`, 256, ) diff --git a/src/adapters/slack/formatter.test.ts b/src/adapters/slack/formatter.test.ts index 2766873..fb32b42 100644 --- a/src/adapters/slack/formatter.test.ts +++ b/src/adapters/slack/formatter.test.ts @@ -14,6 +14,7 @@ function makeAlert(overrides: Partial = {}): FormattedAlert { phase: 'onset', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), @@ -97,6 +98,7 @@ describe('formatSlackPayload', () => { phase: 'ramp', fingerprint: 'abc123', count: 10, + periodCount: 5, suppressedSince: 5, firstSeen: Date.now(), lastSeen: Date.now(), @@ -116,6 +118,7 @@ describe('formatSlackPayload', () => { phase: 'resolution', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), @@ -137,6 +140,7 @@ describe('formatSlackPayload', () => { phase: 'ramp', fingerprint: 'abc123', count: 10, + periodCount: 5, suppressedSince: 5, firstSeen: Date.now(), lastSeen: Date.now(), @@ -158,6 +162,7 @@ describe('formatSlackPayload', () => { phase: 'sustained', fingerprint: 'abc123', count: 200, + periodCount: 37, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), @@ -167,7 +172,8 @@ describe('formatSlackPayload', () => { const payload = formatSlackPayload(alert) const header = payload.attachments[0].blocks[0] - expect(header.text?.text).toContain('x200') + expect(header.text?.text).toContain('x37 since last update') + expect(header.text?.text).toContain('x200 total') expect(header.text?.text).toContain('peak: 3.7/s') }) }) @@ -180,6 +186,7 @@ describe('formatSlackPayload', () => { phase: 'resolution', fingerprint: 'abc123', count: 50, + periodCount: 0, suppressedSince: 0, firstSeen: now - 3_600_000, lastSeen: now, @@ -200,6 +207,7 @@ describe('formatSlackPayload', () => { phase: 'resolution', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), diff --git a/src/adapters/slack/formatter.ts b/src/adapters/slack/formatter.ts index 2c5353a..923f554 100644 --- a/src/adapters/slack/formatter.ts +++ b/src/adapters/slack/formatter.ts @@ -105,7 +105,7 @@ export function formatSlackPayload(alert: FormattedAlert): SlackPayload { case 'sustained': { const title = truncate( - `${badge} [${alert.level.toUpperCase()}] ${alert.title} (x${aggregation.count} \u00B7 peak: ${aggregation.peakRate.toFixed(1)}/s)`, + `${badge} [${alert.level.toUpperCase()}] ${alert.title} (x${aggregation.periodCount} since last update \u00B7 x${aggregation.count} total \u00B7 peak: ${aggregation.peakRate.toFixed(1)}/s)`, 150, ) diff --git a/src/adapters/slack/slack-adapter.test.ts b/src/adapters/slack/slack-adapter.test.ts index 27806c0..ee599ab 100644 --- a/src/adapters/slack/slack-adapter.test.ts +++ b/src/adapters/slack/slack-adapter.test.ts @@ -15,6 +15,7 @@ function makeAlert(overrides: Partial = {}): FormattedAlert { phase: 'onset', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), diff --git a/src/adapters/telegram/formatter.test.ts b/src/adapters/telegram/formatter.test.ts index 08155b8..0a00118 100644 --- a/src/adapters/telegram/formatter.test.ts +++ b/src/adapters/telegram/formatter.test.ts @@ -15,6 +15,7 @@ function makeAlert(overrides: Partial = {}): FormattedAlert { phase: 'onset', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), @@ -81,6 +82,7 @@ describe('formatTelegramMessage', () => { phase: 'ramp', fingerprint: 'abc123', count: 10, + periodCount: 5, suppressedSince: 5, firstSeen: Date.now(), lastSeen: Date.now(), @@ -101,6 +103,7 @@ describe('formatTelegramMessage', () => { phase: 'sustained', fingerprint: 'abc123', count: 200, + periodCount: 37, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), @@ -109,7 +112,8 @@ describe('formatTelegramMessage', () => { }) const msg = formatTelegramMessage(alert) - expect(msg).toContain('x200') + expect(msg).toContain('x37 since last update') + expect(msg).toContain('x200 total') expect(msg).toContain('peak: 3.7/s') }) }) @@ -122,6 +126,7 @@ describe('formatTelegramMessage', () => { phase: 'resolution', fingerprint: 'abc123', count: 50, + periodCount: 0, suppressedSince: 0, firstSeen: now - 3_600_000, lastSeen: now, @@ -142,6 +147,7 @@ describe('formatTelegramMessage', () => { phase: 'resolution', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), diff --git a/src/adapters/telegram/formatter.ts b/src/adapters/telegram/formatter.ts index 7927c48..81e3222 100644 --- a/src/adapters/telegram/formatter.ts +++ b/src/adapters/telegram/formatter.ts @@ -73,7 +73,7 @@ export function formatTelegramMessage(alert: FormattedAlert): string { case 'sustained': { const emoji = SEVERITY_EMOJI[alert.level] ?? SEVERITY_EMOJI.info parts.push( - `${emoji} ${badge} [${alert.level.toUpperCase()}] ${safeTitle} (x${aggregation.count} \u00B7 peak: ${aggregation.peakRate.toFixed(1)}/s)`, + `${emoji} ${badge} [${alert.level.toUpperCase()}] ${safeTitle} (x${aggregation.periodCount} since last update \u00B7 x${aggregation.count} total \u00B7 peak: ${aggregation.peakRate.toFixed(1)}/s)`, ) parts.push('', safeMessage) break diff --git a/src/adapters/telegram/telegram-adapter.test.ts b/src/adapters/telegram/telegram-adapter.test.ts index 2246c61..2b95f38 100644 --- a/src/adapters/telegram/telegram-adapter.test.ts +++ b/src/adapters/telegram/telegram-adapter.test.ts @@ -15,6 +15,7 @@ function makeAlert(overrides: Partial = {}): FormattedAlert { phase: 'onset', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), diff --git a/src/core/aggregator.test.ts b/src/core/aggregator.test.ts index 4ae07cd..94a7246 100644 --- a/src/core/aggregator.test.ts +++ b/src/core/aggregator.test.ts @@ -22,6 +22,7 @@ describe('Aggregator', () => { expect(result.shouldSend).toBe(true) expect(result.phase).toBe('onset') expect(result.count).toBe(1) + expect(result.periodCount).toBe(0) expect(result.fingerprint).toBe('err-1') expect(result.suppressedSince).toBe(0) }) @@ -37,16 +38,22 @@ describe('Aggregator', () => { describe('ramp phase', () => { it('sends at power-of-2 counts (2, 4, 8, 16, 32, 64)', () => { + const agg = new Aggregator({ + ...DEFAULT_AGGREGATION, + rampExitRatePerSecond: Number.POSITIVE_INFINITY, + }) const powerOfTwoCounts = [2, 4, 8, 16, 32, 64] for (let i = 1; i <= 64; i++) { - const result = aggregator.process('err-ramp') + const result = agg.process('err-ramp') if (powerOfTwoCounts.includes(i)) { expect(result.shouldSend).toBe(true) expect(result.phase).toBe('ramp') expect(result.count).toBe(i) } } + + agg.destroy() }) it('suppresses non-power-of-2 counts', () => { @@ -71,6 +78,7 @@ describe('Aggregator', () => { expect(result2.phase).toBe('ramp') expect(result2.shouldSend).toBe(true) expect(result2.count).toBe(2) + expect(result2.periodCount).toBe(1) }) }) @@ -91,16 +99,51 @@ describe('Aggregator', () => { expect(result.count).toBe(65) }) + it('enters sustained early after one ramp alert when rate threshold is exceeded', () => { + const config: AggregationConfig = { + rampThreshold: 64, + rampExitRatePerSecond: 0.6, + rampExitRateWindowMs: 60_000, + digestIntervalMs: 15 * 60_000, + resolutionCooldownMs: 5000, + } + const agg = new Aggregator(config) + + agg.process('err-rate') // onset + const ramp = agg.process('err-rate') // count=2, first ramp alert + expect(ramp.phase).toBe('ramp') + + let sustained: ReturnType | undefined + for (let i = 0; i < 34; i++) { + const result = agg.process('err-rate') + if (result.shouldSend && result.phase === 'sustained') { + sustained = result + break + } + } + + expect(sustained).toBeDefined() + const sustainedResult = sustained! + expect(sustainedResult.shouldSend).toBe(true) + expect(sustainedResult.phase).toBe('sustained') + expect(sustainedResult.count).toBeLessThan(config.rampThreshold) + expect(sustainedResult.periodCount).toBe(4) + expect(sustainedResult.suppressedSince).toBe(4) + + agg.destroy() + }) + it('sends digest when digestIntervalMs has elapsed', () => { // Get past ramp phase processNTimes('err-digest', 65) - // Advance time past digestIntervalMs (default 5 minutes) + // Advance time past digestIntervalMs vi.advanceTimersByTime(DEFAULT_AGGREGATION.digestIntervalMs) const result = aggregator.process('err-digest') expect(result.shouldSend).toBe(true) expect(result.phase).toBe('sustained') + expect(result.periodCount).toBe(2) }) it('suppresses when digestIntervalMs has not elapsed', () => { @@ -135,6 +178,8 @@ describe('Aggregator', () => { it('reports correct suppressed count in sustained phase', () => { const config: AggregationConfig = { rampThreshold: 4, + rampExitRatePerSecond: Number.POSITIVE_INFINITY, + rampExitRateWindowMs: 60_000, digestIntervalMs: 1000, resolutionCooldownMs: 5000, } @@ -158,6 +203,7 @@ describe('Aggregator', () => { const result = agg.process('fp') expect(result.shouldSend).toBe(true) expect(result.phase).toBe('sustained') + expect(result.periodCount).toBe(4) expect(result.suppressedSince).toBe(4) agg.destroy() @@ -165,6 +211,7 @@ describe('Aggregator', () => { it('reports zero suppressedSince on onset', () => { const result = aggregator.process('fp-zero') + expect(result.periodCount).toBe(0) expect(result.suppressedSince).toBe(0) }) }) @@ -186,6 +233,31 @@ describe('Aggregator', () => { expect(resolved).toHaveLength(0) }) + it('resolves alerts that entered sustained via rate-based early exit', () => { + const config: AggregationConfig = { + rampThreshold: 64, + rampExitRatePerSecond: 0.5, + rampExitRateWindowMs: 60_000, + digestIntervalMs: 15 * 60_000, + resolutionCooldownMs: 5000, + } + const agg = new Aggregator(config) + + agg.process('err-rate-resolve') + agg.process('err-rate-resolve') + for (let i = 0; i < 29; i++) { + agg.process('err-rate-resolve') + } + + vi.advanceTimersByTime(5001) + + const resolved = agg.checkResolutions() + expect(resolved).toHaveLength(1) + expect(resolved[0].fingerprint).toBe('err-rate-resolve') + + agg.destroy() + }) + it('resolves alerts that exceeded rampThreshold (sustained crisis)', () => { // Process more than rampThreshold (default 64) processNTimes('err-resolve', 65) diff --git a/src/core/aggregator.ts b/src/core/aggregator.ts index ebfc8b7..1466d8d 100644 --- a/src/core/aggregator.ts +++ b/src/core/aggregator.ts @@ -5,6 +5,8 @@ export interface AggregationState { firstSeen: number lastSeen: number phase: AggregationPhase + everEnteredSustained: boolean + hasSentRampAlert: boolean peakRate: number lastAlertedAt: number lastAlertedCount: number @@ -15,6 +17,7 @@ export interface AggregationResult { shouldSend: boolean phase: AggregationPhase count: number + periodCount: number suppressedSince: number firstSeen: number lastSeen: number @@ -30,7 +33,6 @@ export interface ResolvedEntry { peakRate: number } -const RATE_WINDOW_MS = 60_000 const RESOLUTION_CHECK_INTERVAL_MS = 30_000 const EVICTION_GRACE_MS = 5 * 60_000 @@ -57,6 +59,8 @@ export class Aggregator { firstSeen: now, lastSeen: now, phase: 'onset', + everEnteredSustained: false, + hasSentRampAlert: false, peakRate: 0, lastAlertedAt: 0, lastAlertedCount: 0, @@ -70,9 +74,9 @@ export class Aggregator { // Update sliding rate window state.rateWindow.push(now) - const windowCutoff = now - RATE_WINDOW_MS + const windowCutoff = now - this.config.rampExitRateWindowMs state.rateWindow = state.rateWindow.filter((t) => t > windowCutoff) - const currentRate = state.rateWindow.length / (RATE_WINDOW_MS / 1000) + const currentRate = state.rateWindow.length / (this.config.rampExitRateWindowMs / 1000) if (currentRate > state.peakRate) { state.peakRate = currentRate } @@ -81,6 +85,7 @@ export class Aggregator { shouldSend: false, phase: state.phase, count: state.count, + periodCount: 0, suppressedSince: 0, firstSeen: state.firstSeen, lastSeen: state.lastSeen, @@ -98,25 +103,19 @@ export class Aggregator { return result } - // Phase: ramp (power-of-2 counts up to rampThreshold) - if (state.count <= this.config.rampThreshold && isPowerOfTwo(state.count)) { - state.phase = 'ramp' - result.shouldSend = true - result.phase = 'ramp' - result.suppressedSince = state.count - state.lastAlertedCount - state.lastAlertedAt = now - state.lastAlertedCount = state.count - return result - } + const shouldEnterSustainedByRate = + state.hasSentRampAlert && currentRate >= this.config.rampExitRatePerSecond - // Phase: sustained (count exceeds rampThreshold) - if (state.count > this.config.rampThreshold) { + if (state.count > this.config.rampThreshold || shouldEnterSustainedByRate) { state.phase = 'sustained' + state.everEnteredSustained = true result.phase = 'sustained' - if (now - state.lastAlertedAt >= this.config.digestIntervalMs) { + const sustainedByRate = shouldEnterSustainedByRate && state.count <= this.config.rampThreshold + if (sustainedByRate || now - state.lastAlertedAt >= this.config.digestIntervalMs) { result.shouldSend = true - result.suppressedSince = state.count - state.lastAlertedCount + result.periodCount = state.count - state.lastAlertedCount + result.suppressedSince = result.periodCount state.lastAlertedAt = now state.lastAlertedCount = state.count } @@ -124,6 +123,19 @@ export class Aggregator { return result } + // Phase: ramp (power-of-2 counts up to rampThreshold) + if (isPowerOfTwo(state.count)) { + state.phase = 'ramp' + result.shouldSend = true + result.phase = 'ramp' + result.periodCount = state.count - state.lastAlertedCount + result.suppressedSince = result.periodCount + state.lastAlertedAt = now + state.lastAlertedCount = state.count + state.hasSentRampAlert = true + return result + } + // Still in ramp range but not a power of 2 -- suppress state.phase = 'ramp' result.phase = 'ramp' @@ -144,11 +156,8 @@ export class Aggregator { } if (now - state.lastSeen >= this.config.resolutionCooldownMs) { - // Only send resolution for alerts that reached the sustained phase - // (count > rampThreshold). A few sporadic failures aren't a "crisis" - // worth announcing as resolved — resolution is for ongoing incidents - // that generated a flood of alerts and then stopped. - if (state.count > this.config.rampThreshold) { + // Only send resolution for alerts that ever reached sustained mode. + if (state.everEnteredSustained) { resolved.push({ fingerprint, count: state.count, diff --git a/src/core/alert-logger.ts b/src/core/alert-logger.ts index 039233f..a091697 100644 --- a/src/core/alert-logger.ts +++ b/src/core/alert-logger.ts @@ -133,6 +133,7 @@ export class AlertLogger { phase: result.phase, fingerprint: fp, count: result.count, + periodCount: result.periodCount, suppressedSince: result.suppressedSince, firstSeen: result.firstSeen, lastSeen: result.lastSeen, @@ -160,6 +161,7 @@ export class AlertLogger { phase: 'resolution', fingerprint: entry.fingerprint, count: entry.count, + periodCount: 0, suppressedSince: 0, firstSeen: entry.firstSeen, lastSeen: entry.lastSeen, @@ -194,6 +196,7 @@ export class AlertLogger { phase: 'resolution', fingerprint: `health-recovery-${adapterName}`, count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), diff --git a/src/core/health-manager.test.ts b/src/core/health-manager.test.ts index d45e298..c7bf966 100644 --- a/src/core/health-manager.test.ts +++ b/src/core/health-manager.test.ts @@ -38,6 +38,7 @@ function createAlert(overrides?: Partial): FormattedAlert { phase: 'onset', fingerprint: 'test-fp', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), diff --git a/src/core/queue-persistence.test.ts b/src/core/queue-persistence.test.ts index 7e1f4ca..acdfdff 100644 --- a/src/core/queue-persistence.test.ts +++ b/src/core/queue-persistence.test.ts @@ -24,6 +24,7 @@ function makeEntry(overrides?: Partial): QueueEntry { phase: 'onset', fingerprint: 'abc123', count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), diff --git a/src/core/retry-queue.test.ts b/src/core/retry-queue.test.ts index 4f9df89..c776ca6 100644 --- a/src/core/retry-queue.test.ts +++ b/src/core/retry-queue.test.ts @@ -15,6 +15,7 @@ function makeEntry(id: number): QueueEntry { phase: 'onset', fingerprint: `fp-${id}`, count: 1, + periodCount: 0, suppressedSince: 0, firstSeen: Date.now(), lastSeen: Date.now(), diff --git a/src/core/types.ts b/src/core/types.ts index e8e3d09..24b9099 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -25,6 +25,7 @@ export interface AggregationMeta { phase: AggregationPhase fingerprint: string count: number + periodCount: number suppressedSince: number firstSeen: number lastSeen: number @@ -52,6 +53,8 @@ export interface NormalizerRule { export interface AggregationConfig { rampThreshold: number + rampExitRatePerSecond: number + rampExitRateWindowMs: number digestIntervalMs: number resolutionCooldownMs: number } @@ -109,7 +112,9 @@ export interface ResolvedConfig { export const DEFAULT_AGGREGATION: AggregationConfig = { rampThreshold: 64, - digestIntervalMs: 5 * 60_000, + rampExitRatePerSecond: 0.5, + rampExitRateWindowMs: 60_000, + digestIntervalMs: 15 * 60_000, resolutionCooldownMs: 2 * 60_000, }