Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/serving/api/alerts/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,13 @@ def next_escalation_step(
if step.level > alert.last_escalation_level and elapsed_minutes >= step.after_minutes
]
if due_steps:
return due_steps[-1]
# Advance exactly one level per evaluation tick — the lowest level above
# the current one — so every intermediate escalation target is paged.
# Returning the highest due step (due_steps[-1]) silently skipped the
# on-call recipients of intervening levels whenever two or more became
# due between ticks (sparse polling, restart catch-up).
# (audit_28_06_26.md §5 medium: escalation skips intermediate levels)
return min(due_steps, key=lambda step: step.level)
if (
len(alert.escalation) == 1
and alert.last_escalation_level == alert.escalation[0].level
Expand Down
58 changes: 57 additions & 1 deletion tests/unit/test_alert_escalation_delivery.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@
import pytest

import src.serving.api.alerts.escalation as escalation
from src.serving.api.alerts.dispatcher import AlertEscalationStep, AlertRule
from src.serving.api.alerts.dispatcher import (
AlertEscalationStep,
AlertRule,
next_escalation_step,
)

_NOW = datetime(2026, 6, 28, 12, 0, tzinfo=UTC)

Expand Down Expand Up @@ -107,3 +111,55 @@ async def test_escalation_level_not_advanced_on_delivery_failure(

assert result.last_escalation_level == 1 # not advanced to 2 on failed delivery
assert triggered == 0


# --- next_escalation_step: no intermediate-level skip (audit_28_06_26.md §5 medium) ---

_FIRED = datetime(2026, 6, 28, 11, 0, tzinfo=UTC) # 60 min before _NOW


def _three_level_alert(last_level: int) -> AlertRule:
return _alert(
escalation=[
AlertEscalationStep(level=1, after_minutes=0, webhook_url="https://h/l1"),
AlertEscalationStep(level=2, after_minutes=10, webhook_url="https://h/l2"),
AlertEscalationStep(level=3, after_minutes=20, webhook_url="https://h/l3"),
],
fired_at=_FIRED,
state="firing",
last_escalation_level=last_level,
)


def test_next_escalation_step_advances_one_level_at_a_time() -> None:
# All three levels are due at _NOW (60 min elapsed >= 0/10/20), but a single
# tick must page the *next* level (2), not jump to the highest (3) and skip
# level 2's on-call recipient.
step = next_escalation_step(_three_level_alert(last_level=1), _NOW)
assert step is not None
assert step.level == 2
assert step.webhook_url == "https://h/l2"

# After level 2 is recorded, the following tick advances to level 3.
step = next_escalation_step(_three_level_alert(last_level=2), _NOW)
assert step is not None
assert step.level == 3

# Once the top level is reached there is nothing further to escalate to.
assert next_escalation_step(_three_level_alert(last_level=3), _NOW) is None


def test_next_escalation_step_picks_lowest_due_regardless_of_list_order() -> None:
# Robust to an escalation list that is not sorted by level.
alert = _alert(
escalation=[
AlertEscalationStep(level=3, after_minutes=20, webhook_url="https://h/l3"),
AlertEscalationStep(level=2, after_minutes=10, webhook_url="https://h/l2"),
],
fired_at=_FIRED,
state="firing",
last_escalation_level=1,
)
step = next_escalation_step(alert, _NOW)
assert step is not None
assert step.level == 2
Loading