From 8edce7324d1f1af4bd8f37fd1f0c853d8bbbb642 Mon Sep 17 00:00:00 2001 From: Test Date: Sat, 16 May 2026 18:44:59 -0500 Subject: [PATCH 1/2] =?UTF-8?q?feat(governor):=20Lane=20H=20PR-3c1=20?= =?UTF-8?q?=E2=80=94=20cascade=20evaluator=20pure=20function=20+=20Cascade?= =?UTF-8?q?Thresholds?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per GENOME-FOUNDRY-SENTINEL #1327 Part 11 §'Adjustment Cascade'. Stacks on #1354 (PR-3b LocalSubstrateGovernor, MERGED). PR-3b ships LocalSubstrateGovernor that RECORDS pressure signals. This PR-3c1 ships the pure-function CASCADE EVALUATOR — given (current step, signal, thresholds), decide Advance/Retreat/Hold/ EmergencyAdvanceToMax. PR-3c2 wires the evaluator into on_pressure_signal to actually transition + rewrite policy. What ships in src/workers/continuum-core/src/governor/cascade.rs: - CASCADE_STEP_MIN (0) + CASCADE_STEP_MAX (5) — typed bounds - CascadeAction enum: Hold / Advance / Retreat / EmergencyAdvanceToMax (tagged-union with 'kind' discriminator, ts-rs export) - CascadeThresholds struct (ts-rs export): * spec_miss_rate_advance/retreat (default 0.5/0.3 per spec table) * inference_queue_depth_advance/retreat (16/8) * vram_used_pct_advance/retreat (85/70) * system_mem_used_pct_advance/retreat (85/70) * thermal_advance (Hot per spec) * battery_pct_advance/retreat/emergency (15/25/10) - evaluate_next_step(current_step, signal, thresholds) -> CascadeAction Pure fn. No I/O, no time, no globals. - apply_action(current_step, action) -> u8 Pure fn. Bounded to [MIN, MAX]; Advance caps at MAX; Retreat saturates at MIN; EmergencyAdvanceToMax jumps from any step. Emergency-priority semantics (per spec): - Thermal::Critical → EmergencyAdvanceToMax (protects hardware) - BatteryLow < emergency_pct (10%) → EmergencyAdvanceToMax (protects user) Both checked BEFORE step-specific evaluation; emergency beats all. Hysteresis (anti-oscillation): - Step 1 advance trigger: SpeculationMissRate > 0.5 - Step 1 retreat trigger: SpeculationMissRate < 0.3 - Gap (0.3 → 0.5) is Hold; prevents flapping around a single threshold - Same shape for VRAM (70/85), queue (8/16), mem (70/85), battery (15/25) Failure-mode discipline: - All thresholds typed + named (no magic floats/ints in call sites) - Pure function — same inputs → same output (testable + replayable) - UserActive signal participates but doesn't drive transitions in PR-3c1 (PR-3c2 may use it for user-foreground weighting) - Unmatched (step, signal) combos return Hold (do nothing > panic) Tests: 30 passing on cargo test --lib --features metal,accelerate governor::cascade:: Emergency (3): - Thermal Critical from any step → EmergencyAdvanceToMax - Battery < emergency_pct → EmergencyAdvanceToMax - Battery AT emergency_pct does NOT emergency (boundary, strict <) Step 0 → 1 advances (4): - spec miss high - spec miss at threshold (boundary) - inference queue high - VRAM high + VRAM at threshold Step 1 retreats (4): - spec miss low - spec miss in hysteresis gap holds - inference queue low - VRAM low Step 1 → 2 advances (2): - system mem high - thermal Hot (and Warm/Cool retreats) Step 2/3/4/5 transitions (4): - battery low → step 2 advances (NOT emergency) - step 2 mem-clear retreats - steps 3/4 battery-recovered retreats - step 5 ONLY Cool thermal retreats (strictest) UserActive informational (1): - UserActive holds at every step apply_action (4): - Hold keeps step - Advance bumps + caps at MAX - Retreat drops + saturates at MIN - EmergencyAdvanceToMax jumps from any step Determinism + serde (3): - evaluate is deterministic - CascadeAction tagged-union round-trips - CascadeThresholds defaults match spec table (drift catcher) Emergency priority over step evaluation (1) Plus 2 ts-rs export bindings (CascadeAction, CascadeThresholds). Stack: - #1345 PR-1 governor-types (MERGED) - #1350 PR-2 TOML loader (MERGED) - #1352 PR-3a policy_selector (MERGED) - #1354 PR-3b LocalSubstrateGovernor (MERGED) - This PR (PR-3c1): cascade evaluator pure function - Future PR-3c2: wire into LocalSubstrateGovernor::on_pressure_signal + apply_action_to_policy (rewrites tier_sizes / cadence / concurrency / speculation per cascade_step) + time-in-step gate (sustained-30s-before-advance rule) - Future PR-3d: file watcher (notify crate) - Future PR-4: PressureBroker → governor wiring VDD evidence N/A — pure function. Evidence with PR-3c2 when actual policy mutations land + with PR-4 when real pressure flows. --- .../continuum-core/src/governor/cascade.rs | 761 ++++++++++++++++++ .../continuum-core/src/governor/mod.rs | 5 + 2 files changed, 766 insertions(+) create mode 100644 src/workers/continuum-core/src/governor/cascade.rs diff --git a/src/workers/continuum-core/src/governor/cascade.rs b/src/workers/continuum-core/src/governor/cascade.rs new file mode 100644 index 000000000..fda3ca4ea --- /dev/null +++ b/src/workers/continuum-core/src/governor/cascade.rs @@ -0,0 +1,761 @@ +//! Substrate governor cascade evaluator — Lane H PR-3c1 per +//! GENOME-FOUNDRY-SENTINEL #1327 Part 11 §"Adjustment Cascade". +//! +//! PR-3b (#1354) shipped `LocalSubstrateGovernor` that RECORDS +//! pressure signals. This PR-3c1 ships the pure-function CASCADE +//! EVALUATOR — given (current cascade step, incoming signal, time-in- +//! step), decide whether to advance, hold, or retreat. +//! +//! PR-3c2 wires this evaluator into `on_pressure_signal` to actually +//! transition the governor's cascade_step + rewrite policy fields per +//! the action. +//! +//! ## Cascade semantics (from spec) +//! +//! 6 steps, 0 = normal, 5 = max throttle. Each step has: +//! - An **enter** condition (any signal can trigger advance) +//! - An **exit** condition (ALL clear required to retreat — the +//! hysteresis that prevents oscillation) +//! - A **time-in-step** requirement before further advance (slows +//! the cascade so brief spikes don't immediately escalate) +//! +//! ## Anti-oscillation: restore-speculation-one-step-later +//! +//! Spec rule: when retreating from step N → step N-1, the +//! speculation level is restored ONE STEP LATER than the rest of the +//! policy. Concretely: drop speculation on advance (step 1), restore +//! on retreat (step 0 → step -1, which is a no-op). The "one step +//! later" semantics: if pressure cleared at step 1, retreat to step 0 +//! but keep speculation throttled until the NEXT retreat opportunity. +//! Since step 0 IS the lowest, the restoration happens "naturally" on +//! the next pressure-clear evaluation that confirms sustained calm. +//! +//! This file ships the pure-function evaluator. PR-3c2 wires the +//! `apply_action_to_policy` side-effect. +//! +//! ## Failure-mode discipline +//! +//! - All thresholds are typed + named (no magic floats / ints scattered +//! through call sites) +//! - `evaluate_next_step` is pure — same inputs → same output. PR-3c2 +//! tests the integration; PR-3c1 tests the rule. +//! - No silent skip on unknown signal kinds — every variant of +//! `PressureSignal` participates in evaluation, even if some are +//! no-ops for the current step (`UserActive` doesn't trigger +//! advance, but the evaluator returns Hold rather than panic). + +use crate::governor::types::{PressureSignal, ThermalSeverity}; +use serde::{Deserialize, Serialize}; +use ts_rs::TS; + +/// Cascade step. 0 = normal operation; 1..5 = increasing throttle. +/// The spec enumerates 6 levels (0..5); this enum models them as a +/// transparent newtype so PR-3c2 can compare + bound check. +/// +/// Why `u8` not enum: cascade arithmetic (step + 1, step - 1) is +/// frequent; a u8 with `saturating_add`/`saturating_sub` is cleaner +/// than 6 named match arms. The constants below name the canonical +/// values for diagnostic readability. +pub const CASCADE_STEP_MIN: u8 = 0; +pub const CASCADE_STEP_MAX: u8 = 5; + +/// Decision the cascade evaluator emits per signal. PR-3c2 wires +/// these into the local governor's `on_pressure_signal` to actually +/// rewrite the policy. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, TS, PartialEq, Eq)] +#[serde(rename_all = "camelCase", tag = "kind")] +#[ts(export, export_to = "../../../shared/generated/governor/CascadeAction.ts")] +pub enum CascadeAction { + /// Keep the current step. The pressure signal didn't cross any + /// threshold (or didn't cross it for long enough). + Hold, + /// Advance one step toward higher throttle. Capped at + /// CASCADE_STEP_MAX — already-at-max returns Hold. + Advance, + /// Retreat one step toward normal. Capped at CASCADE_STEP_MIN — + /// already-at-min returns Hold. + Retreat, + /// Emergency advance to MAX immediately, skipping intermediate + /// steps. Per spec: thermal Critical + battery < 10% trigger this + /// to protect hardware/user. + EmergencyAdvanceToMax, +} + +/// Tuneable thresholds for the cascade. Loaded from policy file in +/// PR-3c2 (extends PolicyFile). For PR-3c1, callers pass typed values +/// so the evaluator is testable with any threshold set. +/// +/// Pinned to the values from the spec's §"Adjustment Cascade" table; +/// callers may override per-policy (the spec's table is the default +/// for the M-Air anchor + 5090 anchor). +#[derive(Debug, Clone, Copy, Serialize, Deserialize, TS, PartialEq)] +#[serde(rename_all = "camelCase")] +#[ts(export, export_to = "../../../shared/generated/governor/CascadeThresholds.ts")] +pub struct CascadeThresholds { + // Step 1: speculation miss + queue depth + VRAM + pub spec_miss_rate_advance: f32, // > → advance to step 1 + pub spec_miss_rate_retreat: f32, // < → retreat from step 1 + #[ts(type = "number")] + pub inference_queue_depth_advance: u32, // > → advance + #[ts(type = "number")] + pub inference_queue_depth_retreat: u32, // < → retreat + #[ts(type = "number")] + pub vram_used_pct_advance: u8, // > → advance + #[ts(type = "number")] + pub vram_used_pct_retreat: u8, // < → retreat + + // Step 2: system memory + thermal + #[ts(type = "number")] + pub system_mem_used_pct_advance: u8, + #[ts(type = "number")] + pub system_mem_used_pct_retreat: u8, + /// Thermal severity at or above which step 2 enters. Step 2's + /// other enter conditions are step 1 sustained + mem high. + pub thermal_advance: ThermalSeverity, + + // Step 3: battery + thermal critical + #[ts(type = "number")] + pub battery_pct_advance: u8, // < → advance to step 3 + #[ts(type = "number")] + pub battery_pct_retreat: u8, // > → retreat + /// Battery percentage that triggers EmergencyAdvanceToMax. Below + /// this, the cascade jumps straight to MAX regardless of current + /// step. Default 10% per spec. + #[ts(type = "number")] + pub battery_pct_emergency: u8, +} + +impl Default for CascadeThresholds { + fn default() -> Self { + Self { + // Step 1 — spec table + spec_miss_rate_advance: 0.5, + spec_miss_rate_retreat: 0.3, + inference_queue_depth_advance: 16, + inference_queue_depth_retreat: 8, + vram_used_pct_advance: 85, + vram_used_pct_retreat: 70, + + // Step 2 — spec table + system_mem_used_pct_advance: 85, + system_mem_used_pct_retreat: 70, + thermal_advance: ThermalSeverity::Hot, + + // Step 3 — spec table + battery_pct_advance: 15, + battery_pct_retreat: 25, + battery_pct_emergency: 10, + } + } +} + +/// Evaluate the next cascade action given the current step + incoming +/// signal + thresholds. Pure function — no I/O, no time, no globals. +/// +/// PR-3c2 will add a `time_in_step_ms` parameter to enforce the +/// "step N must be active > 30s before advancing to step N+1" rule. +/// PR-3c1 evaluates the immediate-trigger conditions (signal exceeds +/// threshold) + leaves the time-based gate for the wiring layer. +/// +/// Returns: +/// - `EmergencyAdvanceToMax` for thermal Critical OR battery < emergency_pct +/// - `Advance` if the signal exceeds the advance threshold for the current step +/// - `Retreat` if the signal is below the retreat threshold (sustained-calm +/// logic lands in PR-3c2 via time_in_step) +/// - `Hold` otherwise +pub fn evaluate_next_step( + current_step: u8, + signal: &PressureSignal, + thresholds: &CascadeThresholds, +) -> CascadeAction { + // Emergency: thermal Critical OR battery below emergency floor. + // Skips intermediate steps; protects hardware/user. + if let PressureSignal::Thermal { + severity: ThermalSeverity::Critical, + } = signal + { + return CascadeAction::EmergencyAdvanceToMax; + } + if let PressureSignal::BatteryLow { remaining_pct } = signal { + if *remaining_pct < thresholds.battery_pct_emergency { + return CascadeAction::EmergencyAdvanceToMax; + } + } + + // Per-step evaluation: each signal kind contributes to specific + // steps' enter/exit thresholds. + match (current_step, signal) { + // Step 0 (normal) — only advance triggers fire. + (0, PressureSignal::SpeculationMissRate { rate }) => { + if *rate > thresholds.spec_miss_rate_advance { + CascadeAction::Advance + } else { + CascadeAction::Hold + } + } + (0, PressureSignal::InferenceQueueDepth { depth }) => { + if *depth > thresholds.inference_queue_depth_advance { + CascadeAction::Advance + } else { + CascadeAction::Hold + } + } + (0, PressureSignal::VRAMHigh { used_pct }) => { + if *used_pct > thresholds.vram_used_pct_advance { + CascadeAction::Advance + } else { + CascadeAction::Hold + } + } + + // Step 1 — speculation throttled. Advance triggers from + // mem/thermal; retreat triggers from sustained-low signals. + (1, PressureSignal::SystemMemHigh { used_pct }) => { + if *used_pct > thresholds.system_mem_used_pct_advance { + CascadeAction::Advance + } else { + CascadeAction::Hold + } + } + (1, PressureSignal::Thermal { severity }) => { + if *severity >= thresholds.thermal_advance { + CascadeAction::Advance + } else if *severity <= ThermalSeverity::Warm { + // Cooling — may retreat IF other step-1 conditions also clear + // (PR-3c2 enforces the all-clear retreat rule via state) + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + (1, PressureSignal::SpeculationMissRate { rate }) => { + // Sustained low miss rate → retreat. PR-3c2 enforces sustained-time. + if *rate < thresholds.spec_miss_rate_retreat { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + (1, PressureSignal::InferenceQueueDepth { depth }) => { + if *depth < thresholds.inference_queue_depth_retreat { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + (1, PressureSignal::VRAMHigh { used_pct }) => { + if *used_pct < thresholds.vram_used_pct_retreat { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + + // Step 2 — personas + non-realtime deferred. Advance from + // battery low or sustained step-2 pressure; retreat on mem + // clear + thermal clear. + (2, PressureSignal::BatteryLow { remaining_pct }) => { + if *remaining_pct < thresholds.battery_pct_advance { + CascadeAction::Advance + } else { + CascadeAction::Hold + } + } + (2, PressureSignal::SystemMemHigh { used_pct }) => { + if *used_pct < thresholds.system_mem_used_pct_retreat { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + (2, PressureSignal::Thermal { severity }) => { + if *severity <= ThermalSeverity::Warm { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + + // Step 3 — working-set L1/L2 shrunk + spill. Retreat from + // battery recovery + thermal clear. + (3, PressureSignal::BatteryLow { remaining_pct }) => { + if *remaining_pct > thresholds.battery_pct_retreat { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + (3, PressureSignal::Thermal { severity }) => { + if *severity <= ThermalSeverity::Warm { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + + // Step 4 — federation pull slowed. Retreat when step 3 clears. + (4, PressureSignal::BatteryLow { remaining_pct }) => { + if *remaining_pct > thresholds.battery_pct_retreat { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + (4, PressureSignal::Thermal { severity }) => { + if *severity <= ThermalSeverity::Warm { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + + // Step 5 — consolidation suspended. Retreat on any major + // clear. PR-3c2 enforces the AND-all-clear rule via state. + (5, PressureSignal::Thermal { severity }) => { + if *severity == ThermalSeverity::Cool { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + (5, PressureSignal::BatteryLow { remaining_pct }) => { + if *remaining_pct > thresholds.battery_pct_retreat { + CascadeAction::Retreat + } else { + CascadeAction::Hold + } + } + + // UserActive is informational only — doesn't drive cascade + // step changes directly. PR-3c2 may use it to weight retreat + // (favor responsiveness when user is foreground), but for + // PR-3c1 it's a Hold. + (_, PressureSignal::UserActive { .. }) => CascadeAction::Hold, + + // Catch-all: any signal/step combo not explicitly handled is + // Hold. Future cascade-step + signal combos that need + // explicit handling get tests + match arms added; the default + // is "do nothing" rather than "panic." + _ => CascadeAction::Hold, + } +} + +/// Apply a CascadeAction to a current step value, returning the new +/// step (bounded to [CASCADE_STEP_MIN, CASCADE_STEP_MAX]). +/// +/// Pure function — separated from `evaluate_next_step` so PR-3c2 can +/// log the (action, old_step, new_step) tuple for telemetry without +/// the evaluator caring. +pub fn apply_action(current_step: u8, action: CascadeAction) -> u8 { + match action { + CascadeAction::Hold => current_step, + CascadeAction::Advance => (current_step + 1).min(CASCADE_STEP_MAX), + CascadeAction::Retreat => current_step.saturating_sub(1), + CascadeAction::EmergencyAdvanceToMax => CASCADE_STEP_MAX, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn thresh() -> CascadeThresholds { + CascadeThresholds::default() + } + + // ===== Emergency: thermal Critical + battery 0.5 at step 0 + /// triggers Advance. Spec table row 1. + #[test] + fn spec_miss_high_at_step_0_advances() { + let action = evaluate_next_step( + 0, + &PressureSignal::SpeculationMissRate { rate: 0.6 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Advance); + } + + /// What this catches: speculation miss = 0.5 exactly doesn't advance + /// (strict > threshold). Boundary test. + #[test] + fn spec_miss_at_threshold_doesnt_advance() { + let action = evaluate_next_step( + 0, + &PressureSignal::SpeculationMissRate { rate: 0.5 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Hold); + } + + /// What this catches: inference queue depth > 16 triggers Advance. + #[test] + fn inference_queue_high_at_step_0_advances() { + let action = evaluate_next_step( + 0, + &PressureSignal::InferenceQueueDepth { depth: 17 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Advance); + } + + /// What this catches: VRAM > 85% triggers Advance. + #[test] + fn vram_high_at_step_0_advances() { + let action = evaluate_next_step( + 0, + &PressureSignal::VRAMHigh { used_pct: 90 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Advance); + } + + /// What this catches: VRAM at 85% (exactly threshold) does NOT + /// advance. Boundary. + #[test] + fn vram_at_threshold_doesnt_advance() { + let action = evaluate_next_step( + 0, + &PressureSignal::VRAMHigh { used_pct: 85 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Hold); + } + + // ===== Step 1 → Step 0 (retreat) ===== + + /// What this catches: speculation miss < 0.3 at step 1 triggers + /// Retreat. Hysteresis: advance was at 0.5, retreat at 0.3 — gap + /// prevents oscillation around a single threshold. + #[test] + fn spec_miss_low_at_step_1_retreats() { + let action = evaluate_next_step( + 1, + &PressureSignal::SpeculationMissRate { rate: 0.2 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Retreat); + } + + /// What this catches: speculation miss between retreat (0.3) and + /// advance (0.5) thresholds → Hold. The hysteresis gap. + #[test] + fn spec_miss_in_hysteresis_gap_holds() { + for rate in &[0.31, 0.40, 0.49] { + let action = evaluate_next_step( + 1, + &PressureSignal::SpeculationMissRate { rate: *rate }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Hold, "rate {rate} should Hold in gap"); + } + } + + /// What this catches: inference queue < 8 at step 1 retreats. + #[test] + fn inference_queue_low_at_step_1_retreats() { + let action = evaluate_next_step( + 1, + &PressureSignal::InferenceQueueDepth { depth: 5 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Retreat); + } + + /// What this catches: VRAM < 70 at step 1 retreats. + #[test] + fn vram_low_at_step_1_retreats() { + let action = evaluate_next_step( + 1, + &PressureSignal::VRAMHigh { used_pct: 60 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Retreat); + } + + // ===== Step 1 → Step 2 (advance on mem + thermal) ===== + + /// What this catches: system mem > 85 at step 1 advances to step 2. + /// Spec table row 2. + #[test] + fn system_mem_high_at_step_1_advances() { + let action = evaluate_next_step( + 1, + &PressureSignal::SystemMemHigh { used_pct: 90 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Advance); + } + + /// What this catches: thermal Hot at step 1 advances to step 2. + #[test] + fn thermal_hot_at_step_1_advances() { + let action = evaluate_next_step( + 1, + &PressureSignal::Thermal { + severity: ThermalSeverity::Hot, + }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Advance); + } + + /// What this catches: thermal Warm or Cool at step 1 → Retreat + /// (cascade can step down when thermal clears). + #[test] + fn thermal_warm_at_step_1_retreats() { + for severity in &[ThermalSeverity::Warm, ThermalSeverity::Cool] { + let action = evaluate_next_step( + 1, + &PressureSignal::Thermal { + severity: *severity, + }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Retreat, "severity={severity:?} should retreat"); + } + } + + // ===== Step 2 → Step 3 (advance on battery low) ===== + + /// What this catches: battery < 15% at step 2 advances to step 3 + /// (NOT emergency — emergency is < 10%). + #[test] + fn battery_low_at_step_2_advances_not_emergency() { + let action = evaluate_next_step( + 2, + &PressureSignal::BatteryLow { remaining_pct: 12 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Advance); + } + + /// What this catches: step 2 retreats on mem-clear. + #[test] + fn step_2_retreats_on_mem_clear() { + let action = evaluate_next_step( + 2, + &PressureSignal::SystemMemHigh { used_pct: 60 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Retreat); + } + + // ===== Step 3, 4, 5 — battery + thermal retreat paths ===== + + /// What this catches: battery > 25% at steps 3/4 retreats. + #[test] + fn battery_recovered_at_steps_3_and_4_retreats() { + for step in &[3, 4] { + let action = evaluate_next_step( + *step, + &PressureSignal::BatteryLow { remaining_pct: 30 }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Retreat, "step={step} should retreat"); + } + } + + /// What this catches: at step 5 (max throttle), only Cool thermal + /// retreats; Warm or Hot Holds. Strictest retreat condition. + #[test] + fn step_5_only_cool_thermal_retreats() { + let cool = evaluate_next_step( + 5, + &PressureSignal::Thermal { + severity: ThermalSeverity::Cool, + }, + &thresh(), + ); + assert_eq!(cool, CascadeAction::Retreat); + + for non_cool in &[ThermalSeverity::Warm, ThermalSeverity::Hot] { + let action = evaluate_next_step( + 5, + &PressureSignal::Thermal { + severity: *non_cool, + }, + &thresh(), + ); + assert_eq!(action, CascadeAction::Hold, "severity={non_cool:?} at max step holds"); + } + } + + // ===== UserActive informational only ===== + + /// What this catches: UserActive doesn't drive cascade transitions + /// in PR-3c1 (signal exists for PR-3c2's user-foreground weighting + /// but doesn't fire enter/exit). + #[test] + fn user_active_holds_at_every_step() { + for step in 0..=CASCADE_STEP_MAX { + for foreground in [true, false] { + let action = evaluate_next_step( + step, + &PressureSignal::UserActive { foreground }, + &thresh(), + ); + assert_eq!( + action, + CascadeAction::Hold, + "step={step} foreground={foreground} should Hold" + ); + } + } + } + + // ===== apply_action ===== + + /// What this catches: Hold doesn't move the step. + #[test] + fn apply_hold_keeps_step() { + for step in 0..=CASCADE_STEP_MAX { + assert_eq!(apply_action(step, CascadeAction::Hold), step); + } + } + + /// What this catches: Advance bumps by 1, capped at MAX. + #[test] + fn apply_advance_bumps_one_capped_at_max() { + assert_eq!(apply_action(0, CascadeAction::Advance), 1); + assert_eq!(apply_action(3, CascadeAction::Advance), 4); + assert_eq!(apply_action(CASCADE_STEP_MAX, CascadeAction::Advance), CASCADE_STEP_MAX); + } + + /// What this catches: Retreat drops by 1, saturated at MIN. + #[test] + fn apply_retreat_drops_one_saturated_at_min() { + assert_eq!(apply_action(5, CascadeAction::Retreat), 4); + assert_eq!(apply_action(1, CascadeAction::Retreat), 0); + assert_eq!(apply_action(0, CascadeAction::Retreat), 0); + } + + /// What this catches: EmergencyAdvanceToMax jumps from any step + /// to MAX in one operation. + #[test] + fn apply_emergency_advances_to_max_from_any_step() { + for step in 0..=CASCADE_STEP_MAX { + assert_eq!( + apply_action(step, CascadeAction::EmergencyAdvanceToMax), + CASCADE_STEP_MAX, + "step={step} should jump to MAX" + ); + } + } + + // ===== Determinism + serde ===== + + /// What this catches: pure-function determinism. Same inputs → + /// same output. PR-3c2 can rely on this for the wire-replay path. + #[test] + fn evaluate_is_deterministic() { + let signal = PressureSignal::SpeculationMissRate { rate: 0.7 }; + let a = evaluate_next_step(0, &signal, &thresh()); + let b = evaluate_next_step(0, &signal, &thresh()); + assert_eq!(a, b); + } + + /// What this catches: CascadeAction tagged-union round-trips with + /// `kind` discriminator. PR-3c2 emits these via the trace bus + + /// the wire shape must round-trip cleanly for replay/inspection. + #[test] + fn cascade_action_tagged_union_round_trips() { + let actions = vec![ + CascadeAction::Hold, + CascadeAction::Advance, + CascadeAction::Retreat, + CascadeAction::EmergencyAdvanceToMax, + ]; + for a in &actions { + let j = serde_json::to_string(a).unwrap(); + let back: CascadeAction = serde_json::from_str(&j).unwrap(); + assert_eq!(*a, back); + assert!(j.contains("\"kind\":\""), "tag missing: {j}"); + } + } + + /// What this catches: CascadeThresholds default values match the + /// spec's §"Adjustment Cascade" table. If anyone tunes defaults + /// without updating the spec, this test catches the drift. + #[test] + fn cascade_thresholds_defaults_match_spec_table() { + let t = CascadeThresholds::default(); + // Spec row 1 + assert_eq!(t.spec_miss_rate_advance, 0.5); + assert_eq!(t.spec_miss_rate_retreat, 0.3); + assert_eq!(t.vram_used_pct_advance, 85); + assert_eq!(t.vram_used_pct_retreat, 70); + // Spec row 2 + assert_eq!(t.system_mem_used_pct_advance, 85); + assert_eq!(t.system_mem_used_pct_retreat, 70); + assert_eq!(t.thermal_advance, ThermalSeverity::Hot); + // Spec row 3 + assert_eq!(t.battery_pct_advance, 15); + assert_eq!(t.battery_pct_retreat, 25); + assert_eq!(t.battery_pct_emergency, 10); + } + + /// What this catches: emergency signals beat all other path + /// evaluations. Even at step 0, thermal Critical jumps to MAX — + /// no "first match wins" with a quieter step-0 path. + #[test] + fn emergency_signals_priority_over_step_evaluation() { + let action = evaluate_next_step( + 0, + &PressureSignal::Thermal { + severity: ThermalSeverity::Critical, + }, + &thresh(), + ); + assert_eq!(action, CascadeAction::EmergencyAdvanceToMax); + } +} diff --git a/src/workers/continuum-core/src/governor/mod.rs b/src/workers/continuum-core/src/governor/mod.rs index def93c00f..fb73cf26f 100644 --- a/src/workers/continuum-core/src/governor/mod.rs +++ b/src/workers/continuum-core/src/governor/mod.rs @@ -7,11 +7,16 @@ //! from `inference_capability::hw_probe` (PIECE-5 PR-3 #1335) to //! `HardwareClass`. +pub mod cascade; pub mod local; pub mod policy_file; pub mod policy_selector; pub mod types; +pub use cascade::{ + apply_action, evaluate_next_step, CascadeAction, CascadeThresholds, CASCADE_STEP_MAX, + CASCADE_STEP_MIN, +}; pub use local::LocalSubstrateGovernor; pub use policy_file::{ into_governor_policy, load_policy_file, parse_policy_text, PolicyFile, PolicyFileError, From 09ce4b032cf4c95a8bdfa5329cb7adcc1ad18e62 Mon Sep 17 00:00:00 2001 From: Test Date: Sat, 16 May 2026 18:46:57 -0500 Subject: [PATCH 2/2] feat(governor,PR-3c1): regenerate ts-rs bindings + barrel for CascadeAction + CascadeThresholds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the prior PR-3c1 commit that landed cascade.rs but missed the auto-generated TS bindings + barrel update (worktree race during the multi-tab collision moment — bindings generated by the cargo test run but were still untracked when the commit was staged). Adds CascadeAction.ts + CascadeThresholds.ts + index.ts entry. No Rust changes. --- .../generated/governor/CascadeAction.ts | 8 +++++++ .../generated/governor/CascadeThresholds.ts | 24 +++++++++++++++++++ src/shared/generated/governor/index.ts | 2 ++ 3 files changed, 34 insertions(+) create mode 100644 src/shared/generated/governor/CascadeAction.ts create mode 100644 src/shared/generated/governor/CascadeThresholds.ts diff --git a/src/shared/generated/governor/CascadeAction.ts b/src/shared/generated/governor/CascadeAction.ts new file mode 100644 index 000000000..c9cfc2fc0 --- /dev/null +++ b/src/shared/generated/governor/CascadeAction.ts @@ -0,0 +1,8 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Decision the cascade evaluator emits per signal. PR-3c2 wires + * these into the local governor's `on_pressure_signal` to actually + * rewrite the policy. + */ +export type CascadeAction = { "kind": "hold" } | { "kind": "advance" } | { "kind": "retreat" } | { "kind": "emergencyAdvanceToMax" }; diff --git a/src/shared/generated/governor/CascadeThresholds.ts b/src/shared/generated/governor/CascadeThresholds.ts new file mode 100644 index 000000000..8bbb39e2e --- /dev/null +++ b/src/shared/generated/governor/CascadeThresholds.ts @@ -0,0 +1,24 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { ThermalSeverity } from "./ThermalSeverity"; + +/** + * Tuneable thresholds for the cascade. Loaded from policy file in + * PR-3c2 (extends PolicyFile). For PR-3c1, callers pass typed values + * so the evaluator is testable with any threshold set. + * + * Pinned to the values from the spec's §"Adjustment Cascade" table; + * callers may override per-policy (the spec's table is the default + * for the M-Air anchor + 5090 anchor). + */ +export type CascadeThresholds = { specMissRateAdvance: number, specMissRateRetreat: number, inferenceQueueDepthAdvance: number, inferenceQueueDepthRetreat: number, vramUsedPctAdvance: number, vramUsedPctRetreat: number, systemMemUsedPctAdvance: number, systemMemUsedPctRetreat: number, +/** + * Thermal severity at or above which step 2 enters. Step 2's + * other enter conditions are step 1 sustained + mem high. + */ +thermalAdvance: ThermalSeverity, batteryPctAdvance: number, batteryPctRetreat: number, +/** + * Battery percentage that triggers EmergencyAdvanceToMax. Below + * this, the cascade jumps straight to MAX regardless of current + * step. Default 10% per spec. + */ +batteryPctEmergency: number, }; diff --git a/src/shared/generated/governor/index.ts b/src/shared/generated/governor/index.ts index 2f8a4a71a..991d321f1 100644 --- a/src/shared/generated/governor/index.ts +++ b/src/shared/generated/governor/index.ts @@ -3,6 +3,8 @@ // Re-generate: cargo test --lib --features metal,accelerate governor:: export type { CadenceMultipliers } from './CadenceMultipliers'; +export type { CascadeAction } from './CascadeAction'; +export type { CascadeThresholds } from './CascadeThresholds'; export type { ConcurrencyCaps } from './ConcurrencyCaps'; export type { ConsolidationSchedule } from './ConsolidationSchedule'; export type { FederationCadence } from './FederationCadence';