diff --git a/Cargo.lock b/Cargo.lock index 743cfe1206..e84810ede5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1557,6 +1557,7 @@ dependencies = [ "linkerd-app-core", "linkerd-app-test", "linkerd-distribute", + "linkerd-ewma", "linkerd-http-box", "linkerd-http-classify", "linkerd-http-prom", @@ -1564,6 +1565,7 @@ dependencies = [ "linkerd-http-route", "linkerd-identity", "linkerd-io", + "linkerd-load-biaser", "linkerd-meshtls", "linkerd-mock-http-body", "linkerd-opaq-route", @@ -2693,9 +2695,9 @@ dependencies = [ [[package]] name = "linkerd2-proxy-api" -version = "0.18.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba9e3b341ca4992feaf43a4d2bdbfe2081aa3e2b9a503753544ce55242af6342" +checksum = "e0cd682795e8f91ea36e2131a2f7021da136c74f6d3cd3d9eabbf7842511042d" dependencies = [ "h2", "http", diff --git a/Cargo.toml b/Cargo.toml index bc2dfc2e95..3993cafc1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -137,7 +137,7 @@ default-features = false features = ["tokio", "tracing"] [workspace.dependencies.linkerd2-proxy-api] -version = "0.18.0" +version = "0.19.0" [workspace.dependencies.rand] version = "0.9" diff --git a/linkerd/app/core/src/classify.rs b/linkerd/app/core/src/classify.rs index 68e3d74a0e..249b120b35 100644 --- a/linkerd/app/core/src/classify.rs +++ b/linkerd/app/core/src/classify.rs @@ -202,7 +202,7 @@ impl classify::ClassifyEos for Eos { } } -fn grpc_code(hdrs: &http::HeaderMap) -> Option { +pub fn grpc_code(hdrs: &http::HeaderMap) -> Option { hdrs.get("grpc-status") .and_then(|v| v.to_str().ok()) .and_then(|s| s.parse::().ok()) diff --git a/linkerd/app/integration/src/policy.rs b/linkerd/app/integration/src/policy.rs index 6b2faa6d76..b3a31cb5ca 100644 --- a/linkerd/app/integration/src/policy.rs +++ b/linkerd/app/integration/src/policy.rs @@ -205,6 +205,7 @@ pub fn backend(dst: impl ToString) -> outbound::Backend { default_rtt: Some(Duration::from_millis(30).try_into().unwrap()), decay: Some(Duration::from_secs(10).try_into().unwrap()), })), + ejection: None, })), } } diff --git a/linkerd/app/outbound/Cargo.toml b/linkerd/app/outbound/Cargo.toml index 08b7739971..599adf70df 100644 --- a/linkerd/app/outbound/Cargo.toml +++ b/linkerd/app/outbound/Cargo.toml @@ -35,6 +35,8 @@ tower = { workspace = true, features = ["util"] } tracing = { workspace = true } linkerd-app-core = { path = "../core" } +linkerd-ewma = { path = "../../ewma" } +linkerd-load-biaser = { path = "../../load-biaser" } linkerd-app-test = { path = "../test", optional = true } linkerd-distribute = { path = "../../distribute" } linkerd-http-classify = { path = "../../http/classify" } diff --git a/linkerd/app/outbound/src/discover.rs b/linkerd/app/outbound/src/discover.rs index 54693d5f5c..e8cfffdf43 100644 --- a/linkerd/app/outbound/src/discover.rs +++ b/linkerd/app/outbound/src/discover.rs @@ -277,11 +277,15 @@ fn policy_for_backend( timeout, http1: policy::http::Http1 { routes: routes.clone(), - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, }, http2: policy::http::Http2 { routes, - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, }, opaque, }; diff --git a/linkerd/app/outbound/src/http/breaker.rs b/linkerd/app/outbound/src/http/breaker.rs index 302f5f7f2d..9790fac923 100644 --- a/linkerd/app/outbound/src/http/breaker.rs +++ b/linkerd/app/outbound/src/http/breaker.rs @@ -1,51 +1,154 @@ +//! Circuit breaker infrastructure for HTTP endpoints. +//! +//! This module implements a unified circuit breaker with two complementary failure +//! tracking mechanisms: consecutive failures and success rate (EWMA-based). +//! +//! The first trips the breaker after N consecutive failures (typically 5xx statuses). +//! It uses exponential backoff before allowing probe requests. +//! +//! The second trips when the success rate drops below a threshold. It treats both 5xx +//! and 429 as failures to enable rate limiting awareness. +//! +//! Both mechanisms are tracked by a single [`UnifiedBreaker`] policy, in which any +//! one of both conditions can trip the circuit. +//! +//! ## Recovery +//! +//! During probation, a probe request must be _non-5xx AND non-429_ to succeed. +//! This ensures the endpoint isn't still rate limiting before reopening. +//! +//! ## Retry-After Integration +//! +//! When a 429 response includes a `Retry-After` header, the duration is captured and used +//! to extend the circuit breaker's backoff period. +//! +//! ## Cold-start +//! +//! Cold-start protection applies to the success rate mechanism. +//! +//! ## Usage +//! +//! The breaker is integrated into the load balancer's endpoint stack via [`NewRetryAfterGateSet`], +//! which replaces the standard `NewClassifyGateSet` to add Retry-After extraction. + use linkerd_app_core::{classify, proxy::http::classify::gate, svc}; use linkerd_proxy_client_policy::FailureAccrual; + +use tokio::time::Duration; use tracing::{trace_span, Instrument}; -mod consecutive_failures; +pub mod retry_after; +mod unified; +pub mod wrap_classify; + +#[cfg(test)] +mod integration_tests; + +pub use self::retry_after::{GrpcRetryPushbackStore, RetryAfterStore}; +use self::unified::{UnifiedBreaker, UnifiedBreakerConfig}; +pub use self::wrap_classify::{HasFailureAccrual, NewRetryAfterGateSet, RetryAfterGateParams}; -use self::consecutive_failures::ConsecutiveFailures; +/// Reason why the circuit breaker tripped. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum TripReason { + /// Tripped due to N consecutive 5xx failures. + ConsecutiveFailures, + /// Tripped due to EWMA success rate dropping below threshold. + LowSuccessRate, +} + +/// Default EWMA decay window (used when success_rate is configured but has no explicit decay). +const DEFAULT_SUCCESS_RATE_DECAY: Duration = Duration::from_secs(10); /// Params configuring a circuit breaker stack. -#[derive(Copy, Clone, Debug)] +/// +/// Retry-After stores move hints from response classification to the +/// circuit breaker backoff logic. A new pair is built for each endpoint and +/// shared between that endpoint's classifier and its [`UnifiedBreaker`], so a +/// hint seen on one endpoint never extends the backoff of another. +#[derive(Clone, Debug)] pub(crate) struct Params { - pub(crate) accrual: FailureAccrual, + pub(crate) accrual: Option, pub(crate) channel_capacity: usize, + /// Shared store for HTTP Retry-After hints. + pub(crate) retry_after_store: RetryAfterStore, + /// Shared store for gRPC retry pushback hints. + pub(crate) grpc_retry_pushback_store: GrpcRetryPushbackStore, + /// Maximum Retry-After duration the proxy will honor (clamping cap). + pub(crate) max_duration: Duration, } impl svc::ExtractParam, T> for Params { fn extract_param(&self, _: &T) -> gate::Params { // Create a channel so that we can receive response summaries and // control the gate. - let (prms, gate, rsps) = gate::Params::channel(self.channel_capacity); + let (prms, gate_tx, rsps) = gate::Params::channel(self.channel_capacity); match self.accrual { - FailureAccrual::None => { + None => { // No failure accrual for this target; construct a gate // that will never close. tracing::trace!("No failure accrual policy enabled."); prms } - FailureAccrual::ConsecutiveFailures { - max_failures, - backoff, - } => { - tracing::trace!( - max_failures, - backoff = ?backoff, - "Using consecutive-failures failure accrual policy.", - ); + Some(ref accrual) => { + // When success_rate is Some, use configured EWMA policy values. + // When None (consecutive-only mode), disable EWMA by using + // threshold 0.0 (rate >= 0 always, so never trips) and + // min_requests usize::MAX (cold-start never passes). + let (success_rate_threshold, success_rate_decay, min_request_threshold) = + if let Some(sr) = accrual.success_rate { + if accrual.consecutive.max_failures == 0 { + tracing::trace!( + success_rate_threshold = %sr.threshold, + min_requests = sr.min_requests, + backoff = ?accrual.consecutive.backoff, + "Using circuit breaker with consecutive failures disabled and success rate tracking.", + ); + } else { + tracing::trace!( + max_failures = accrual.consecutive.max_failures, + backoff = ?accrual.consecutive.backoff, + success_rate_threshold = %sr.threshold, + min_requests = sr.min_requests, + "Using unified circuit breaker with consecutive failures and success rate tracking.", + ); + } + (sr.threshold, sr.decay, sr.min_requests as usize) + } else { + if accrual.consecutive.max_failures == 0 { + tracing::trace!( + backoff = ?accrual.consecutive.backoff, + "Using circuit breaker with consecutive failures disabled.", + ); + } else { + tracing::trace!( + max_failures = accrual.consecutive.max_failures, + backoff = ?accrual.consecutive.backoff, + "Using circuit breaker with consecutive failures only.", + ); + } + (0.0_f64, DEFAULT_SUCCESS_RATE_DECAY, usize::MAX) + }; + + // Spawn the unified breaker that handles both policies + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: accrual.consecutive.max_failures, + threshold: success_rate_threshold, + decay: success_rate_decay, + backoff: accrual.consecutive.backoff, + min_requests: min_request_threshold, + gate: gate_tx, + rsps, + retry_after_store: self.retry_after_store.clone(), + grpc_retry_pushback_store: self.grpc_retry_pushback_store.clone(), + max_duration: self.max_duration, + }); - // 1. If the configured number of consecutive failures are encountered, - // shut the gate. - // 2. After an ejection timeout, open the gate so that 1 request can be processed. - // 3. If that request succeeds, open the gate. If it fails, increase the - // ejection timeout and repeat. - let breaker = ConsecutiveFailures::new(max_failures, backoff, gate, rsps); tokio::spawn( breaker .run() - .instrument(trace_span!("consecutive_failures").or_current()), + .instrument(trace_span!("unified_breaker").or_current()), ); prms diff --git a/linkerd/app/outbound/src/http/breaker/consecutive_failures.rs b/linkerd/app/outbound/src/http/breaker/consecutive_failures.rs deleted file mode 100644 index cc980ed405..0000000000 --- a/linkerd/app/outbound/src/http/breaker/consecutive_failures.rs +++ /dev/null @@ -1,328 +0,0 @@ -use futures::stream::StreamExt; -use linkerd_app_core::{classify, exp_backoff::ExponentialBackoff, proxy::http::classify::gate}; -use tokio::sync::mpsc; - -pub struct ConsecutiveFailures { - max_failures: usize, - backoff: ExponentialBackoff, - gate: gate::Tx, - rsps: mpsc::Receiver, -} - -impl ConsecutiveFailures { - pub fn new( - max_failures: usize, - backoff: ExponentialBackoff, - gate: gate::Tx, - rsps: mpsc::Receiver, - ) -> Self { - Self { - max_failures, - backoff, - gate, - rsps, - } - } - - pub(super) async fn run(mut self) { - loop { - if self.open().await.is_err() { - return; - } - - tracing::info!("Consecutive failure-accrual breaker closed"); - if self.closed().await.is_err() { - return; - } - - tracing::info!("Consecutive failure-accrual breaker reopened"); - } - } - - /// Keep the breaker open until `max_failures` consecutive failures are - /// observed. - async fn open(&mut self) -> Result<(), ()> { - tracing::debug!("Open"); - self.gate.open().map_err(|_| ())?; - let mut failures = 0; - loop { - let class = tokio::select! { - rsp = self.rsps.recv() => rsp.ok_or(())?, - _ = self.gate.lost() => return Err(()), - }; - - tracing::trace!(?class, %failures, "Response"); - if class.is_success() { - failures = 0; - } else { - failures += 1; - if failures == self.max_failures { - return Ok(()); - } - } - } - } - - /// Keep the breaker closed for at least the initial backoff, and then, - /// once the timeout expires, go into probation to admit a single request - /// before reverting to the open state or continuing in the shut state. - async fn closed(&mut self) -> Result<(), ()> { - let mut backoff = self.backoff.stream(); - loop { - // The breaker is shut now. Wait until we can open it again. - tracing::debug!(backoff = ?backoff.duration(), "Shut"); - self.gate.shut().map_err(|_| ())?; - - loop { - tokio::select! { - _ = backoff.next() => break, - // Ignore responses while the breaker is shut, but - // terminate if the channel is closed. recv() on a - // closed channel returns None immediately, which would - // otherwise busy-spin this loop. - rsp = self.rsps.recv() => { rsp.ok_or(())?; }, - _ = self.gate.lost() => return Err(()), - } - } - - let class = self.probation().await?; - tracing::trace!(?class, "Response"); - if class.is_success() { - // Open! - return Ok(()); - } - } - } - - /// Wait for a response to determine whether the breaker should be opened. - async fn probation(&mut self) -> Result { - tracing::debug!("Probation"); - let _sem = self.gate.limit(1).map_err(|_| ())?; - tokio::select! { - rsp = self.rsps.recv() => rsp.ok_or(()), - _ = self.gate.lost() => Err(()), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use linkerd_app_core::{ - proxy::http::{self, classify::BroadcastClassification, BoxBody}, - svc::{self, ServiceExt}, - Error, - }; - use tokio::{task::yield_now, time}; - use tokio_test::{assert_pending, task}; - - #[tokio::test(flavor = "current_thread", start_paused = true)] - async fn transitions() { - let _trace = linkerd_tracing::test::trace_init(); - - let (mut params, gate, rsps) = gate::Params::channel(1); - let send = |res: Result| { - params - .responses - .try_send(classify::Class::Http(res)) - .unwrap() - }; - - let backoff = ExponentialBackoff::try_new( - time::Duration::from_secs(1), - time::Duration::from_secs(100), - // Don't jitter backoffs to ensure tests are deterministic. - 0.0, - ) - .expect("backoff params are valid"); - let breaker = ConsecutiveFailures::new(2, backoff, gate, rsps); - let mut task = task::spawn(breaker.run()); - - // Start open and failing. - send(Err(http::StatusCode::BAD_GATEWAY)); - assert_pending!(task.poll()); - assert!(params.gate.is_open()); - - send(Err(http::StatusCode::BAD_GATEWAY)); - assert_pending!(task.poll()); - assert!(params.gate.is_shut()); - - // After two failures, the breaker is closed. - time::sleep(time::Duration::from_millis(500)).await; - assert_pending!(task.poll()); - assert!(params.gate.is_shut()); - - // It remains closed until the init ejection backoff elapses. Then it's - // limited to a single request. - time::sleep(time::Duration::from_millis(500)).await; - assert_pending!(task.poll()); - - // hold the permit to prevent the breaker from opening - match params.gate.state() { - gate::State::Open => panic!("still open"), - gate::State::Shut => panic!("still shut"), - gate::State::Limited(sem) => { - assert_eq!(sem.available_permits(), 1); - params - .gate - .opened_for_test() - .await - .expect("permit should be acquired") - // The `Gate` service would forget this permit when called, so - // we must do the same here explicitly. - .forget(); - assert_eq!(sem.available_permits(), 0); - } - }; - - // The first request in probation fails, so the breaker remains closed. - send(Err(http::StatusCode::BAD_GATEWAY)); - assert_pending!(task.poll()); - assert!(params.gate.is_shut()); - - // The breaker goes into closed for 2s now... - time::sleep(time::Duration::from_secs(1)).await; - assert_pending!(task.poll()); - assert!(params.gate.is_shut()); - - // If some straggling responses are observed while the breaker is - // closed, they are ignored. - send(Ok(http::StatusCode::OK)); - assert_pending!(task.poll()); - assert!(params.gate.is_shut()); - // After that timeout elapses, we go into probation again. - time::sleep(time::Duration::from_secs(1)).await; - assert_pending!(task.poll()); - - match params.gate.state() { - gate::State::Open => panic!("still open"), - gate::State::Shut => panic!("still shut"), - gate::State::Limited(sem) => { - assert_eq!(sem.available_permits(), 1); - params - .gate - .opened_for_test() - .await - .expect("permit should be acquired") - // The `Gate` service would forget this permit when called, so - // we must do the same here explicitly. - .forget(); - assert_eq!(sem.available_permits(), 0); - } - } - - // And then a single success takes us back into the open state! - send(Ok(http::StatusCode::OK)); - assert_pending!(task.poll()); - assert!(params.gate.is_open()); - } - - #[tokio::test(flavor = "current_thread", start_paused = true)] - async fn closed_state_terminates_on_channel_close() { - let _trace = linkerd_tracing::test::trace_init(); - - let (params, gate, rsps) = gate::Params::channel(1); - let send = |res: Result| { - params - .responses - .try_send(classify::Class::Http(res)) - .unwrap() - }; - - let backoff = ExponentialBackoff::try_new( - time::Duration::from_secs(1), - time::Duration::from_secs(100), - 0.0, - ) - .expect("backoff params are valid"); - - // max_failures=1: a single failure trips the breaker into closed state. - let breaker = ConsecutiveFailures::new(1, backoff, gate, rsps); - let mut task = task::spawn(breaker.run()); - - assert_pending!(task.poll()); - assert!(params.gate.is_open()); - - // Trip the breaker into the closed state. - send(Err(http::StatusCode::INTERNAL_SERVER_ERROR)); - assert_pending!(task.poll()); - assert!(params.gate.is_shut(), "should be in closed state"); - - // Drop the response sender while in the closed state drain loop. - // The breaker must terminate rather than busy-spinning on None from - // the closed channel. - drop(params.responses); - assert!(task.poll().is_ready()); - } - - /// This test uses `tokio::spawn` (real task) instead of - /// `tokio_test::task::spawn` because the Gate + BroadcastClassification - /// service stack requires an actual task context for `oneshot()` to drive - /// response classification through the mpsc channel. - #[tokio::test(flavor = "current_thread", start_paused = true)] - async fn closed_state_terminates_when_service_drops_but_gate_rx_survives() { - let _trace = linkerd_tracing::test::trace_init(); - - let (params, gate, rsps) = gate::Params::channel(1); - // Keep an extra gate receiver alive to model a future stack change that - // retains gate state after the endpoint service itself is dropped. - let leaked_gate = params.gate.clone(); - - let backoff = ExponentialBackoff::try_new( - time::Duration::from_secs(1), - time::Duration::from_secs(100), - 0.0, - ) - .expect("backoff params are valid"); - - let breaker = ConsecutiveFailures::new(1, backoff, gate, rsps); - let breaker = tokio::spawn(breaker.run()); - - let svc = svc::Gate::new( - params.gate, - BroadcastClassification::::new( - params.responses, - svc::mk(|_: http::Request| async move { - Ok::<_, Error>( - http::Response::builder() - .status(http::StatusCode::INTERNAL_SERVER_ERROR) - .body(BoxBody::default()) - .unwrap(), - ) - }), - ), - ); - - let req = http::Request::builder() - .extension(classify::Response::default()) - .body(BoxBody::default()) - .unwrap(); - let rsp = svc.oneshot(req).await.expect("service must succeed"); - drop(rsp); - - for _ in 0..16 { - if leaked_gate.is_shut() { - break; - } - yield_now().await; - } - assert!( - leaked_gate.is_shut(), - "breaker should enter the closed state" - ); - - for _ in 0..16 { - if breaker.is_finished() { - break; - } - yield_now().await; - } - - assert!( - breaker.is_finished(), - "breaker should terminate when the response channel closes, \ - even if a gate::Rx clone survives" - ); - breaker.await.expect("breaker task must not panic"); - } -} diff --git a/linkerd/app/outbound/src/http/breaker/integration_tests.rs b/linkerd/app/outbound/src/http/breaker/integration_tests.rs new file mode 100644 index 0000000000..fe30cf1b8c --- /dev/null +++ b/linkerd/app/outbound/src/http/breaker/integration_tests.rs @@ -0,0 +1,814 @@ +//! Integration tests for the unified circuit breaker. +//! +//! Tests cover specific breaker behaviors: SR-only trip, gRPC classification, +//! Retry-After hint extension, and backwards-compatibility regression checks. + +use super::*; + +use linkerd_app_core::{exp_backoff::ExponentialBackoff, svc::ExtractParam}; +use linkerd_proxy_client_policy::DEFAULT_RETRY_AFTER_MAX_DURATION; +use std::time::Duration; +use tokio::time; +use tokio_test::{assert_pending, task}; +use tonic; + +// Sends a probe-success classification directly to the breaker's mpsc, +// bypassing Gate/MockService. The caller should first check is_limited() +// to confirm the breaker is in probation (not draining in backoff). +fn send_probe_success(params: &gate::Params) { + params + .responses + .try_send(classify::Class::Http(Ok(http::StatusCode::OK))) + .expect("try_send failed -- bump gate channel capacity"); +} + +fn make_backoff() -> ExponentialBackoff { + ExponentialBackoff::try_new( + Duration::from_secs(1), + Duration::from_secs(100), + 0.0, // no jitter for deterministic tests + ) + .expect("backoff params are valid") +} + +fn send_class(params: &gate::Params, class: classify::Class) { + params.responses.try_send(class).unwrap(); +} + +fn send_ok(params: &gate::Params, status: http::StatusCode) { + send_class(params, classify::Class::Http(Ok(status))); +} + +fn send_err(params: &gate::Params, status: http::StatusCode) { + send_class(params, classify::Class::Http(Err(status))); +} + +fn send_grpc(params: &gate::Params, code: tonic::Code) { + let class = if matches!( + code, + tonic::Code::Unknown + | tonic::Code::DeadlineExceeded + | tonic::Code::PermissionDenied + | tonic::Code::Internal + | tonic::Code::Unavailable + | tonic::Code::DataLoss + ) { + classify::Class::Grpc(Err(code)) + } else { + classify::Class::Grpc(Ok(code)) + }; + send_class(params, class); +} + +fn default_test_config( + gate_tx: gate::Tx, + rsps: tokio::sync::mpsc::Receiver, +) -> UnifiedBreakerConfig { + UnifiedBreakerConfig { + max_failures: 3, + threshold: 0.8, + decay: Duration::from_secs(10), + backoff: make_backoff(), + min_requests: 1, + gate: gate_tx, + rsps, + retry_after_store: RetryAfterStore::new(), + grpc_retry_pushback_store: GrpcRetryPushbackStore::new(), + max_duration: DEFAULT_RETRY_AFTER_MAX_DURATION, + } +} + +// Verifies that the breaker trips via success rate alone when `max_failures=0` +// disables the consecutive-failure policy. The `should_trip` function skips the +// consecutive check when max_failures is zero, so only EWMA degradation can trip. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn sr_only_trip_with_consecutive_disabled() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(8); + + tokio::spawn( + UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 0, // consecutive failures disabled + threshold: 0.5, // trip when SR drops below 50% + min_requests: 1, // cold-start passes after 1 request + ..default_test_config(gate_tx, rsps) + }) + .run(), + ); + + // Advance past t=0 so the EWMA accepts add() calls (skips when ts <= timestamp). + time::sleep(Duration::from_millis(1)).await; + assert!(params.gate.is_open(), "gate should start open"); + + // One success to satisfy min_requests and seed the EWMA. + send_ok(¶ms, http::StatusCode::OK); + time::advance(Duration::from_secs(1)).await; + + // 429s degrade the EWMA without touching consecutive failures (max_failures=0 + // means the counter is never updated). With decay=10s and 1s spacing, + // we need about 7 samples to drop below 0.5. + for _ in 0..8 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(Duration::from_secs(1)).await; + } + + assert!( + params.gate.is_shut(), + "breaker should trip via SR when consecutive failures are disabled", + ); + + // Wait for backoff (1s base + safety margin). With tokio::spawn, the breaker + // runs on the runtime and enters probation when the backoff timer fires. + time::advance(Duration::from_secs(2)).await; + + for _ in 0..5 { + if params.gate.is_limited() { + break; + } + time::advance(Duration::from_millis(100)).await; + } + assert!( + params.gate.is_limited(), + "gate should enter probation after backoff", + ); + + // Probe success via the channel (same pattern as send_probe_success). + send_probe_success(¶ms); + time::advance(Duration::from_millis(100)).await; + + assert!( + params.gate.is_open(), + "gate should reopen after probe success" + ); +} + +// Exercises gRPC response classification through the breaker: +// - Grpc(Err(_)) is a configured failure (increments consecutive counter AND degrades EWMA) +// - Grpc(Ok(ResourceExhausted)) is a rate failure (degrades EWMA) but does +// NOT increment consecutive failures +// - Grpc(Ok(Ok)) is a success (resets consecutive counter) +// +// The negative assertion on consecutive-failure count after ResourceExhausted is +// the key distinction tested here. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn grpc_classification_through_breaker() { + let _trace = linkerd_tracing::test::trace_init(); + + // Part A: ResourceExhausted must NOT trip via consecutive failures + + let (params_a, gate_tx_a, rsps_a) = gate::Params::channel(1); + + let breaker_a = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 1, // trip after just 1 consecutive configured failure + threshold: 0.0, // SR disabled (threshold 0.0 never trips) + min_requests: usize::MAX, + ..default_test_config(gate_tx_a, rsps_a) + }); + let mut task_a = task::spawn(breaker_a.run()); + + assert_pending!(task_a.poll()); + + // Send 5 ResourceExhausted responses. Because ResourceExhausted is classified + // as Grpc(Ok(ResourceExhausted)), it is not a configured failure. The + // consecutive counter stays at 0 and cannot trip even with max_failures=1. + for _ in 0..5 { + send_grpc(¶ms_a, tonic::Code::ResourceExhausted); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_a.poll()); + } + + assert!( + params_a.gate.is_open(), + "ResourceExhausted must not trip consecutive failures (counter should stay at 0)", + ); + + // Part B: gRPC configured failures trip consecutive failures + + let (params_b, gate_tx_b, rsps_b) = gate::Params::channel(1); + + let breaker_b = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 2, + threshold: 0.0, // SR disabled + min_requests: usize::MAX, + ..default_test_config(gate_tx_b, rsps_b) + }); + let mut task_b = task::spawn(breaker_b.run()); + + assert_pending!(task_b.poll()); + + // Unavailable is Grpc(Err(Unavailable)), counting as failure + send_grpc(¶ms_b, tonic::Code::Unavailable); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_b.poll()); + + send_grpc(¶ms_b, tonic::Code::Unavailable); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_b.poll()); + + assert!( + params_b.gate.is_shut(), + "Grpc(Err(Unavailable)) should trip consecutive failures at max_failures=2", + ); + + // Part C: Ok resets the consecutive counter + + let (params_c, gate_tx_c, rsps_c) = gate::Params::channel(1); + + let breaker_c = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 2, + threshold: 0.0, + min_requests: usize::MAX, + ..default_test_config(gate_tx_c, rsps_c) + }); + let mut task_c = task::spawn(breaker_c.run()); + + assert_pending!(task_c.poll()); + + // 1 failure, then Ok resets, then 1 failure again. Should not trip. + send_grpc(¶ms_c, tonic::Code::Internal); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_c.poll()); + + send_grpc(¶ms_c, tonic::Code::Ok); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_c.poll()); + + send_grpc(¶ms_c, tonic::Code::Internal); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_c.poll()); + + assert!( + params_c.gate.is_open(), + "Grpc(Ok(Ok)) should reset consecutive counter, preventing trip at max_failures=2", + ); + + // Part D: ResourceExhausted degrades SR EWMA + + let (params_d, gate_tx_d, rsps_d) = gate::Params::channel(1); + + let breaker_d = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // consecutive won't trip + threshold: 0.5, + min_requests: 1, + ..default_test_config(gate_tx_d, rsps_d) + }); + let mut task_d = task::spawn(breaker_d.run()); + + assert_pending!(task_d.poll()); + + // Seed with one success. + send_grpc(¶ms_d, tonic::Code::Ok); + time::advance(Duration::from_secs(1)).await; + assert_pending!(task_d.poll()); + + // ResourceExhausted pushes EWMA below threshold. With decay=10s and 1s + // spacing, alpha ~ 0.095 per sample. Need ~7 zeros to drop below 0.5. + for _ in 0..8 { + send_grpc(¶ms_d, tonic::Code::ResourceExhausted); + time::advance(Duration::from_secs(1)).await; + assert_pending!(task_d.poll()); + } + + assert!( + !params_d.gate.is_open(), + "ResourceExhausted should degrade SR EWMA and trip on low success rate", + ); +} + +// Check that a Retry-After hint recorded in the store extends the breaker's +// backoff duration beyond the normal exponential backoff. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn retry_after_hint_extends_backoff_integration() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let ra_store = RetryAfterStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: ra_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record a 5s Retry-After hint before the trip. The hint must be present + // in the store when the breaker enters closed state and calls + // take_combined_hint() on its first backoff iteration. In production the + // classifier records the hint from the Retry-After header before the + // breaker loop processes the tripping response. + ra_store.record(Duration::from_secs(5)); + + // Trip via 3 consecutive 5xx + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // Advance 4s (hint minus 1s). The normal 1s exponential backoff would have + // expired, but the 5s hint keeps the gate shut. This is the important + // check. Without the hint, probation would arrive at ~1s. + time::sleep(Duration::from_secs(4)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 4s (hint is 5s, backoff is 1s)", + ); + + // Advance past the 5s hint boundary. The hint elapses and probation begins. + time::sleep(Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_limited(), + "gate should enter probation after the hint duration expires", + ); +} + +// Check that a gRPC pushback hint recorded in the store extends the breaker's +// backoff duration. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn grpc_pushback_hint_extends_backoff_integration() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(8); + let grpc_store = GrpcRetryPushbackStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + grpc_retry_pushback_store: grpc_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record a 5s gRPC pushback hint before the trip. The hint must be + // present in the store when the breaker enters closed state and calls + // take_combined_hint() on its first backoff iteration. + grpc_store.record(Duration::from_secs(5)); + + // Trip via 3 consecutive 5xx + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // Advance 4s (hint minus 1s). The normal 1s exponential backoff would + // have expired, but the 5s gRPC pushback hint keeps the gate shut. This is + // the important check. Without the hint, probation would arrive at + // ~1s. + time::sleep(Duration::from_secs(4)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 4s (gRPC hint is 5s, backoff is 1s)", + ); + + // Advance past the 5s hint boundary. The hint elapses and probation begins. + time::sleep(Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_limited(), + "gate should enter probation after the gRPC hint duration expires", + ); +} + +// Check that when both HTTP Retry-After and gRPC pushback hints are +// present, the breaker uses the maximum of the two durations for backoff +// extension. With http=3s and grpc=7s, the gate stays shut until 7s. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn combined_http_and_grpc_hints_max_wins_integration() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(8); + let ra_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: ra_store.clone(), + grpc_retry_pushback_store: grpc_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record both hints before the trip. take_combined_hint() returns + // max(http, grpc) = max(3s, 7s) = 7s. + ra_store.record(Duration::from_secs(3)); + grpc_store.record(Duration::from_secs(7)); + + // Trip via 3 consecutive 5xx + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // At 3s the HTTP hint has expired, but the gRPC 7s hint keeps the gate + // shut because take_combined_hint returned max(3s, 7s) = 7s. + time::sleep(Duration::from_secs(3)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 3s (gRPC hint is 7s)", + ); + + // At 6s total we're still within the 7s gRPC hint window + time::sleep(Duration::from_secs(3)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 6s (gRPC hint is 7s)", + ); + + // At 7s total the gRPC hint expires and the gate enters probation + time::sleep(Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_limited(), + "gate should enter probation after max(3s, 7s) = 7s hint expires", + ); +} + +// When both stores hold hints longer than the configured maximum, the breaker +// clamps each hint to that maximum before taking the larger of the two. With +// both hints above the cap, the backoff floor is the cap itself, so the gate +// enters probation when the cap elapses rather than when the raw hints would. +// This pins take_combined_hint's clamp-then-max behavior end-to-end. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn combined_hints_clamp_to_max_duration_integration() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(8); + let ra_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: ra_store.clone(), + grpc_retry_pushback_store: grpc_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Both hints exceed the cap. Each is clamped to max_duration, so the + // combined floor is max(cap, cap) = cap, not the raw 600s. + let over_cap = DEFAULT_RETRY_AFTER_MAX_DURATION + Duration::from_secs(300); + ra_store.record(over_cap); + grpc_store.record(over_cap); + + // Trip via 3 consecutive 5xx + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // Just short of the cap the gate is still shut. The clamped floor has not + // elapsed yet. + time::sleep(DEFAULT_RETRY_AFTER_MAX_DURATION - Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut until the clamped max_duration elapses", + ); + + // Past the cap the gate enters probation. If either raw 600s hint had + // survived unclamped, the gate would still be shut here. + time::sleep(Duration::from_secs(2)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_limited(), + "gate should enter probation once max_duration elapses, proving both hints were clamped", + ); +} + +// In consecutive-only mode (success_rate=None), a 429 response during probation +// is evaluated using `class.is_success()` (the old code behavior), not the +// stricter 429-as-failure check used in dual mode. This preserves backwards +// compatibility for people that do not enable success rate tracking. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn consecutive_only_429_does_not_change_probe_behavior() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + // Consecutive-only mode: success_rate disabled via threshold=0.0 and + // min_requests=usize::MAX (the sentinel values used when the annotation + // has no success_rate configuration). + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 2, + threshold: 0.0, + min_requests: usize::MAX, + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip via 2 consecutive 5xx. + for _ in 0..2 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 2x 5xx"); + + // Wait for backoff to enter probation. + time::sleep(Duration::from_secs(2)).await; + assert_pending!(task.poll()); + + for _ in 0..5 { + if params.gate.is_limited() { + break; + } + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!( + params.gate.is_limited(), + "gate should be in probation (Limited)", + ); + + // Send a 429 as the probe response. In consecutive-only mode 429 is + // evaluated by class.is_success() which treats 429 as success (it is not + // a server error). This is the old consecutive-failures breaker behavior. + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + // The breaker should reopen because the consecutive-only probe check + // delegates to class.is_success(), and 429 is_success() == true. + assert!( + params.gate.is_open(), + "consecutive-only mode: 429 during probation should be treated as success \ + (delegates to class.is_success()), preserving old breaker semantics", + ); +} + +// The breaker reads only the stores it holds. A hint recorded into a different +// store never reaches it, so the backoff stays on the base schedule. The gate +// set builds a separate store pair per endpoint, so one breaker cannot read a +// hint meant for another. +// +// The test models this directly. A hint goes into an unrelated store while the +// breaker holds the empty stores from default_test_config. The breaker trips +// and reopens on the base 1s backoff, proving the recorded hint was never read. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn hint_in_unheld_store_is_ignored() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + // The breaker keeps the empty stores from default_test_config. + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record a long hint into a store the breaker does not hold. With the + // classifier separated from the breaker's stores, this hint cannot + // change the backoff. + let unread_store = RetryAfterStore::new(); + unread_store.record(Duration::from_secs(10)); + + // Trip via 3 consecutive 5xx + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // Advance just past the 1s base backoff. If the 10s hint had reached the + // breaker, the gate would still be shut. Because it did not, probation + // arrives on the base interval. + time::sleep(Duration::from_millis(1050)).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_limited(), + "gate should enter probation on the base 1s backoff when no hint reaches the breaker", + ); + + // The unrelated store still holds its hint. The breaker never took it. + // take() adjusts the returned duration for elapsed time, so the exact value + // is not pinned here. What matters is that the slot is still occupied. + assert!( + unread_store.take(Duration::from_secs(60)).is_some(), + "the hint must remain in the unread store, untouched by the breaker", + ); +} + +// When failure_accrual is None (disabled), the ExtractParam impl does +// not spawn a breaker task. The gate stays open permanently with no +// state transitions. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn failure_accrual_none_produces_no_breaker_task() { + let _trace = linkerd_tracing::test::trace_init(); + + let params_struct = Params { + accrual: None, + channel_capacity: 1, + retry_after_store: RetryAfterStore::new(), + grpc_retry_pushback_store: GrpcRetryPushbackStore::new(), + max_duration: DEFAULT_RETRY_AFTER_MAX_DURATION, + }; + + let gate_params: gate::Params = params_struct.extract_param(&()); + + // The gate should start open. + assert!( + gate_params.gate.is_open(), + "gate should be open when failure_accrual is None", + ); + + // The response channel receiver was dropped because no breaker task was + // spawned. try_send() returns an error because nobody is listening. + // The expected behavior is the channel sender exists in the gate params + // (for BroadcastClassification to use), but sends are silently dropped. + let send_result = gate_params.responses.try_send(classify::Class::Http(Err( + http::StatusCode::INTERNAL_SERVER_ERROR, + ))); + assert!( + send_result.is_err(), + "try_send should fail because no breaker task consumes the receiver", + ); + + // Give the runtime a chance to process any spawned tasks. + time::sleep(Duration::from_millis(100)).await; + + // The gate must still be open. No breaker task was spawned to shut it. + assert!( + gate_params.gate.is_open(), + "gate should remain permanently open when failure_accrual is None \ + (no breaker task spawned to close it)", + ); +} + +// Drives a real 429 with a Retry-After header through the classifier and on +// into the breaker's backoff, end to end. The classifier records into the exact +// RetryAfterStore the breaker drains, so a mismatch between the two store +// instances would show up here as a base-interval backoff instead of the +// extended one. The header value, not a pre-seeded store entry, sets the floor. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn retry_after_header_extends_backoff_end_to_end() { + use super::retry_after::RetryAfterClassify; + use linkerd_http_classify::ClassifyResponse; + use linkerd_proxy_client_policy::http::StatusRanges; + + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let ra_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + + // The breaker drains the very stores the classifier writes to. + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: ra_store.clone(), + grpc_retry_pushback_store: grpc_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Build an actual 429 with `Retry-After: 5` and run it through the + // classifier bound to the breaker's stores. The classifier parses the + // header and records 5s into ra_store. + let classifier = RetryAfterClassify::new( + classify::Response::Http(StatusRanges::default()), + ra_store.clone(), + grpc_store.clone(), + ); + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::TOO_MANY_REQUESTS; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("5"), + ); + let _eos = classifier.start(&rsp); + + // Trip via 3 consecutive 5xx. The hint is already in the store when the + // breaker enters its backoff and calls take_combined_hint(). + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // At 4s the base 1s backoff has long passed, but the 5s header hint (less + // the ~300ms consumed while tripping) keeps the gate shut. This is the + // important check. Without the header reaching the breaker's store, + // probation would have arrived at ~1s. + time::sleep(Duration::from_secs(4)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 4s because the Retry-After header extended the backoff", + ); + + // Past the hint window the gate enters probation. + time::sleep(Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_limited(), + "gate should enter probation once the header-derived hint elapses", + ); +} + +// The gRPC counterpart. A real 200 OK whose RESOURCE_EXHAUSTED trailer holds +// `grpc-retry-pushback-ms` runs through the classifier's trailer handling into +// the breaker's gRPC store. As with the HTTP case, the classifier and breaker +// share one store instance, so this catches a wiring split that the +// store-injecting tests cannot. The trailer pushback, not a seeded value, drives +// the backoff. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn grpc_pushback_trailer_extends_backoff_end_to_end() { + use super::retry_after::RetryAfterClassify; + use linkerd_http_classify::{ClassifyEos, ClassifyResponse}; + use linkerd_proxy_client_policy::grpc::Codes; + + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let ra_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: ra_store.clone(), + grpc_retry_pushback_store: grpc_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // A 200 OK with no gRPC status header opens a streaming gRPC response. The + // RESOURCE_EXHAUSTED status and pushback arrive in the trailers, which the + // classifier parses into the breaker's gRPC store. + let classifier = RetryAfterClassify::new( + classify::Response::Grpc(Codes::default()), + ra_store.clone(), + grpc_store.clone(), + ); + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::OK; + rsp.headers_mut().insert( + http::header::CONTENT_TYPE, + http::HeaderValue::from_static("application/grpc"), + ); + let eos = classifier.start(&rsp); + + let mut trailers = http::HeaderMap::new(); + trailers.insert("grpc-status", http::HeaderValue::from_static("8")); + trailers.insert( + "grpc-retry-pushback-ms", + http::HeaderValue::from_static("5000"), + ); + let class = eos.eos(Some(&trailers)); + assert_eq!( + class, + classify::Class::Grpc(Ok(tonic::Code::ResourceExhausted)), + "trailer classification should yield RESOURCE_EXHAUSTED", + ); + + // Trip via 3 consecutive 5xx. The 5s pushback is in the gRPC store before + // the breaker reaches its backoff. + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // At 4s the gate is still shut because the 5s trailer pushback (less the + // ~300ms tripping window) extended the backoff beyond the base 1s. + time::sleep(Duration::from_secs(4)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 4s because the gRPC pushback trailer extended the backoff", + ); + + // Past the pushback window the gate enters probation. + time::sleep(Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_limited(), + "gate should enter probation once the trailer-derived pushback elapses", + ); +} diff --git a/linkerd/app/outbound/src/http/breaker/retry_after.rs b/linkerd/app/outbound/src/http/breaker/retry_after.rs new file mode 100644 index 0000000000..39d8ca3f78 --- /dev/null +++ b/linkerd/app/outbound/src/http/breaker/retry_after.rs @@ -0,0 +1,875 @@ +//! Retry-After and grpc-retry-pushback-ms parsing for rate limiting responses. +//! +//! Parses backoff hints from HTTP 429/503 and gRPC RESOURCE_EXHAUSTED responses +//! into [`RetryAfterStore`] and [`GrpcRetryPushbackStore`]. Both use a +//! max-value-wins strategy (longest hint kept). The circuit breaker queries +//! both via `UnifiedBreaker::take_combined_hint()` and uses the maximum. +//! +//! Parsing is delegated to [`linkerd_http_classify::retry_after`]. +//! +//! ```text +//! HTTP 429/503 Response +//! | +//! v +//! RetryAfterClassify::start() +//! | +//! +-> parse_retry_after() -> RetryAfterStore::record() +//! | +//! +-> gated_parse_grpc_retry_pushback() (from headers, for unary gRPC) +//! | +//! v +//! GrpcRetryPushbackStore::record() +//! +//! gRPC RESOURCE_EXHAUSTED (streaming) +//! | +//! v +//! GrpcRetryPushbackClassifyEos::eos(trailers) +//! | +//! v +//! gated_parse_grpc_retry_pushback() -> GrpcRetryPushbackStore::record() +//! +//! Circuit Breaker Backoff +//! | +//! v +//! UnifiedBreaker::take_combined_hint() +//! | +//! +-> RetryAfterStore::take() +//! +-> GrpcRetryPushbackStore::take() +//! | +//! v +//! max(http_hint, grpc_hint) +//! ``` + +use http::HeaderMap; +use linkerd_app_core::classify::{self, grpc_code}; +use linkerd_http_classify::{ClassifyEos, ClassifyResponse}; +use parking_lot::Mutex; +use std::sync::Arc; +use std::time::Duration; +use tokio::time::Instant; +use tonic::Code; + +/// Shared store for duration hints, timestamped to detect staleness. +#[derive(Clone, Debug)] +pub struct DurationHintStore { + name: &'static str, + inner: Arc>>, +} + +impl DurationHintStore { + /// Create a new empty store with the given name for logging. + pub fn new(name: &'static str) -> Self { + Self { + name, + inner: Arc::new(Mutex::new(None)), + } + } + + /// Record a duration hint (max value wins, only updates if longer). + pub fn record(&self, duration: Duration) { + let mut guard = self.inner.lock(); + let should_update = match *guard { + None => true, + Some((_, existing)) => duration > existing, + }; + + if should_update { + *guard = Some((Instant::now(), duration)); + tracing::debug!( + ?duration, + hint_type = %self.name, + "Recorded hint (max value wins)", + ); + } else { + tracing::debug!( + ?duration, + existing = ?guard.as_ref().map(|(_, d)| d), + hint_type = %self.name, + "Ignoring smaller hint", + ); + } + } + + /// Take the stored hint if it exists and was recorded recently. + /// + /// The hint is consumed (set to None) to prevent reusing the same hint + /// for multiple backoff cycles. The `max_age` parameter controls how + /// old a hint can be before it's considered stale. + /// + /// Stale hints are discarded without being consumed: we inspect the + /// timestamp first, then only `take()` if the hint is recent. This + /// prevents a race where a stale hint is consumed (clearing the store) + /// when a new hint arrives. + pub fn take(&self, max_age: Duration) -> Option { + let mut guard = self.inner.lock(); + if let Some(&(recorded_at, duration)) = guard.as_ref() { + // Monotonic clock: saturating_duration_since returns zero on backwards drift + let elapsed = Instant::now().saturating_duration_since(recorded_at); + if elapsed <= max_age { + // Elapsed time consumed the entire hint. + if elapsed >= duration { + *guard = None; + tracing::debug!( + ?duration, + ?elapsed, + hint_type = %self.name, + "Hint fully elapsed, discarding", + ); + return None; + } + + // Subtract elapsed time to obtain remaining wait. + let adjusted = duration.saturating_sub(elapsed); + + *guard = None; + tracing::debug!( + ?duration, + ?elapsed, + ?adjusted, + hint_type = %self.name, + "Using hint (adjusted for elapsed time)", + ); + return Some(adjusted); + } + tracing::debug!( + ?duration, + ?elapsed, + hint_type = %self.name, + "Hint too old, discarding", + ); + + // Stale. Discard explicitly without consuming for use + *guard = None; + } + None + } +} + +/// A shared store for Retry-After hints. +/// +/// This is used to pass Retry-After hints from the response path to the circuit +/// breaker. Wraps [`DurationHintStore`] with the "Retry-After" name for logging. +#[derive(Clone, Debug)] +pub struct RetryAfterStore(DurationHintStore); + +impl Default for RetryAfterStore { + fn default() -> Self { + Self(DurationHintStore::new("Retry-After")) + } +} + +impl RetryAfterStore { + /// Create a new empty store. + pub fn new() -> Self { + Self::default() + } + + /// Record a Retry-After hint from a 429 response. + pub fn record(&self, duration: Duration) { + self.0.record(duration) + } + + /// Take the stored hint if it exists and was recorded recently. + pub fn take(&self, max_age: Duration) -> Option { + self.0.take(max_age) + } +} + +/// A shared store for gRPC retry pushback hints. +/// +/// This is used to pass `grpc-retry-pushback-ms` hints from the response path +/// to the circuit breaker. Wraps [`DurationHintStore`] with the +/// "grpc-retry-pushback-ms" name for logging. +#[derive(Clone, Debug)] +pub struct GrpcRetryPushbackStore(DurationHintStore); + +impl Default for GrpcRetryPushbackStore { + fn default() -> Self { + Self(DurationHintStore::new("grpc-retry-pushback-ms")) + } +} + +impl GrpcRetryPushbackStore { + /// Create a new empty store. + pub fn new() -> Self { + Self::default() + } + + /// Record a grpc-retry-pushback-ms hint from a RESOURCE_EXHAUSTED response. + pub fn record(&self, duration: Duration) { + self.0.record(duration) + } + + /// Take the stored hint if it exists and was recorded recently. + pub fn take(&self, max_age: Duration) -> Option { + self.0.take(max_age) + } +} + +/// Parse grpc-retry-pushback-ms from headers or trailers if code is RESOURCE_EXHAUSTED. +/// +/// Delegates to the shared parser in [`linkerd_http_classify::retry_after`], +/// but only when `grpc_code` is RESOURCE_EXHAUSTED (code 8). +/// +/// `max` caps the parsed duration (typically `max_duration` from config). +/// +/// Returns `None` for non-RESOURCE_EXHAUSTED responses or any parse failure. +fn gated_parse_grpc_retry_pushback( + grpc_code: Option, + headers_or_trailers: &HeaderMap, + max: Duration, +) -> Option { + if grpc_code != Some(Code::ResourceExhausted) { + return None; + } + linkerd_http_classify::retry_after::parse_grpc_retry_pushback(headers_or_trailers, max) +} + +/// A ClassifyEos wrapper that records grpc-retry-pushback-ms hints from trailers. +/// +/// This wraps any [`ClassifyEos`] implementation and intercepts the `eos()` +/// call to parse `grpc-retry-pushback-ms` trailers from RESOURCE_EXHAUSTED +/// responses. The parsed duration is recorded in a [`GrpcRetryPushbackStore`] +/// for use by the circuit breaker's backoff logic. +/// +/// The wrapper delegates all classification logic to the inner classifier unchanged. +#[derive(Clone, Debug)] +pub struct GrpcRetryPushbackClassifyEos { + inner: E, + store: GrpcRetryPushbackStore, + max_duration: Duration, +} + +impl GrpcRetryPushbackClassifyEos { + /// Create a new wrapper around the given ClassifyEos. + pub fn new(inner: E, store: GrpcRetryPushbackStore, max_duration: Duration) -> Self { + Self { + inner, + store, + max_duration, + } + } +} + +impl ClassifyEos for GrpcRetryPushbackClassifyEos +where + E: ClassifyEos, +{ + type Class = classify::Class; + + fn eos(self, trailers: Option<&HeaderMap>) -> Self::Class { + // Get classification from inner first + let class = self.inner.eos(trailers); + + // If we have trailers, check for grpc-retry-pushback-ms + if let Some(trls) = trailers { + // Extract gRPC code from the classification result + let grpc_code = match &class { + classify::Class::Grpc(Ok(code)) | classify::Class::Grpc(Err(code)) => Some(*code), + _ => None, + }; + + if let Some(duration) = + gated_parse_grpc_retry_pushback(grpc_code, trls, self.max_duration) + { + self.store.record(duration); + } + } + + class + } + + fn error(self, error: &linkerd_app_core::Error) -> Self::Class { + // Delegate error classification unchanged + self.inner.error(error) + } +} + +// === RetryAfterClassify === + +/// A classifier wrapper that records retry hints from both HTTP and gRPC responses. +/// +/// This wraps any [`ClassifyResponse`] implementation and intercepts the `start()` +/// call to: +/// 1. Parse `Retry-After` headers from HTTP 429 responses +/// 2. Parse `grpc-retry-pushback-ms` from gRPC headers (for unary failures) +/// 3. Wrap the inner `ClassifyEos` with [`GrpcRetryPushbackClassifyEos`] to +/// also parse gRPC trailers +/// +/// The parsed durations are recorded in their respective stores for use by the +/// circuit breaker's backoff logic. +#[derive(Clone, Debug)] +pub struct RetryAfterClassify { + inner: C, + http_store: RetryAfterStore, + grpc_store: GrpcRetryPushbackStore, + max_duration: Duration, +} + +impl RetryAfterClassify { + /// Test-only. Production code uses [`Self::new_with_max`]. + /// + /// Create a new wrapper around the given classifier. + #[cfg(test)] + pub fn new(inner: C, http_store: RetryAfterStore, grpc_store: GrpcRetryPushbackStore) -> Self { + Self { + inner, + http_store, + grpc_store, + max_duration: Duration::from_secs(300), + } + } + + /// Create a new wrapper with a custom max Retry-After cap. + pub fn new_with_max( + inner: C, + http_store: RetryAfterStore, + grpc_store: GrpcRetryPushbackStore, + max_duration: Duration, + ) -> Self { + Self { + inner, + http_store, + grpc_store, + max_duration, + } + } +} + +impl ClassifyResponse for RetryAfterClassify +where + C: ClassifyResponse, +{ + type Class = classify::Class; + type ClassifyEos = GrpcRetryPushbackClassifyEos; + + fn start(self, rsp: &http::Response) -> Self::ClassifyEos { + // Record only a genuine HTTP Retry-After header into the HTTP store. + // gRPC pushback is recorded on its own into its store below, so the + // two hint sources stay distinct and the "Retry-After" store never holds + // a value that did not come from a Retry-After header. + let max = self.max_duration; + if let Some(duration) = + linkerd_http_classify::retry_after::parse_retry_after(rsp.status(), rsp.headers(), max) + { + self.http_store.record(duration); + } + + // gRPC headers (grpc-status, grpc-retry-pushback-ms) are only meaningful + // when HTTP status is 200 OK. Non-200 responses are never gRPC. + if rsp.status() == http::StatusCode::OK { + let grpc_code = grpc_code(rsp.headers()); + if let Some(duration) = gated_parse_grpc_retry_pushback(grpc_code, rsp.headers(), max) { + self.grpc_store.record(duration); + } + } + + // Wrap the inner ClassifyEos with GrpcRetryPushbackClassifyEos to + // also parse gRPC trailers when the body ends. + let inner_eos = self.inner.start(rsp); + GrpcRetryPushbackClassifyEos::new(inner_eos, self.grpc_store, max) + } + + fn error(self, error: &linkerd_app_core::Error) -> Self::Class { + // Delegate error classification unchanged. + self.inner.error(error) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use http::HeaderValue; + use linkerd_proxy_client_policy::http::StatusRanges; + + /// A simplified local model of max-value-wins combination, used to keep the + /// store-level unit tests self-contained. Unlike the breaker's + /// `UnifiedBreaker::take_combined_hint`, it does not clamp either input to a + /// maximum duration. The clamp is pinned end-to-end against the real code in + /// the breaker integration tests. + fn combine_hints(http: Option, grpc: Option) -> Option { + match (http, grpc) { + (Some(h), Some(g)) => Some(h.max(g)), + (h @ Some(_), None) => h, + (None, g @ Some(_)) => g, + (None, None) => None, + } + } + + // === RetryAfterStore tests === + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn store_max_value_wins() { + let store = RetryAfterStore::new(); + + // Record a 10-second hint + store.record(Duration::from_secs(10)); + + // Try to record a smaller hint (5 seconds) - should be ignored + store.record(Duration::from_secs(5)); + + // The stored value should still be 10 seconds + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(10))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn store_larger_value_updates() { + let store = RetryAfterStore::new(); + + // Record a 5-second hint + store.record(Duration::from_secs(5)); + + // Record a larger hint (10 seconds) - should update + store.record(Duration::from_secs(10)); + + // The stored value should be 10 seconds + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(10))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn store_empty_accepts_any() { + let store = RetryAfterStore::new(); + + // Empty store should accept any value + store.record(Duration::from_secs(5)); + + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(5))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn store_take_clears() { + let store = RetryAfterStore::new(); + + store.record(Duration::from_secs(10)); + + // First take should succeed + let hint1 = store.take(Duration::from_secs(1)); + assert_eq!(hint1, Some(Duration::from_secs(10))); + + // Second take should return None (consumed) + let hint2 = store.take(Duration::from_secs(1)); + assert_eq!(hint2, None); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn store_after_take_accepts_new() { + let store = RetryAfterStore::new(); + + // Record and take + store.record(Duration::from_secs(10)); + let _ = store.take(Duration::from_secs(1)); + + // Now any new value should be accepted (store is empty) + store.record(Duration::from_secs(5)); + + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(5))); + } + + #[test] + fn parse_grpc_pushback_positive() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("5000")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, Some(Duration::from_millis(5000))); + } + + #[test] + fn parse_grpc_pushback_zero() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("0")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, Some(Duration::ZERO)); + } + + #[test] + fn parse_grpc_pushback_negative_ignored() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("-1")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + // Negative values mean "do not retry" - we ignore them + assert_eq!(result, None); + } + + #[test] + fn parse_grpc_pushback_non_resource_exhausted_ignored() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("5000")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::Ok), &headers, max); + assert_eq!(result, None); + + let result = gated_parse_grpc_retry_pushback(Some(Code::Internal), &headers, max); + assert_eq!(result, None); + + let result = gated_parse_grpc_retry_pushback(Some(Code::Unavailable), &headers, max); + assert_eq!(result, None); + + // None (no code) + let result = gated_parse_grpc_retry_pushback(None, &headers, max); + assert_eq!(result, None); + } + + #[test] + fn parse_grpc_pushback_caps_at_max() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + // 999999999 ms should be capped at max (300s) + headers.insert( + "grpc-retry-pushback-ms", + HeaderValue::from_static("999999999"), + ); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, Some(max)); + } + + #[test] + fn parse_grpc_pushback_invalid_format() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("abc")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, None); + } + + #[test] + fn parse_grpc_pushback_missing_header() { + let max = Duration::from_secs(300); + let headers = HeaderMap::new(); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, None); + } + + #[test] + fn parse_grpc_pushback_empty_value() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, None); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn grpc_store_max_value_wins() { + let store = GrpcRetryPushbackStore::new(); + + // Record a 10-second hint + store.record(Duration::from_secs(10)); + + // Try to record a smaller hint (5 seconds) - should be ignored + store.record(Duration::from_secs(5)); + + // The stored value should still be 10 seconds + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(10))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn grpc_store_larger_value_updates() { + let store = GrpcRetryPushbackStore::new(); + + // Record a 5-second hint + store.record(Duration::from_secs(5)); + + // Record a larger hint (10 seconds) - should update + store.record(Duration::from_secs(10)); + + // The stored value should be 10 seconds + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(10))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn grpc_store_take_clears() { + let store = GrpcRetryPushbackStore::new(); + + store.record(Duration::from_secs(10)); + + // First take should succeed + let hint1 = store.take(Duration::from_secs(1)); + assert_eq!(hint1, Some(Duration::from_secs(10))); + + // Second take should return None (consumed) + let hint2 = store.take(Duration::from_secs(1)); + assert_eq!(hint2, None); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn combined_http_and_grpc_hints_max_wins() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let max_age = Duration::from_secs(60); + + // Record HTTP hint of 10 seconds + http_store.record(Duration::from_secs(10)); + // Record gRPC hint of 30 seconds (larger - should win) + grpc_store.record(Duration::from_secs(30)); + + let http_hint = http_store.take(max_age); + let grpc_hint = grpc_store.take(max_age); + + // Verify both hints are retrieved + assert_eq!(http_hint, Some(Duration::from_secs(10))); + assert_eq!(grpc_hint, Some(Duration::from_secs(30))); + + // Verify max-value-wins logic (as implemented in UnifiedBreaker::take_combined_hint) + let effective = combine_hints(http_hint, grpc_hint); + + assert_eq!(effective, Some(Duration::from_secs(30))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn combined_hints_http_wins_when_larger() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let max_age = Duration::from_secs(60); + + // Record HTTP hint of 60 seconds (larger) + http_store.record(Duration::from_secs(60)); + // Record gRPC hint of 15 seconds + grpc_store.record(Duration::from_secs(15)); + + let http_hint = http_store.take(max_age); + let grpc_hint = grpc_store.take(max_age); + + let effective = combine_hints(http_hint, grpc_hint); + + assert_eq!(effective, Some(Duration::from_secs(60))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn combined_hints_only_http() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let max_age = Duration::from_secs(60); + + // Record only HTTP hint + http_store.record(Duration::from_secs(20)); + + let http_hint = http_store.take(max_age); + let grpc_hint = grpc_store.take(max_age); + + let effective = combine_hints(http_hint, grpc_hint); + + assert_eq!(effective, Some(Duration::from_secs(20))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn combined_hints_only_grpc() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let max_age = Duration::from_secs(60); + + // Record only gRPC hint + grpc_store.record(Duration::from_secs(25)); + + let http_hint = http_store.take(max_age); + let grpc_hint = grpc_store.take(max_age); + + let effective = combine_hints(http_hint, grpc_hint); + + assert_eq!(effective, Some(Duration::from_secs(25))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn stale_hint_discarded() { + let store = DurationHintStore::new("test"); + store.record(Duration::from_secs(5)); + + // Advance time past the max_age + tokio::time::advance(Duration::from_secs(60)).await; + + // Stale hint should be discarded, not returned + let result = store.take(Duration::from_secs(30)); + assert!(result.is_none(), "Stale hint should be discarded"); + + // Store should be empty now (stale hint was cleared) + let result2 = store.take(Duration::from_secs(30)); + assert!( + result2.is_none(), + "Store should be empty after stale discard" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn take_adjusts_for_elapsed_time() { + let store = DurationHintStore::new("test"); + + // Record a 10-second hint + store.record(Duration::from_secs(10)); + + // Advance 3 seconds. The hint is partly elapsed. + tokio::time::advance(Duration::from_secs(3)).await; + + // take() should return 10s - 3s = 7s (elapsed < duration) + let result = store.take(Duration::from_secs(60)); + assert_eq!(result, Some(Duration::from_secs(7))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn exactly_elapsed_hint_returns_none() { + let store = DurationHintStore::new("test"); + + // Record a 5-second hint + store.record(Duration::from_secs(5)); + + // Advance exactly 5 seconds, so elapsed equals duration. + tokio::time::advance(Duration::from_secs(5)).await; + + // take() should return None (elapsed >= duration) + let result = store.take(Duration::from_secs(60)); + assert_eq!(result, None); + + // Store should be empty (hint consumed, not just ignored) + assert_eq!(store.take(Duration::from_secs(60)), None); + + // Prove the slot was cleared: a new (smaller) hint is immediately visible. + // If take() failed to clear the slot, record(3s) would be rejected by + // max-value-wins (3 < 5), and this assertion would fail. + store.record(Duration::from_secs(3)); + assert_eq!( + store.take(Duration::from_secs(60)), + Some(Duration::from_secs(3)) + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn overshot_hint_returns_none() { + let store = DurationHintStore::new("test"); + + // Record a 2-second hint + store.record(Duration::from_secs(2)); + + // Advance 3 seconds, so elapsed passes duration but stays within max_age. + tokio::time::advance(Duration::from_secs(3)).await; + + // take() should return None (elapsed > duration) + let result = store.take(Duration::from_secs(60)); + assert_eq!(result, None); + + // Store should be empty (hint consumed, not just ignored) + assert_eq!(store.take(Duration::from_secs(60)), None); + + // Prove the slot was cleared: a new (smaller) hint is immediately visible. + // If take() failed to clear the slot, record(1s) would be rejected by + // max-value-wins (1 < 2), and this assertion would fail. + store.record(Duration::from_secs(1)); + assert_eq!( + store.take(Duration::from_secs(60)), + Some(Duration::from_secs(1)) + ); + } + + #[test] + fn combined_hints_neither_present() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let max_age = Duration::from_secs(60); + + let http_hint = http_store.take(max_age); + let grpc_hint = grpc_store.take(max_age); + + let effective = combine_hints(http_hint, grpc_hint); + + assert_eq!(effective, None); + } + + // Verify that grpc-status is not parsed on non-200 responses. + // + // An HTTP 429 with a spurious `grpc-status: 8` header should record + // the Retry-After hint in the HTTP store but leave the gRPC store empty. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn grpc_status_skipped_on_non_200() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = classify::Response::Http(StatusRanges::default()); + let wrapped = RetryAfterClassify::new(inner, http_store.clone(), grpc_store.clone()); + + // HTTP 429 with Retry-After AND a spurious grpc-status: 8 header. + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::TOO_MANY_REQUESTS; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("30"), + ); + rsp.headers_mut() + .insert("grpc-status", http::HeaderValue::from_static("8")); + rsp.headers_mut().insert( + "grpc-retry-pushback-ms", + http::HeaderValue::from_static("5000"), + ); + + let _eos = wrapped.start(&rsp); + + // HTTP store should have the Retry-After hint + let http_hint = http_store.take(Duration::from_secs(10)); + assert_eq!(http_hint, Some(Duration::from_secs(30))); + + // gRPC store should be empty + let grpc_hint = grpc_store.take(Duration::from_secs(10)); + assert_eq!( + grpc_hint, None, + "grpc-status should not be parsed on HTTP 429" + ); + } + + // Verify that 200 OK gRPC pushback goes only into the gRPC store. + // + // A 200 OK with `grpc-status: 8` (RESOURCE_EXHAUSTED) and + // `grpc-retry-pushback-ms: 5000` should record grpc_store = Some(5s) and + // leave http_store empty. There is no HTTP Retry-After header, so the + // "Retry-After" store must not pick up the gRPC pushback. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn grpc_pushback_recorded_only_in_grpc_store() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = classify::Response::Http(StatusRanges::default()); + let wrapped = RetryAfterClassify::new(inner, http_store.clone(), grpc_store.clone()); + + // HTTP 200 OK with gRPC RESOURCE_EXHAUSTED + pushback header. + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::OK; + rsp.headers_mut().insert( + http::header::CONTENT_TYPE, + http::HeaderValue::from_static("application/grpc"), + ); + rsp.headers_mut() + .insert("grpc-status", http::HeaderValue::from_static("8")); + rsp.headers_mut().insert( + "grpc-retry-pushback-ms", + http::HeaderValue::from_static("5000"), + ); + + let _eos = wrapped.start(&rsp); + + // gRPC store should have the pushback hint (the check let it through). + let grpc_hint = grpc_store.take(Duration::from_secs(10)); + assert_eq!( + grpc_hint, + Some(Duration::from_millis(5000)), + "200 OK with grpc-status: 8 should record gRPC pushback" + ); + + // HTTP store stays empty. No Retry-After header was present, and the + // gRPC pushback belongs only to the gRPC store. + let http_hint = http_store.take(Duration::from_secs(10)); + assert_eq!( + http_hint, None, + "gRPC pushback must not leak into the Retry-After store" + ); + } +} diff --git a/linkerd/app/outbound/src/http/breaker/unified.rs b/linkerd/app/outbound/src/http/breaker/unified.rs new file mode 100644 index 0000000000..cdc978c713 --- /dev/null +++ b/linkerd/app/outbound/src/http/breaker/unified.rs @@ -0,0 +1,1884 @@ +//! Unified circuit breaker combining consecutive failures and success rate policies. +//! +//! This module implements a single circuit breaker that implements two policies: +//! - Consecutive failures: Trips after N consecutive 5xx responses +//! - Success rate (EWMA): Trips when success rate drops below threshold (treats 429 as failure) +//! +//! ## Trip Conditions +//! +//! Either condition can trip the circuit: +//! +//! * Consecutive failures >= `max_failures` (0 = disabled, no cold-start protection) +//! * EWMA success rate < `threshold` AND request_count >= `min_requests` (cold-start protected) +//! +//! Consecutive failures are checked before success rate, so a response that crosses +//! both thresholds at once is charged to [`TripReason::ConsecutiveFailures`]. The +//! reason is for observability only. The circuit trips on either condition either way. +//! +//! ## Cold-Start Protection +//! +//! The EWMA success rate policy has three layers of cold-start protection: +//! +//! 1. Optimistic initialization: EWMA starts at 1.0 (100% success). This prevents +//! immediate tripping on first failures. +//! +//! 2. Minimum request threshold: The circuit cannot trip on low success rate until +//! `min_requests` have been observed. This ensures statistical significance. +//! +//! 3. Idle restart: After a long idle period (3x the decay window), the request +//! counter resets, re-requiring `min_requests` new observations before +//! the success rate policy can trip. This prevents a single post-idle response +//! from dominating the EWMA and tripping the circuit. +//! +//! Cold-start protection does NOT apply to consecutive failures policy. If an endpoint +//! returns `max_failures` consecutive 5xx errors, it will trip immediately. This is +//! intentional because repeated failures are a strong signal regardless of sample size. +//! +//! ## Recovery Conditions +//! +//! During probation, the probe success check is mode-aware: +//! +//! - In dual mode (success rate active), a probe must be non-5xx AND non-429 for +//! HTTP, or Ok for gRPC, to succeed. A 429 during probation is treated as failure +//! because it indicates the endpoint is still rate limiting. +//! +//! - In consecutive-only mode (success rate disabled), the probe delegates to +//! `class.is_success()` to preserve the behavior of the consecutive-failures +//! breaker it replaces. This ensures 429 responses are evaluated by the default +//! classifier rather than being unconditionally treated as failure. +//! +//! ## Retry-After Integration +//! +//! When a 429 response includes a `Retry-After` header, the duration is stored in +//! a [`RetryAfterStore`] and used as a floor for the first backoff after trip. +//! +//! Similarly, when a gRPC RESOURCE_EXHAUSTED response includes a `grpc-retry-pushback-ms` +//! header or trailer, the duration is stored in a [`GrpcRetryPushbackStore`] and used +//! as a floor for the first backoff. If both HTTP and gRPC hints exist, the maximum +//! is used. +//! +//! ## gRPC RESOURCE_EXHAUSTED Handling +//! +//! gRPC's RESOURCE_EXHAUSTED (code 8) is treated equivalently to HTTP 429: +//! - Counts as failure for success rate EWMA +//! - Won't count for consecutive failures (not a hard failure) +//! - During probation only gRPC Ok (code 0) is considered success + +use super::{ + retry_after::{GrpcRetryPushbackStore, RetryAfterStore}, + TripReason, +}; +use futures::stream::StreamExt; +use linkerd_app_core::proxy::http::classify::gate; +use linkerd_app_core::{classify, exp_backoff, exp_backoff::ExponentialBackoff}; +use linkerd_ewma::{Ewma, MIN_DECAY}; +use tokio::sync::mpsc; +use tokio::time::{Duration, Instant}; +use tonic::Code; + +/// Idle multiplier for cold-start restart. When the gap since the last response +/// exceeds this many decay windows, the request counter resets so that +/// `min_requests` new samples are required before the success rate policy can +/// trip. At 3x, alpha ~= 0.95: a single sample after the idle gap would account +/// for ~95% of the EWMA, so one error after a quiet period could trip the +/// circuit on its own. Re-arming cold-start protection prevents that. +/// +// TODO: expose this as configuration once the mechanism has seen real traffic. +const IDLE_COLD_START_FACTOR: u32 = 3; + +/// Unified circuit breaker combining consecutive failures and success rate policies. +/// +/// This policy tracks both consecutive failures (for 5xx responses) and EWMA success +/// rate (treating both 5xx and 429 as failures). Either condition can trip the circuit. +/// Cold-start protection only applies to success rate, not consecutive failures. +pub struct UnifiedBreaker { + // === Configuration === + /// Maximum consecutive failures (5xx only) before trip. + max_failures: usize, + /// Success rate threshold (0.0-1.0) - trip when EWMA drops below this. + threshold: f64, + /// EWMA decay window for success rate tracking. + decay: Duration, + /// Exponential backoff configuration for recovery. + backoff: ExponentialBackoff, + /// Minimum requests before circuit can trip (cold-start protection). + min_requests: usize, + + // === Gate Control === + /// Gate channel to control endpoint readiness (direct - no coordination wrapper). + gate: gate::Tx, + /// Channel receiving response classifications. + rsps: mpsc::Receiver, + + // === Retry-After Integration === + /// Shared store for HTTP Retry-After hints from 429 responses. + retry_after_store: RetryAfterStore, + /// Shared store for gRPC retry pushback hints from RESOURCE_EXHAUSTED responses. + grpc_retry_pushback_store: GrpcRetryPushbackStore, + /// Maximum Retry-After duration the proxy will honor (clamping cap). + max_duration: Duration, + + /// Lets tests see the reason charged to each trip without affecting behavior. + #[cfg(test)] + trip_observer: Option>, +} + +/// Internal state tracked during the open phase. +struct OpenState { + /// Current consecutive failure count (5xx only). + consecutive_failures: usize, + /// EWMA tracker for success rate (5xx and 429 count as failure). + ewma: Ewma, + /// Requests since last cold-start reset (for cold-start protection). + /// Resets on recovery and after idle periods exceeding the cold-start + /// threshold. + request_count: usize, + /// Timestamp of the most recent response, used to detect idle gaps. + /// Tracked apart from the EWMA. The average's timestamp freezes while the + /// circuit is tripped and probing, so reading idle from it would miss quiet + /// periods that span a trip-recover cycle. + last_seen: Instant, +} + +impl OpenState { + /// Create new open state with optimistic EWMA initialization. + /// + /// The EWMA starts at 1.0 (100% success rate) rather than 0.0 to implement + /// "optimistic cold-start": new or recovered endpoints are assumed healthy + /// until proven otherwise. Combined with `min_requests` cold-start protection, + /// this prevents: + /// - Immediate tripping on first failure (EWMA would drop below threshold) + /// - False positives during low-traffic periods where single failures have + /// outsized impact on the EWMA + /// + /// The EWMA will naturally decay toward actual observed success rate as + /// requests are processed. After `min_requests` observations, the EWMA + /// represents a statistically meaningful estimate. + fn new(decay: Duration, now: Instant) -> Self { + if decay < MIN_DECAY { + tracing::warn!( + ?decay, + min = ?MIN_DECAY, + "success-rate decay below minimum, will be clamped by EWMA constructor" + ); + } + Self { + consecutive_failures: 0, + ewma: Ewma::new_with_value(decay, now, 1.0), + request_count: 0, + last_seen: now, + } + } + + /// Return the state to its initial values after recovery. + fn reset(&mut self, decay: Duration, now: Instant) { + *self = Self::new(decay, now); + } +} + +/// Configuration for [`UnifiedBreaker::new`]. +#[derive(Debug)] +pub(crate) struct UnifiedBreakerConfig { + pub(crate) max_failures: usize, + pub(crate) threshold: f64, + pub(crate) decay: Duration, + pub(crate) backoff: ExponentialBackoff, + pub(crate) min_requests: usize, + pub(crate) gate: gate::Tx, + pub(crate) rsps: mpsc::Receiver, + pub(crate) retry_after_store: RetryAfterStore, + pub(crate) grpc_retry_pushback_store: GrpcRetryPushbackStore, + pub(crate) max_duration: Duration, +} + +impl UnifiedBreaker { + /// Create a new unified circuit breaker. + pub fn new(config: UnifiedBreakerConfig) -> Self { + let UnifiedBreakerConfig { + max_failures, + threshold, + decay, + backoff, + min_requests, + gate, + rsps, + retry_after_store, + grpc_retry_pushback_store, + max_duration, + } = config; + Self { + max_failures, + threshold, + decay, + backoff, + min_requests, + gate, + rsps, + retry_after_store, + grpc_retry_pushback_store, + max_duration, + #[cfg(test)] + trip_observer: None, + } + } + + /// Attach a channel that receives the reason for each trip. + /// + /// Tests use this to check which condition the breaker charged a trip to. + #[cfg(test)] + fn with_trip_observer(mut self, tx: mpsc::UnboundedSender) -> Self { + self.trip_observer = Some(tx); + self + } + + /// Run the circuit breaker state machine. + /// + /// The state machine cycles through: + /// - Open: accept requests, track failures until trip condition met + /// - Closed: reject requests, wait for backoff + /// - Probation: allow single probe request + /// - If probe succeeds -> Open, if probe fails -> Closed + pub async fn run(mut self) { + let mut state = OpenState::new(self.decay, Instant::now()); + + loop { + // Open state: track failures until trip condition + let trip_reason = match self.open(&mut state).await { + Ok(reason) => reason, + Err(()) => return, // Gate lost + }; + + tracing::info!(?trip_reason, "Unified circuit breaker tripped"); + + #[cfg(test)] + if let Some(tx) = self.trip_observer.as_ref() { + let _ = tx.send(trip_reason); + } + + // Closed state: wait for backoff, then probe + match self.closed(trip_reason).await { + Ok(()) => { + // Probe succeeded, reset state for reopening + state.reset(self.decay, Instant::now()); + tracing::info!("Unified circuit breaker reopened"); + } + Err(()) => return, // Gate lost + } + } + } + + /// Open state: track both policies until either trips. + /// + /// Returns the reason for trip, or `Err(())` if gate is lost. + async fn open(&mut self, state: &mut OpenState) -> Result { + tracing::debug!("Open"); + + self.gate.open().map_err(|_| ())?; + + // Saturate rather than multiply directly. A very large decay set by an + // operator could push `decay * IDLE_COLD_START_FACTOR` past + // `Duration::MAX` and panic. Computing it once also keeps the multiply + // out of the loop. + let idle_reset = self.decay.saturating_mul(IDLE_COLD_START_FACTOR); + + loop { + let class = tokio::select! { + rsp = self.rsps.recv() => rsp.ok_or(())?, + _ = self.gate.lost() => return Err(()), + }; + + let now = Instant::now(); + + // Reset cold-start protection after idle periods where the + // EWMA has decayed enough that one sample would dominate. Idle is + // measured against the last response rather than the EWMA, whose + // timestamp stops across a trip-recover cycle. + if now.saturating_duration_since(state.last_seen) > idle_reset { + tracing::debug!( + previous_count = state.request_count, + "Re-engaging cold-start protection after idle" + ); + state.request_count = 0; + } + + state.request_count = state.request_count.saturating_add(1); + state.last_seen = now; + + let is_configured_failure = !class.is_success(); + let degrades_success_rate = match &class { + classify::Class::Http(Ok(status)) => *status == http::StatusCode::TOO_MANY_REQUESTS, + classify::Class::Http(Err(_)) => true, + classify::Class::Grpc(Ok(code)) => *code == Code::ResourceExhausted, + classify::Class::Grpc(Err(_)) => true, + classify::Class::Error(_) => true, + }; + + // Update consecutive configured failures (skip when policy disabled) + if self.max_failures > 0 { + if is_configured_failure { + state.consecutive_failures = state.consecutive_failures.saturating_add(1); + } else { + state.consecutive_failures = 0; + } + } + + // Update EWMA (configured failures, typically 5xx, and 429 are failures). + // `Ewma::add` drops samples whose timestamp matches the stored one, so + // responses that share a single instant combine into the first. The request + // counter still increments per response and can lead the EWMA by one. + let value = if degrades_success_rate { 0.0 } else { 1.0 }; + state.ewma.add(value, now); + let rate = state.ewma.get(); + + tracing::trace!( + ?class, + consecutive_failures = state.consecutive_failures, + success_rate = %rate, + request_count = state.request_count, + min_requests = self.min_requests, + "Response" + ); + + // Either condition can trip. Consecutive failures has no cold-start, success + // rate does. The reason reflects whichever condition `should_trip` checks first. + if let Some(trip_reason) = self.should_trip(state) { + return Ok(trip_reason); + } + } + } + + /// Check if the circuit should trip based on current state. + /// + /// Returns the trip reason if a trip condition is met, `None` otherwise. + /// + /// Note that cold-start protection only applies to success rate, not consecutive failures. + /// + /// Consecutive failures come first. When a single response crosses both the + /// consecutive-failure and success-rate thresholds, the reported reason is + /// [`TripReason::ConsecutiveFailures`] because that condition is checked first. The + /// difference is for observability only, since either condition trips the circuit. + fn should_trip(&self, state: &OpenState) -> Option { + // max_failures=0 disables the consecutive failure policy entirely. + if self.max_failures == 0 { + // Skip consecutive failures check. Only success rate can trip. + } else if state.consecutive_failures >= self.max_failures { + // Condition 1: Consecutive failures threshold exceeded (no cold-start protection) + // This can trip immediately after max_failures consecutive 5xx errors. + return Some(TripReason::ConsecutiveFailures); + } + + // Condition 2: Success rate dropped below threshold (with cold-start protection) + // Requires min_requests for accurate EWMA estimation before tripping. + if state.request_count >= self.min_requests && state.ewma.get() < self.threshold { + return Some(TripReason::LowSuccessRate); + } + + None + } + + /// Closed state: backoff with optional Retry-After extension. + /// + /// Waits for backoff to expire, then enters probation. If the probe + /// succeeds, returns `Ok(())`. If it fails, resumes the backoff loop. + async fn closed(&mut self, trip_reason: TripReason) -> Result<(), ()> { + let mut backoff = self.backoff.stream(); + let mut first_iteration = true; + + loop { + // Shut the gate + tracing::debug!(backoff = ?backoff.duration(), ?trip_reason, "Shut"); + self.gate.shut().map_err(|_| ())?; + + // Wait for backoff, optionally extended by Retry-After/pushback hint on first iteration + let retry_after_hint = if first_iteration { + first_iteration = false; + self.take_combined_hint() + } else { + None + }; + + self.wait_for_backoff(&mut backoff, retry_after_hint) + .await?; + + // Enter probation and wait for probe response + let class = self.probation().await?; + tracing::trace!(?class, "Probe response"); + + // Check probe result (mode-aware: consecutive-only preserves old semantics) + if self.is_probe_success(&class) { + return Ok(()); + } + // Probe failed, continue backoff loop + } + } + + /// Take combined HTTP Retry-After and gRPC pushback hints, returning the maximum. + /// + /// A hint is honored as long as it is no older than the configured maximum + /// hint duration. The store already subtracts elapsed time, so the remaining + /// wait reflects how much of the hint is still in the future. If both HTTP + /// and gRPC hints exist, the maximum duration is returned. + fn take_combined_hint(&self) -> Option { + let http_hint = self.retry_after_store.take(self.max_duration); + let grpc_hint = self.grpc_retry_pushback_store.take(self.max_duration); + + // Cap each hint individually so per-source tracing shows which one was clamped. + let http_hint = http_hint.map(|h| { + if h > self.max_duration { + tracing::warn!( + hint = ?h, + max = ?self.max_duration, + "HTTP Retry-After hint exceeds maximum, clamping" + ); + self.max_duration + } else { + h + } + }); + let grpc_hint = grpc_hint.map(|g| { + if g > self.max_duration { + tracing::warn!( + hint = ?g, + max = ?self.max_duration, + "gRPC pushback hint exceeds maximum, clamping" + ); + self.max_duration + } else { + g + } + }); + + match (http_hint, grpc_hint) { + (Some(h), Some(g)) => { + let combined = h.max(g); + tracing::debug!( + http_hint = ?h, + grpc_hint = ?g, + combined = ?combined, + "Combined HTTP and gRPC hints (max-value-wins)" + ); + Some(combined) + } + (Some(h), None) => { + tracing::debug!(hint = ?h, "Using HTTP Retry-After hint"); + Some(h) + } + (None, Some(g)) => { + tracing::debug!(hint = ?g, "Using gRPC pushback hint"); + Some(g) + } + (None, None) => None, + } + } + + /// Check if a probe response indicates successful recovery. + /// + /// This check is mode-aware to preserve backwards compatibility: + /// + /// In consecutive-only mode (`min_requests == usize::MAX`), the probe + /// delegates to `class.is_success()`. This matches the behavior of the + /// previous consecutive-failures breaker, where 429 responses were + /// evaluated by the default classifier rather than being unconditionally + /// treated as failure. + /// + /// In dual mode (success rate active), the check is stricter: HTTP probes + /// must be non-5xx AND non-429, and gRPC probes must be Ok. This prevents + /// reopening the circuit when the endpoint is still rate limiting. + fn is_probe_success(&self, class: &classify::Class) -> bool { + if self.min_requests == usize::MAX { + // Consecutive-only mode: preserve old is_success() semantics. + // The existing consecutive-failures breaker used class.is_success() + // which lets the default classifier decide whether 429 is success + // or failure. We must not change this behavior. + class.is_success() + } else { + // Dual mode (success_rate active): stricter check. + // 429 and RESOURCE_EXHAUSTED count as probe failure because they + // indicate the endpoint is rate limiting, which the success-rate + // EWMA should track. + match class { + classify::Class::Http(Ok(status)) => { + !status.is_server_error() && *status != http::StatusCode::TOO_MANY_REQUESTS + } + classify::Class::Http(Err(_)) => false, // 5xx failure + classify::Class::Grpc(Ok(code)) => *code == Code::Ok, + classify::Class::Grpc(Err(_)) => false, + _ => class.is_success(), + } + } + } + + /// Wait for the backoff duration to expire, optionally extended by Retry-After hint. + /// + /// If `retry_after_hint` is provided, waits for _both_ the hint duration _and_ the backoff + /// to complete, ensuring we wait at least `max(hint, backoff_duration)`. + async fn wait_for_backoff( + &mut self, + backoff: &mut exp_backoff::ExponentialBackoffStream, + retry_after_hint: Option, + ) -> Result<(), ()> { + if let Some(hint) = retry_after_hint { + debug_assert!( + hint <= self.max_duration, + "hint {hint:?} exceeds max_duration {:?} -- take_combined_hint should have clamped", + self.max_duration + ); + tracing::info!(?hint, "Using Retry-After hint as backoff floor"); + + let hint_sleep = tokio::time::sleep(hint); + tokio::pin!(hint_sleep); + + let mut backoff_done = false; + let mut hint_done = false; + + // Wait for both to complete + loop { + tokio::select! { + _ = backoff.next(), if !backoff_done => { + backoff_done = true; + if hint_done { break; } + } + _ = &mut hint_sleep, if !hint_done => { + hint_done = true; + if backoff_done { break; } + } + // Drain responses while the breaker is shut, but stop when + // the channel closes. recv() on a closed channel returns + // None at once, so looping on it would spin this task. + rsp = self.rsps.recv() => { rsp.ok_or(())?; continue } + _ = self.gate.lost() => return Err(()), + } + } + } else { + // No hint, use normal backoff + loop { + tokio::select! { + _ = backoff.next() => break, + // Drain responses while the breaker is shut, but stop when + // the channel closes. recv() on a closed channel returns + // None at once, so looping on it would spin this task. + rsp = self.rsps.recv() => { rsp.ok_or(())?; continue } + _ = self.gate.lost() => return Err(()), + } + } + } + + Ok(()) + } + + /// Probation state: allow single probe request. + /// + /// Uses `gate.limit(1)` to allow exactly one request through. + async fn probation(&mut self) -> Result { + tracing::debug!("Probation"); + + let _sem = self.gate.limit(1).map_err(|_| ())?; + + tokio::select! { + rsp = self.rsps.recv() => rsp.ok_or(()), + _ = self.gate.lost() => Err(()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use linkerd_proxy_client_policy::DEFAULT_RETRY_AFTER_MAX_DURATION; + use tokio::time; + use tokio_test::{assert_pending, task}; + + fn make_backoff() -> ExponentialBackoff { + ExponentialBackoff::try_new( + time::Duration::from_secs(1), + time::Duration::from_secs(100), + 0.0, // No jitter for deterministic tests + ) + .expect("backoff params are valid") + } + + fn send_class(params: &gate::Params, class: classify::Class) { + params.responses.try_send(class).unwrap() + } + + fn send_ok(params: &gate::Params, status: http::StatusCode) { + send_class(params, classify::Class::Http(Ok(status))); + } + + fn send_err(params: &gate::Params, status: http::StatusCode) { + send_class(params, classify::Class::Http(Err(status))); + } + + fn default_test_config( + gate_tx: gate::Tx, + rsps: mpsc::Receiver, + ) -> UnifiedBreakerConfig { + UnifiedBreakerConfig { + max_failures: 3, + threshold: 0.8, + decay: time::Duration::from_secs(10), + backoff: make_backoff(), + min_requests: 1, + gate: gate_tx, + rsps, + retry_after_store: RetryAfterStore::new(), + grpc_retry_pushback_store: GrpcRetryPushbackStore::new(), + max_duration: DEFAULT_RETRY_AFTER_MAX_DURATION, + } + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn starts_open() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn trips_on_consecutive_failures() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + + // Send 3 consecutive 5xx (should trip) + for i in 1..=3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + if i < 3 { + assert!( + params.gate.is_open(), + "Should still be open after {} failures", + i + ); + } + } + + assert!(params.gate.is_shut(), "Should trip after 3 consecutive 5xx"); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn success_resets_consecutive_count() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Send 2 failures + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + + // Success resets count + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + // 2 more failures - still need 3 consecutive + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_open(), + "Should still be open - only 2 consecutive after reset" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn trips_on_low_success_rate() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // High max_failures so CF doesn't trip + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Send 1 success + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // Send 429s to drop success rate below 80% + // With EWMA, after ~3-4 429s the rate drops below 0.8 + for _ in 0..4 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!(!params.gate.is_open(), "Should trip on low success rate"); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn success_rate_counts_5xx() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // High max_failures so CF doesn't trip first + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // 5xx should count as failure for success rate + for _ in 0..5 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!( + params.gate.is_shut(), + "5xx should count as failure for success rate" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn cold_start_protects_success_rate() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // High max_failures + min_requests: 5, // min_requests = 5 + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Send 4 429s - should NOT trip (only 4 requests, need 5) + for _ in 0..4 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!( + params.gate.is_open(), + "Cold-start should protect success rate" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn probation_success_reopens() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip the breaker + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // Wait for backoff + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // Enter probation + match params.gate.state() { + gate::State::Limited(sem) => { + assert_eq!(sem.available_permits(), 1); + params.gate.opened_for_test().await.unwrap().forget(); + } + _ => panic!("Expected Limited state"), + } + + // Probe succeeds + send_ok(¶ms, http::StatusCode::OK); + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn probation_failure_on_429() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip the breaker + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // Wait for backoff and enter probation + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + match params.gate.state() { + gate::State::Limited(_) => { + params.gate.opened_for_test().await.unwrap().forget(); + } + _ => panic!("Expected Limited state"), + } + + // Probe "fails" with 429 - strict AND recovery (dual mode, min_requests=1) + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "429 during probation should fail (strict AND)" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn retry_after_hint_extends_backoff() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let retry_after_store = RetryAfterStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: retry_after_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record Retry-After hint of 5s (longer than 1s backoff) + retry_after_store.record(time::Duration::from_secs(5)); + + // Trip the breaker + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // After 1s (normal backoff would complete), still shut due to hint + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!(params.gate.is_shut()); + + // After 3s more (4s total), still shut + time::sleep(time::Duration::from_secs(3)).await; + assert_pending!(task.poll()); + assert!(params.gate.is_shut()); + + // After 1s more (5s total), enter probation + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!(params.gate.is_limited()); + } + + // Verify that consecutive-only mode (min_requests == usize::MAX) uses + // class.is_success() for probe checks, preserving the old breaker behavior + // where 429 is evaluated by the default classifier. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn consecutive_only_429_probe_preserves_old_semantics() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + // Consecutive-only mode: min_requests = usize::MAX disables EWMA tripping + // and triggers the class.is_success() path in is_probe_success(). + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + min_requests: usize::MAX, + threshold: 0.0, // threshold=0.0 also prevents SR trip + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip the breaker via consecutive failures + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // Wait for backoff and enter probation + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + match params.gate.state() { + gate::State::Limited(_) => { + params.gate.opened_for_test().await.unwrap().forget(); + } + _ => panic!("Expected Limited state"), + } + + // In consecutive-only mode, 429 probe uses class.is_success(). + // The default HTTP classifier treats 429 as success (only 500-599 are + // failures by default). So the probe should succeed and reopen the gate. + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "In consecutive-only mode, 429 during probation should succeed \ + (delegates to class.is_success() which treats 429 as success)" + ); + } + + fn send_grpc(params: &gate::Params, code: tonic::Code) { + let class = if matches!( + code, + tonic::Code::Unknown + | tonic::Code::DeadlineExceeded + | tonic::Code::PermissionDenied + | tonic::Code::Internal + | tonic::Code::Unavailable + | tonic::Code::DataLoss + ) { + classify::Class::Grpc(Err(code)) + } else { + classify::Class::Grpc(Ok(code)) + }; + send_class(params, class); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn consecutive_failures_ignores_grpc_resource_exhausted() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + threshold: 0.1, // very low threshold so SR doesn't trip + min_requests: 100, // high min_requests so SR doesn't trip + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // RESOURCE_EXHAUSTED should not count as consecutive failures + for _ in 0..5 { + send_grpc(¶ms, tonic::Code::ResourceExhausted); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + + assert!( + params.gate.is_open(), + "gRPC RESOURCE_EXHAUSTED should not trigger consecutive failures trip" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn trips_on_low_success_rate_grpc_resource_exhausted() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // high max_failures so CF doesn't trip + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Send one success + send_grpc(¶ms, tonic::Code::Ok); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // Send RESOURCE_EXHAUSTED to drop success rate below 80% + for _ in 0..4 { + send_grpc(¶ms, tonic::Code::ResourceExhausted); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!( + !params.gate.is_open(), + "Should trip on low success rate from gRPC RESOURCE_EXHAUSTED" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn trips_on_low_success_rate_grpc_server_error() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + send_grpc(¶ms, tonic::Code::Ok); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + for _ in 0..4 { + send_grpc(¶ms, tonic::Code::Internal); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!( + !params.gate.is_open(), + "Should trip on low success rate from gRPC Internal errors" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn probation_success_on_grpc_ok() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip the breaker + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // Wait for backoff + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // Enter probation + match params.gate.state() { + gate::State::Limited(sem) => { + assert_eq!(sem.available_permits(), 1); + params.gate.opened_for_test().await.unwrap().forget(); + } + _ => panic!("Expected Limited state"), + } + + // Probe succeeds with gRPC Ok + send_grpc(¶ms, tonic::Code::Ok); + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "gRPC Ok during probation should succeed" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn consecutive_failures_ignores_429() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + threshold: 0.1, // Very low threshold so SR doesn't trip + min_requests: 100, // High min_requests so SR doesn't trip + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // 429s should not count as consecutive failures for the CF policy + for _ in 0..5 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + + assert!( + params.gate.is_open(), + "429s should not trigger consecutive failures trip" + ); + } + + // Verify threshold=0.0 disables success-rate tripping + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn threshold_zero_disables_success_rate() { + let _trace = linkerd_tracing::test::trace_init(); + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // High max_failures so CF won't trip + threshold: 0.0, // threshold=0.0 (SR disabled) + min_requests: 1, // Cold-start passes after 1 request + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + + // 1 success to pass cold-start (min_requests=1) + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // Many 429s to drop success rate toward 0%. + for _ in 0..10 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!( + params.gate.is_open(), + "threshold=0.0 should disable success-rate tripping" + ); + } + + // Verify Duration::MAX hint is clamped to DEFAULT_RETRY_AFTER_MAX_DURATION + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn duration_max_hint_is_clamped() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let retry_after_store = RetryAfterStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: retry_after_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record a Duration::MAX hint BEFORE tripping the breaker. + retry_after_store.record(Duration::MAX); + + // Trip the breaker with 3 consecutive 5xx errors. + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "Should trip after 3 consecutive 5xx"); + + // After DEFAULT_RETRY_AFTER_MAX_DURATION (300s), the breaker should enter probation. + time::sleep(DEFAULT_RETRY_AFTER_MAX_DURATION).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_limited(), + "Duration::MAX hint should be clamped to DEFAULT_RETRY_AFTER_MAX_DURATION; \ + gate should enter probation after 300s, not sleep forever" + ); + } + + // Verify that `closed()` creates a new backoff stream each time the breaker + // trips, so backoff escalation is not kept across trip-recover-trip cycles. + // After recovery the next trip should start at the base backoff (1s), not an + // escalated value (2s). + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn backoff_resets_to_base_on_retrip() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 2, + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + + // Trip #1 + for _ in 0..2 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "Should trip after 2 consecutive 5xx"); + + // Wait for first backoff (base = 1s) and enter probation + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + match params.gate.state() { + gate::State::Limited(sem) => { + assert_eq!(sem.available_permits(), 1); + params.gate.opened_for_test().await.unwrap().forget(); + } + _ => panic!("Expected Limited state after first backoff"), + } + + // Probe succeeds, breaker reopens + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Should reopen after successful probe" + ); + + // Trip #2 + for _ in 0..2 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!( + params.gate.is_shut(), + "Should trip again after 2 consecutive 5xx" + ); + + // Advance 1s + small buffer. If the backoff had escalated to 2s (iteration 1), + // the gate would still be shut here. With a new stream the base is 1s. + time::sleep(time::Duration::from_millis(1050)).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_limited(), + "Second trip should use base backoff (1s), not escalated (2s); \ + closed() creates a new backoff stream each time" + ); + } + + // When both consecutive failures (max_failures=0) and success rate + // (threshold=0.0, min_requests=usize::MAX) are disabled, no combination of + // errors can trip the circuit. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn both_policies_disabled_never_trips() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 0, + threshold: 0.0, + min_requests: usize::MAX, + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Advance 1ms past t=0 so EWMA accepts samples + time::advance(time::Duration::from_millis(1)).await; + + for _ in 0..10 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Breaker must stay open when both policies are disabled (tripped via 5xx)" + ); + } + + for _ in 0..10 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Breaker must stay open when both policies are disabled (tripped via 429)" + ); + } + } + + // After a long idle period (many multiples of the decay window), cold-start + // protection re-engages so that one post-idle sample cannot trip the circuit. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn idle_reengages_cold_start_with_success() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, + threshold: 0.5, + min_requests: 3, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + time::advance(time::Duration::from_millis(1)).await; + + for _ in 0..3 { + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "One failure among successes should not trip (EWMA still above 0.5)" + ); + + // Long idle: 200s = 20x the decay window, well above the 3x + // cold-start re-engagement threshold. + time::advance(time::Duration::from_secs(200)).await; + + // Cold-start re-engaged: request_count reset to 0. A post-idle + // success brings it to 1, well below min_requests=3. + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Post-idle success: cold-start active (1 < min_requests) and EWMA near 1.0" + ); + + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Post-idle failure: cold-start still active (2 < min_requests)" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn idle_reengages_cold_start_with_failure() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, + threshold: 0.5, + min_requests: 3, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + time::advance(time::Duration::from_millis(1)).await; + + for _ in 0..3 { + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "One failure among successes should not trip (EWMA still above 0.5)" + ); + + // Long idle: 200s = 20x the decay window + time::advance(time::Duration::from_secs(200)).await; + + // Cold-start re-engaged: request_count reset to 0. Failures after + // idle cannot trip until min_requests new observations accumulate. + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Post-idle failure 1/3: cold-start active (1 < min_requests)" + ); + + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Post-idle failure 2/3: cold-start active (2 < min_requests)" + ); + + // Third failure reaches min_requests=3 with EWMA near 0.0 + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "Post-idle failure 3/3: cold-start satisfied, EWMA near 0.0 < threshold 0.5" + ); + } + + // A closed response channel must end the breaker task even while it is + // parked in the backoff wait. recv() on a closed channel yields None right + // away, so the drain step has to treat None as teardown rather than looping + // on it. The gate receiver is kept alive here, so gate.lost() never fires + // and the task ends only through the closed-channel check. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn wait_for_backoff_terminates_when_channel_closes() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + // max_failures=1 trips the breaker on the first 5xx, dropping it into + // the closed state and the backoff wait. + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 1, + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + + send_err(¶ms, http::StatusCode::INTERNAL_SERVER_ERROR); + assert_pending!(task.poll()); + assert!(params.gate.is_shut(), "single 5xx should trip the breaker"); + + // Close the channel while the breaker waits out the backoff. The task + // must finish rather than spin on None. + drop(params.responses); + assert!( + task.poll().is_ready(), + "breaker must terminate when the response channel closes during backoff" + ); + } + + // Same teardown as above, but a second gate receiver outlives the endpoint + // service so gate.lost() can never resolve. Without the closed-channel + // check the backoff wait spins forever on None. With it the task exits. A + // real task is needed so the Gate plus BroadcastClassification stack can + // drive a classification through the channel. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn wait_for_backoff_terminates_with_leaked_gate() { + use linkerd_app_core::{ + proxy::http::{classify::BroadcastClassification, BoxBody}, + svc::{self, ServiceExt}, + Error, + }; + use tokio::task::yield_now; + + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + // Model a stack that retains gate state after the endpoint service is + // dropped by holding a second gate receiver. + let leaked_gate = params.gate.clone(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 1, + ..default_test_config(gate_tx, rsps) + }); + let breaker = tokio::spawn(breaker.run()); + + let svc = svc::Gate::new( + params.gate, + BroadcastClassification::::new( + params.responses, + svc::mk(|_: http::Request| async move { + Ok::<_, Error>( + http::Response::builder() + .status(http::StatusCode::INTERNAL_SERVER_ERROR) + .body(BoxBody::default()) + .unwrap(), + ) + }), + ), + ); + + let req = http::Request::builder() + .extension(classify::Response::default()) + .body(BoxBody::default()) + .unwrap(); + let rsp = svc.oneshot(req).await.expect("service must succeed"); + // Dropping the response emits a final classification, then drops the + // last sender so the channel closes. + drop(rsp); + + for _ in 0..16 { + if leaked_gate.is_shut() { + break; + } + yield_now().await; + } + assert!( + leaked_gate.is_shut(), + "breaker should enter the closed state" + ); + + for _ in 0..16 { + if breaker.is_finished() { + break; + } + yield_now().await; + } + assert!( + breaker.is_finished(), + "breaker must terminate when the response channel closes, \ + even while a second gate receiver survives" + ); + breaker.await.expect("breaker task must not panic"); + } + + // Probation limits the gate to a single in-flight probe. The breaker calls + // `gate.limit(1)`, so the semaphore offers exactly one permit. Acquiring it + // models the one admitted probe. A second concurrent request finds no permit + // left and would block until the gate state changes. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn probation_admits_single_probe() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip via 3 consecutive 5xx. + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // Wait out the base backoff so the breaker enters probation. + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // The gate exposes a one-permit semaphore. Draining that permit leaves + // none for a would-be second probe, which is the single-probe bound. + match params.gate.state() { + gate::State::Limited(sem) => { + assert_eq!(sem.available_permits(), 1, "probation admits one probe"); + let permit = sem.try_acquire().expect("the single probe acquires"); + assert_eq!( + sem.available_permits(), + 0, + "a second concurrent probe finds no permit", + ); + assert!( + sem.try_acquire().is_err(), + "the semaphore admits only one probe at a time", + ); + drop(permit); + } + other => panic!("expected Limited state, got {other:?}"), + } + } + + // After a success-rate trip, an optimistic reopen plus steady healthy + // traffic raises the EWMA above the threshold. A single failure right + // after recovery loses its weight as successes add up across the decay + // window, so the circuit stays open rather than tripping again on stale memory. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn success_rate_recovers_after_decay() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + // Consecutive failures disabled so only success rate drives the trip + // and the later recovery. decay=10s keeps the arithmetic readable. + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 0, + threshold: 0.5, + min_requests: 1, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Seed one success so cold-start clears and the EWMA sits at 1.0. + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // 429s lower the rate below 0.5 and trip the circuit. With 1s spacing + // the EWMA crosses 0.5 around the seventh sample. Stop at the trip so + // the backoff timer is not advanced while the rate is still dropping. + for _ in 0..8 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + if params.gate.is_shut() { + break; + } + } + assert!(params.gate.is_shut(), "low success rate should trip"); + + // Wait out the base backoff and let the probe succeed. + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + match params.gate.state() { + gate::State::Limited(_) => { + params.gate.opened_for_test().await.unwrap().forget(); + } + other => panic!("expected Limited state, got {other:?}"), + } + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "successful probe reopens the circuit" + ); + + // One failure after recovery dips the newly reset EWMA but stays above + // 0.5 (one zero against an optimistic 1.0). + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "a single failure after recovery should not re-trip", + ); + + // Steady successes across two decay windows pull the rate back toward + // 1.0 while the old failure decays out. The circuit holds open throughout. + for _ in 0..20 { + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "recovered success rate should keep the circuit open", + ); + } + } + + // With both policies enabled, a stream of 429s lowers the success rate + // without ever incrementing the consecutive counter (429 is not a hard + // failure). The circuit trips on low success rate while consecutive failures + // stay at zero, and the charged reason reflects that. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn dual_mode_success_rate_trips_before_consecutive() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let (trip_tx, mut trip_rx) = mpsc::unbounded_channel(); + + // Both mechanisms active: a five-failure consecutive budget and an 80% + // success-rate floor with a three-request cold-start. + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 5, + threshold: 0.8, + min_requests: 3, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }) + .with_trip_observer(trip_tx); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Seed one success so the EWMA starts at 1.0 and the cold-start counter + // begins climbing. + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // 429s push the rate under 0.8. Consecutive failures never move because + // 429 is treated as success by the consecutive policy. With 1s spacing + // the EWMA reads ~0.74 after the third 429, below 0.8, and min_requests + // is met, so the trip happens there. Break at that response to check while + // the gate is shut. A further 429 would advance time past the base + // backoff, reopening the gate for a probe and hiding the shut state. + let mut shut = false; + for _ in 0..4 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + if params.gate.is_shut() { + shut = true; + break; + } + } + + assert!( + shut, + "success rate should trip while consecutive failures stay at zero", + ); + // The reason proves the consecutive policy did not trip. `should_trip` + // checks consecutive failures first, so a `LowSuccessRate` verdict means + // the consecutive counter never reached max_failures. + assert_eq!( + trip_rx.try_recv(), + Ok(TripReason::LowSuccessRate), + "the trip is attributed to low success rate, not consecutive failures", + ); + } + + // A consecutive-failure trip in a single-policy configuration reports + // `ConsecutiveFailures`. Success rate is disabled (threshold 0.0), so the + // only path to a trip is the consecutive counter reaching max_failures. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn consecutive_trip_reports_consecutive_reason() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let (trip_tx, mut trip_rx) = mpsc::unbounded_channel(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 3, + threshold: 0.0, // success rate disabled + min_requests: usize::MAX, + ..default_test_config(gate_tx, rsps) + }) + .with_trip_observer(trip_tx); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + + assert!(params.gate.is_shut(), "three 5xx trip consecutive failures"); + assert_eq!( + trip_rx.try_recv(), + Ok(TripReason::ConsecutiveFailures), + "a consecutive-failure trip reports ConsecutiveFailures", + ); + } + + // A success-rate trip in a single-policy configuration reports + // `LowSuccessRate`. Consecutive failures are disabled (max_failures 0), so + // only EWMA degradation can trip. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn success_rate_trip_reports_low_success_rate_reason() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let (trip_tx, mut trip_rx) = mpsc::unbounded_channel(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 0, // consecutive failures disabled + threshold: 0.5, + min_requests: 1, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }) + .with_trip_observer(trip_tx); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + for _ in 0..8 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + if params.gate.is_shut() { + break; + } + } + + assert!(params.gate.is_shut(), "low success rate trips the circuit"); + assert_eq!( + trip_rx.try_recv(), + Ok(TripReason::LowSuccessRate), + "a success-rate trip reports LowSuccessRate", + ); + } + + // When one response crosses both thresholds at once, consecutive failures + // win the charge because `should_trip` checks them first. The config is tuned + // so the third 5xx reaches max_failures, satisfies min_requests, and drops + // the EWMA below the threshold on the same response. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn simultaneous_threshold_cross_reports_consecutive_reason() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let (trip_tx, mut trip_rx) = mpsc::unbounded_channel(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 3, + threshold: 0.8, + min_requests: 3, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }) + .with_trip_observer(trip_tx); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // 1s spacing decays the EWMA fast. After three 5xx it reads ~0.74, below + // 0.8, exactly as the consecutive counter reaches 3 and min_requests is + // met. Both conditions hold on the third response. + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!(params.gate.is_shut(), "the third 5xx trips the circuit"); + assert_eq!( + trip_rx.try_recv(), + Ok(TripReason::ConsecutiveFailures), + "a simultaneous cross is attributed to consecutive failures", + ); + } +} diff --git a/linkerd/app/outbound/src/http/breaker/wrap_classify.rs b/linkerd/app/outbound/src/http/breaker/wrap_classify.rs new file mode 100644 index 0000000000..478a1bac88 --- /dev/null +++ b/linkerd/app/outbound/src/http/breaker/wrap_classify.rs @@ -0,0 +1,432 @@ +//! Per-endpoint gate wiring for the circuit breaker. +//! +//! Each balancer endpoint sits behind a [`svc::Gate`] whose readiness a +//! [`UnifiedBreaker`](super::unified::UnifiedBreaker) controls. The gate is paired with a +//! response classifier that broadcasts each classification over the breaker's channel. +//! +//! When failure accrual is set up for the target, the classifier is wrapped with +//! [`RetryAfterClassify`] so that `Retry-After` and `grpc-retry-pushback-ms` hints are recorded +//! into stores the breaker reads while backing off. The stores are built here, per endpoint, so a +//! hint seen on one endpoint extends only that endpoint's backoff. +//! +//! When accrual is absent, the endpoint uses the stock +//! [`BroadcastClassification`] with no stores and no hint parsing. This matches an +//! endpoint that has no circuit breaking. + +use super::retry_after::{GrpcRetryPushbackStore, RetryAfterClassify, RetryAfterStore}; +use super::Params; +use linkerd_app_core::{ + classify, + proxy::http::{self, classify::gate}, + svc::{self, ExtractParam}, + Error, +}; +use linkerd_http_classify::BroadcastClassification; +use linkerd_proxy_client_policy::{FailureAccrual, RetryAfterConfig}; +use std::{ + task::{Context, Poll}, + time::Duration, +}; + +/// The classifier the breaker reads when accrual is set up. +type WrappedClassify = RetryAfterClassify; + +/// Parameters for creating the Retry-After aware gate set. +#[derive(Clone, Debug)] +pub struct RetryAfterGateParams { + pub channel_capacity: usize, + /// Maximum Retry-After duration to honor. Capped to this value. + pub max_duration: Duration, +} + +/// Reads the per-endpoint failure accrual and Retry-After configuration, then builds the +/// [`NewRetryAfterGate`] that creates each endpoint's gated service. +#[derive(Clone, Debug)] +pub struct NewRetryAfterGateSet { + inner: N, + params: RetryAfterGateParams, +} + +/// Builds each endpoint's [`svc::Gate`], branching on whether failure accrual is set up. +#[derive(Clone, Debug)] +pub struct NewRetryAfterGate { + inner: N, + accrual: Option, + channel_capacity: usize, + max_duration: Duration, +} + +/// Trait for targets that provide failure accrual configuration. +pub trait HasFailureAccrual { + fn failure_accrual(&self) -> Option; +} + +/// Whether an accrual policy can never trip, which makes it the same as having no circuit breaking. +/// +/// Consecutive tracking does nothing when `max_failures` is zero. Success-rate tracking does +/// nothing when it is absent or its threshold is at or below zero, since the EWMA rate stays at or +/// above any such threshold. A policy where both hold could only ever spawn an unused breaker, so +/// it is treated as having no breaker. +fn is_effectively_disabled(accrual: &FailureAccrual) -> bool { + accrual.consecutive.max_failures == 0 + && accrual.success_rate.is_none_or(|sr| sr.threshold <= 0.0) +} + +/// Wraps the request's `classify::Response` with [`RetryAfterClassify`] so the inner broadcaster +/// records hints into the breaker's stores. +/// +/// The wrapped classifier is inserted into the request extensions under its own type. The inner +/// [`BroadcastClassification`] reads it from there. The original `classify::Response` extension +/// stays in place for other consumers such as metrics. +#[derive(Clone, Debug)] +pub struct InsertRetryAfterClassify { + inner: S, + http_store: RetryAfterStore, + grpc_store: GrpcRetryPushbackStore, + max_duration: Duration, +} + +// === impl RetryAfterGateParams === + +impl RetryAfterGateParams { + pub fn new(channel_capacity: usize, max_duration: Duration) -> Self { + Self { + channel_capacity, + max_duration, + } + } +} + +// === impl NewRetryAfterGateSet === + +impl NewRetryAfterGateSet { + /// Create a layer that puts a breaker-controlled gate in front of each endpoint. + pub fn layer( + params: RetryAfterGateParams, + ) -> impl svc::layer::Layer + Clone { + svc::layer::mk(move |inner| Self { + inner, + params: params.clone(), + }) + } +} + +impl svc::NewService for NewRetryAfterGateSet +where + T: HasFailureAccrual + svc::Param> + Clone, + N: svc::NewService + Clone, +{ + type Service = NewRetryAfterGate; + + fn new_service(&self, target: T) -> Self::Service { + // Per-target Retry-After cap wins over the stack default. + let max_duration = svc::Param::>::param(&target) + .map(|ra| ra.max_duration) + .unwrap_or(self.params.max_duration); + + // An accrual policy that can never trip costs the same as none at all. Treat it as having + // no breaker so no stores or breaker task get allocated for it. + let accrual = target + .failure_accrual() + .filter(|a| !is_effectively_disabled(a)); + let inner = self.inner.new_service(target); + + NewRetryAfterGate { + inner, + accrual, + channel_capacity: self.params.channel_capacity, + max_duration, + } + } +} + +// === impl NewRetryAfterGate === + +impl svc::NewService for NewRetryAfterGate +where + N: svc::NewService, + S: svc::Service< + http::Request, + Response = http::Response, + Error = Error, + > + Send + + 'static, + S::Future: Send + 'static, +{ + type Service = svc::Gate>; + + fn new_service(&self, target: T) -> Self::Service { + match self.accrual { + None => { + // No breaker for this endpoint. Build the stock classifier with a gate that never + // shuts and no hint stores. + let (params, _gate_tx, _rsps) = + gate::Params::::channel(self.channel_capacity); + tracing::trace!("No failure accrual policy enabled."); + let inner = self.inner.new_service(target); + let broadcast = + BroadcastClassification::::new(params.responses, inner); + svc::Gate::new( + params.gate, + svc::BoxHttp::new(http::BoxResponse::new(broadcast)), + ) + } + Some(ref accrual) => { + // Build the stores this endpoint's breaker reads, spawn the breaker, then wrap the + // classifier so 429 and RESOURCE_EXHAUSTED hints go into those stores. + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let breaker_params = Params { + accrual: Some(accrual.clone()), + channel_capacity: self.channel_capacity, + retry_after_store: http_store.clone(), + grpc_retry_pushback_store: grpc_store.clone(), + max_duration: self.max_duration, + }; + let params: gate::Params = + , T>>::extract_param( + &breaker_params, + &target, + ); + let inner = self.inner.new_service(target); + let broadcast = + BroadcastClassification::::new(params.responses, inner); + let wrapped = InsertRetryAfterClassify { + inner: broadcast, + http_store, + grpc_store, + max_duration: self.max_duration, + }; + svc::Gate::new( + params.gate, + svc::BoxHttp::new(http::BoxResponse::new(wrapped)), + ) + } + } + } +} + +// === impl InsertRetryAfterClassify === + +impl svc::Service> for InsertRetryAfterClassify +where + S: svc::Service, Response = http::Response, Error = Error>, +{ + type Response = S::Response; + type Error = Error; + type Future = S::Future; + + #[inline] + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_ready(cx) + } + + fn call(&mut self, mut req: http::Request) -> Self::Future { + if let Some(classify) = req.extensions().get::().cloned() { + let wrapped = RetryAfterClassify::new_with_max( + classify, + self.http_store.clone(), + self.grpc_store.clone(), + self.max_duration, + ); + req.extensions_mut().insert(wrapped); + } + self.inner.call(req) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use linkerd_app_core::{exp_backoff::ExponentialBackoff, svc::NewService}; + use linkerd_http_classify::ClassifyResponse; + use linkerd_proxy_client_policy::http::StatusRanges; + use linkerd_proxy_client_policy::{ConsecutiveFailures, SuccessRateConfig}; + use std::time::Duration; + + // `super::*` re-exports `http` (`linkerd_app_core::proxy::http`). Its `Response`, `StatusCode`, + // `HeaderValue`, and `header` are used to build the test responses below. + + // Helper to create a default HTTP classifier for testing + fn test_classifier() -> classify::Response { + classify::Response::Http(StatusRanges::default()) + } + + #[derive(Clone)] + struct MockTarget(Option); + + impl HasFailureAccrual for MockTarget { + fn failure_accrual(&self) -> Option { + self.0.clone() + } + } + + impl svc::Param> for MockTarget { + fn param(&self) -> Option { + None + } + } + + fn mk_backoff() -> ExponentialBackoff { + ExponentialBackoff::try_new(Duration::from_secs(1), Duration::from_secs(100), 0.0) + .expect("backoff params are valid") + } + + // Read a target through the gate-set layer and report the accrual the per-endpoint gate + // would see. `None` means the endpoint has no breaker and no added cost. + fn resolved_accrual(accrual: Option) -> Option { + let set = NewRetryAfterGateSet { + inner: |_: MockTarget| (), + params: RetryAfterGateParams::new(1, Duration::from_secs(300)), + }; + set.new_service(MockTarget(accrual)).accrual + } + + // An accrual policy that can never trip is treated the same as a truly absent policy, so it + // spawns no breaker and allocates no hint stores. This pins the cost of a config that is + // present but can never trip to the cost of having no circuit breaking at all. + #[test] + fn effectively_disabled_accrual_produces_no_breaker_task() { + let disabled = FailureAccrual { + consecutive: ConsecutiveFailures { + max_failures: 0, + backoff: mk_backoff(), + }, + success_rate: Some(SuccessRateConfig { + threshold: 0.0, + decay: Duration::from_secs(10), + min_requests: 1, + }), + }; + + assert!( + is_effectively_disabled(&disabled), + "max_failures 0 with a zero success-rate threshold can never trip", + ); + assert_eq!( + resolved_accrual(Some(disabled)), + None, + "an inert accrual policy must resolve to the disabled path, same as None", + ); + + // A genuinely live policy keeps its enabled resolution. + let live = FailureAccrual { + consecutive: ConsecutiveFailures { + max_failures: 3, + backoff: mk_backoff(), + }, + success_rate: None, + }; + assert!(!is_effectively_disabled(&live)); + assert!(resolved_accrual(Some(live)).is_some()); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn wrapped_classifier_records_retry_after() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = test_classifier(); + let wrapped = RetryAfterClassify::new(inner, http_store.clone(), grpc_store); + + // Create a 429 response with Retry-After header + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::TOO_MANY_REQUESTS; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("30"), + ); + + // start() should parse and record the hint + let _eos = wrapped.start(&rsp); + + let hint = http_store.take(Duration::from_secs(10)); + assert_eq!(hint, Some(Duration::from_secs(30))); + } + + #[test] + fn wrapped_classifier_ignores_non_429() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = test_classifier(); + let wrapped = RetryAfterClassify::new(inner, http_store.clone(), grpc_store); + + // Create a 200 OK response with Retry-After header (shouldn't be recorded) + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::OK; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("30"), + ); + + // start() should NOT record the hint + let _eos = wrapped.start(&rsp); + + // Verify the store is empty + let hint = http_store.take(Duration::from_secs(10)); + assert_eq!(hint, None); + } + + // Check that `RetryAfterConfig.max_duration` caps hints through `new_with_max()`. + // + // A 429 response with a 600s Retry-After header must be capped to 120s in the store + // when the classifier was built with `max_duration = 120s`. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn config_max_duration_caps_hint_at_recording() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = test_classifier(); + let custom_cap = Duration::from_secs(120); + + let wrapped = + RetryAfterClassify::new_with_max(inner, http_store.clone(), grpc_store, custom_cap); + + // Create a 429 response with Retry-After: 600 (exceeds the 120s cap). + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::TOO_MANY_REQUESTS; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("600"), + ); + + // start() parses the header and records via rate_limit_hint(max_duration). + let _eos = wrapped.start(&rsp); + + // The store must contain 120s instead of 600s + let hint = http_store.take(Duration::from_secs(10)); + assert_eq!( + hint, + Some(custom_cap), + "600s Retry-After should be capped to 120s by max_duration at recording site" + ); + } + + // Check that a hint within the cap is recorded at its actual value (no truncation). + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn config_max_duration_passes_through_uncapped_hint() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = test_classifier(); + let custom_cap = Duration::from_secs(120); + + let wrapped = + RetryAfterClassify::new_with_max(inner, http_store.clone(), grpc_store, custom_cap); + + // Create a 429 response with Retry-After: 60 (below the 120s cap). + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::TOO_MANY_REQUESTS; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("60"), + ); + + let _eos = wrapped.start(&rsp); + + // The store must contain the actual 60s value, not the cap. + let hint = http_store.take(Duration::from_secs(10)); + assert_eq!( + hint, + Some(Duration::from_secs(60)), + "60s Retry-After should pass through unchanged when below 120s cap" + ); + } +} diff --git a/linkerd/app/outbound/src/http/concrete.rs b/linkerd/app/outbound/src/http/concrete.rs index 5cd77563f4..fef90d752e 100644 --- a/linkerd/app/outbound/src/http/concrete.rs +++ b/linkerd/app/outbound/src/http/concrete.rs @@ -20,7 +20,7 @@ use linkerd_app_core::{ transport::{self, addrs::*}, Error, Infallible, NameAddr, Result, }; -use linkerd_proxy_client_policy::FailureAccrual; +use linkerd_proxy_client_policy::{FailureAccrual, RetryAfterConfig}; use std::{fmt::Debug, net::SocketAddr, sync::Arc}; use tracing::info_span; @@ -76,7 +76,8 @@ impl Outbound { T: svc::Param, T: svc::Param, T: svc::Param, - T: svc::Param, + T: svc::Param>, + T: svc::Param>, T: Clone + Debug + Send + Sync + 'static, // Endpoint resolution. R: Resolve, diff --git a/linkerd/app/outbound/src/http/concrete/balance.rs b/linkerd/app/outbound/src/http/concrete/balance.rs index 71631223ff..98eabd5696 100644 --- a/linkerd/app/outbound/src/http/concrete/balance.rs +++ b/linkerd/app/outbound/src/http/concrete/balance.rs @@ -5,7 +5,6 @@ use crate::{ stack_labels, BackendRef, ParentRef, }; use linkerd_app_core::{ - classify, config::{ConnectConfig, QueueConfig}, proxy::{ api_resolve::{ConcreteAddr, Metadata}, @@ -15,7 +14,9 @@ use linkerd_app_core::{ transport::addrs::*, Error, NameAddr, }; -use linkerd_proxy_client_policy::FailureAccrual; +use linkerd_proxy_client_policy::{ + FailureAccrual, RetryAfterConfig, DEFAULT_RETRY_AFTER_MAX_DURATION, +}; use std::{fmt::Debug, net::SocketAddr}; use tracing::info_span; @@ -71,12 +72,25 @@ impl> svc::Param for Balance { } } +impl>> breaker::HasFailureAccrual for Balance { + fn failure_accrual(&self) -> Option { + self.parent.param() + } +} + +impl>> svc::Param> for Balance { + fn param(&self) -> Option { + self.parent.param() + } +} + impl Balance where // Parent target. T: svc::Param, T: svc::Param, - T: svc::Param, + T: svc::Param>, + T: svc::Param>, T: Clone + Debug + Send + Sync + 'static, { pub(super) fn layer( @@ -137,15 +151,12 @@ where }) .push_on_service(svc::MapErr::layer_boxed()) .lift_new_with_target() - .push( - http::NewClassifyGateSet::::layer_via({ - let channel_capacity = http_queue.capacity; - move |target: &Self| breaker::Params { - accrual: target.parent.param(), - channel_capacity, - } - }), - ) + .push(breaker::NewRetryAfterGateSet::layer( + breaker::RetryAfterGateParams::new( + http_queue.capacity, + DEFAULT_RETRY_AFTER_MAX_DURATION, + ), + )) .push_on_service(svc::OnServiceLayer::new( stack_metrics.layer(stack_labels("http", "endpoint")), )) diff --git a/linkerd/app/outbound/src/http/logical.rs b/linkerd/app/outbound/src/http/logical.rs index cd4093e28c..794f34d377 100644 --- a/linkerd/app/outbound/src/http/logical.rs +++ b/linkerd/app/outbound/src/http/logical.rs @@ -22,7 +22,7 @@ mod tests; pub struct LogicalAddr(pub Addr); /// Configures the flavor of HTTP routing. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub enum Routes { /// Policy routes. Policy(policy::Params), @@ -35,14 +35,44 @@ pub enum Routes { Endpoint(Remote, Arc), } -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug)] pub struct Concrete { target: concrete::Dispatch, authority: Option, parent: T, parent_ref: ParentRef, backend_ref: BackendRef, - failure_accrual: policy::FailureAccrual, + failure_accrual: Option, + retry_after: Option, +} + +impl PartialEq for Concrete { + fn eq(&self, other: &Self) -> bool { + self.target == other.target + && self.authority == other.authority + && self.parent == other.parent + && self.parent_ref == other.parent_ref + && self.backend_ref == other.backend_ref + // failure_accrual and retry_after are left out on purpose. + // They configure the breaker but do not determine backend + // identity, and the backend cache is rebuilt whenever a policy + // update changes them, so they need not take part in the key. + } +} + +impl Eq for Concrete {} + +impl Hash for Concrete { + fn hash(&self, state: &mut H) { + self.target.hash(state); + self.authority.hash(state); + self.parent.hash(state); + self.parent_ref.hash(state); + self.backend_ref.hash(state); + // failure_accrual and retry_after are left out on purpose, + // matching the equality impl. They do not determine identity, + // and SuccessRateConfig contains an f64 that does not impl Hash. + } } #[derive(Debug, thiserror::Error)] @@ -57,7 +87,7 @@ pub struct LogicalError { source: Error, } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] enum RouterParams { Policy(policy::Policy), @@ -154,7 +184,8 @@ where target: concrete::Dispatch::Forward(remote, meta), authority: None, parent, - failure_accrual: Default::default(), + failure_accrual: None, + retry_after: None, }) } Self::Profile(profile) => { @@ -251,9 +282,15 @@ impl svc::Param for Concrete { } } -impl svc::Param for Concrete { - fn param(&self) -> policy::FailureAccrual { - self.failure_accrual +impl svc::Param> for Concrete { + fn param(&self) -> Option { + self.failure_accrual.clone() + } +} + +impl svc::Param> for Concrete { + fn param(&self) -> Option { + self.retry_after } } @@ -267,3 +304,108 @@ impl From for http::HeaderPair { ) } } + +#[cfg(test)] +mod identity_tests { + use super::*; + use crate::http::concrete; + use linkerd_app_core::{exp_backoff::ExponentialBackoff, NameAddr}; + use linkerd_proxy_client_policy::Meta; + use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + time::Duration, + }; + + fn hash_of(value: &T) -> u64 { + let mut hasher = DefaultHasher::new(); + value.hash(&mut hasher); + hasher.finish() + } + + // Builds a `Concrete<()>` whose five identity fields are fixed, leaving + // the breaker configuration to the caller. The identity fields decide + // which backend the cache key selects. + fn base( + failure_accrual: Option, + retry_after: Option, + ) -> Concrete<()> { + let addr: NameAddr = "example.com:1234".parse().unwrap(); + let meta = Meta::new_default("test"); + Concrete { + target: concrete::Dispatch::Balance(addr.clone(), super::profile::DEFAULT_EWMA), + authority: Some(addr.as_http_authority()), + parent: (), + parent_ref: ParentRef(meta.clone()), + backend_ref: BackendRef(meta), + failure_accrual, + retry_after, + } + } + + fn some_accrual() -> policy::FailureAccrual { + policy::FailureAccrual { + consecutive: policy::ConsecutiveFailures { + max_failures: 3, + backoff: ExponentialBackoff::new_unchecked( + Duration::from_secs(1), + Duration::from_secs(60), + 0.0, + ), + }, + success_rate: None, + } + } + + // The concrete target keeps breaker configuration out of its cache key on + // purpose, so two concretes that differ only in `failure_accrual` must + // compare equal and hash equal. Otherwise a config-only change would rebuild + // the backend cache. + #[test] + fn failure_accrual_does_not_affect_identity() { + let without = base(None, None); + let with = base(Some(some_accrual()), None); + + assert_eq!(without, with); + assert_eq!(hash_of(&without), hash_of(&with)); + } + + // `retry_after` is also kept out of the cache key, so toggling it + // must not change a concrete's identity. + #[test] + fn retry_after_does_not_affect_identity() { + let without = base(None, None); + let with = base( + None, + Some(policy::RetryAfterConfig { + max_duration: Duration::from_secs(30), + }), + ); + + assert_eq!(without, with); + assert_eq!(hash_of(&without), hash_of(&with)); + } + + // Both breaker fields are kept out together. A concrete that holds full + // breaker configuration can be swapped with one that holds none. + #[test] + fn breaker_config_does_not_affect_identity() { + let without = base(None, None); + let with = base( + Some(some_accrual()), + Some(policy::RetryAfterConfig { + max_duration: Duration::from_secs(30), + }), + ); + + assert_eq!(without, with); + assert_eq!(hash_of(&without), hash_of(&with)); + + // Check that an identity field still tells concretes apart, so + // equality is not true for an empty reason. + let other_addr: NameAddr = "other.example.com:1234".parse().unwrap(); + let mut different = base(None, None); + different.target = concrete::Dispatch::Balance(other_addr, super::profile::DEFAULT_EWMA); + assert_ne!(without, different); + } +} diff --git a/linkerd/app/outbound/src/http/logical/policy.rs b/linkerd/app/outbound/src/http/logical/policy.rs index 029ab42f34..2d32a99017 100644 --- a/linkerd/app/outbound/src/http/logical/policy.rs +++ b/linkerd/app/outbound/src/http/logical/policy.rs @@ -11,17 +11,19 @@ pub use self::{ route::{errors, GrpcRouteMetrics, HttpRouteMetrics}, router::{GrpcParams, HttpParams}, }; -pub use linkerd_proxy_client_policy::{ClientPolicy, FailureAccrual}; +pub use linkerd_proxy_client_policy::{ + ClientPolicy, ConsecutiveFailures, FailureAccrual, RetryAfterConfig, +}; /// HTTP or gRPC policy route parameters. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub enum Params { Http(router::HttpParams), Grpc(router::GrpcParams), } /// A stack module configured by `Params` and some `T`-typed parent target. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub(super) enum Policy { Http(router::Http), Grpc(router::Grpc), diff --git a/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs b/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs index c1a30f2511..28f8be1d8c 100644 --- a/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs +++ b/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs @@ -449,7 +449,8 @@ fn mock_http_route_backend_metrics( Default::default(), ), authority: None, - failure_accrual: Default::default(), + failure_accrual: None, + retry_after: None, parent: (), parent_ref: parent_ref.clone(), backend_ref: backend_ref.clone(), @@ -505,7 +506,8 @@ fn mock_grpc_route_backend_metrics( Default::default(), ), authority: None, - failure_accrual: Default::default(), + failure_accrual: None, + retry_after: None, parent: (), parent_ref: parent_ref.clone(), backend_ref: backend_ref.clone(), diff --git a/linkerd/app/outbound/src/http/logical/policy/router.rs b/linkerd/app/outbound/src/http/logical/policy/router.rs index 1b5f2fa045..42c55d5467 100644 --- a/linkerd/app/outbound/src/http/logical/policy/router.rs +++ b/linkerd/app/outbound/src/http/logical/policy/router.rs @@ -12,13 +12,14 @@ use linkerd_http_route as http_route; use linkerd_proxy_client_policy as policy; use std::{fmt::Debug, hash::Hash, sync::Arc}; -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub struct Params { pub addr: Addr, pub meta: ParentRef, pub routes: Arc<[http_route::Route>]>, pub backends: Arc<[policy::Backend]>, - pub failure_accrual: policy::FailureAccrual, + pub failure_accrual: Option, + pub retry_after: Option, } pub type HttpParams = @@ -143,6 +144,7 @@ where routes, backends, failure_accrual, + retry_after, } = rts; let mk_concrete = { @@ -162,7 +164,8 @@ where parent: parent.clone(), backend_ref, parent_ref: parent_ref.clone(), - failure_accrual, + failure_accrual: failure_accrual.clone(), + retry_after, } } }; diff --git a/linkerd/app/outbound/src/http/logical/policy/tests.rs b/linkerd/app/outbound/src/http/logical/policy/tests.rs index ccecc61008..a1ae9c041a 100644 --- a/linkerd/app/outbound/src/http/logical/policy/tests.rs +++ b/linkerd/app/outbound/src/http/logical/policy/tests.rs @@ -120,7 +120,8 @@ async fn header_based_route() { backends: std::iter::once(default_backend.clone()) .chain(Some(special_backend.clone())) .collect(), - failure_accrual: Default::default(), + failure_accrual: None, + retry_after: None, }); let metrics = HttpRouteMetrics::default(); @@ -231,7 +232,8 @@ async fn http_filter_request_headers() { }], }]), backends: std::iter::once(backend).collect(), - failure_accrual: Default::default(), + failure_accrual: None, + retry_after: None, } }); diff --git a/linkerd/app/outbound/src/http/logical/profile.rs b/linkerd/app/outbound/src/http/logical/profile.rs index 447c47426d..e6022372f7 100644 --- a/linkerd/app/outbound/src/http/logical/profile.rs +++ b/linkerd/app/outbound/src/http/logical/profile.rs @@ -117,7 +117,8 @@ where target: concrete::Dispatch::Balance(addr.clone(), DEFAULT_EWMA), authority: Some(addr.as_http_authority()), parent: parent.clone(), - failure_accrual: Default::default(), + failure_accrual: None, + retry_after: None, }; let backends = std::iter::once(concrete.clone()).collect(); let distribution = Distribution::first_available(std::iter::once(concrete)); @@ -133,7 +134,8 @@ where target: concrete::Dispatch::Balance(t.addr.clone(), DEFAULT_EWMA), authority: Some(t.addr.as_http_authority()), parent: parent.clone(), - failure_accrual: Default::default(), + failure_accrual: None, + retry_after: None, }) .collect(); let distribution = Distribution::random_available(targets.iter().cloned().map( @@ -146,7 +148,8 @@ where authority: Some(addr.as_http_authority()), target: concrete::Dispatch::Balance(addr, DEFAULT_EWMA), parent: parent.clone(), - failure_accrual: Default::default(), + failure_accrual: None, + retry_after: None, }; (concrete, weight) }, diff --git a/linkerd/app/outbound/src/http/logical/tests.rs b/linkerd/app/outbound/src/http/logical/tests.rs index d1980a01b3..dfbf4e3a99 100644 --- a/linkerd/app/outbound/src/http/logical/tests.rs +++ b/linkerd/app/outbound/src/http/logical/tests.rs @@ -127,6 +127,18 @@ async fn mk_rsp(status: StatusCode, body: impl ToString) -> Result { .unwrap()) } +async fn mk_rsp_with_retry_after( + status: StatusCode, + retry_after_secs: u64, + body: impl ToString, +) -> Result { + Ok(http::Response::builder() + .status(status) + .header(http::header::RETRY_AFTER, retry_after_secs.to_string()) + .body(http::BoxBody::new(body.to_string())) + .unwrap()) +} + async fn mk_grpc_rsp(code: tonic::Code) -> Result { Ok(http::Response::builder() .version(::http::Version::HTTP_2) @@ -252,7 +264,8 @@ fn mock_http(params: client_policy::http::RouteParams) -> (svc::BoxCloneHttp, Ha meta: ParentRef(client_policy::Meta::new_default("parent")), backends: Arc::new([backend]), routes: Arc::new([route]), - failure_accrual: client_policy::FailureAccrual::None, + failure_accrual: None, + retry_after: None, })) } @@ -265,7 +278,8 @@ fn mock_grpc(params: client_policy::grpc::RouteParams) -> (svc::BoxCloneHttp, Ha meta: ParentRef(client_policy::Meta::new_default("parent")), backends: Arc::new([backend]), routes: Arc::new([route]), - failure_accrual: client_policy::FailureAccrual::None, + failure_accrual: None, + retry_after: None, })) } diff --git a/linkerd/app/outbound/src/http/logical/tests/basic.rs b/linkerd/app/outbound/src/http/logical/tests/basic.rs index be2986b7a4..b388ebe3dd 100644 --- a/linkerd/app/outbound/src/http/logical/tests/basic.rs +++ b/linkerd/app/outbound/src/http/logical/tests/basic.rs @@ -33,7 +33,8 @@ async fn routes() { meta: ParentRef(client_policy::Meta::new_default("parent")), backends: Arc::new([backend.clone()]), routes: Arc::new([default_route(backend)]), - failure_accrual: client_policy::FailureAccrual::None, + failure_accrual: None, + retry_after: None, }))); let target = Target { num: 1, diff --git a/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs b/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs index 5c8bf979c4..c87ec7f774 100644 --- a/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs +++ b/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs @@ -50,10 +50,14 @@ async fn consecutive_failures_accrue() { meta: ParentRef(client_policy::Meta::new_default("parent")), backends: Arc::new([backend.clone()]), routes: Arc::new([default_route(backend)]), - failure_accrual: client_policy::FailureAccrual::ConsecutiveFailures { - max_failures: 3, - backoff, - }, + failure_accrual: Some(client_policy::FailureAccrual { + consecutive: client_policy::ConsecutiveFailures { + max_failures: 3, + backoff, + }, + success_rate: None, + }), + retry_after: None, }))); let target = Target { num: 1, @@ -212,10 +216,14 @@ async fn balancer_doesnt_select_tripped_breakers() { meta: ParentRef(client_policy::Meta::new_default("parent")), backends: Arc::new([backend.clone()]), routes: Arc::new([default_route(backend)]), - failure_accrual: client_policy::FailureAccrual::ConsecutiveFailures { - max_failures: 3, - backoff, - }, + failure_accrual: Some(client_policy::FailureAccrual { + consecutive: client_policy::ConsecutiveFailures { + max_failures: 3, + backoff, + }, + success_rate: None, + }), + retry_after: None, }))); let target = Target { num: 1, @@ -262,3 +270,119 @@ async fn balancer_doesnt_select_tripped_breakers() { assert_rsp(rsp, StatusCode::OK, "endpoint 1").await; } } + +// A Retry-After hint returned by one endpoint must not extend the backoff of +// another endpoint behind the same balancer. Each endpoint runs an independent +// breaker, so the duration hint it honors has to come from its own responses. +// +// Endpoint A returns `Retry-After: 30` while healthy. Endpoint B then trips on +// its own 5xx failures. When B's breaker computes its backoff it must use the +// base delay, not A's 30s hint. The endpoints are driven one at a time so the +// balancer's endpoint selection cannot hide which breaker honors the hint. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn retry_after_hint_does_not_bleed_across_endpoints() { + let _trace = trace::test::with_default_filter(format!( + "{},linkerd_app_outbound=trace,linkerd_stack=trace", + trace::test::DEFAULT_LOG + )); + + let addr_a = SocketAddr::new([192, 0, 2, 41].into(), PORT); + let addr_b = SocketAddr::new([192, 0, 2, 42].into(), PORT); + let dest: NameAddr = format!("{AUTHORITY}:{PORT}") + .parse::() + .expect("dest addr is valid"); + let (svc_a, mut handle_a) = tower_test::mock::pair(); + let (svc_b, mut handle_b) = tower_test::mock::pair(); + let connect = HttpConnect::default() + .service(addr_a, svc_a) + .service(addr_b, svc_b); + let resolve = support::resolver(); + let mut dest_tx = resolve.endpoint_tx(dest.clone()); + let (rt, _shutdown) = runtime(); + let cfg = default_config(); + let stack = Outbound::new(cfg.clone(), rt, &mut Default::default()) + .with_stack(svc::ArcNewService::new(connect)) + .push_http_cached(resolve) + .into_inner(); + + let backend = default_backend(&dest); + // Keep the probe delay above the failfast timeout so the gate shuts and + // the queue reaches failfast before the breaker probes again. + let min_backoff = cfg.http_request_queue.failfast_timeout + Duration::from_secs(1); + let backoff = ExponentialBackoff::try_new(min_backoff, min_backoff * 6, 0.0).unwrap(); + let mut backoffs = backoff.stream(); + let (_route_tx, routes) = + watch::channel(Routes::Policy(policy::Params::Http(policy::HttpParams { + addr: dest.into(), + meta: ParentRef(client_policy::Meta::new_default("parent")), + backends: Arc::new([backend.clone()]), + routes: Arc::new([default_route(backend)]), + failure_accrual: Some(client_policy::FailureAccrual { + consecutive: client_policy::ConsecutiveFailures { + max_failures: 3, + backoff, + }, + success_rate: None, + }), + retry_after: None, + }))); + let target = Target { + num: 1, + version: http::Variant::H2, + routes, + }; + let svc = stack.new_service(target); + + // Resolve endpoint A alone and let it answer a 429 with a 30s Retry-After. + // A 429 is not a consecutive failure, so A stays open. The + // hint is recorded for whichever breaker reads A's responses. + dest_tx.add([(addr_a, Default::default())]).unwrap(); + task::yield_now().await; + info!("Recording a Retry-After hint from endpoint A"); + handle_a.allow(1); + let rsp = send_req(svc.clone(), http_get()); + serve( + &mut handle_a, + mk_rsp_with_retry_after(StatusCode::TOO_MANY_REQUESTS, 30, "endpoint a"), + ) + .await; + assert_rsp(rsp, StatusCode::TOO_MANY_REQUESTS, "endpoint a").await; + + // Drop A and resolve endpoint B in its place. The balancer now routes + // solely to B, so B's breaker is the one exercised below. + dest_tx.remove([addr_a]).unwrap(); + dest_tx.add([(addr_b, Default::default())]).unwrap(); + task::yield_now().await; + + info!("Tripping endpoint B on its own failures"); + for i in 1..=3 { + info!("Sending bad request {i}/3 to endpoint B"); + handle_b.allow(1); + let rsp = send_req(svc.clone(), http_get()); + serve( + &mut handle_b, + mk_rsp(StatusCode::INTERNAL_SERVER_ERROR, "endpoint b"), + ) + .await; + assert_rsp(rsp, StatusCode::INTERNAL_SERVER_ERROR, "endpoint b").await; + } + + // B is now shut. Advance exactly one base backoff interval. Without the + // hint, B reopens here and admits a probe. If A's 30s hint had passed to + // B's store, B would still be shut for far longer and the probe below + // would fail in failfast. + info!("Waiting out the base backoff"); + backoffs.next().await; + task::yield_now().await; + + info!("Probing endpoint B after the base backoff"); + handle_b.allow(1); + let rsp = send_req(svc.clone(), http_get()); + tokio::time::timeout( + Duration::from_secs(10), + serve(&mut handle_b, mk_rsp(StatusCode::OK, "endpoint b")), + ) + .await + .expect("probe must reach endpoint B once the base backoff elapses"); + assert_rsp(rsp, StatusCode::OK, "endpoint b").await; +} diff --git a/linkerd/app/outbound/src/ingress.rs b/linkerd/app/outbound/src/ingress.rs index 0014384341..c0895594f3 100644 --- a/linkerd/app/outbound/src/ingress.rs +++ b/linkerd/app/outbound/src/ingress.rs @@ -542,9 +542,17 @@ fn policy_routes( ref http2, .. } => { - let (routes, failure_accrual) = match version { - http::Variant::Http1 => (http1.routes.clone(), http1.failure_accrual), - http::Variant::H2 => (http2.routes.clone(), http2.failure_accrual), + let (routes, failure_accrual, retry_after) = match version { + http::Variant::Http1 => ( + http1.routes.clone(), + http1.failure_accrual.clone(), + http1.retry_after, + ), + http::Variant::H2 => ( + http2.routes.clone(), + http2.failure_accrual.clone(), + http2.retry_after, + ), }; Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { @@ -553,6 +561,7 @@ fn policy_routes( backends: policy.backends.clone(), routes, failure_accrual, + retry_after, }, ))) } @@ -561,38 +570,47 @@ fn policy_routes( // target? probably should make an error route instead? policy::Protocol::Http1(policy::http::Http1 { ref routes, - failure_accrual, + ref failure_accrual, + retry_after, + .. }) => Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { addr, meta, backends: policy.backends.clone(), routes: routes.clone(), - failure_accrual, + failure_accrual: failure_accrual.clone(), + retry_after, }, ))), policy::Protocol::Http2(policy::http::Http2 { ref routes, - failure_accrual, + ref failure_accrual, + retry_after, + .. }) => Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { addr, meta, backends: policy.backends.clone(), routes: routes.clone(), - failure_accrual, + failure_accrual: failure_accrual.clone(), + retry_after, }, ))), policy::Protocol::Grpc(policy::grpc::Grpc { ref routes, - failure_accrual, + ref failure_accrual, + retry_after, + .. }) => Some(http::Routes::Policy(http::policy::Params::Grpc( http::policy::GrpcParams { addr, meta, backends: policy.backends.clone(), routes: routes.clone(), - failure_accrual, + failure_accrual: failure_accrual.clone(), + retry_after, }, ))), _ => None, diff --git a/linkerd/app/outbound/src/sidecar.rs b/linkerd/app/outbound/src/sidecar.rs index d04d5c6543..29492de2b5 100644 --- a/linkerd/app/outbound/src/sidecar.rs +++ b/linkerd/app/outbound/src/sidecar.rs @@ -231,26 +231,40 @@ impl HttpSidecar { // protocol changes but remains HTTP-ish, we propagate those // changes. If the protocol flips to an opaque protocol, we ignore // the protocol update. - let (routes, failure_accrual) = match policy.protocol { + let (routes, failure_accrual, retry_after) = match policy.protocol { policy::Protocol::Detect { ref http1, ref http2, .. } => match version { - http::Variant::Http1 => (http1.routes.clone(), http1.failure_accrual), - http::Variant::H2 => (http2.routes.clone(), http2.failure_accrual), + http::Variant::Http1 => ( + http1.routes.clone(), + http1.failure_accrual.clone(), + http1.retry_after, + ), + http::Variant::H2 => ( + http2.routes.clone(), + http2.failure_accrual.clone(), + http2.retry_after, + ), }, policy::Protocol::Http1(policy::http::Http1 { ref routes, - failure_accrual, - }) => (routes.clone(), failure_accrual), + ref failure_accrual, + retry_after, + .. + }) => (routes.clone(), failure_accrual.clone(), retry_after), policy::Protocol::Http2(policy::http::Http2 { ref routes, - failure_accrual, - }) => (routes.clone(), failure_accrual), + ref failure_accrual, + retry_after, + .. + }) => (routes.clone(), failure_accrual.clone(), retry_after), policy::Protocol::Grpc(policy::grpc::Grpc { ref routes, - failure_accrual, + ref failure_accrual, + retry_after, + .. }) => { return Some(http::Routes::Policy(http::policy::Params::Grpc( http::policy::GrpcParams { @@ -258,7 +272,8 @@ impl HttpSidecar { meta: parent_ref, backends: policy.backends.clone(), routes: routes.clone(), - failure_accrual, + failure_accrual: failure_accrual.clone(), + retry_after, }, ))) } @@ -277,6 +292,7 @@ impl HttpSidecar { routes, backends: policy.backends.clone(), failure_accrual, + retry_after, }, ))) } diff --git a/linkerd/app/test/src/resolver/client_policy.rs b/linkerd/app/test/src/resolver/client_policy.rs index f57d3f9ced..d5487d529c 100644 --- a/linkerd/app/test/src/resolver/client_policy.rs +++ b/linkerd/app/test/src/resolver/client_policy.rs @@ -85,11 +85,15 @@ impl ClientPolicies { timeout: Duration::from_secs(10), http1: http::Http1 { routes: http_routes.clone(), - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, }, http2: http::Http2 { routes: http_routes, - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, }, opaque: opaq::Opaque { routes: Some(opaq::Route { diff --git a/linkerd/http/classify/src/channel.rs b/linkerd/http/classify/src/channel.rs index 48f83ff7f7..b3f24d3460 100644 --- a/linkerd/http/classify/src/channel.rs +++ b/linkerd/http/classify/src/channel.rs @@ -36,12 +36,23 @@ pub struct ResponseBody { state: Option>, } -#[derive(Debug)] +/// State for tracking a response classification. struct State { + /// The classifier for this response. classify: C, + /// Channel to send classification results. tx: mpsc::Sender, } +impl Debug for State { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("State") + .field("classify", &self.classify) + .field("tx", &self.tx) + .finish() + } +} + // === impl BroadcastClassification === impl BroadcastClassification { @@ -125,7 +136,9 @@ where Err(e) => { if let Some(State { classify, tx }) = this.state.take() { let class = classify.error(&e); - let _ = tx.try_send(class); + if tx.try_send(class).is_err() { + tracing::debug!("Classification dropped due to channel backpressure"); + } } Poll::Ready(Err(e)) } @@ -152,7 +165,9 @@ where None => { // Classify the stream if it has reached a `None`. if let Some(State { classify, tx }) = this.state.take() { - let _ = tx.try_send(classify.eos(None)); + if tx.try_send(classify.eos(None)).is_err() { + tracing::debug!("Classification dropped due to channel backpressure"); + } } Poll::Ready(None) } @@ -160,7 +175,9 @@ where // Classify the stream if this is a trailers frame. if let trls @ Some(_) = data.trailers_ref() { if let Some(State { classify, tx }) = this.state.take() { - let _ = tx.try_send(classify.eos(trls)); + if tx.try_send(classify.eos(trls)).is_err() { + tracing::debug!("Classification dropped due to channel backpressure"); + } } } Poll::Ready(Some(Ok(data))) @@ -168,7 +185,9 @@ where Some(Err(e)) => { // Classify the stream if an error has been encountered. if let Some(State { classify, tx }) = this.state.take() { - let _ = tx.try_send(classify.error(&e)); + if tx.try_send(classify.error(&e)).is_err() { + tracing::debug!("Classification dropped due to channel backpressure"); + } } Poll::Ready(Some(Err(e))) } @@ -192,7 +211,9 @@ impl PinnedDrop for ResponseBody { tracing::debug!("dropping ResponseBody"); if let Some(State { classify, tx }) = self.project().state.take() { tracing::debug!("sending EOS to classify"); - let _ = tx.try_send(classify.eos(None)); + if tx.try_send(classify.eos(None)).is_err() { + tracing::debug!("Classification dropped due to channel backpressure"); + } } } } diff --git a/linkerd/proxy/client-policy/src/grpc.rs b/linkerd/proxy/client-policy/src/grpc.rs index ef159749b9..e77517d6d1 100644 --- a/linkerd/proxy/client-policy/src/grpc.rs +++ b/linkerd/proxy/client-policy/src/grpc.rs @@ -1,4 +1,4 @@ -use crate::FailureAccrual; +use crate::{FailureAccrual, LoadBiasConfig, RetryAfterConfig}; use linkerd_exp_backoff::ExponentialBackoff; use linkerd_http_route::{grpc, http}; use std::{sync::Arc, time}; @@ -18,13 +18,15 @@ pub struct RouteParams { } // TODO HTTP2 settings -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq)] pub struct Grpc { pub routes: Arc<[Route]>, /// Configures how endpoints accrue observed failures. // TODO(ver) Move this to backends and scope to endpoints. - pub failure_accrual: FailureAccrual, + pub failure_accrual: Option, + pub load_bias: Option, + pub retry_after: Option, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] @@ -65,7 +67,9 @@ impl Default for Grpc { fn default() -> Self { Self { routes: Arc::new([]), - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, } } } @@ -152,6 +156,9 @@ pub mod proto { #[error("{0}")] Timeout(#[from] crate::http::proto::InvalidTimeouts), + + #[error("invalid duration for {0}: {1}")] + InvalidDuration(&'static str, #[source] prost_types::DurationError), } #[derive(Debug, thiserror::Error)] @@ -196,7 +203,20 @@ pub mod proto { .collect::, _>>()?; Ok(Self { routes, - failure_accrual: proto.failure_accrual.try_into()?, + failure_accrual: proto + .failure_accrual + .map(FailureAccrual::try_from) + .transpose()?, + load_bias: proto + .load_bias + .map(crate::proto::try_load_bias_config) + .transpose() + .map_err(|e| InvalidGrpcRoute::InvalidDuration("load_bias", e))?, + retry_after: proto + .retry_after + .map(crate::proto::try_retry_after_config) + .transpose() + .map_err(|e| InvalidGrpcRoute::InvalidDuration("retry_after", e))?, }) } @@ -400,3 +420,133 @@ pub mod proto { } } } + +#[cfg(all(test, feature = "proto"))] +mod tests { + use super::*; + use crate::{LoadBiasConfig, RetryAfterConfig}; + + #[test] + fn grpc_default_has_none_load_bias_and_retry_after() { + let g = Grpc::default(); + assert_eq!( + g.load_bias, None, + "Grpc::default() must produce None for load_bias" + ); + assert_eq!( + g.retry_after, None, + "Grpc::default() must produce None for retry_after" + ); + } + + #[test] + fn grpc_try_from_parses_load_bias_and_retry_after_from_proto() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Grpc { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: 4, + nanos: 0, + }), + penalty_decay: Some(prost_types::Duration { + seconds: 9, + nanos: 0, + }), + }), + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: 90, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Grpc::try_from(overrides, proto).unwrap(); + + assert_eq!( + result.load_bias, + Some(LoadBiasConfig { + enabled: true, + penalty: std::time::Duration::from_secs(4), + penalty_decay: std::time::Duration::from_secs(9), + }) + ); + assert_eq!( + result.retry_after, + Some(RetryAfterConfig { + max_duration: std::time::Duration::from_secs(90), + }) + ); + } + + #[test] + fn grpc_try_from_rejects_invalid_load_bias_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Grpc { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + penalty_decay: None, + }), + retry_after: None, + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Grpc::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid load_bias duration must produce an error" + ); + let msg = format!("{}", result.unwrap_err()); + assert!( + msg.contains("load_bias"), + "error should mention 'load_bias', got: {msg}" + ); + } + + #[test] + fn grpc_try_from_rejects_invalid_retry_after_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Grpc { + routes: vec![], + failure_accrual: None, + load_bias: None, + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Grpc::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid retry_after duration must produce an error" + ); + let msg = format!("{}", result.unwrap_err()); + assert!( + msg.contains("retry_after"), + "error should mention 'retry_after', got: {msg}" + ); + } +} diff --git a/linkerd/proxy/client-policy/src/http.rs b/linkerd/proxy/client-policy/src/http.rs index 98a78ea450..fa209d9861 100644 --- a/linkerd/proxy/client-policy/src/http.rs +++ b/linkerd/proxy/client-policy/src/http.rs @@ -1,4 +1,4 @@ -use crate::FailureAccrual; +use crate::{FailureAccrual, LoadBiasConfig, RetryAfterConfig}; use linkerd_exp_backoff::ExponentialBackoff; use linkerd_http_route::http; use std::{ops::RangeInclusive, sync::Arc, time}; @@ -18,21 +18,25 @@ pub struct RouteParams { } // TODO: keepalive settings, etc. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq)] pub struct Http1 { pub routes: Arc<[Route]>, /// Configures how endpoints accrue observed failures. - pub failure_accrual: FailureAccrual, + pub failure_accrual: Option, + pub load_bias: Option, + pub retry_after: Option, } // TODO: window sizes, etc -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq)] pub struct Http2 { pub routes: Arc<[Route]>, /// Configures how endpoints accrue observed failures. - pub failure_accrual: FailureAccrual, + pub failure_accrual: Option, + pub load_bias: Option, + pub retry_after: Option, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] @@ -84,7 +88,9 @@ impl Default for Http1 { fn default() -> Self { Self { routes: Arc::new([]), - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, } } } @@ -95,7 +101,9 @@ impl Default for Http2 { fn default() -> Self { Self { routes: Arc::new([]), - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, } } } @@ -167,6 +175,9 @@ pub mod proto { #[error("{0}")] Retry(#[from] InvalidRetry), + + #[error("invalid duration for {0}: {1}")] + InvalidDuration(&'static str, #[source] prost_types::DurationError), } #[derive(Debug, thiserror::Error)] @@ -229,7 +240,20 @@ pub mod proto { .collect::, _>>()?; Ok(Self { routes, - failure_accrual: proto.failure_accrual.try_into()?, + failure_accrual: proto + .failure_accrual + .map(FailureAccrual::try_from) + .transpose()?, + load_bias: proto + .load_bias + .map(crate::proto::try_load_bias_config) + .transpose() + .map_err(|e| InvalidHttpRoute::InvalidDuration("load_bias", e))?, + retry_after: proto + .retry_after + .map(crate::proto::try_retry_after_config) + .transpose() + .map_err(|e| InvalidHttpRoute::InvalidDuration("retry_after", e))?, }) } } @@ -246,7 +270,20 @@ pub mod proto { .collect::, _>>()?; Ok(Self { routes, - failure_accrual: proto.failure_accrual.try_into()?, + failure_accrual: proto + .failure_accrual + .map(FailureAccrual::try_from) + .transpose()?, + load_bias: proto + .load_bias + .map(crate::proto::try_load_bias_config) + .transpose() + .map_err(|e| InvalidHttpRoute::InvalidDuration("load_bias", e))?, + retry_after: proto + .retry_after + .map(crate::proto::try_retry_after_config) + .transpose() + .map_err(|e| InvalidHttpRoute::InvalidDuration("retry_after", e))?, }) } } @@ -479,3 +516,292 @@ pub mod proto { } } } + +#[cfg(all(test, feature = "proto"))] +mod tests { + use super::*; + use crate::{LoadBiasConfig, RetryAfterConfig}; + + #[test] + fn http1_default_has_none_load_bias_and_retry_after() { + let h1 = Http1::default(); + assert_eq!( + h1.load_bias, None, + "Http1::default() must produce None for load_bias" + ); + assert_eq!( + h1.retry_after, None, + "Http1::default() must produce None for retry_after" + ); + } + + #[test] + fn http2_default_has_none_load_bias_and_retry_after() { + let h2 = Http2::default(); + assert_eq!( + h2.load_bias, None, + "Http2::default() must produce None for load_bias" + ); + assert_eq!( + h2.retry_after, None, + "Http2::default() must produce None for retry_after" + ); + } + + #[test] + fn http1_try_from_parses_load_bias_and_retry_after_from_proto() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http1 { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: 3, + nanos: 0, + }), + penalty_decay: Some(prost_types::Duration { + seconds: 8, + nanos: 0, + }), + }), + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: 60, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http1::try_from(overrides, proto).unwrap(); + + assert_eq!( + result.load_bias, + Some(LoadBiasConfig { + enabled: true, + penalty: std::time::Duration::from_secs(3), + penalty_decay: std::time::Duration::from_secs(8), + }) + ); + assert_eq!( + result.retry_after, + Some(RetryAfterConfig { + max_duration: std::time::Duration::from_secs(60), + }) + ); + } + + #[test] + fn http2_try_from_parses_load_bias_and_retry_after_from_proto() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http2 { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: false, + penalty: Some(prost_types::Duration { + seconds: 2, + nanos: 0, + }), + penalty_decay: Some(prost_types::Duration { + seconds: 12, + nanos: 0, + }), + }), + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: 45, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http2::try_from(overrides, proto).unwrap(); + + assert_eq!( + result.load_bias, + Some(LoadBiasConfig { + enabled: false, + penalty: std::time::Duration::from_secs(2), + penalty_decay: std::time::Duration::from_secs(12), + }) + ); + assert_eq!( + result.retry_after, + Some(RetryAfterConfig { + max_duration: std::time::Duration::from_secs(45), + }) + ); + } + + #[test] + fn http1_try_from_produces_none_when_load_bias_and_retry_after_absent() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http1 { + routes: vec![], + failure_accrual: None, + load_bias: None, + retry_after: None, + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http1::try_from(overrides, proto).unwrap(); + + assert_eq!(result.load_bias, None); + assert_eq!(result.retry_after, None); + } + + #[test] + fn http1_try_from_rejects_invalid_load_bias_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http1 { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + penalty_decay: None, + }), + retry_after: None, + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http1::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid load_bias duration must produce an error" + ); + let msg = format!("{}", result.unwrap_err()); + assert!( + msg.contains("load_bias"), + "error should mention 'load_bias', got: {msg}" + ); + } + + #[test] + fn http1_try_from_rejects_invalid_retry_after_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http1 { + routes: vec![], + failure_accrual: None, + load_bias: None, + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: -10, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http1::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid retry_after duration must produce an error" + ); + let msg = format!("{}", result.unwrap_err()); + assert!( + msg.contains("retry_after"), + "error should mention 'retry_after', got: {msg}" + ); + } + + #[test] + fn http2_try_from_rejects_invalid_load_bias_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http2 { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: true, + penalty: None, + penalty_decay: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + }), + retry_after: None, + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http2::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid load_bias penalty_decay duration must produce an error" + ); + } + + #[test] + fn http2_try_from_produces_none_when_load_bias_and_retry_after_absent() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http2 { + routes: vec![], + failure_accrual: None, + load_bias: None, + retry_after: None, + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http2::try_from(overrides, proto).unwrap(); + + assert_eq!(result.load_bias, None); + assert_eq!(result.retry_after, None); + } + + #[test] + fn http2_try_from_rejects_invalid_retry_after_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http2 { + routes: vec![], + failure_accrual: None, + load_bias: None, + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: -10, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http2::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid retry_after duration must produce an error" + ); + let msg = format!("{}", result.unwrap_err()); + assert!( + msg.contains("retry_after"), + "error should mention 'retry_after', got: {msg}" + ); + } +} diff --git a/linkerd/proxy/client-policy/src/lib.rs b/linkerd/proxy/client-policy/src/lib.rs index 4a51b3a1d3..52e7516a1d 100644 --- a/linkerd/proxy/client-policy/src/lib.rs +++ b/linkerd/proxy/client-policy/src/lib.rs @@ -12,7 +12,7 @@ pub mod tls; pub use linkerd_http_route as route; pub use linkerd_proxy_api_resolve::Metadata as EndpointMetadata; -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub struct ClientPolicy { pub parent: Arc, pub protocol: Protocol, @@ -25,7 +25,8 @@ pub struct ClientPolicyOverrides { } // TODO additional server configs (e.g. concurrency limits, window sizes, etc) -#[derive(Clone, Debug, Eq, Hash, PartialEq)] +#[derive(Clone, Debug, PartialEq)] +#[allow(clippy::large_enum_variant)] pub enum Protocol { Detect { timeout: time::Duration, @@ -122,19 +123,70 @@ pub struct PeakEwma { pub default_rtt: time::Duration, } +/// Failure-accrual circuit breaking configuration for an endpoint. +/// +/// When present the proxy tracks endpoint health and trips the breaker +/// based on the configured modes. `consecutive` is always present, +/// `success_rate` is present only when explicitly configured via an +/// annotation. +/// +/// `Eq` and `Hash` cannot be derived because `SuccessRateConfig` +/// contains an `f64` threshold field. +#[derive(Clone, Debug, PartialEq)] +pub struct FailureAccrual { + pub consecutive: ConsecutiveFailures, + pub success_rate: Option, +} + +/// Consecutive-failure tracking parameters. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub enum FailureAccrual { - /// Endpoints do not become unavailable due to observed failures. - None, - /// Endpoints are marked as unavailable when `max_failures` consecutive - /// failures are observed. - ConsecutiveFailures { - /// The number of consecutive failures after which an endpoint becomes - /// unavailable. - max_failures: usize, - /// Backoff for probing the endpoint when it is in a failed state. - backoff: linkerd_exp_backoff::ExponentialBackoff, - }, +pub struct ConsecutiveFailures { + /// The number of consecutive failures after which an endpoint becomes + /// unavailable. + pub max_failures: usize, + /// Backoff for probing the endpoint when it is in a failed state. + pub backoff: linkerd_exp_backoff::ExponentialBackoff, +} + +/// Success-rate-based circuit breaking configuration. +/// +/// The circuit trips when the EWMA success rate drops below `threshold` +/// after at least `min_requests` have been observed. `decay` controls +/// the EWMA window. +/// +/// `Eq` and `Hash` cannot be derived because `threshold` is `f64` and +/// IEEE 754 defines `NaN != NaN`, which is `!Eq` (and `!Hash`). +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct SuccessRateConfig { + pub threshold: f64, + pub decay: time::Duration, + pub min_requests: u32, +} + +/// Load-biasing configuration for rate-aware load balancing. +/// +/// When `enabled`, the balancer adds `penalty` to the load estimate of a +/// rate-limited endpoint so the power-of-two-choices selection prefers +/// other endpoints. `penalty_decay` is the window over which that penalty +/// fades. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct LoadBiasConfig { + pub enabled: bool, + pub penalty: time::Duration, + pub penalty_decay: time::Duration, +} + +/// Default maximum Retry-After duration when no configuration is provided. +pub const DEFAULT_RETRY_AFTER_MAX_DURATION: time::Duration = time::Duration::from_secs(300); + +/// Configuration for honoring rate-limit hints. +/// +/// `max_duration` caps the delay the proxy will take from a Retry-After +/// header or a gRPC pushback trailer. Longer hints are clamped to this +/// value. When absent, [`DEFAULT_RETRY_AFTER_MAX_DURATION`] applies. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct RetryAfterConfig { + pub max_duration: time::Duration, } // === impl ClientPolicy === @@ -171,11 +223,15 @@ impl ClientPolicy { timeout, http1: http::Http1 { routes: HTTP_ROUTES.clone(), - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, }, http2: http::Http2 { routes: HTTP_ROUTES.clone(), - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, }, opaque: opaq::Opaque { @@ -211,11 +267,15 @@ impl ClientPolicy { timeout, http1: http::Http1 { routes: NO_HTTP_ROUTES.clone(), - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, }, http2: http::Http2 { routes: NO_HTTP_ROUTES.clone(), - failure_accrual: Default::default(), + failure_accrual: None, + load_bias: None, + retry_after: None, }, opaque: opaq::Opaque { routes: None }, }, @@ -313,12 +373,6 @@ impl fmt::Display for Meta { // === impl FailureAccrual === -impl Default for FailureAccrual { - fn default() -> Self { - Self::None - } -} - #[cfg(feature = "proto")] pub mod proto { use super::*; @@ -423,6 +477,8 @@ pub mod proto { Backoff(#[from] InvalidBackoff), #[error("missing {0}")] Missing(&'static str), + #[error("invalid value: {0}")] + InvalidValue(&'static str), } #[derive(Debug, thiserror::Error)] @@ -655,7 +711,9 @@ pub mod proto { }; let dispatcher = match backend.kind { - Some(backend::Kind::Balancer(BalanceP2c { discovery, load })) => { + Some(backend::Kind::Balancer(BalanceP2c { + discovery, load, .. + })) => { let discovery = discovery .ok_or(InvalidBackend::Missing("balancer discovery"))? .try_into()?; @@ -718,31 +776,78 @@ pub mod proto { } } + /// Lower bound on the success-rate decay window. + /// + /// A decay shorter than this is below the moving average's usable + /// resolution and would be silently clamped later, leaving the breaker + /// tracking a window the operator did not ask for. The value matches the + /// moving average's own floor (`linkerd_ewma::MIN_DECAY`), kept here as a + /// literal to avoid a dependency on another crate for one constant. + const MIN_SUCCESS_RATE_DECAY: time::Duration = time::Duration::from_millis(1); + + /// Upper bound on the success-rate cold-start request floor. + /// + /// A floor above this counts as misconfiguration. Cold-start would never + /// end, so the policy could never trip. The ceiling is a safety limit, not + /// a protocol constraint, and may be raised if a workload genuinely needs a + /// larger warm-up sample. + const MAX_SUCCESS_RATE_MIN_REQUESTS: u32 = 1_000_000; + impl TryFrom for FailureAccrual { type Error = InvalidFailureAccrual; fn try_from(accrual: outbound::FailureAccrual) -> Result { - use outbound::failure_accrual::{self, ConsecutiveFailures}; - let kind = accrual.kind.ok_or(InvalidFailureAccrual::Missing("kind"))?; - match kind { - failure_accrual::Kind::ConsecutiveFailures(ConsecutiveFailures { - max_failures, - backoff, - }) => Ok(FailureAccrual::ConsecutiveFailures { + use outbound::failure_accrual::ConsecutiveFailures as ProtoConsecutiveFailures; + let ProtoConsecutiveFailures { + max_failures, + backoff, + } = accrual + .consecutive_failures + .ok_or(InvalidFailureAccrual::Missing("consecutive_failures"))?; + let backoff = + backoff + .map(try_backoff) + .transpose()? + .ok_or(InvalidFailureAccrual::Missing( + "consecutive failures backoff", + ))?; + let success_rate = accrual + .success_rate + .map(|sr| -> Result { + if !(0.0..=1.0).contains(&sr.threshold) { + return Err(InvalidFailureAccrual::InvalidValue( + "success rate threshold must be between 0.0 and 1.0", + )); + } + let decay = sr + .decay + .map(time::Duration::try_from) + .transpose() + .map_err(|e| InvalidFailureAccrual::Backoff(InvalidBackoff::Duration(e)))? + .unwrap_or(time::Duration::from_secs(10)); + if decay < MIN_SUCCESS_RATE_DECAY { + return Err(InvalidFailureAccrual::InvalidValue( + "success rate decay is below the moving-average minimum", + )); + } + if sr.min_requests > MAX_SUCCESS_RATE_MIN_REQUESTS { + return Err(InvalidFailureAccrual::InvalidValue( + "success rate min_requests exceeds the supported maximum", + )); + } + Ok(SuccessRateConfig { + threshold: sr.threshold, + decay, + min_requests: sr.min_requests, + }) + }) + .transpose()?; + Ok(FailureAccrual { + consecutive: ConsecutiveFailures { max_failures: max_failures as usize, - backoff: backoff.map(try_backoff).transpose()?.ok_or( - InvalidFailureAccrual::Missing("consecutive failures backoff"), - )?, - }), - } - } - } - - impl TryFrom> for FailureAccrual { - type Error = InvalidFailureAccrual; - fn try_from(accrual: Option) -> Result { - accrual - .map(Self::try_from) - .unwrap_or(Ok(FailureAccrual::None)) + backoff, + }, + success_rate, + }) } } @@ -761,7 +866,441 @@ pub mod proto { .map(time::Duration::try_from) .transpose()? .ok_or(InvalidBackoff::Missing("max_backoff"))?; + // `jitter_ratio` is defined as `float` in the proto API. Use f64 here + // to match `linkerd_exp_backoff::ExponentialBackoff::jitter: f64`. linkerd_exp_backoff::ExponentialBackoff::try_new(min, max, jitter_ratio as f64) .map_err(Into::into) } + + pub(crate) fn try_load_bias_config( + proto: outbound::LoadBiasConfig, + ) -> Result { + Ok(LoadBiasConfig { + enabled: proto.enabled, + penalty: proto + .penalty + .map(time::Duration::try_from) + .transpose()? + .unwrap_or(time::Duration::from_secs(5)), + penalty_decay: proto + .penalty_decay + .map(time::Duration::try_from) + .transpose()? + .unwrap_or(time::Duration::from_secs(10)), + }) + } + + pub(crate) fn try_retry_after_config( + proto: outbound::RetryAfterConfig, + ) -> Result { + Ok(RetryAfterConfig { + max_duration: proto + .max_duration + .map(time::Duration::try_from) + .transpose()? + .unwrap_or(DEFAULT_RETRY_AFTER_MAX_DURATION), + }) + } +} + +#[cfg(all(test, feature = "proto"))] +mod tests { + use super::*; + use linkerd2_proxy_api::outbound; + use proto::InvalidFailureAccrual; + + #[test] + fn default_retry_after_max_duration_is_300_seconds() { + assert_eq!( + DEFAULT_RETRY_AFTER_MAX_DURATION, + time::Duration::from_secs(300), + ); + } + + #[test] + fn try_load_bias_config_parses_valid_input_with_all_fields() { + let proto = outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: 7, + nanos: 0, + }), + penalty_decay: Some(prost_types::Duration { + seconds: 15, + nanos: 0, + }), + }; + let result = proto::try_load_bias_config(proto).unwrap(); + assert_eq!( + result, + LoadBiasConfig { + enabled: true, + penalty: time::Duration::from_secs(7), + penalty_decay: time::Duration::from_secs(15), + } + ); + } + + #[test] + fn try_load_bias_config_uses_default_5s_penalty_when_missing() { + let proto = outbound::LoadBiasConfig { + enabled: true, + penalty: None, + penalty_decay: Some(prost_types::Duration { + seconds: 20, + nanos: 0, + }), + }; + let result = proto::try_load_bias_config(proto).unwrap(); + assert_eq!(result.penalty, time::Duration::from_secs(5)); + } + + #[test] + fn try_load_bias_config_uses_default_10s_decay_when_missing() { + let proto = outbound::LoadBiasConfig { + enabled: false, + penalty: Some(prost_types::Duration { + seconds: 3, + nanos: 0, + }), + penalty_decay: None, + }; + let result = proto::try_load_bias_config(proto).unwrap(); + assert_eq!(result.penalty_decay, time::Duration::from_secs(10)); + } + + #[test] + fn try_load_bias_config_uses_both_defaults_when_all_durations_missing() { + let proto = outbound::LoadBiasConfig { + enabled: true, + penalty: None, + penalty_decay: None, + }; + let result = proto::try_load_bias_config(proto).unwrap(); + assert_eq!(result.penalty, time::Duration::from_secs(5)); + assert_eq!(result.penalty_decay, time::Duration::from_secs(10)); + } + + #[test] + fn try_load_bias_config_rejects_invalid_penalty_duration() { + let proto = outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + penalty_decay: None, + }; + let result = proto::try_load_bias_config(proto); + assert!(result.is_err(), "negative penalty must be rejected"); + } + + #[test] + fn try_load_bias_config_rejects_invalid_penalty_decay_duration() { + let proto = outbound::LoadBiasConfig { + enabled: true, + penalty: None, + penalty_decay: Some(prost_types::Duration { + seconds: -5, + nanos: 0, + }), + }; + let result = proto::try_load_bias_config(proto); + assert!(result.is_err(), "negative penalty_decay must be rejected"); + } + + #[test] + fn try_retry_after_config_parses_valid_input() { + let proto = outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: 120, + nanos: 0, + }), + }; + let result = proto::try_retry_after_config(proto).unwrap(); + assert_eq!( + result, + RetryAfterConfig { + max_duration: time::Duration::from_secs(120), + } + ); + } + + #[test] + fn try_retry_after_config_defaults_to_300s_when_max_duration_missing() { + let proto = outbound::RetryAfterConfig { max_duration: None }; + let result = proto::try_retry_after_config(proto).unwrap(); + assert_eq!( + result.max_duration, + time::Duration::from_secs(300), + "missing max_duration should default to DEFAULT_RETRY_AFTER_MAX_DURATION (300s)" + ); + } + + #[test] + fn try_retry_after_config_rejects_invalid_duration() { + let proto = outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + }; + let result = proto::try_retry_after_config(proto); + assert!(result.is_err(), "negative max_duration must be rejected"); + } + + #[test] + fn failure_accrual_try_from_returns_error_when_consecutive_failures_missing() { + let accrual = outbound::FailureAccrual { + consecutive_failures: None, + success_rate: None, + }; + let result = FailureAccrual::try_from(accrual); + assert!( + result.is_err(), + "missing consecutive_failures must return an error" + ); + let err = result.unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("consecutive_failures"), + "error message should mention 'consecutive_failures', got: {msg}" + ); + } + + #[test] + fn failure_accrual_try_from_parses_valid_consecutive_failures() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(outbound::failure_accrual::ConsecutiveFailures { + max_failures: 5, + backoff: Some(outbound::ExponentialBackoff { + min_backoff: Some(prost_types::Duration { + seconds: 1, + nanos: 0, + }), + max_backoff: Some(prost_types::Duration { + seconds: 60, + nanos: 0, + }), + jitter_ratio: 0.5, + }), + }), + success_rate: None, + }; + let result = FailureAccrual::try_from(accrual).unwrap(); + assert_eq!(result.consecutive.max_failures, 5); + assert_eq!(result.success_rate, None); + } + + #[test] + fn failure_accrual_try_from_errors_when_backoff_missing() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(outbound::failure_accrual::ConsecutiveFailures { + max_failures: 3, + backoff: None, + }), + success_rate: None, + }; + let result = FailureAccrual::try_from(accrual); + assert!( + result.is_err(), + "missing backoff in consecutive_failures must be an error" + ); + } + + fn valid_consecutive_failures() -> outbound::failure_accrual::ConsecutiveFailures { + outbound::failure_accrual::ConsecutiveFailures { + max_failures: 5, + backoff: Some(outbound::ExponentialBackoff { + min_backoff: Some(prost_types::Duration { + seconds: 1, + nanos: 0, + }), + max_backoff: Some(prost_types::Duration { + seconds: 60, + nanos: 0, + }), + jitter_ratio: 0.5, + }), + } + } + + #[test] + fn failure_accrual_try_from_rejects_subminimum_decay() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.9, + // 500us sits below the moving average's usable minimum of 1ms. + decay: Some(prost_types::Duration { + seconds: 0, + nanos: 500_000, + }), + min_requests: 100, + }), + }; + let result = FailureAccrual::try_from(accrual); + assert!( + matches!(result, Err(InvalidFailureAccrual::InvalidValue(_))), + "a decay below the moving-average minimum must be rejected, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_rejects_excessive_min_requests() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.9, + decay: Some(prost_types::Duration { + seconds: 10, + nanos: 0, + }), + // A floor this high means cold-start never ends. + min_requests: 2_000_000, + }), + }; + let result = FailureAccrual::try_from(accrual); + assert!( + matches!(result, Err(InvalidFailureAccrual::InvalidValue(_))), + "a min_requests above the safety ceiling must be rejected, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_parses_success_rate() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.5, + decay: Some(prost_types::Duration { + seconds: 10, + nanos: 0, + }), + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual).unwrap(); + let sr = result + .success_rate + .expect("a configured success rate must survive conversion"); + assert_eq!(sr.threshold, 0.5); + assert_eq!(sr.decay, time::Duration::from_secs(10)); + assert_eq!(sr.min_requests, 20); + } + + #[test] + fn failure_accrual_try_from_defaults_success_rate_decay_when_absent() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.5, + // An unset decay falls back to the conversion's 10s default. + decay: None, + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual).unwrap(); + let sr = result.success_rate.expect("success rate must be present"); + assert_eq!(sr.decay, time::Duration::from_secs(10)); + } + + #[test] + fn failure_accrual_try_from_rejects_threshold_above_one() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 1.5, + decay: None, + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual); + assert!( + matches!(result, Err(InvalidFailureAccrual::InvalidValue(_))), + "a threshold above 1.0 must be rejected, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_rejects_threshold_below_zero() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: -0.1, + decay: None, + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual); + assert!( + matches!(result, Err(InvalidFailureAccrual::InvalidValue(_))), + "a threshold below 0.0 must be rejected, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_rejects_nan_threshold() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: f64::NAN, + decay: None, + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual); + // NaN compares false against both range bounds, so it falls outside + // the accepted interval and is rejected. + assert!( + matches!(result, Err(InvalidFailureAccrual::InvalidValue(_))), + "a NaN threshold must be rejected, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_rejects_negative_success_rate_decay() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.9, + decay: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual); + // A negative prost duration fails the `time::Duration` conversion. It + // surfaces as a backoff duration error, not a value error. + assert!( + matches!( + result, + Err(InvalidFailureAccrual::Backoff( + proto::InvalidBackoff::Duration(_) + )) + ), + "a negative decay must surface a duration error, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_accepts_one_millisecond_decay_boundary() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.9, + // Exactly the 1ms floor. The lower bound is inclusive. + decay: Some(prost_types::Duration { + seconds: 0, + nanos: 1_000_000, + }), + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual).unwrap(); + let sr = result.success_rate.expect("success rate must be present"); + assert_eq!(sr.decay, time::Duration::from_millis(1)); + } }