From 3084e89f73e71fc51aeb8eff2c4a632b1c39d76d Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 16:59:51 +0200 Subject: [PATCH 01/18] build: bump linkerd2-proxy-api to 0.19.0 Update the linkerd2-proxy-api dependency to version 0.19.0, which contains the restructured FailureAccrual type with a direct consecutive_failures field, the new LoadBiasConfig and RetryAfterConfig message types, and the ejection field on BalanceP2c required by config conversions. The circuit breaker work depends on these. Signed-off-by: Alejandro Martinez Ruiz --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- linkerd/app/integration/src/policy.rs | 1 + linkerd/proxy/client-policy/src/lib.rs | 30 ++++++++++++++------------ 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 743cfe1206..935bddf994 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2693,9 +2693,9 @@ dependencies = [ [[package]] name = "linkerd2-proxy-api" -version = "0.18.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba9e3b341ca4992feaf43a4d2bdbfe2081aa3e2b9a503753544ce55242af6342" +checksum = "e0cd682795e8f91ea36e2131a2f7021da136c74f6d3cd3d9eabbf7842511042d" dependencies = [ "h2", "http", diff --git a/Cargo.toml b/Cargo.toml index bc2dfc2e95..3993cafc1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -137,7 +137,7 @@ default-features = false features = ["tokio", "tracing"] [workspace.dependencies.linkerd2-proxy-api] -version = "0.18.0" +version = "0.19.0" [workspace.dependencies.rand] version = "0.9" diff --git a/linkerd/app/integration/src/policy.rs b/linkerd/app/integration/src/policy.rs index 6b2faa6d76..b3a31cb5ca 100644 --- a/linkerd/app/integration/src/policy.rs +++ b/linkerd/app/integration/src/policy.rs @@ -205,6 +205,7 @@ pub fn backend(dst: impl ToString) -> outbound::Backend { default_rtt: Some(Duration::from_millis(30).try_into().unwrap()), decay: Some(Duration::from_secs(10).try_into().unwrap()), })), + ejection: None, })), } } diff --git a/linkerd/proxy/client-policy/src/lib.rs b/linkerd/proxy/client-policy/src/lib.rs index 4a51b3a1d3..cf18e6366b 100644 --- a/linkerd/proxy/client-policy/src/lib.rs +++ b/linkerd/proxy/client-policy/src/lib.rs @@ -655,7 +655,9 @@ pub mod proto { }; let dispatcher = match backend.kind { - Some(backend::Kind::Balancer(BalanceP2c { discovery, load })) => { + Some(backend::Kind::Balancer(BalanceP2c { + discovery, load, .. + })) => { let discovery = discovery .ok_or(InvalidBackend::Missing("balancer discovery"))? .try_into()?; @@ -721,19 +723,19 @@ pub mod proto { impl TryFrom for FailureAccrual { type Error = InvalidFailureAccrual; fn try_from(accrual: outbound::FailureAccrual) -> Result { - use outbound::failure_accrual::{self, ConsecutiveFailures}; - let kind = accrual.kind.ok_or(InvalidFailureAccrual::Missing("kind"))?; - match kind { - failure_accrual::Kind::ConsecutiveFailures(ConsecutiveFailures { - max_failures, - backoff, - }) => Ok(FailureAccrual::ConsecutiveFailures { - max_failures: max_failures as usize, - backoff: backoff.map(try_backoff).transpose()?.ok_or( - InvalidFailureAccrual::Missing("consecutive failures backoff"), - )?, - }), - } + use outbound::failure_accrual::ConsecutiveFailures; + let ConsecutiveFailures { + max_failures, + backoff, + } = accrual + .consecutive_failures + .ok_or(InvalidFailureAccrual::Missing("consecutive_failures"))?; + Ok(FailureAccrual::ConsecutiveFailures { + max_failures: max_failures as usize, + backoff: backoff.map(try_backoff).transpose()?.ok_or( + InvalidFailureAccrual::Missing("consecutive failures backoff"), + )?, + }) } } From 326ba73923b9bec55b255353700f4764f394c102 Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 17:00:12 +0200 Subject: [PATCH 02/18] feat(client-policy): add config types for load biasing, retry-after, and success rate Introduce SuccessRateConfig, LoadBiasConfig, and RetryAfterConfig as typed domain models that bridge proto messages to proxy internals. This captures the configuration surface for each respective feature. Signed-off-by: Alejandro Martinez Ruiz --- linkerd/proxy/client-policy/src/lib.rs | 64 ++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/linkerd/proxy/client-policy/src/lib.rs b/linkerd/proxy/client-policy/src/lib.rs index cf18e6366b..0b08529448 100644 --- a/linkerd/proxy/client-policy/src/lib.rs +++ b/linkerd/proxy/client-policy/src/lib.rs @@ -137,6 +137,36 @@ pub enum FailureAccrual { }, } +/// Success-rate-based circuit breaking configuration. +/// +/// The circuit trips when the EWMA success rate drops below `threshold` +/// after at least `min_requests` have been observed. `decay` controls +/// the EWMA window. +/// +/// `Eq` and `Hash` cannot be derived because `threshold` is `f64` and +/// IEEE 754 defines `NaN != NaN`, which is `!Eq` (and `!Hash`). +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct SuccessRateConfig { + pub threshold: f64, + pub decay: time::Duration, + pub min_requests: u32, +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct LoadBiasConfig { + pub enabled: bool, + pub penalty: time::Duration, + pub penalty_decay: time::Duration, +} + +/// Default maximum Retry-After duration when no configuration is provided. +pub const DEFAULT_RETRY_AFTER_MAX_DURATION: time::Duration = time::Duration::from_secs(300); + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct RetryAfterConfig { + pub max_duration: time::Duration, +} + // === impl ClientPolicy === impl ClientPolicy { @@ -423,6 +453,8 @@ pub mod proto { Backoff(#[from] InvalidBackoff), #[error("missing {0}")] Missing(&'static str), + #[error("invalid value: {0}")] + InvalidValue(&'static str), } #[derive(Debug, thiserror::Error)] @@ -763,7 +795,39 @@ pub mod proto { .map(time::Duration::try_from) .transpose()? .ok_or(InvalidBackoff::Missing("max_backoff"))?; + // `jitter_ratio` is defined as `float` in the proto API. Use f64 here + // to match `linkerd_exp_backoff::ExponentialBackoff::jitter: f64`. linkerd_exp_backoff::ExponentialBackoff::try_new(min, max, jitter_ratio as f64) .map_err(Into::into) } + + pub(crate) fn try_load_bias_config( + proto: outbound::LoadBiasConfig, + ) -> Result { + Ok(LoadBiasConfig { + enabled: proto.enabled, + penalty: proto + .penalty + .map(time::Duration::try_from) + .transpose()? + .unwrap_or(time::Duration::from_secs(5)), + penalty_decay: proto + .penalty_decay + .map(time::Duration::try_from) + .transpose()? + .unwrap_or(time::Duration::from_secs(10)), + }) + } + + pub(crate) fn try_retry_after_config( + proto: outbound::RetryAfterConfig, + ) -> Result { + Ok(RetryAfterConfig { + max_duration: proto + .max_duration + .map(time::Duration::try_from) + .transpose()? + .unwrap_or(DEFAULT_RETRY_AFTER_MAX_DURATION), + }) + } } From 9c59e4230db0566044c5db1f8c421c4d67639b54 Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 17:00:31 +0200 Subject: [PATCH 03/18] feat(client-policy): add load_bias and retry_after fields to protocol variants Plug LoadBiasConfig and RetryAfterConfig in the Http1, Http2, and Grpc protocol variants as Option fields defaulting to None. Consumer sites that destructure these variants (sidecar.rs, ingress.rs) gain `..` to skip the new fields until later work uses them in the balance layer. Signed-off-by: Alejandro Martinez Ruiz --- linkerd/app/outbound/src/discover.rs | 4 +++ linkerd/app/outbound/src/ingress.rs | 3 ++ linkerd/app/outbound/src/sidecar.rs | 3 ++ .../app/test/src/resolver/client_policy.rs | 4 +++ linkerd/proxy/client-policy/src/grpc.rs | 19 ++++++++++- linkerd/proxy/client-policy/src/http.rs | 33 ++++++++++++++++++- linkerd/proxy/client-policy/src/lib.rs | 8 +++++ 7 files changed, 72 insertions(+), 2 deletions(-) diff --git a/linkerd/app/outbound/src/discover.rs b/linkerd/app/outbound/src/discover.rs index 54693d5f5c..f956fd5ffb 100644 --- a/linkerd/app/outbound/src/discover.rs +++ b/linkerd/app/outbound/src/discover.rs @@ -278,10 +278,14 @@ fn policy_for_backend( http1: policy::http::Http1 { routes: routes.clone(), failure_accrual: Default::default(), + load_bias: None, + retry_after: None, }, http2: policy::http::Http2 { routes, failure_accrual: Default::default(), + load_bias: None, + retry_after: None, }, opaque, }; diff --git a/linkerd/app/outbound/src/ingress.rs b/linkerd/app/outbound/src/ingress.rs index 0014384341..3f154314de 100644 --- a/linkerd/app/outbound/src/ingress.rs +++ b/linkerd/app/outbound/src/ingress.rs @@ -562,6 +562,7 @@ fn policy_routes( policy::Protocol::Http1(policy::http::Http1 { ref routes, failure_accrual, + .. }) => Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { addr, @@ -574,6 +575,7 @@ fn policy_routes( policy::Protocol::Http2(policy::http::Http2 { ref routes, failure_accrual, + .. }) => Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { addr, @@ -586,6 +588,7 @@ fn policy_routes( policy::Protocol::Grpc(policy::grpc::Grpc { ref routes, failure_accrual, + .. }) => Some(http::Routes::Policy(http::policy::Params::Grpc( http::policy::GrpcParams { addr, diff --git a/linkerd/app/outbound/src/sidecar.rs b/linkerd/app/outbound/src/sidecar.rs index d04d5c6543..2bb605b0ac 100644 --- a/linkerd/app/outbound/src/sidecar.rs +++ b/linkerd/app/outbound/src/sidecar.rs @@ -243,14 +243,17 @@ impl HttpSidecar { policy::Protocol::Http1(policy::http::Http1 { ref routes, failure_accrual, + .. }) => (routes.clone(), failure_accrual), policy::Protocol::Http2(policy::http::Http2 { ref routes, failure_accrual, + .. }) => (routes.clone(), failure_accrual), policy::Protocol::Grpc(policy::grpc::Grpc { ref routes, failure_accrual, + .. }) => { return Some(http::Routes::Policy(http::policy::Params::Grpc( http::policy::GrpcParams { diff --git a/linkerd/app/test/src/resolver/client_policy.rs b/linkerd/app/test/src/resolver/client_policy.rs index f57d3f9ced..044346c1fa 100644 --- a/linkerd/app/test/src/resolver/client_policy.rs +++ b/linkerd/app/test/src/resolver/client_policy.rs @@ -86,10 +86,14 @@ impl ClientPolicies { http1: http::Http1 { routes: http_routes.clone(), failure_accrual: Default::default(), + load_bias: None, + retry_after: None, }, http2: http::Http2 { routes: http_routes, failure_accrual: Default::default(), + load_bias: None, + retry_after: None, }, opaque: opaq::Opaque { routes: Some(opaq::Route { diff --git a/linkerd/proxy/client-policy/src/grpc.rs b/linkerd/proxy/client-policy/src/grpc.rs index ef159749b9..3f78fd8c68 100644 --- a/linkerd/proxy/client-policy/src/grpc.rs +++ b/linkerd/proxy/client-policy/src/grpc.rs @@ -1,4 +1,4 @@ -use crate::FailureAccrual; +use crate::{FailureAccrual, LoadBiasConfig, RetryAfterConfig}; use linkerd_exp_backoff::ExponentialBackoff; use linkerd_http_route::{grpc, http}; use std::{sync::Arc, time}; @@ -25,6 +25,8 @@ pub struct Grpc { /// Configures how endpoints accrue observed failures. // TODO(ver) Move this to backends and scope to endpoints. pub failure_accrual: FailureAccrual, + pub load_bias: Option, + pub retry_after: Option, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] @@ -66,6 +68,8 @@ impl Default for Grpc { Self { routes: Arc::new([]), failure_accrual: Default::default(), + load_bias: None, + retry_after: None, } } } @@ -152,6 +156,9 @@ pub mod proto { #[error("{0}")] Timeout(#[from] crate::http::proto::InvalidTimeouts), + + #[error("invalid duration for {0}: {1}")] + InvalidDuration(&'static str, #[source] prost_types::DurationError), } #[derive(Debug, thiserror::Error)] @@ -197,6 +204,16 @@ pub mod proto { Ok(Self { routes, failure_accrual: proto.failure_accrual.try_into()?, + load_bias: proto + .load_bias + .map(crate::proto::try_load_bias_config) + .transpose() + .map_err(|e| InvalidGrpcRoute::InvalidDuration("load_bias", e))?, + retry_after: proto + .retry_after + .map(crate::proto::try_retry_after_config) + .transpose() + .map_err(|e| InvalidGrpcRoute::InvalidDuration("retry_after", e))?, }) } diff --git a/linkerd/proxy/client-policy/src/http.rs b/linkerd/proxy/client-policy/src/http.rs index 98a78ea450..077832ff5b 100644 --- a/linkerd/proxy/client-policy/src/http.rs +++ b/linkerd/proxy/client-policy/src/http.rs @@ -1,4 +1,4 @@ -use crate::FailureAccrual; +use crate::{FailureAccrual, LoadBiasConfig, RetryAfterConfig}; use linkerd_exp_backoff::ExponentialBackoff; use linkerd_http_route::http; use std::{ops::RangeInclusive, sync::Arc, time}; @@ -24,6 +24,8 @@ pub struct Http1 { /// Configures how endpoints accrue observed failures. pub failure_accrual: FailureAccrual, + pub load_bias: Option, + pub retry_after: Option, } // TODO: window sizes, etc @@ -33,6 +35,8 @@ pub struct Http2 { /// Configures how endpoints accrue observed failures. pub failure_accrual: FailureAccrual, + pub load_bias: Option, + pub retry_after: Option, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] @@ -85,6 +89,8 @@ impl Default for Http1 { Self { routes: Arc::new([]), failure_accrual: Default::default(), + load_bias: None, + retry_after: None, } } } @@ -96,6 +102,8 @@ impl Default for Http2 { Self { routes: Arc::new([]), failure_accrual: Default::default(), + load_bias: None, + retry_after: None, } } } @@ -167,6 +175,9 @@ pub mod proto { #[error("{0}")] Retry(#[from] InvalidRetry), + + #[error("invalid duration for {0}: {1}")] + InvalidDuration(&'static str, #[source] prost_types::DurationError), } #[derive(Debug, thiserror::Error)] @@ -230,6 +241,16 @@ pub mod proto { Ok(Self { routes, failure_accrual: proto.failure_accrual.try_into()?, + load_bias: proto + .load_bias + .map(crate::proto::try_load_bias_config) + .transpose() + .map_err(|e| InvalidHttpRoute::InvalidDuration("load_bias", e))?, + retry_after: proto + .retry_after + .map(crate::proto::try_retry_after_config) + .transpose() + .map_err(|e| InvalidHttpRoute::InvalidDuration("retry_after", e))?, }) } } @@ -247,6 +268,16 @@ pub mod proto { Ok(Self { routes, failure_accrual: proto.failure_accrual.try_into()?, + load_bias: proto + .load_bias + .map(crate::proto::try_load_bias_config) + .transpose() + .map_err(|e| InvalidHttpRoute::InvalidDuration("load_bias", e))?, + retry_after: proto + .retry_after + .map(crate::proto::try_retry_after_config) + .transpose() + .map_err(|e| InvalidHttpRoute::InvalidDuration("retry_after", e))?, }) } } diff --git a/linkerd/proxy/client-policy/src/lib.rs b/linkerd/proxy/client-policy/src/lib.rs index 0b08529448..90c4d33bd2 100644 --- a/linkerd/proxy/client-policy/src/lib.rs +++ b/linkerd/proxy/client-policy/src/lib.rs @@ -202,10 +202,14 @@ impl ClientPolicy { http1: http::Http1 { routes: HTTP_ROUTES.clone(), failure_accrual: Default::default(), + load_bias: None, + retry_after: None, }, http2: http::Http2 { routes: HTTP_ROUTES.clone(), failure_accrual: Default::default(), + load_bias: None, + retry_after: None, }, opaque: opaq::Opaque { @@ -242,10 +246,14 @@ impl ClientPolicy { http1: http::Http1 { routes: NO_HTTP_ROUTES.clone(), failure_accrual: Default::default(), + load_bias: None, + retry_after: None, }, http2: http::Http2 { routes: NO_HTTP_ROUTES.clone(), failure_accrual: Default::default(), + load_bias: None, + retry_after: None, }, opaque: opaq::Opaque { routes: None }, }, From 58cf9da66521d53ae31e6453f7659efcdf2859af Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 16:44:17 +0200 Subject: [PATCH 04/18] test(client-policy): add unit tests for config types and proto conversions Signed-off-by: Alejandro Martinez Ruiz --- linkerd/proxy/client-policy/src/grpc.rs | 130 +++++++++++ linkerd/proxy/client-policy/src/http.rs | 289 ++++++++++++++++++++++++ linkerd/proxy/client-policy/src/lib.rs | 219 ++++++++++++++++++ 3 files changed, 638 insertions(+) diff --git a/linkerd/proxy/client-policy/src/grpc.rs b/linkerd/proxy/client-policy/src/grpc.rs index 3f78fd8c68..233ee18e2e 100644 --- a/linkerd/proxy/client-policy/src/grpc.rs +++ b/linkerd/proxy/client-policy/src/grpc.rs @@ -417,3 +417,133 @@ pub mod proto { } } } + +#[cfg(all(test, feature = "proto"))] +mod tests { + use super::*; + use crate::{LoadBiasConfig, RetryAfterConfig}; + + #[test] + fn grpc_default_has_none_load_bias_and_retry_after() { + let g = Grpc::default(); + assert_eq!( + g.load_bias, None, + "Grpc::default() must produce None for load_bias" + ); + assert_eq!( + g.retry_after, None, + "Grpc::default() must produce None for retry_after" + ); + } + + #[test] + fn grpc_try_from_parses_load_bias_and_retry_after_from_proto() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Grpc { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: 4, + nanos: 0, + }), + penalty_decay: Some(prost_types::Duration { + seconds: 9, + nanos: 0, + }), + }), + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: 90, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Grpc::try_from(overrides, proto).unwrap(); + + assert_eq!( + result.load_bias, + Some(LoadBiasConfig { + enabled: true, + penalty: std::time::Duration::from_secs(4), + penalty_decay: std::time::Duration::from_secs(9), + }) + ); + assert_eq!( + result.retry_after, + Some(RetryAfterConfig { + max_duration: std::time::Duration::from_secs(90), + }) + ); + } + + #[test] + fn grpc_try_from_rejects_invalid_load_bias_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Grpc { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + penalty_decay: None, + }), + retry_after: None, + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Grpc::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid load_bias duration must produce an error" + ); + let msg = format!("{}", result.unwrap_err()); + assert!( + msg.contains("load_bias"), + "error should mention 'load_bias', got: {msg}" + ); + } + + #[test] + fn grpc_try_from_rejects_invalid_retry_after_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Grpc { + routes: vec![], + failure_accrual: None, + load_bias: None, + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Grpc::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid retry_after duration must produce an error" + ); + let msg = format!("{}", result.unwrap_err()); + assert!( + msg.contains("retry_after"), + "error should mention 'retry_after', got: {msg}" + ); + } +} diff --git a/linkerd/proxy/client-policy/src/http.rs b/linkerd/proxy/client-policy/src/http.rs index 077832ff5b..b7f4a11f47 100644 --- a/linkerd/proxy/client-policy/src/http.rs +++ b/linkerd/proxy/client-policy/src/http.rs @@ -510,3 +510,292 @@ pub mod proto { } } } + +#[cfg(all(test, feature = "proto"))] +mod tests { + use super::*; + use crate::{LoadBiasConfig, RetryAfterConfig}; + + #[test] + fn http1_default_has_none_load_bias_and_retry_after() { + let h1 = Http1::default(); + assert_eq!( + h1.load_bias, None, + "Http1::default() must produce None for load_bias" + ); + assert_eq!( + h1.retry_after, None, + "Http1::default() must produce None for retry_after" + ); + } + + #[test] + fn http2_default_has_none_load_bias_and_retry_after() { + let h2 = Http2::default(); + assert_eq!( + h2.load_bias, None, + "Http2::default() must produce None for load_bias" + ); + assert_eq!( + h2.retry_after, None, + "Http2::default() must produce None for retry_after" + ); + } + + #[test] + fn http1_try_from_parses_load_bias_and_retry_after_from_proto() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http1 { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: 3, + nanos: 0, + }), + penalty_decay: Some(prost_types::Duration { + seconds: 8, + nanos: 0, + }), + }), + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: 60, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http1::try_from(overrides, proto).unwrap(); + + assert_eq!( + result.load_bias, + Some(LoadBiasConfig { + enabled: true, + penalty: std::time::Duration::from_secs(3), + penalty_decay: std::time::Duration::from_secs(8), + }) + ); + assert_eq!( + result.retry_after, + Some(RetryAfterConfig { + max_duration: std::time::Duration::from_secs(60), + }) + ); + } + + #[test] + fn http2_try_from_parses_load_bias_and_retry_after_from_proto() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http2 { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: false, + penalty: Some(prost_types::Duration { + seconds: 2, + nanos: 0, + }), + penalty_decay: Some(prost_types::Duration { + seconds: 12, + nanos: 0, + }), + }), + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: 45, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http2::try_from(overrides, proto).unwrap(); + + assert_eq!( + result.load_bias, + Some(LoadBiasConfig { + enabled: false, + penalty: std::time::Duration::from_secs(2), + penalty_decay: std::time::Duration::from_secs(12), + }) + ); + assert_eq!( + result.retry_after, + Some(RetryAfterConfig { + max_duration: std::time::Duration::from_secs(45), + }) + ); + } + + #[test] + fn http1_try_from_produces_none_when_load_bias_and_retry_after_absent() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http1 { + routes: vec![], + failure_accrual: None, + load_bias: None, + retry_after: None, + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http1::try_from(overrides, proto).unwrap(); + + assert_eq!(result.load_bias, None); + assert_eq!(result.retry_after, None); + } + + #[test] + fn http1_try_from_rejects_invalid_load_bias_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http1 { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + penalty_decay: None, + }), + retry_after: None, + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http1::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid load_bias duration must produce an error" + ); + let msg = format!("{}", result.unwrap_err()); + assert!( + msg.contains("load_bias"), + "error should mention 'load_bias', got: {msg}" + ); + } + + #[test] + fn http1_try_from_rejects_invalid_retry_after_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http1 { + routes: vec![], + failure_accrual: None, + load_bias: None, + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: -10, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http1::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid retry_after duration must produce an error" + ); + let msg = format!("{}", result.unwrap_err()); + assert!( + msg.contains("retry_after"), + "error should mention 'retry_after', got: {msg}" + ); + } + + #[test] + fn http2_try_from_rejects_invalid_load_bias_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http2 { + routes: vec![], + failure_accrual: None, + load_bias: Some(outbound::LoadBiasConfig { + enabled: true, + penalty: None, + penalty_decay: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + }), + retry_after: None, + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http2::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid load_bias penalty_decay duration must produce an error" + ); + } + + #[test] + fn http2_try_from_produces_none_when_load_bias_and_retry_after_absent() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http2 { + routes: vec![], + failure_accrual: None, + load_bias: None, + retry_after: None, + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http2::try_from(overrides, proto).unwrap(); + + assert_eq!(result.load_bias, None); + assert_eq!(result.retry_after, None); + } + + #[test] + fn http2_try_from_rejects_invalid_retry_after_duration() { + use linkerd2_proxy_api::outbound; + + let proto = outbound::proxy_protocol::Http2 { + routes: vec![], + failure_accrual: None, + load_bias: None, + retry_after: Some(outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: -10, + nanos: 0, + }), + }), + }; + + let overrides = crate::ClientPolicyOverrides { + export_hostname_labels: false, + }; + let result = Http2::try_from(overrides, proto); + assert!( + result.is_err(), + "invalid retry_after duration must produce an error" + ); + let msg = format!("{}", result.unwrap_err()); + assert!( + msg.contains("retry_after"), + "error should mention 'retry_after', got: {msg}" + ); + } +} diff --git a/linkerd/proxy/client-policy/src/lib.rs b/linkerd/proxy/client-policy/src/lib.rs index 90c4d33bd2..d088a7f44a 100644 --- a/linkerd/proxy/client-policy/src/lib.rs +++ b/linkerd/proxy/client-policy/src/lib.rs @@ -839,3 +839,222 @@ pub mod proto { }) } } + +#[cfg(all(test, feature = "proto"))] +mod tests { + use super::*; + use linkerd2_proxy_api::outbound; + + #[test] + fn default_retry_after_max_duration_is_300_seconds() { + assert_eq!( + DEFAULT_RETRY_AFTER_MAX_DURATION, + time::Duration::from_secs(300), + ); + } + + #[test] + fn try_load_bias_config_parses_valid_input_with_all_fields() { + let proto = outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: 7, + nanos: 0, + }), + penalty_decay: Some(prost_types::Duration { + seconds: 15, + nanos: 0, + }), + }; + let result = proto::try_load_bias_config(proto).unwrap(); + assert_eq!( + result, + LoadBiasConfig { + enabled: true, + penalty: time::Duration::from_secs(7), + penalty_decay: time::Duration::from_secs(15), + } + ); + } + + #[test] + fn try_load_bias_config_uses_default_5s_penalty_when_missing() { + let proto = outbound::LoadBiasConfig { + enabled: true, + penalty: None, + penalty_decay: Some(prost_types::Duration { + seconds: 20, + nanos: 0, + }), + }; + let result = proto::try_load_bias_config(proto).unwrap(); + assert_eq!(result.penalty, time::Duration::from_secs(5)); + } + + #[test] + fn try_load_bias_config_uses_default_10s_decay_when_missing() { + let proto = outbound::LoadBiasConfig { + enabled: false, + penalty: Some(prost_types::Duration { + seconds: 3, + nanos: 0, + }), + penalty_decay: None, + }; + let result = proto::try_load_bias_config(proto).unwrap(); + assert_eq!(result.penalty_decay, time::Duration::from_secs(10)); + } + + #[test] + fn try_load_bias_config_uses_both_defaults_when_all_durations_missing() { + let proto = outbound::LoadBiasConfig { + enabled: true, + penalty: None, + penalty_decay: None, + }; + let result = proto::try_load_bias_config(proto).unwrap(); + assert_eq!(result.penalty, time::Duration::from_secs(5)); + assert_eq!(result.penalty_decay, time::Duration::from_secs(10)); + } + + #[test] + fn try_load_bias_config_rejects_invalid_penalty_duration() { + let proto = outbound::LoadBiasConfig { + enabled: true, + penalty: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + penalty_decay: None, + }; + let result = proto::try_load_bias_config(proto); + assert!(result.is_err(), "negative penalty must be rejected"); + } + + #[test] + fn try_load_bias_config_rejects_invalid_penalty_decay_duration() { + let proto = outbound::LoadBiasConfig { + enabled: true, + penalty: None, + penalty_decay: Some(prost_types::Duration { + seconds: -5, + nanos: 0, + }), + }; + let result = proto::try_load_bias_config(proto); + assert!(result.is_err(), "negative penalty_decay must be rejected"); + } + + #[test] + fn try_retry_after_config_parses_valid_input() { + let proto = outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: 120, + nanos: 0, + }), + }; + let result = proto::try_retry_after_config(proto).unwrap(); + assert_eq!( + result, + RetryAfterConfig { + max_duration: time::Duration::from_secs(120), + } + ); + } + + #[test] + fn try_retry_after_config_defaults_to_300s_when_max_duration_missing() { + let proto = outbound::RetryAfterConfig { max_duration: None }; + let result = proto::try_retry_after_config(proto).unwrap(); + assert_eq!( + result.max_duration, + time::Duration::from_secs(300), + "missing max_duration should default to DEFAULT_RETRY_AFTER_MAX_DURATION (300s)" + ); + } + + #[test] + fn try_retry_after_config_rejects_invalid_duration() { + let proto = outbound::RetryAfterConfig { + max_duration: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + }; + let result = proto::try_retry_after_config(proto); + assert!(result.is_err(), "negative max_duration must be rejected"); + } + + #[test] + fn failure_accrual_try_from_returns_error_when_consecutive_failures_missing() { + let accrual = outbound::FailureAccrual { + consecutive_failures: None, + success_rate: None, + }; + let result = FailureAccrual::try_from(accrual); + assert!( + result.is_err(), + "missing consecutive_failures must return an error" + ); + let err = result.unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("consecutive_failures"), + "error message should mention 'consecutive_failures', got: {msg}" + ); + } + + #[test] + fn failure_accrual_try_from_returns_none_for_option_none() { + let result = FailureAccrual::try_from(None); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), FailureAccrual::None); + } + + #[test] + fn failure_accrual_try_from_parses_valid_consecutive_failures() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(outbound::failure_accrual::ConsecutiveFailures { + max_failures: 5, + backoff: Some(outbound::ExponentialBackoff { + min_backoff: Some(prost_types::Duration { + seconds: 1, + nanos: 0, + }), + max_backoff: Some(prost_types::Duration { + seconds: 60, + nanos: 0, + }), + jitter_ratio: 0.5, + }), + }), + success_rate: None, + }; + let result = FailureAccrual::try_from(accrual).unwrap(); + match result { + FailureAccrual::ConsecutiveFailures { + max_failures, + backoff: _, + } => { + assert_eq!(max_failures, 5); + } + other => panic!("expected ConsecutiveFailures, got {:?}", other), + } + } + + #[test] + fn failure_accrual_try_from_errors_when_backoff_missing() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(outbound::failure_accrual::ConsecutiveFailures { + max_failures: 3, + backoff: None, + }), + success_rate: None, + }; + let result = FailureAccrual::try_from(accrual); + assert!( + result.is_err(), + "missing backoff in consecutive_failures must be an error" + ); + } +} From 8675c7f4f6904a3816ee4d4857d92db5fa2157e3 Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Fri, 8 May 2026 10:17:45 +0200 Subject: [PATCH 05/18] refactor(outbound): hand-write PartialEq, Eq, and Hash on Concrete Concrete moves from derived PartialEq, Eq, and Hash to manual implementations. All three exclude failure_accrual, which cannot implement Eq or Hash because SuccessRateConfig holds f64 fields. Identity over the remaining fields stays the same. Signed-off-by: Alejandro Martinez Ruiz --- linkerd/app/outbound/src/http/logical.rs | 28 +++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/linkerd/app/outbound/src/http/logical.rs b/linkerd/app/outbound/src/http/logical.rs index cd4093e28c..dfba20df11 100644 --- a/linkerd/app/outbound/src/http/logical.rs +++ b/linkerd/app/outbound/src/http/logical.rs @@ -35,7 +35,7 @@ pub enum Routes { Endpoint(Remote, Arc), } -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug)] pub struct Concrete { target: concrete::Dispatch, authority: Option, @@ -45,6 +45,32 @@ pub struct Concrete { failure_accrual: policy::FailureAccrual, } +impl PartialEq for Concrete { + fn eq(&self, other: &Self) -> bool { + self.target == other.target + && self.authority == other.authority + && self.parent == other.parent + && self.parent_ref == other.parent_ref + && self.backend_ref == other.backend_ref + // failure_accrual intentionally excluded (does not + // determine identity) + } +} + +impl Eq for Concrete {} + +impl Hash for Concrete { + fn hash(&self, state: &mut H) { + self.target.hash(state); + self.authority.hash(state); + self.parent.hash(state); + self.parent_ref.hash(state); + self.backend_ref.hash(state); + // failure_accrual intentionally excluded (SuccessRateConfig + // contains f64 which does not impl Hash). + } +} + #[derive(Debug, thiserror::Error)] #[error("no route")] pub struct NoRoute; From 8e3af34dc843c488ff0fdd5d85fa5a6fd4c7ffbb Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Fri, 8 May 2026 10:22:20 +0200 Subject: [PATCH 06/18] feat(classify): log dropped classifications on full channel When the broadcast channel is full, a classification result was discarded without any trace. Each send site on the response future and body now reports the drop at debug level, so backpressure on the classification channel becomes observable instead of silent. State also gains a hand-written Debug implementation in place of the derived one, so only the classifier needs to be printable and the class type no longer has to be. Signed-off-by: Alejandro Martinez Ruiz --- linkerd/http/classify/src/channel.rs | 33 +++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/linkerd/http/classify/src/channel.rs b/linkerd/http/classify/src/channel.rs index 48f83ff7f7..b3f24d3460 100644 --- a/linkerd/http/classify/src/channel.rs +++ b/linkerd/http/classify/src/channel.rs @@ -36,12 +36,23 @@ pub struct ResponseBody { state: Option>, } -#[derive(Debug)] +/// State for tracking a response classification. struct State { + /// The classifier for this response. classify: C, + /// Channel to send classification results. tx: mpsc::Sender, } +impl Debug for State { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("State") + .field("classify", &self.classify) + .field("tx", &self.tx) + .finish() + } +} + // === impl BroadcastClassification === impl BroadcastClassification { @@ -125,7 +136,9 @@ where Err(e) => { if let Some(State { classify, tx }) = this.state.take() { let class = classify.error(&e); - let _ = tx.try_send(class); + if tx.try_send(class).is_err() { + tracing::debug!("Classification dropped due to channel backpressure"); + } } Poll::Ready(Err(e)) } @@ -152,7 +165,9 @@ where None => { // Classify the stream if it has reached a `None`. if let Some(State { classify, tx }) = this.state.take() { - let _ = tx.try_send(classify.eos(None)); + if tx.try_send(classify.eos(None)).is_err() { + tracing::debug!("Classification dropped due to channel backpressure"); + } } Poll::Ready(None) } @@ -160,7 +175,9 @@ where // Classify the stream if this is a trailers frame. if let trls @ Some(_) = data.trailers_ref() { if let Some(State { classify, tx }) = this.state.take() { - let _ = tx.try_send(classify.eos(trls)); + if tx.try_send(classify.eos(trls)).is_err() { + tracing::debug!("Classification dropped due to channel backpressure"); + } } } Poll::Ready(Some(Ok(data))) @@ -168,7 +185,9 @@ where Some(Err(e)) => { // Classify the stream if an error has been encountered. if let Some(State { classify, tx }) = this.state.take() { - let _ = tx.try_send(classify.error(&e)); + if tx.try_send(classify.error(&e)).is_err() { + tracing::debug!("Classification dropped due to channel backpressure"); + } } Poll::Ready(Some(Err(e))) } @@ -192,7 +211,9 @@ impl PinnedDrop for ResponseBody { tracing::debug!("dropping ResponseBody"); if let Some(State { classify, tx }) = self.project().state.take() { tracing::debug!("sending EOS to classify"); - let _ = tx.try_send(classify.eos(None)); + if tx.try_send(classify.eos(None)).is_err() { + tracing::debug!("Classification dropped due to channel backpressure"); + } } } } From e832a385332b0854eb694c8bd7eedee38b5a2a1c Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 15:46:51 +0200 Subject: [PATCH 07/18] refactor(client-policy): make FailureAccrual a struct FailureAccrual changes from an enum with None and ConsecutiveFailures variants into a struct holding a consecutive field plus an optional success_rate field, and consumers now hold an Option in place of the former None variant. With success_rate present, the proto conversion reads that field, which it dropped before, and rejects degenerate settings. A threshold outside the zero-to-one range, a decay below the moving-average floor, or a cold-start request count above a safety ceiling all return an error rather than a breaker that can never trip or tracks an unusable window. The struct cannot derive Eq or Hash because SuccessRateConfig keeps an f64 threshold, so those impls are dropped from ClientPolicy, Protocol, Http1, Http2, and Grpc. The migration itself keeps the same runtime behavior. Signed-off-by: Alejandro Martinez Ruiz --- linkerd/app/outbound/src/discover.rs | 4 +- linkerd/app/outbound/src/http/breaker.rs | 13 +- linkerd/app/outbound/src/http/concrete.rs | 2 +- .../app/outbound/src/http/concrete/balance.rs | 2 +- linkerd/app/outbound/src/http/logical.rs | 14 +- .../app/outbound/src/http/logical/policy.rs | 6 +- .../policy/route/backend/metrics/tests.rs | 4 +- .../src/http/logical/policy/router.rs | 6 +- .../outbound/src/http/logical/policy/tests.rs | 4 +- .../app/outbound/src/http/logical/profile.rs | 6 +- .../app/outbound/src/http/logical/tests.rs | 4 +- .../outbound/src/http/logical/tests/basic.rs | 2 +- .../src/http/logical/tests/failure_accrual.rs | 22 +- linkerd/app/outbound/src/ingress.rs | 16 +- linkerd/app/outbound/src/sidecar.rs | 16 +- .../app/test/src/resolver/client_policy.rs | 4 +- linkerd/proxy/client-policy/src/grpc.rs | 11 +- linkerd/proxy/client-policy/src/http.rs | 22 +- linkerd/proxy/client-policy/src/lib.rs | 222 +++++++++++++----- 19 files changed, 252 insertions(+), 128 deletions(-) diff --git a/linkerd/app/outbound/src/discover.rs b/linkerd/app/outbound/src/discover.rs index f956fd5ffb..e8cfffdf43 100644 --- a/linkerd/app/outbound/src/discover.rs +++ b/linkerd/app/outbound/src/discover.rs @@ -277,13 +277,13 @@ fn policy_for_backend( timeout, http1: policy::http::Http1 { routes: routes.clone(), - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, }, http2: policy::http::Http2 { routes, - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, }, diff --git a/linkerd/app/outbound/src/http/breaker.rs b/linkerd/app/outbound/src/http/breaker.rs index 302f5f7f2d..f8177757f8 100644 --- a/linkerd/app/outbound/src/http/breaker.rs +++ b/linkerd/app/outbound/src/http/breaker.rs @@ -7,9 +7,9 @@ mod consecutive_failures; use self::consecutive_failures::ConsecutiveFailures; /// Params configuring a circuit breaker stack. -#[derive(Copy, Clone, Debug)] +#[derive(Clone, Debug)] pub(crate) struct Params { - pub(crate) accrual: FailureAccrual, + pub(crate) accrual: Option, pub(crate) channel_capacity: usize, } @@ -20,16 +20,15 @@ impl svc::ExtractParam, T> for Params { let (prms, gate, rsps) = gate::Params::channel(self.channel_capacity); match self.accrual { - FailureAccrual::None => { + None => { // No failure accrual for this target; construct a gate // that will never close. tracing::trace!("No failure accrual policy enabled."); prms } - FailureAccrual::ConsecutiveFailures { - max_failures, - backoff, - } => { + Some(ref accrual) => { + let max_failures = accrual.consecutive.max_failures; + let backoff = accrual.consecutive.backoff; tracing::trace!( max_failures, backoff = ?backoff, diff --git a/linkerd/app/outbound/src/http/concrete.rs b/linkerd/app/outbound/src/http/concrete.rs index 5cd77563f4..99afba72f6 100644 --- a/linkerd/app/outbound/src/http/concrete.rs +++ b/linkerd/app/outbound/src/http/concrete.rs @@ -76,7 +76,7 @@ impl Outbound { T: svc::Param, T: svc::Param, T: svc::Param, - T: svc::Param, + T: svc::Param>, T: Clone + Debug + Send + Sync + 'static, // Endpoint resolution. R: Resolve, diff --git a/linkerd/app/outbound/src/http/concrete/balance.rs b/linkerd/app/outbound/src/http/concrete/balance.rs index 71631223ff..5a7463bbb5 100644 --- a/linkerd/app/outbound/src/http/concrete/balance.rs +++ b/linkerd/app/outbound/src/http/concrete/balance.rs @@ -76,7 +76,7 @@ where // Parent target. T: svc::Param, T: svc::Param, - T: svc::Param, + T: svc::Param>, T: Clone + Debug + Send + Sync + 'static, { pub(super) fn layer( diff --git a/linkerd/app/outbound/src/http/logical.rs b/linkerd/app/outbound/src/http/logical.rs index dfba20df11..64fec21ce3 100644 --- a/linkerd/app/outbound/src/http/logical.rs +++ b/linkerd/app/outbound/src/http/logical.rs @@ -22,7 +22,7 @@ mod tests; pub struct LogicalAddr(pub Addr); /// Configures the flavor of HTTP routing. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub enum Routes { /// Policy routes. Policy(policy::Params), @@ -42,7 +42,7 @@ pub struct Concrete { parent: T, parent_ref: ParentRef, backend_ref: BackendRef, - failure_accrual: policy::FailureAccrual, + failure_accrual: Option, } impl PartialEq for Concrete { @@ -83,7 +83,7 @@ pub struct LogicalError { source: Error, } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] enum RouterParams { Policy(policy::Policy), @@ -180,7 +180,7 @@ where target: concrete::Dispatch::Forward(remote, meta), authority: None, parent, - failure_accrual: Default::default(), + failure_accrual: None, }) } Self::Profile(profile) => { @@ -277,9 +277,9 @@ impl svc::Param for Concrete { } } -impl svc::Param for Concrete { - fn param(&self) -> policy::FailureAccrual { - self.failure_accrual +impl svc::Param> for Concrete { + fn param(&self) -> Option { + self.failure_accrual.clone() } } diff --git a/linkerd/app/outbound/src/http/logical/policy.rs b/linkerd/app/outbound/src/http/logical/policy.rs index 029ab42f34..83be9fd34a 100644 --- a/linkerd/app/outbound/src/http/logical/policy.rs +++ b/linkerd/app/outbound/src/http/logical/policy.rs @@ -11,17 +11,17 @@ pub use self::{ route::{errors, GrpcRouteMetrics, HttpRouteMetrics}, router::{GrpcParams, HttpParams}, }; -pub use linkerd_proxy_client_policy::{ClientPolicy, FailureAccrual}; +pub use linkerd_proxy_client_policy::{ClientPolicy, ConsecutiveFailures, FailureAccrual}; /// HTTP or gRPC policy route parameters. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub enum Params { Http(router::HttpParams), Grpc(router::GrpcParams), } /// A stack module configured by `Params` and some `T`-typed parent target. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub(super) enum Policy { Http(router::Http), Grpc(router::Grpc), diff --git a/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs b/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs index c1a30f2511..e4d9693d47 100644 --- a/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs +++ b/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs @@ -449,7 +449,7 @@ fn mock_http_route_backend_metrics( Default::default(), ), authority: None, - failure_accrual: Default::default(), + failure_accrual: None, parent: (), parent_ref: parent_ref.clone(), backend_ref: backend_ref.clone(), @@ -505,7 +505,7 @@ fn mock_grpc_route_backend_metrics( Default::default(), ), authority: None, - failure_accrual: Default::default(), + failure_accrual: None, parent: (), parent_ref: parent_ref.clone(), backend_ref: backend_ref.clone(), diff --git a/linkerd/app/outbound/src/http/logical/policy/router.rs b/linkerd/app/outbound/src/http/logical/policy/router.rs index 1b5f2fa045..a96cbcbce8 100644 --- a/linkerd/app/outbound/src/http/logical/policy/router.rs +++ b/linkerd/app/outbound/src/http/logical/policy/router.rs @@ -12,13 +12,13 @@ use linkerd_http_route as http_route; use linkerd_proxy_client_policy as policy; use std::{fmt::Debug, hash::Hash, sync::Arc}; -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub struct Params { pub addr: Addr, pub meta: ParentRef, pub routes: Arc<[http_route::Route>]>, pub backends: Arc<[policy::Backend]>, - pub failure_accrual: policy::FailureAccrual, + pub failure_accrual: Option, } pub type HttpParams = @@ -162,7 +162,7 @@ where parent: parent.clone(), backend_ref, parent_ref: parent_ref.clone(), - failure_accrual, + failure_accrual: failure_accrual.clone(), } } }; diff --git a/linkerd/app/outbound/src/http/logical/policy/tests.rs b/linkerd/app/outbound/src/http/logical/policy/tests.rs index ccecc61008..50ed49ad04 100644 --- a/linkerd/app/outbound/src/http/logical/policy/tests.rs +++ b/linkerd/app/outbound/src/http/logical/policy/tests.rs @@ -120,7 +120,7 @@ async fn header_based_route() { backends: std::iter::once(default_backend.clone()) .chain(Some(special_backend.clone())) .collect(), - failure_accrual: Default::default(), + failure_accrual: None, }); let metrics = HttpRouteMetrics::default(); @@ -231,7 +231,7 @@ async fn http_filter_request_headers() { }], }]), backends: std::iter::once(backend).collect(), - failure_accrual: Default::default(), + failure_accrual: None, } }); diff --git a/linkerd/app/outbound/src/http/logical/profile.rs b/linkerd/app/outbound/src/http/logical/profile.rs index 447c47426d..3eab8e4eae 100644 --- a/linkerd/app/outbound/src/http/logical/profile.rs +++ b/linkerd/app/outbound/src/http/logical/profile.rs @@ -117,7 +117,7 @@ where target: concrete::Dispatch::Balance(addr.clone(), DEFAULT_EWMA), authority: Some(addr.as_http_authority()), parent: parent.clone(), - failure_accrual: Default::default(), + failure_accrual: None, }; let backends = std::iter::once(concrete.clone()).collect(); let distribution = Distribution::first_available(std::iter::once(concrete)); @@ -133,7 +133,7 @@ where target: concrete::Dispatch::Balance(t.addr.clone(), DEFAULT_EWMA), authority: Some(t.addr.as_http_authority()), parent: parent.clone(), - failure_accrual: Default::default(), + failure_accrual: None, }) .collect(); let distribution = Distribution::random_available(targets.iter().cloned().map( @@ -146,7 +146,7 @@ where authority: Some(addr.as_http_authority()), target: concrete::Dispatch::Balance(addr, DEFAULT_EWMA), parent: parent.clone(), - failure_accrual: Default::default(), + failure_accrual: None, }; (concrete, weight) }, diff --git a/linkerd/app/outbound/src/http/logical/tests.rs b/linkerd/app/outbound/src/http/logical/tests.rs index d1980a01b3..643ca73a82 100644 --- a/linkerd/app/outbound/src/http/logical/tests.rs +++ b/linkerd/app/outbound/src/http/logical/tests.rs @@ -252,7 +252,7 @@ fn mock_http(params: client_policy::http::RouteParams) -> (svc::BoxCloneHttp, Ha meta: ParentRef(client_policy::Meta::new_default("parent")), backends: Arc::new([backend]), routes: Arc::new([route]), - failure_accrual: client_policy::FailureAccrual::None, + failure_accrual: None, })) } @@ -265,7 +265,7 @@ fn mock_grpc(params: client_policy::grpc::RouteParams) -> (svc::BoxCloneHttp, Ha meta: ParentRef(client_policy::Meta::new_default("parent")), backends: Arc::new([backend]), routes: Arc::new([route]), - failure_accrual: client_policy::FailureAccrual::None, + failure_accrual: None, })) } diff --git a/linkerd/app/outbound/src/http/logical/tests/basic.rs b/linkerd/app/outbound/src/http/logical/tests/basic.rs index be2986b7a4..b72e9005fe 100644 --- a/linkerd/app/outbound/src/http/logical/tests/basic.rs +++ b/linkerd/app/outbound/src/http/logical/tests/basic.rs @@ -33,7 +33,7 @@ async fn routes() { meta: ParentRef(client_policy::Meta::new_default("parent")), backends: Arc::new([backend.clone()]), routes: Arc::new([default_route(backend)]), - failure_accrual: client_policy::FailureAccrual::None, + failure_accrual: None, }))); let target = Target { num: 1, diff --git a/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs b/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs index 5c8bf979c4..2103bee040 100644 --- a/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs +++ b/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs @@ -50,10 +50,13 @@ async fn consecutive_failures_accrue() { meta: ParentRef(client_policy::Meta::new_default("parent")), backends: Arc::new([backend.clone()]), routes: Arc::new([default_route(backend)]), - failure_accrual: client_policy::FailureAccrual::ConsecutiveFailures { - max_failures: 3, - backoff, - }, + failure_accrual: Some(client_policy::FailureAccrual { + consecutive: client_policy::ConsecutiveFailures { + max_failures: 3, + backoff, + }, + success_rate: None, + }), }))); let target = Target { num: 1, @@ -212,10 +215,13 @@ async fn balancer_doesnt_select_tripped_breakers() { meta: ParentRef(client_policy::Meta::new_default("parent")), backends: Arc::new([backend.clone()]), routes: Arc::new([default_route(backend)]), - failure_accrual: client_policy::FailureAccrual::ConsecutiveFailures { - max_failures: 3, - backoff, - }, + failure_accrual: Some(client_policy::FailureAccrual { + consecutive: client_policy::ConsecutiveFailures { + max_failures: 3, + backoff, + }, + success_rate: None, + }), }))); let target = Target { num: 1, diff --git a/linkerd/app/outbound/src/ingress.rs b/linkerd/app/outbound/src/ingress.rs index 3f154314de..de36ab4f5a 100644 --- a/linkerd/app/outbound/src/ingress.rs +++ b/linkerd/app/outbound/src/ingress.rs @@ -543,8 +543,8 @@ fn policy_routes( .. } => { let (routes, failure_accrual) = match version { - http::Variant::Http1 => (http1.routes.clone(), http1.failure_accrual), - http::Variant::H2 => (http2.routes.clone(), http2.failure_accrual), + http::Variant::Http1 => (http1.routes.clone(), http1.failure_accrual.clone()), + http::Variant::H2 => (http2.routes.clone(), http2.failure_accrual.clone()), }; Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { @@ -561,7 +561,7 @@ fn policy_routes( // target? probably should make an error route instead? policy::Protocol::Http1(policy::http::Http1 { ref routes, - failure_accrual, + ref failure_accrual, .. }) => Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { @@ -569,12 +569,12 @@ fn policy_routes( meta, backends: policy.backends.clone(), routes: routes.clone(), - failure_accrual, + failure_accrual: failure_accrual.clone(), }, ))), policy::Protocol::Http2(policy::http::Http2 { ref routes, - failure_accrual, + ref failure_accrual, .. }) => Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { @@ -582,12 +582,12 @@ fn policy_routes( meta, backends: policy.backends.clone(), routes: routes.clone(), - failure_accrual, + failure_accrual: failure_accrual.clone(), }, ))), policy::Protocol::Grpc(policy::grpc::Grpc { ref routes, - failure_accrual, + ref failure_accrual, .. }) => Some(http::Routes::Policy(http::policy::Params::Grpc( http::policy::GrpcParams { @@ -595,7 +595,7 @@ fn policy_routes( meta, backends: policy.backends.clone(), routes: routes.clone(), - failure_accrual, + failure_accrual: failure_accrual.clone(), }, ))), _ => None, diff --git a/linkerd/app/outbound/src/sidecar.rs b/linkerd/app/outbound/src/sidecar.rs index 2bb605b0ac..c8a39caaf9 100644 --- a/linkerd/app/outbound/src/sidecar.rs +++ b/linkerd/app/outbound/src/sidecar.rs @@ -237,22 +237,22 @@ impl HttpSidecar { ref http2, .. } => match version { - http::Variant::Http1 => (http1.routes.clone(), http1.failure_accrual), - http::Variant::H2 => (http2.routes.clone(), http2.failure_accrual), + http::Variant::Http1 => (http1.routes.clone(), http1.failure_accrual.clone()), + http::Variant::H2 => (http2.routes.clone(), http2.failure_accrual.clone()), }, policy::Protocol::Http1(policy::http::Http1 { ref routes, - failure_accrual, + ref failure_accrual, .. - }) => (routes.clone(), failure_accrual), + }) => (routes.clone(), failure_accrual.clone()), policy::Protocol::Http2(policy::http::Http2 { ref routes, - failure_accrual, + ref failure_accrual, .. - }) => (routes.clone(), failure_accrual), + }) => (routes.clone(), failure_accrual.clone()), policy::Protocol::Grpc(policy::grpc::Grpc { ref routes, - failure_accrual, + ref failure_accrual, .. }) => { return Some(http::Routes::Policy(http::policy::Params::Grpc( @@ -261,7 +261,7 @@ impl HttpSidecar { meta: parent_ref, backends: policy.backends.clone(), routes: routes.clone(), - failure_accrual, + failure_accrual: failure_accrual.clone(), }, ))) } diff --git a/linkerd/app/test/src/resolver/client_policy.rs b/linkerd/app/test/src/resolver/client_policy.rs index 044346c1fa..d5487d529c 100644 --- a/linkerd/app/test/src/resolver/client_policy.rs +++ b/linkerd/app/test/src/resolver/client_policy.rs @@ -85,13 +85,13 @@ impl ClientPolicies { timeout: Duration::from_secs(10), http1: http::Http1 { routes: http_routes.clone(), - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, }, http2: http::Http2 { routes: http_routes, - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, }, diff --git a/linkerd/proxy/client-policy/src/grpc.rs b/linkerd/proxy/client-policy/src/grpc.rs index 233ee18e2e..e77517d6d1 100644 --- a/linkerd/proxy/client-policy/src/grpc.rs +++ b/linkerd/proxy/client-policy/src/grpc.rs @@ -18,13 +18,13 @@ pub struct RouteParams { } // TODO HTTP2 settings -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq)] pub struct Grpc { pub routes: Arc<[Route]>, /// Configures how endpoints accrue observed failures. // TODO(ver) Move this to backends and scope to endpoints. - pub failure_accrual: FailureAccrual, + pub failure_accrual: Option, pub load_bias: Option, pub retry_after: Option, } @@ -67,7 +67,7 @@ impl Default for Grpc { fn default() -> Self { Self { routes: Arc::new([]), - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, } @@ -203,7 +203,10 @@ pub mod proto { .collect::, _>>()?; Ok(Self { routes, - failure_accrual: proto.failure_accrual.try_into()?, + failure_accrual: proto + .failure_accrual + .map(FailureAccrual::try_from) + .transpose()?, load_bias: proto .load_bias .map(crate::proto::try_load_bias_config) diff --git a/linkerd/proxy/client-policy/src/http.rs b/linkerd/proxy/client-policy/src/http.rs index b7f4a11f47..fa209d9861 100644 --- a/linkerd/proxy/client-policy/src/http.rs +++ b/linkerd/proxy/client-policy/src/http.rs @@ -18,23 +18,23 @@ pub struct RouteParams { } // TODO: keepalive settings, etc. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq)] pub struct Http1 { pub routes: Arc<[Route]>, /// Configures how endpoints accrue observed failures. - pub failure_accrual: FailureAccrual, + pub failure_accrual: Option, pub load_bias: Option, pub retry_after: Option, } // TODO: window sizes, etc -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq)] pub struct Http2 { pub routes: Arc<[Route]>, /// Configures how endpoints accrue observed failures. - pub failure_accrual: FailureAccrual, + pub failure_accrual: Option, pub load_bias: Option, pub retry_after: Option, } @@ -88,7 +88,7 @@ impl Default for Http1 { fn default() -> Self { Self { routes: Arc::new([]), - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, } @@ -101,7 +101,7 @@ impl Default for Http2 { fn default() -> Self { Self { routes: Arc::new([]), - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, } @@ -240,7 +240,10 @@ pub mod proto { .collect::, _>>()?; Ok(Self { routes, - failure_accrual: proto.failure_accrual.try_into()?, + failure_accrual: proto + .failure_accrual + .map(FailureAccrual::try_from) + .transpose()?, load_bias: proto .load_bias .map(crate::proto::try_load_bias_config) @@ -267,7 +270,10 @@ pub mod proto { .collect::, _>>()?; Ok(Self { routes, - failure_accrual: proto.failure_accrual.try_into()?, + failure_accrual: proto + .failure_accrual + .map(FailureAccrual::try_from) + .transpose()?, load_bias: proto .load_bias .map(crate::proto::try_load_bias_config) diff --git a/linkerd/proxy/client-policy/src/lib.rs b/linkerd/proxy/client-policy/src/lib.rs index d088a7f44a..7ddd3821d4 100644 --- a/linkerd/proxy/client-policy/src/lib.rs +++ b/linkerd/proxy/client-policy/src/lib.rs @@ -12,7 +12,7 @@ pub mod tls; pub use linkerd_http_route as route; pub use linkerd_proxy_api_resolve::Metadata as EndpointMetadata; -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub struct ClientPolicy { pub parent: Arc, pub protocol: Protocol, @@ -25,7 +25,8 @@ pub struct ClientPolicyOverrides { } // TODO additional server configs (e.g. concurrency limits, window sizes, etc) -#[derive(Clone, Debug, Eq, Hash, PartialEq)] +#[derive(Clone, Debug, PartialEq)] +#[allow(clippy::large_enum_variant)] pub enum Protocol { Detect { timeout: time::Duration, @@ -122,19 +123,29 @@ pub struct PeakEwma { pub default_rtt: time::Duration, } +/// Failure-accrual circuit breaking configuration for an endpoint. +/// +/// When present the proxy tracks endpoint health and trips the breaker +/// based on the configured modes. `consecutive` is always present, +/// `success_rate` is present only when explicitly configured via an +/// annotation. +/// +/// `Eq` and `Hash` cannot be derived because `SuccessRateConfig` +/// contains an `f64` threshold field. +#[derive(Clone, Debug, PartialEq)] +pub struct FailureAccrual { + pub consecutive: ConsecutiveFailures, + pub success_rate: Option, +} + +/// Consecutive-failure tracking parameters. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub enum FailureAccrual { - /// Endpoints do not become unavailable due to observed failures. - None, - /// Endpoints are marked as unavailable when `max_failures` consecutive - /// failures are observed. - ConsecutiveFailures { - /// The number of consecutive failures after which an endpoint becomes - /// unavailable. - max_failures: usize, - /// Backoff for probing the endpoint when it is in a failed state. - backoff: linkerd_exp_backoff::ExponentialBackoff, - }, +pub struct ConsecutiveFailures { + /// The number of consecutive failures after which an endpoint becomes + /// unavailable. + pub max_failures: usize, + /// Backoff for probing the endpoint when it is in a failed state. + pub backoff: linkerd_exp_backoff::ExponentialBackoff, } /// Success-rate-based circuit breaking configuration. @@ -152,6 +163,12 @@ pub struct SuccessRateConfig { pub min_requests: u32, } +/// Load-biasing configuration for rate-aware load balancing. +/// +/// When `enabled`, the balancer adds `penalty` to the load estimate of a +/// rate-limited endpoint so the power-of-two-choices selection prefers +/// other endpoints. `penalty_decay` is the window over which that penalty +/// fades. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct LoadBiasConfig { pub enabled: bool, @@ -162,6 +179,11 @@ pub struct LoadBiasConfig { /// Default maximum Retry-After duration when no configuration is provided. pub const DEFAULT_RETRY_AFTER_MAX_DURATION: time::Duration = time::Duration::from_secs(300); +/// Configuration for honoring rate-limit hints. +/// +/// `max_duration` caps the delay the proxy will take from a Retry-After +/// header or a gRPC pushback trailer. Longer hints are clamped to this +/// value. When absent, [`DEFAULT_RETRY_AFTER_MAX_DURATION`] applies. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct RetryAfterConfig { pub max_duration: time::Duration, @@ -201,13 +223,13 @@ impl ClientPolicy { timeout, http1: http::Http1 { routes: HTTP_ROUTES.clone(), - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, }, http2: http::Http2 { routes: HTTP_ROUTES.clone(), - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, }, @@ -245,13 +267,13 @@ impl ClientPolicy { timeout, http1: http::Http1 { routes: NO_HTTP_ROUTES.clone(), - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, }, http2: http::Http2 { routes: NO_HTTP_ROUTES.clone(), - failure_accrual: Default::default(), + failure_accrual: None, load_bias: None, retry_after: None, }, @@ -351,12 +373,6 @@ impl fmt::Display for Meta { // === impl FailureAccrual === -impl Default for FailureAccrual { - fn default() -> Self { - Self::None - } -} - #[cfg(feature = "proto")] pub mod proto { use super::*; @@ -760,34 +776,81 @@ pub mod proto { } } + /// Lower bound on the success-rate decay window. + /// + /// A decay shorter than this is below the moving average's usable + /// resolution and would be silently clamped later, leaving the breaker + /// tracking a window the operator did not ask for. The value matches the + /// moving average's own floor (`linkerd_ewma::MIN_DECAY`), kept here as a + /// literal to avoid a dependency on another crate for one constant. + const MIN_SUCCESS_RATE_DECAY: time::Duration = time::Duration::from_millis(1); + + /// Upper bound on the success-rate cold-start request floor. + /// + /// A floor above this counts as misconfiguration. Cold-start would never + /// end, so the policy could never trip. The ceiling is a safety limit, not + /// a protocol constraint, and may be raised if a workload genuinely needs a + /// larger warm-up sample. + const MAX_SUCCESS_RATE_MIN_REQUESTS: u32 = 1_000_000; + impl TryFrom for FailureAccrual { type Error = InvalidFailureAccrual; fn try_from(accrual: outbound::FailureAccrual) -> Result { - use outbound::failure_accrual::ConsecutiveFailures; - let ConsecutiveFailures { + use outbound::failure_accrual::ConsecutiveFailures as ProtoConsecutiveFailures; + let ProtoConsecutiveFailures { max_failures, backoff, } = accrual .consecutive_failures .ok_or(InvalidFailureAccrual::Missing("consecutive_failures"))?; - Ok(FailureAccrual::ConsecutiveFailures { - max_failures: max_failures as usize, - backoff: backoff.map(try_backoff).transpose()?.ok_or( - InvalidFailureAccrual::Missing("consecutive failures backoff"), - )?, + let backoff = + backoff + .map(try_backoff) + .transpose()? + .ok_or(InvalidFailureAccrual::Missing( + "consecutive failures backoff", + ))?; + let success_rate = accrual + .success_rate + .map(|sr| -> Result { + if !(0.0..=1.0).contains(&sr.threshold) { + return Err(InvalidFailureAccrual::InvalidValue( + "success rate threshold must be between 0.0 and 1.0", + )); + } + let decay = sr + .decay + .map(time::Duration::try_from) + .transpose() + .map_err(|e| InvalidFailureAccrual::Backoff(InvalidBackoff::Duration(e)))? + .unwrap_or(time::Duration::from_secs(10)); + if decay < MIN_SUCCESS_RATE_DECAY { + return Err(InvalidFailureAccrual::InvalidValue( + "success rate decay is below the moving-average minimum", + )); + } + if sr.min_requests > MAX_SUCCESS_RATE_MIN_REQUESTS { + return Err(InvalidFailureAccrual::InvalidValue( + "success rate min_requests exceeds the supported maximum", + )); + } + Ok(SuccessRateConfig { + threshold: sr.threshold, + decay, + min_requests: sr.min_requests, + }) + }) + .transpose()?; + Ok(FailureAccrual { + consecutive: ConsecutiveFailures { + max_failures: max_failures as usize, + backoff, + }, + success_rate, }) } } - impl TryFrom> for FailureAccrual { - type Error = InvalidFailureAccrual; - fn try_from(accrual: Option) -> Result { - accrual - .map(Self::try_from) - .unwrap_or(Ok(FailureAccrual::None)) - } - } - pub(crate) fn try_backoff( outbound::ExponentialBackoff { min_backoff, @@ -844,6 +907,7 @@ pub mod proto { mod tests { use super::*; use linkerd2_proxy_api::outbound; + use proto::InvalidFailureAccrual; #[test] fn default_retry_after_max_duration_is_300_seconds() { @@ -1004,13 +1068,6 @@ mod tests { ); } - #[test] - fn failure_accrual_try_from_returns_none_for_option_none() { - let result = FailureAccrual::try_from(None); - assert!(result.is_ok()); - assert_eq!(result.unwrap(), FailureAccrual::None); - } - #[test] fn failure_accrual_try_from_parses_valid_consecutive_failures() { let accrual = outbound::FailureAccrual { @@ -1031,15 +1088,8 @@ mod tests { success_rate: None, }; let result = FailureAccrual::try_from(accrual).unwrap(); - match result { - FailureAccrual::ConsecutiveFailures { - max_failures, - backoff: _, - } => { - assert_eq!(max_failures, 5); - } - other => panic!("expected ConsecutiveFailures, got {:?}", other), - } + assert_eq!(result.consecutive.max_failures, 5); + assert_eq!(result.success_rate, None); } #[test] @@ -1057,4 +1107,64 @@ mod tests { "missing backoff in consecutive_failures must be an error" ); } + + fn valid_consecutive_failures() -> outbound::failure_accrual::ConsecutiveFailures { + outbound::failure_accrual::ConsecutiveFailures { + max_failures: 5, + backoff: Some(outbound::ExponentialBackoff { + min_backoff: Some(prost_types::Duration { + seconds: 1, + nanos: 0, + }), + max_backoff: Some(prost_types::Duration { + seconds: 60, + nanos: 0, + }), + jitter_ratio: 0.5, + }), + } + } + + #[test] + fn failure_accrual_try_from_rejects_subminimum_decay() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.9, + // 500us sits below the moving average's usable minimum of 1ms. + decay: Some(prost_types::Duration { + seconds: 0, + nanos: 500_000, + }), + min_requests: 100, + }), + }; + let result = FailureAccrual::try_from(accrual); + assert!( + matches!(result, Err(InvalidFailureAccrual::InvalidValue(_))), + "a decay below the moving-average minimum must be rejected, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_rejects_excessive_min_requests() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.9, + decay: Some(prost_types::Duration { + seconds: 10, + nanos: 0, + }), + // A floor this high means cold-start never ends. + min_requests: 2_000_000, + }), + }; + let result = FailureAccrual::try_from(accrual); + assert!( + matches!(result, Err(InvalidFailureAccrual::InvalidValue(_))), + "a min_requests above the safety ceiling must be rejected, got: {result:?}" + ); + } + } From 992a06f2e80898650b0026022533d5c870df16ac Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Fri, 29 May 2026 16:33:05 +0200 Subject: [PATCH 08/18] test(client-policy): cover success-rate proto conversion These tests pin the success-rate side of the failure-accrual proto conversion. They cover the accept case, the 10s default applied when decay is unset, threshold rejection outside the [0.0, 1.0] range including NaN, and the inclusive 1ms decay floor. A negative decay is also covered. It fails the duration conversion and surfaces as a backoff duration error rather than a value error. Signed-off-by: Alejandro Martinez Ruiz --- linkerd/proxy/client-policy/src/lib.rs | 136 +++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/linkerd/proxy/client-policy/src/lib.rs b/linkerd/proxy/client-policy/src/lib.rs index 7ddd3821d4..52e7516a1d 100644 --- a/linkerd/proxy/client-policy/src/lib.rs +++ b/linkerd/proxy/client-policy/src/lib.rs @@ -1167,4 +1167,140 @@ mod tests { ); } + #[test] + fn failure_accrual_try_from_parses_success_rate() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.5, + decay: Some(prost_types::Duration { + seconds: 10, + nanos: 0, + }), + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual).unwrap(); + let sr = result + .success_rate + .expect("a configured success rate must survive conversion"); + assert_eq!(sr.threshold, 0.5); + assert_eq!(sr.decay, time::Duration::from_secs(10)); + assert_eq!(sr.min_requests, 20); + } + + #[test] + fn failure_accrual_try_from_defaults_success_rate_decay_when_absent() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.5, + // An unset decay falls back to the conversion's 10s default. + decay: None, + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual).unwrap(); + let sr = result.success_rate.expect("success rate must be present"); + assert_eq!(sr.decay, time::Duration::from_secs(10)); + } + + #[test] + fn failure_accrual_try_from_rejects_threshold_above_one() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 1.5, + decay: None, + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual); + assert!( + matches!(result, Err(InvalidFailureAccrual::InvalidValue(_))), + "a threshold above 1.0 must be rejected, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_rejects_threshold_below_zero() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: -0.1, + decay: None, + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual); + assert!( + matches!(result, Err(InvalidFailureAccrual::InvalidValue(_))), + "a threshold below 0.0 must be rejected, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_rejects_nan_threshold() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: f64::NAN, + decay: None, + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual); + // NaN compares false against both range bounds, so it falls outside + // the accepted interval and is rejected. + assert!( + matches!(result, Err(InvalidFailureAccrual::InvalidValue(_))), + "a NaN threshold must be rejected, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_rejects_negative_success_rate_decay() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.9, + decay: Some(prost_types::Duration { + seconds: -1, + nanos: 0, + }), + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual); + // A negative prost duration fails the `time::Duration` conversion. It + // surfaces as a backoff duration error, not a value error. + assert!( + matches!( + result, + Err(InvalidFailureAccrual::Backoff( + proto::InvalidBackoff::Duration(_) + )) + ), + "a negative decay must surface a duration error, got: {result:?}" + ); + } + + #[test] + fn failure_accrual_try_from_accepts_one_millisecond_decay_boundary() { + let accrual = outbound::FailureAccrual { + consecutive_failures: Some(valid_consecutive_failures()), + success_rate: Some(outbound::failure_accrual::SuccessRate { + threshold: 0.9, + // Exactly the 1ms floor. The lower bound is inclusive. + decay: Some(prost_types::Duration { + seconds: 0, + nanos: 1_000_000, + }), + min_requests: 20, + }), + }; + let result = FailureAccrual::try_from(accrual).unwrap(); + let sr = result.success_rate.expect("success rate must be present"); + assert_eq!(sr.decay, time::Duration::from_millis(1)); + } } From 24c67350f0d6343544c5b1ab77556f450abbb0ac Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 15:47:05 +0200 Subject: [PATCH 09/18] feat(outbound): add duration hint stores and retry-after classification Rate-limited responses include backoff hints the circuit breaker can reuse. A new retry_after.rs adds DurationHintStore plus the RetryAfterStore and GrpcRetryPushbackStore wrappers around it, and the RetryAfterClassify and GrpcRetryPushbackClassifyEos classifiers that read Retry-After and grpc-retry-pushback-ms values off the response and record them for the breaker backoff. Each store keeps the longest hint seen and timestamps it so old values turn stale. The two hint sources stay separate. A Retry-After header records only into the HTTP store, and gRPC pushback records only into the gRPC store, so the Retry-After store never holds a value that did not come from a Retry-After header. The classify::grpc_code helper becomes pub so the classifier can read gRPC status codes from response headers. Signed-off-by: Alejandro Martinez Ruiz --- linkerd/app/core/src/classify.rs | 2 +- linkerd/app/outbound/Cargo.toml | 2 + linkerd/app/outbound/src/http/breaker.rs | 1 + .../outbound/src/http/breaker/retry_after.rs | 875 ++++++++++++++++++ 4 files changed, 879 insertions(+), 1 deletion(-) create mode 100644 linkerd/app/outbound/src/http/breaker/retry_after.rs diff --git a/linkerd/app/core/src/classify.rs b/linkerd/app/core/src/classify.rs index 68e3d74a0e..249b120b35 100644 --- a/linkerd/app/core/src/classify.rs +++ b/linkerd/app/core/src/classify.rs @@ -202,7 +202,7 @@ impl classify::ClassifyEos for Eos { } } -fn grpc_code(hdrs: &http::HeaderMap) -> Option { +pub fn grpc_code(hdrs: &http::HeaderMap) -> Option { hdrs.get("grpc-status") .and_then(|v| v.to_str().ok()) .and_then(|s| s.parse::().ok()) diff --git a/linkerd/app/outbound/Cargo.toml b/linkerd/app/outbound/Cargo.toml index 08b7739971..599adf70df 100644 --- a/linkerd/app/outbound/Cargo.toml +++ b/linkerd/app/outbound/Cargo.toml @@ -35,6 +35,8 @@ tower = { workspace = true, features = ["util"] } tracing = { workspace = true } linkerd-app-core = { path = "../core" } +linkerd-ewma = { path = "../../ewma" } +linkerd-load-biaser = { path = "../../load-biaser" } linkerd-app-test = { path = "../test", optional = true } linkerd-distribute = { path = "../../distribute" } linkerd-http-classify = { path = "../../http/classify" } diff --git a/linkerd/app/outbound/src/http/breaker.rs b/linkerd/app/outbound/src/http/breaker.rs index f8177757f8..87069ab81b 100644 --- a/linkerd/app/outbound/src/http/breaker.rs +++ b/linkerd/app/outbound/src/http/breaker.rs @@ -3,6 +3,7 @@ use linkerd_proxy_client_policy::FailureAccrual; use tracing::{trace_span, Instrument}; mod consecutive_failures; +pub mod retry_after; use self::consecutive_failures::ConsecutiveFailures; diff --git a/linkerd/app/outbound/src/http/breaker/retry_after.rs b/linkerd/app/outbound/src/http/breaker/retry_after.rs new file mode 100644 index 0000000000..39d8ca3f78 --- /dev/null +++ b/linkerd/app/outbound/src/http/breaker/retry_after.rs @@ -0,0 +1,875 @@ +//! Retry-After and grpc-retry-pushback-ms parsing for rate limiting responses. +//! +//! Parses backoff hints from HTTP 429/503 and gRPC RESOURCE_EXHAUSTED responses +//! into [`RetryAfterStore`] and [`GrpcRetryPushbackStore`]. Both use a +//! max-value-wins strategy (longest hint kept). The circuit breaker queries +//! both via `UnifiedBreaker::take_combined_hint()` and uses the maximum. +//! +//! Parsing is delegated to [`linkerd_http_classify::retry_after`]. +//! +//! ```text +//! HTTP 429/503 Response +//! | +//! v +//! RetryAfterClassify::start() +//! | +//! +-> parse_retry_after() -> RetryAfterStore::record() +//! | +//! +-> gated_parse_grpc_retry_pushback() (from headers, for unary gRPC) +//! | +//! v +//! GrpcRetryPushbackStore::record() +//! +//! gRPC RESOURCE_EXHAUSTED (streaming) +//! | +//! v +//! GrpcRetryPushbackClassifyEos::eos(trailers) +//! | +//! v +//! gated_parse_grpc_retry_pushback() -> GrpcRetryPushbackStore::record() +//! +//! Circuit Breaker Backoff +//! | +//! v +//! UnifiedBreaker::take_combined_hint() +//! | +//! +-> RetryAfterStore::take() +//! +-> GrpcRetryPushbackStore::take() +//! | +//! v +//! max(http_hint, grpc_hint) +//! ``` + +use http::HeaderMap; +use linkerd_app_core::classify::{self, grpc_code}; +use linkerd_http_classify::{ClassifyEos, ClassifyResponse}; +use parking_lot::Mutex; +use std::sync::Arc; +use std::time::Duration; +use tokio::time::Instant; +use tonic::Code; + +/// Shared store for duration hints, timestamped to detect staleness. +#[derive(Clone, Debug)] +pub struct DurationHintStore { + name: &'static str, + inner: Arc>>, +} + +impl DurationHintStore { + /// Create a new empty store with the given name for logging. + pub fn new(name: &'static str) -> Self { + Self { + name, + inner: Arc::new(Mutex::new(None)), + } + } + + /// Record a duration hint (max value wins, only updates if longer). + pub fn record(&self, duration: Duration) { + let mut guard = self.inner.lock(); + let should_update = match *guard { + None => true, + Some((_, existing)) => duration > existing, + }; + + if should_update { + *guard = Some((Instant::now(), duration)); + tracing::debug!( + ?duration, + hint_type = %self.name, + "Recorded hint (max value wins)", + ); + } else { + tracing::debug!( + ?duration, + existing = ?guard.as_ref().map(|(_, d)| d), + hint_type = %self.name, + "Ignoring smaller hint", + ); + } + } + + /// Take the stored hint if it exists and was recorded recently. + /// + /// The hint is consumed (set to None) to prevent reusing the same hint + /// for multiple backoff cycles. The `max_age` parameter controls how + /// old a hint can be before it's considered stale. + /// + /// Stale hints are discarded without being consumed: we inspect the + /// timestamp first, then only `take()` if the hint is recent. This + /// prevents a race where a stale hint is consumed (clearing the store) + /// when a new hint arrives. + pub fn take(&self, max_age: Duration) -> Option { + let mut guard = self.inner.lock(); + if let Some(&(recorded_at, duration)) = guard.as_ref() { + // Monotonic clock: saturating_duration_since returns zero on backwards drift + let elapsed = Instant::now().saturating_duration_since(recorded_at); + if elapsed <= max_age { + // Elapsed time consumed the entire hint. + if elapsed >= duration { + *guard = None; + tracing::debug!( + ?duration, + ?elapsed, + hint_type = %self.name, + "Hint fully elapsed, discarding", + ); + return None; + } + + // Subtract elapsed time to obtain remaining wait. + let adjusted = duration.saturating_sub(elapsed); + + *guard = None; + tracing::debug!( + ?duration, + ?elapsed, + ?adjusted, + hint_type = %self.name, + "Using hint (adjusted for elapsed time)", + ); + return Some(adjusted); + } + tracing::debug!( + ?duration, + ?elapsed, + hint_type = %self.name, + "Hint too old, discarding", + ); + + // Stale. Discard explicitly without consuming for use + *guard = None; + } + None + } +} + +/// A shared store for Retry-After hints. +/// +/// This is used to pass Retry-After hints from the response path to the circuit +/// breaker. Wraps [`DurationHintStore`] with the "Retry-After" name for logging. +#[derive(Clone, Debug)] +pub struct RetryAfterStore(DurationHintStore); + +impl Default for RetryAfterStore { + fn default() -> Self { + Self(DurationHintStore::new("Retry-After")) + } +} + +impl RetryAfterStore { + /// Create a new empty store. + pub fn new() -> Self { + Self::default() + } + + /// Record a Retry-After hint from a 429 response. + pub fn record(&self, duration: Duration) { + self.0.record(duration) + } + + /// Take the stored hint if it exists and was recorded recently. + pub fn take(&self, max_age: Duration) -> Option { + self.0.take(max_age) + } +} + +/// A shared store for gRPC retry pushback hints. +/// +/// This is used to pass `grpc-retry-pushback-ms` hints from the response path +/// to the circuit breaker. Wraps [`DurationHintStore`] with the +/// "grpc-retry-pushback-ms" name for logging. +#[derive(Clone, Debug)] +pub struct GrpcRetryPushbackStore(DurationHintStore); + +impl Default for GrpcRetryPushbackStore { + fn default() -> Self { + Self(DurationHintStore::new("grpc-retry-pushback-ms")) + } +} + +impl GrpcRetryPushbackStore { + /// Create a new empty store. + pub fn new() -> Self { + Self::default() + } + + /// Record a grpc-retry-pushback-ms hint from a RESOURCE_EXHAUSTED response. + pub fn record(&self, duration: Duration) { + self.0.record(duration) + } + + /// Take the stored hint if it exists and was recorded recently. + pub fn take(&self, max_age: Duration) -> Option { + self.0.take(max_age) + } +} + +/// Parse grpc-retry-pushback-ms from headers or trailers if code is RESOURCE_EXHAUSTED. +/// +/// Delegates to the shared parser in [`linkerd_http_classify::retry_after`], +/// but only when `grpc_code` is RESOURCE_EXHAUSTED (code 8). +/// +/// `max` caps the parsed duration (typically `max_duration` from config). +/// +/// Returns `None` for non-RESOURCE_EXHAUSTED responses or any parse failure. +fn gated_parse_grpc_retry_pushback( + grpc_code: Option, + headers_or_trailers: &HeaderMap, + max: Duration, +) -> Option { + if grpc_code != Some(Code::ResourceExhausted) { + return None; + } + linkerd_http_classify::retry_after::parse_grpc_retry_pushback(headers_or_trailers, max) +} + +/// A ClassifyEos wrapper that records grpc-retry-pushback-ms hints from trailers. +/// +/// This wraps any [`ClassifyEos`] implementation and intercepts the `eos()` +/// call to parse `grpc-retry-pushback-ms` trailers from RESOURCE_EXHAUSTED +/// responses. The parsed duration is recorded in a [`GrpcRetryPushbackStore`] +/// for use by the circuit breaker's backoff logic. +/// +/// The wrapper delegates all classification logic to the inner classifier unchanged. +#[derive(Clone, Debug)] +pub struct GrpcRetryPushbackClassifyEos { + inner: E, + store: GrpcRetryPushbackStore, + max_duration: Duration, +} + +impl GrpcRetryPushbackClassifyEos { + /// Create a new wrapper around the given ClassifyEos. + pub fn new(inner: E, store: GrpcRetryPushbackStore, max_duration: Duration) -> Self { + Self { + inner, + store, + max_duration, + } + } +} + +impl ClassifyEos for GrpcRetryPushbackClassifyEos +where + E: ClassifyEos, +{ + type Class = classify::Class; + + fn eos(self, trailers: Option<&HeaderMap>) -> Self::Class { + // Get classification from inner first + let class = self.inner.eos(trailers); + + // If we have trailers, check for grpc-retry-pushback-ms + if let Some(trls) = trailers { + // Extract gRPC code from the classification result + let grpc_code = match &class { + classify::Class::Grpc(Ok(code)) | classify::Class::Grpc(Err(code)) => Some(*code), + _ => None, + }; + + if let Some(duration) = + gated_parse_grpc_retry_pushback(grpc_code, trls, self.max_duration) + { + self.store.record(duration); + } + } + + class + } + + fn error(self, error: &linkerd_app_core::Error) -> Self::Class { + // Delegate error classification unchanged + self.inner.error(error) + } +} + +// === RetryAfterClassify === + +/// A classifier wrapper that records retry hints from both HTTP and gRPC responses. +/// +/// This wraps any [`ClassifyResponse`] implementation and intercepts the `start()` +/// call to: +/// 1. Parse `Retry-After` headers from HTTP 429 responses +/// 2. Parse `grpc-retry-pushback-ms` from gRPC headers (for unary failures) +/// 3. Wrap the inner `ClassifyEos` with [`GrpcRetryPushbackClassifyEos`] to +/// also parse gRPC trailers +/// +/// The parsed durations are recorded in their respective stores for use by the +/// circuit breaker's backoff logic. +#[derive(Clone, Debug)] +pub struct RetryAfterClassify { + inner: C, + http_store: RetryAfterStore, + grpc_store: GrpcRetryPushbackStore, + max_duration: Duration, +} + +impl RetryAfterClassify { + /// Test-only. Production code uses [`Self::new_with_max`]. + /// + /// Create a new wrapper around the given classifier. + #[cfg(test)] + pub fn new(inner: C, http_store: RetryAfterStore, grpc_store: GrpcRetryPushbackStore) -> Self { + Self { + inner, + http_store, + grpc_store, + max_duration: Duration::from_secs(300), + } + } + + /// Create a new wrapper with a custom max Retry-After cap. + pub fn new_with_max( + inner: C, + http_store: RetryAfterStore, + grpc_store: GrpcRetryPushbackStore, + max_duration: Duration, + ) -> Self { + Self { + inner, + http_store, + grpc_store, + max_duration, + } + } +} + +impl ClassifyResponse for RetryAfterClassify +where + C: ClassifyResponse, +{ + type Class = classify::Class; + type ClassifyEos = GrpcRetryPushbackClassifyEos; + + fn start(self, rsp: &http::Response) -> Self::ClassifyEos { + // Record only a genuine HTTP Retry-After header into the HTTP store. + // gRPC pushback is recorded on its own into its store below, so the + // two hint sources stay distinct and the "Retry-After" store never holds + // a value that did not come from a Retry-After header. + let max = self.max_duration; + if let Some(duration) = + linkerd_http_classify::retry_after::parse_retry_after(rsp.status(), rsp.headers(), max) + { + self.http_store.record(duration); + } + + // gRPC headers (grpc-status, grpc-retry-pushback-ms) are only meaningful + // when HTTP status is 200 OK. Non-200 responses are never gRPC. + if rsp.status() == http::StatusCode::OK { + let grpc_code = grpc_code(rsp.headers()); + if let Some(duration) = gated_parse_grpc_retry_pushback(grpc_code, rsp.headers(), max) { + self.grpc_store.record(duration); + } + } + + // Wrap the inner ClassifyEos with GrpcRetryPushbackClassifyEos to + // also parse gRPC trailers when the body ends. + let inner_eos = self.inner.start(rsp); + GrpcRetryPushbackClassifyEos::new(inner_eos, self.grpc_store, max) + } + + fn error(self, error: &linkerd_app_core::Error) -> Self::Class { + // Delegate error classification unchanged. + self.inner.error(error) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use http::HeaderValue; + use linkerd_proxy_client_policy::http::StatusRanges; + + /// A simplified local model of max-value-wins combination, used to keep the + /// store-level unit tests self-contained. Unlike the breaker's + /// `UnifiedBreaker::take_combined_hint`, it does not clamp either input to a + /// maximum duration. The clamp is pinned end-to-end against the real code in + /// the breaker integration tests. + fn combine_hints(http: Option, grpc: Option) -> Option { + match (http, grpc) { + (Some(h), Some(g)) => Some(h.max(g)), + (h @ Some(_), None) => h, + (None, g @ Some(_)) => g, + (None, None) => None, + } + } + + // === RetryAfterStore tests === + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn store_max_value_wins() { + let store = RetryAfterStore::new(); + + // Record a 10-second hint + store.record(Duration::from_secs(10)); + + // Try to record a smaller hint (5 seconds) - should be ignored + store.record(Duration::from_secs(5)); + + // The stored value should still be 10 seconds + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(10))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn store_larger_value_updates() { + let store = RetryAfterStore::new(); + + // Record a 5-second hint + store.record(Duration::from_secs(5)); + + // Record a larger hint (10 seconds) - should update + store.record(Duration::from_secs(10)); + + // The stored value should be 10 seconds + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(10))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn store_empty_accepts_any() { + let store = RetryAfterStore::new(); + + // Empty store should accept any value + store.record(Duration::from_secs(5)); + + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(5))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn store_take_clears() { + let store = RetryAfterStore::new(); + + store.record(Duration::from_secs(10)); + + // First take should succeed + let hint1 = store.take(Duration::from_secs(1)); + assert_eq!(hint1, Some(Duration::from_secs(10))); + + // Second take should return None (consumed) + let hint2 = store.take(Duration::from_secs(1)); + assert_eq!(hint2, None); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn store_after_take_accepts_new() { + let store = RetryAfterStore::new(); + + // Record and take + store.record(Duration::from_secs(10)); + let _ = store.take(Duration::from_secs(1)); + + // Now any new value should be accepted (store is empty) + store.record(Duration::from_secs(5)); + + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(5))); + } + + #[test] + fn parse_grpc_pushback_positive() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("5000")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, Some(Duration::from_millis(5000))); + } + + #[test] + fn parse_grpc_pushback_zero() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("0")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, Some(Duration::ZERO)); + } + + #[test] + fn parse_grpc_pushback_negative_ignored() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("-1")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + // Negative values mean "do not retry" - we ignore them + assert_eq!(result, None); + } + + #[test] + fn parse_grpc_pushback_non_resource_exhausted_ignored() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("5000")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::Ok), &headers, max); + assert_eq!(result, None); + + let result = gated_parse_grpc_retry_pushback(Some(Code::Internal), &headers, max); + assert_eq!(result, None); + + let result = gated_parse_grpc_retry_pushback(Some(Code::Unavailable), &headers, max); + assert_eq!(result, None); + + // None (no code) + let result = gated_parse_grpc_retry_pushback(None, &headers, max); + assert_eq!(result, None); + } + + #[test] + fn parse_grpc_pushback_caps_at_max() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + // 999999999 ms should be capped at max (300s) + headers.insert( + "grpc-retry-pushback-ms", + HeaderValue::from_static("999999999"), + ); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, Some(max)); + } + + #[test] + fn parse_grpc_pushback_invalid_format() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("abc")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, None); + } + + #[test] + fn parse_grpc_pushback_missing_header() { + let max = Duration::from_secs(300); + let headers = HeaderMap::new(); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, None); + } + + #[test] + fn parse_grpc_pushback_empty_value() { + let max = Duration::from_secs(300); + let mut headers = HeaderMap::new(); + headers.insert("grpc-retry-pushback-ms", HeaderValue::from_static("")); + + let result = gated_parse_grpc_retry_pushback(Some(Code::ResourceExhausted), &headers, max); + assert_eq!(result, None); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn grpc_store_max_value_wins() { + let store = GrpcRetryPushbackStore::new(); + + // Record a 10-second hint + store.record(Duration::from_secs(10)); + + // Try to record a smaller hint (5 seconds) - should be ignored + store.record(Duration::from_secs(5)); + + // The stored value should still be 10 seconds + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(10))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn grpc_store_larger_value_updates() { + let store = GrpcRetryPushbackStore::new(); + + // Record a 5-second hint + store.record(Duration::from_secs(5)); + + // Record a larger hint (10 seconds) - should update + store.record(Duration::from_secs(10)); + + // The stored value should be 10 seconds + let hint = store.take(Duration::from_secs(1)); + assert_eq!(hint, Some(Duration::from_secs(10))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn grpc_store_take_clears() { + let store = GrpcRetryPushbackStore::new(); + + store.record(Duration::from_secs(10)); + + // First take should succeed + let hint1 = store.take(Duration::from_secs(1)); + assert_eq!(hint1, Some(Duration::from_secs(10))); + + // Second take should return None (consumed) + let hint2 = store.take(Duration::from_secs(1)); + assert_eq!(hint2, None); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn combined_http_and_grpc_hints_max_wins() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let max_age = Duration::from_secs(60); + + // Record HTTP hint of 10 seconds + http_store.record(Duration::from_secs(10)); + // Record gRPC hint of 30 seconds (larger - should win) + grpc_store.record(Duration::from_secs(30)); + + let http_hint = http_store.take(max_age); + let grpc_hint = grpc_store.take(max_age); + + // Verify both hints are retrieved + assert_eq!(http_hint, Some(Duration::from_secs(10))); + assert_eq!(grpc_hint, Some(Duration::from_secs(30))); + + // Verify max-value-wins logic (as implemented in UnifiedBreaker::take_combined_hint) + let effective = combine_hints(http_hint, grpc_hint); + + assert_eq!(effective, Some(Duration::from_secs(30))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn combined_hints_http_wins_when_larger() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let max_age = Duration::from_secs(60); + + // Record HTTP hint of 60 seconds (larger) + http_store.record(Duration::from_secs(60)); + // Record gRPC hint of 15 seconds + grpc_store.record(Duration::from_secs(15)); + + let http_hint = http_store.take(max_age); + let grpc_hint = grpc_store.take(max_age); + + let effective = combine_hints(http_hint, grpc_hint); + + assert_eq!(effective, Some(Duration::from_secs(60))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn combined_hints_only_http() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let max_age = Duration::from_secs(60); + + // Record only HTTP hint + http_store.record(Duration::from_secs(20)); + + let http_hint = http_store.take(max_age); + let grpc_hint = grpc_store.take(max_age); + + let effective = combine_hints(http_hint, grpc_hint); + + assert_eq!(effective, Some(Duration::from_secs(20))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn combined_hints_only_grpc() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let max_age = Duration::from_secs(60); + + // Record only gRPC hint + grpc_store.record(Duration::from_secs(25)); + + let http_hint = http_store.take(max_age); + let grpc_hint = grpc_store.take(max_age); + + let effective = combine_hints(http_hint, grpc_hint); + + assert_eq!(effective, Some(Duration::from_secs(25))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn stale_hint_discarded() { + let store = DurationHintStore::new("test"); + store.record(Duration::from_secs(5)); + + // Advance time past the max_age + tokio::time::advance(Duration::from_secs(60)).await; + + // Stale hint should be discarded, not returned + let result = store.take(Duration::from_secs(30)); + assert!(result.is_none(), "Stale hint should be discarded"); + + // Store should be empty now (stale hint was cleared) + let result2 = store.take(Duration::from_secs(30)); + assert!( + result2.is_none(), + "Store should be empty after stale discard" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn take_adjusts_for_elapsed_time() { + let store = DurationHintStore::new("test"); + + // Record a 10-second hint + store.record(Duration::from_secs(10)); + + // Advance 3 seconds. The hint is partly elapsed. + tokio::time::advance(Duration::from_secs(3)).await; + + // take() should return 10s - 3s = 7s (elapsed < duration) + let result = store.take(Duration::from_secs(60)); + assert_eq!(result, Some(Duration::from_secs(7))); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn exactly_elapsed_hint_returns_none() { + let store = DurationHintStore::new("test"); + + // Record a 5-second hint + store.record(Duration::from_secs(5)); + + // Advance exactly 5 seconds, so elapsed equals duration. + tokio::time::advance(Duration::from_secs(5)).await; + + // take() should return None (elapsed >= duration) + let result = store.take(Duration::from_secs(60)); + assert_eq!(result, None); + + // Store should be empty (hint consumed, not just ignored) + assert_eq!(store.take(Duration::from_secs(60)), None); + + // Prove the slot was cleared: a new (smaller) hint is immediately visible. + // If take() failed to clear the slot, record(3s) would be rejected by + // max-value-wins (3 < 5), and this assertion would fail. + store.record(Duration::from_secs(3)); + assert_eq!( + store.take(Duration::from_secs(60)), + Some(Duration::from_secs(3)) + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn overshot_hint_returns_none() { + let store = DurationHintStore::new("test"); + + // Record a 2-second hint + store.record(Duration::from_secs(2)); + + // Advance 3 seconds, so elapsed passes duration but stays within max_age. + tokio::time::advance(Duration::from_secs(3)).await; + + // take() should return None (elapsed > duration) + let result = store.take(Duration::from_secs(60)); + assert_eq!(result, None); + + // Store should be empty (hint consumed, not just ignored) + assert_eq!(store.take(Duration::from_secs(60)), None); + + // Prove the slot was cleared: a new (smaller) hint is immediately visible. + // If take() failed to clear the slot, record(1s) would be rejected by + // max-value-wins (1 < 2), and this assertion would fail. + store.record(Duration::from_secs(1)); + assert_eq!( + store.take(Duration::from_secs(60)), + Some(Duration::from_secs(1)) + ); + } + + #[test] + fn combined_hints_neither_present() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let max_age = Duration::from_secs(60); + + let http_hint = http_store.take(max_age); + let grpc_hint = grpc_store.take(max_age); + + let effective = combine_hints(http_hint, grpc_hint); + + assert_eq!(effective, None); + } + + // Verify that grpc-status is not parsed on non-200 responses. + // + // An HTTP 429 with a spurious `grpc-status: 8` header should record + // the Retry-After hint in the HTTP store but leave the gRPC store empty. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn grpc_status_skipped_on_non_200() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = classify::Response::Http(StatusRanges::default()); + let wrapped = RetryAfterClassify::new(inner, http_store.clone(), grpc_store.clone()); + + // HTTP 429 with Retry-After AND a spurious grpc-status: 8 header. + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::TOO_MANY_REQUESTS; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("30"), + ); + rsp.headers_mut() + .insert("grpc-status", http::HeaderValue::from_static("8")); + rsp.headers_mut().insert( + "grpc-retry-pushback-ms", + http::HeaderValue::from_static("5000"), + ); + + let _eos = wrapped.start(&rsp); + + // HTTP store should have the Retry-After hint + let http_hint = http_store.take(Duration::from_secs(10)); + assert_eq!(http_hint, Some(Duration::from_secs(30))); + + // gRPC store should be empty + let grpc_hint = grpc_store.take(Duration::from_secs(10)); + assert_eq!( + grpc_hint, None, + "grpc-status should not be parsed on HTTP 429" + ); + } + + // Verify that 200 OK gRPC pushback goes only into the gRPC store. + // + // A 200 OK with `grpc-status: 8` (RESOURCE_EXHAUSTED) and + // `grpc-retry-pushback-ms: 5000` should record grpc_store = Some(5s) and + // leave http_store empty. There is no HTTP Retry-After header, so the + // "Retry-After" store must not pick up the gRPC pushback. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn grpc_pushback_recorded_only_in_grpc_store() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = classify::Response::Http(StatusRanges::default()); + let wrapped = RetryAfterClassify::new(inner, http_store.clone(), grpc_store.clone()); + + // HTTP 200 OK with gRPC RESOURCE_EXHAUSTED + pushback header. + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::OK; + rsp.headers_mut().insert( + http::header::CONTENT_TYPE, + http::HeaderValue::from_static("application/grpc"), + ); + rsp.headers_mut() + .insert("grpc-status", http::HeaderValue::from_static("8")); + rsp.headers_mut().insert( + "grpc-retry-pushback-ms", + http::HeaderValue::from_static("5000"), + ); + + let _eos = wrapped.start(&rsp); + + // gRPC store should have the pushback hint (the check let it through). + let grpc_hint = grpc_store.take(Duration::from_secs(10)); + assert_eq!( + grpc_hint, + Some(Duration::from_millis(5000)), + "200 OK with grpc-status: 8 should record gRPC pushback" + ); + + // HTTP store stays empty. No Retry-After header was present, and the + // gRPC pushback belongs only to the gRPC store. + let http_hint = http_store.take(Duration::from_secs(10)); + assert_eq!( + http_hint, None, + "gRPC pushback must not leak into the Retry-After store" + ); + } +} From cfcda2f7a8ab9fc1b09110d285e39b86b7322da1 Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 15:47:30 +0200 Subject: [PATCH 10/18] feat(outbound): add unified breaker three-state machine A unified breaker replaces the single consecutive-failures policy with one that watches two signals at once: a run of consecutive 5xx responses, and an EWMA success rate that also counts 429 and gRPC RESOURCE_EXHAUSTED as failures. Either signal can trip the circuit on its own. The breaker moves through three states, open while it accepts traffic and tracks both signals, closed while it backs off, and probation while it admits one probe to decide whether to reopen. Cold-start protection covers only the success rate. The EWMA starts at 1.0 and cannot trip until min_requests responses arrive, and a long idle gap resets that counter so one late response does not dominate the average. The consecutive policy keeps no such grace and trips as soon as the run reaches its limit. The probe check stays mode-aware to keep the old breaker's behavior. With the success rate off (min_requests at usize::MAX) it defers to the default classifier through class.is_success(), so 429 is judged as before. With the success rate on it asks for a clean response, treating 429 and RESOURCE_EXHAUSTED as a still-limiting endpoint. A stored Retry-After or gRPC pushback hint sets a floor for the first backoff, and the larger of the two wins when both are present. Signed-off-by: Alejandro Martinez Ruiz --- linkerd/app/outbound/src/http/breaker.rs | 16 + .../app/outbound/src/http/breaker/unified.rs | 1651 +++++++++++++++++ 2 files changed, 1667 insertions(+) create mode 100644 linkerd/app/outbound/src/http/breaker/unified.rs diff --git a/linkerd/app/outbound/src/http/breaker.rs b/linkerd/app/outbound/src/http/breaker.rs index 87069ab81b..e08149537e 100644 --- a/linkerd/app/outbound/src/http/breaker.rs +++ b/linkerd/app/outbound/src/http/breaker.rs @@ -1,11 +1,27 @@ use linkerd_app_core::{classify, proxy::http::classify::gate, svc}; use linkerd_proxy_client_policy::FailureAccrual; + +use tokio::time::Duration; use tracing::{trace_span, Instrument}; mod consecutive_failures; pub mod retry_after; +mod unified; use self::consecutive_failures::ConsecutiveFailures; +use self::unified::{UnifiedBreaker, UnifiedBreakerConfig}; + +/// Reason why the circuit breaker tripped. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum TripReason { + /// Tripped due to N consecutive 5xx failures. + ConsecutiveFailures, + /// Tripped due to EWMA success rate dropping below threshold. + LowSuccessRate, +} + +/// Default EWMA decay window (used when success_rate is configured but has no explicit decay). +const DEFAULT_SUCCESS_RATE_DECAY: Duration = Duration::from_secs(10); /// Params configuring a circuit breaker stack. #[derive(Clone, Debug)] diff --git a/linkerd/app/outbound/src/http/breaker/unified.rs b/linkerd/app/outbound/src/http/breaker/unified.rs new file mode 100644 index 0000000000..a395147900 --- /dev/null +++ b/linkerd/app/outbound/src/http/breaker/unified.rs @@ -0,0 +1,1651 @@ +//! Unified circuit breaker combining consecutive failures and success rate policies. +//! +//! This module implements a single circuit breaker that implements two policies: +//! - Consecutive failures: Trips after N consecutive 5xx responses +//! - Success rate (EWMA): Trips when success rate drops below threshold (treats 429 as failure) +//! +//! ## Trip Conditions +//! +//! Either condition can trip the circuit: +//! +//! * Consecutive failures >= `max_failures` (0 = disabled, no cold-start protection) +//! * EWMA success rate < `threshold` AND request_count >= `min_requests` (cold-start protected) +//! +//! Consecutive failures are checked before success rate, so a response that crosses +//! both thresholds at once is charged to [`TripReason::ConsecutiveFailures`]. The +//! reason is for observability only. The circuit trips on either condition either way. +//! +//! ## Cold-Start Protection +//! +//! The EWMA success rate policy has three layers of cold-start protection: +//! +//! 1. Optimistic initialization: EWMA starts at 1.0 (100% success). This prevents +//! immediate tripping on first failures. +//! +//! 2. Minimum request threshold: The circuit cannot trip on low success rate until +//! `min_requests` have been observed. This ensures statistical significance. +//! +//! 3. Idle restart: After a long idle period (3x the decay window), the request +//! counter resets, re-requiring `min_requests` new observations before +//! the success rate policy can trip. This prevents a single post-idle response +//! from dominating the EWMA and tripping the circuit. +//! +//! Cold-start protection does NOT apply to consecutive failures policy. If an endpoint +//! returns `max_failures` consecutive 5xx errors, it will trip immediately. This is +//! intentional because repeated failures are a strong signal regardless of sample size. +//! +//! ## Recovery Conditions +//! +//! During probation, the probe success check is mode-aware: +//! +//! - In dual mode (success rate active), a probe must be non-5xx AND non-429 for +//! HTTP, or Ok for gRPC, to succeed. A 429 during probation is treated as failure +//! because it indicates the endpoint is still rate limiting. +//! +//! - In consecutive-only mode (success rate disabled), the probe delegates to +//! `class.is_success()` to preserve the behavior of the consecutive-failures +//! breaker it replaces. This ensures 429 responses are evaluated by the default +//! classifier rather than being unconditionally treated as failure. +//! +//! ## Retry-After Integration +//! +//! When a 429 response includes a `Retry-After` header, the duration is stored in +//! a [`RetryAfterStore`] and used as a floor for the first backoff after trip. +//! +//! Similarly, when a gRPC RESOURCE_EXHAUSTED response includes a `grpc-retry-pushback-ms` +//! header or trailer, the duration is stored in a [`GrpcRetryPushbackStore`] and used +//! as a floor for the first backoff. If both HTTP and gRPC hints exist, the maximum +//! is used. +//! +//! ## gRPC RESOURCE_EXHAUSTED Handling +//! +//! gRPC's RESOURCE_EXHAUSTED (code 8) is treated equivalently to HTTP 429: +//! - Counts as failure for success rate EWMA +//! - Won't count for consecutive failures (not a hard failure) +//! - During probation only gRPC Ok (code 0) is considered success + +use super::{ + retry_after::{GrpcRetryPushbackStore, RetryAfterStore}, + TripReason, +}; +use futures::stream::StreamExt; +use linkerd_app_core::proxy::http::classify::gate; +use linkerd_app_core::{classify, exp_backoff, exp_backoff::ExponentialBackoff}; +use linkerd_ewma::{Ewma, MIN_DECAY}; +use tokio::sync::mpsc; +use tokio::time::{Duration, Instant}; +use tonic::Code; + +/// Idle multiplier for cold-start restart. When the gap since the last response +/// exceeds this many decay windows, the request counter resets so that +/// `min_requests` new samples are required before the success rate policy can +/// trip. At 3x, alpha ~= 0.95: a single sample after the idle gap would account +/// for ~95% of the EWMA, so one error after a quiet period could trip the +/// circuit on its own. Re-arming cold-start protection prevents that. +/// +// TODO: expose this as configuration once the mechanism has seen real traffic. +const IDLE_COLD_START_FACTOR: u32 = 3; + +/// Unified circuit breaker combining consecutive failures and success rate policies. +/// +/// This policy tracks both consecutive failures (for 5xx responses) and EWMA success +/// rate (treating both 5xx and 429 as failures). Either condition can trip the circuit. +/// Cold-start protection only applies to success rate, not consecutive failures. +pub struct UnifiedBreaker { + // === Configuration === + /// Maximum consecutive failures (5xx only) before trip. + max_failures: usize, + /// Success rate threshold (0.0-1.0) - trip when EWMA drops below this. + threshold: f64, + /// EWMA decay window for success rate tracking. + decay: Duration, + /// Exponential backoff configuration for recovery. + backoff: ExponentialBackoff, + /// Minimum requests before circuit can trip (cold-start protection). + min_requests: usize, + + // === Gate Control === + /// Gate channel to control endpoint readiness (direct - no coordination wrapper). + gate: gate::Tx, + /// Channel receiving response classifications. + rsps: mpsc::Receiver, + + // === Retry-After Integration === + /// Shared store for HTTP Retry-After hints from 429 responses. + retry_after_store: RetryAfterStore, + /// Shared store for gRPC retry pushback hints from RESOURCE_EXHAUSTED responses. + grpc_retry_pushback_store: GrpcRetryPushbackStore, + /// Maximum Retry-After duration the proxy will honor (clamping cap). + max_duration: Duration, + + /// Lets tests see the reason charged to each trip without affecting behavior. + #[cfg(test)] + trip_observer: Option>, +} + +/// Internal state tracked during the open phase. +struct OpenState { + /// Current consecutive failure count (5xx only). + consecutive_failures: usize, + /// EWMA tracker for success rate (5xx and 429 count as failure). + ewma: Ewma, + /// Requests since last cold-start reset (for cold-start protection). + /// Resets on recovery and after idle periods exceeding the cold-start + /// threshold. + request_count: usize, + /// Timestamp of the most recent response, used to detect idle gaps. + /// Tracked apart from the EWMA. The average's timestamp freezes while the + /// circuit is tripped and probing, so reading idle from it would miss quiet + /// periods that span a trip-recover cycle. + last_seen: Instant, +} + +impl OpenState { + /// Create new open state with optimistic EWMA initialization. + /// + /// The EWMA starts at 1.0 (100% success rate) rather than 0.0 to implement + /// "optimistic cold-start": new or recovered endpoints are assumed healthy + /// until proven otherwise. Combined with `min_requests` cold-start protection, + /// this prevents: + /// - Immediate tripping on first failure (EWMA would drop below threshold) + /// - False positives during low-traffic periods where single failures have + /// outsized impact on the EWMA + /// + /// The EWMA will naturally decay toward actual observed success rate as + /// requests are processed. After `min_requests` observations, the EWMA + /// represents a statistically meaningful estimate. + fn new(decay: Duration, now: Instant) -> Self { + if decay < MIN_DECAY { + tracing::warn!( + ?decay, + min = ?MIN_DECAY, + "success-rate decay below minimum, will be clamped by EWMA constructor" + ); + } + Self { + consecutive_failures: 0, + ewma: Ewma::new_with_value(decay, now, 1.0), + request_count: 0, + last_seen: now, + } + } + + /// Return the state to its initial values after recovery. + fn reset(&mut self, decay: Duration, now: Instant) { + *self = Self::new(decay, now); + } +} + +/// Configuration for [`UnifiedBreaker::new`]. +#[derive(Debug)] +pub(crate) struct UnifiedBreakerConfig { + pub(crate) max_failures: usize, + pub(crate) threshold: f64, + pub(crate) decay: Duration, + pub(crate) backoff: ExponentialBackoff, + pub(crate) min_requests: usize, + pub(crate) gate: gate::Tx, + pub(crate) rsps: mpsc::Receiver, + pub(crate) retry_after_store: RetryAfterStore, + pub(crate) grpc_retry_pushback_store: GrpcRetryPushbackStore, + pub(crate) max_duration: Duration, +} + +impl UnifiedBreaker { + /// Create a new unified circuit breaker. + pub fn new(config: UnifiedBreakerConfig) -> Self { + let UnifiedBreakerConfig { + max_failures, + threshold, + decay, + backoff, + min_requests, + gate, + rsps, + retry_after_store, + grpc_retry_pushback_store, + max_duration, + } = config; + Self { + max_failures, + threshold, + decay, + backoff, + min_requests, + gate, + rsps, + retry_after_store, + grpc_retry_pushback_store, + max_duration, + #[cfg(test)] + trip_observer: None, + } + } + + /// Attach a channel that receives the reason for each trip. + /// + /// Tests use this to check which condition the breaker charged a trip to. + #[cfg(test)] + fn with_trip_observer(mut self, tx: mpsc::UnboundedSender) -> Self { + self.trip_observer = Some(tx); + self + } + + /// Run the circuit breaker state machine. + /// + /// The state machine cycles through: + /// - Open: accept requests, track failures until trip condition met + /// - Closed: reject requests, wait for backoff + /// - Probation: allow single probe request + /// - If probe succeeds -> Open, if probe fails -> Closed + pub async fn run(mut self) { + let mut state = OpenState::new(self.decay, Instant::now()); + + loop { + // Open state: track failures until trip condition + let trip_reason = match self.open(&mut state).await { + Ok(reason) => reason, + Err(()) => return, // Gate lost + }; + + tracing::info!(?trip_reason, "Unified circuit breaker tripped"); + + #[cfg(test)] + if let Some(tx) = self.trip_observer.as_ref() { + let _ = tx.send(trip_reason); + } + + // Closed state: wait for backoff, then probe + match self.closed(trip_reason).await { + Ok(()) => { + // Probe succeeded, reset state for reopening + state.reset(self.decay, Instant::now()); + tracing::info!("Unified circuit breaker reopened"); + } + Err(()) => return, // Gate lost + } + } + } + + /// Open state: track both policies until either trips. + /// + /// Returns the reason for trip, or `Err(())` if gate is lost. + async fn open(&mut self, state: &mut OpenState) -> Result { + tracing::debug!("Open"); + + self.gate.open().map_err(|_| ())?; + + // Saturate rather than multiply directly. A very large decay set by an + // operator could push `decay * IDLE_COLD_START_FACTOR` past + // `Duration::MAX` and panic. Computing it once also keeps the multiply + // out of the loop. + let idle_reset = self.decay.saturating_mul(IDLE_COLD_START_FACTOR); + + loop { + let class = tokio::select! { + rsp = self.rsps.recv() => rsp.ok_or(())?, + _ = self.gate.lost() => return Err(()), + }; + + let now = Instant::now(); + + // Reset cold-start protection after idle periods where the + // EWMA has decayed enough that one sample would dominate. Idle is + // measured against the last response rather than the EWMA, whose + // timestamp stops across a trip-recover cycle. + if now.saturating_duration_since(state.last_seen) > idle_reset { + tracing::debug!( + previous_count = state.request_count, + "Re-engaging cold-start protection after idle" + ); + state.request_count = 0; + } + + state.request_count = state.request_count.saturating_add(1); + state.last_seen = now; + + let is_configured_failure = !class.is_success(); + let degrades_success_rate = match &class { + classify::Class::Http(Ok(status)) => *status == http::StatusCode::TOO_MANY_REQUESTS, + classify::Class::Http(Err(_)) => true, + classify::Class::Grpc(Ok(code)) => *code == Code::ResourceExhausted, + classify::Class::Grpc(Err(_)) => true, + classify::Class::Error(_) => true, + }; + + // Update consecutive configured failures (skip when policy disabled) + if self.max_failures > 0 { + if is_configured_failure { + state.consecutive_failures = state.consecutive_failures.saturating_add(1); + } else { + state.consecutive_failures = 0; + } + } + + // Update EWMA (configured failures, typically 5xx, and 429 are failures). + // `Ewma::add` drops samples whose timestamp matches the stored one, so + // responses that share a single instant combine into the first. The request + // counter still increments per response and can lead the EWMA by one. + let value = if degrades_success_rate { 0.0 } else { 1.0 }; + state.ewma.add(value, now); + let rate = state.ewma.get(); + + tracing::trace!( + ?class, + consecutive_failures = state.consecutive_failures, + success_rate = %rate, + request_count = state.request_count, + min_requests = self.min_requests, + "Response" + ); + + // Either condition can trip. Consecutive failures has no cold-start, success + // rate does. The reason reflects whichever condition `should_trip` checks first. + if let Some(trip_reason) = self.should_trip(state) { + return Ok(trip_reason); + } + } + } + + /// Check if the circuit should trip based on current state. + /// + /// Returns the trip reason if a trip condition is met, `None` otherwise. + /// + /// Note that cold-start protection only applies to success rate, not consecutive failures. + /// + /// Consecutive failures come first. When a single response crosses both the + /// consecutive-failure and success-rate thresholds, the reported reason is + /// [`TripReason::ConsecutiveFailures`] because that condition is checked first. The + /// difference is for observability only, since either condition trips the circuit. + fn should_trip(&self, state: &OpenState) -> Option { + // max_failures=0 disables the consecutive failure policy entirely. + if self.max_failures == 0 { + // Skip consecutive failures check. Only success rate can trip. + } else if state.consecutive_failures >= self.max_failures { + // Condition 1: Consecutive failures threshold exceeded (no cold-start protection) + // This can trip immediately after max_failures consecutive 5xx errors. + return Some(TripReason::ConsecutiveFailures); + } + + // Condition 2: Success rate dropped below threshold (with cold-start protection) + // Requires min_requests for accurate EWMA estimation before tripping. + if state.request_count >= self.min_requests && state.ewma.get() < self.threshold { + return Some(TripReason::LowSuccessRate); + } + + None + } + + /// Closed state: backoff with optional Retry-After extension. + /// + /// Waits for backoff to expire, then enters probation. If the probe + /// succeeds, returns `Ok(())`. If it fails, resumes the backoff loop. + async fn closed(&mut self, trip_reason: TripReason) -> Result<(), ()> { + let mut backoff = self.backoff.stream(); + let mut first_iteration = true; + + loop { + // Shut the gate + tracing::debug!(backoff = ?backoff.duration(), ?trip_reason, "Shut"); + self.gate.shut().map_err(|_| ())?; + + // Wait for backoff, optionally extended by Retry-After/pushback hint on first iteration + let retry_after_hint = if first_iteration { + first_iteration = false; + self.take_combined_hint() + } else { + None + }; + + self.wait_for_backoff(&mut backoff, retry_after_hint) + .await?; + + // Enter probation and wait for probe response + let class = self.probation().await?; + tracing::trace!(?class, "Probe response"); + + // Check probe result (mode-aware: consecutive-only preserves old semantics) + if self.is_probe_success(&class) { + return Ok(()); + } + // Probe failed, continue backoff loop + } + } + + /// Take combined HTTP Retry-After and gRPC pushback hints, returning the maximum. + /// + /// A hint is honored as long as it is no older than the configured maximum + /// hint duration. The store already subtracts elapsed time, so the remaining + /// wait reflects how much of the hint is still in the future. If both HTTP + /// and gRPC hints exist, the maximum duration is returned. + fn take_combined_hint(&self) -> Option { + let http_hint = self.retry_after_store.take(self.max_duration); + let grpc_hint = self.grpc_retry_pushback_store.take(self.max_duration); + + // Cap each hint individually so per-source tracing shows which one was clamped. + let http_hint = http_hint.map(|h| { + if h > self.max_duration { + tracing::warn!( + hint = ?h, + max = ?self.max_duration, + "HTTP Retry-After hint exceeds maximum, clamping" + ); + self.max_duration + } else { + h + } + }); + let grpc_hint = grpc_hint.map(|g| { + if g > self.max_duration { + tracing::warn!( + hint = ?g, + max = ?self.max_duration, + "gRPC pushback hint exceeds maximum, clamping" + ); + self.max_duration + } else { + g + } + }); + + match (http_hint, grpc_hint) { + (Some(h), Some(g)) => { + let combined = h.max(g); + tracing::debug!( + http_hint = ?h, + grpc_hint = ?g, + combined = ?combined, + "Combined HTTP and gRPC hints (max-value-wins)" + ); + Some(combined) + } + (Some(h), None) => { + tracing::debug!(hint = ?h, "Using HTTP Retry-After hint"); + Some(h) + } + (None, Some(g)) => { + tracing::debug!(hint = ?g, "Using gRPC pushback hint"); + Some(g) + } + (None, None) => None, + } + } + + /// Check if a probe response indicates successful recovery. + /// + /// This check is mode-aware to preserve backwards compatibility: + /// + /// In consecutive-only mode (`min_requests == usize::MAX`), the probe + /// delegates to `class.is_success()`. This matches the behavior of the + /// previous consecutive-failures breaker, where 429 responses were + /// evaluated by the default classifier rather than being unconditionally + /// treated as failure. + /// + /// In dual mode (success rate active), the check is stricter: HTTP probes + /// must be non-5xx AND non-429, and gRPC probes must be Ok. This prevents + /// reopening the circuit when the endpoint is still rate limiting. + fn is_probe_success(&self, class: &classify::Class) -> bool { + if self.min_requests == usize::MAX { + // Consecutive-only mode: preserve old is_success() semantics. + // The existing consecutive-failures breaker used class.is_success() + // which lets the default classifier decide whether 429 is success + // or failure. We must not change this behavior. + class.is_success() + } else { + // Dual mode (success_rate active): stricter check. + // 429 and RESOURCE_EXHAUSTED count as probe failure because they + // indicate the endpoint is rate limiting, which the success-rate + // EWMA should track. + match class { + classify::Class::Http(Ok(status)) => { + !status.is_server_error() && *status != http::StatusCode::TOO_MANY_REQUESTS + } + classify::Class::Http(Err(_)) => false, // 5xx failure + classify::Class::Grpc(Ok(code)) => *code == Code::Ok, + classify::Class::Grpc(Err(_)) => false, + _ => class.is_success(), + } + } + } + + /// Wait for the backoff duration to expire, optionally extended by Retry-After hint. + /// + /// If `retry_after_hint` is provided, waits for _both_ the hint duration _and_ the backoff + /// to complete, ensuring we wait at least `max(hint, backoff_duration)`. + async fn wait_for_backoff( + &mut self, + backoff: &mut exp_backoff::ExponentialBackoffStream, + retry_after_hint: Option, + ) -> Result<(), ()> { + if let Some(hint) = retry_after_hint { + debug_assert!( + hint <= self.max_duration, + "hint {hint:?} exceeds max_duration {:?} -- take_combined_hint should have clamped", + self.max_duration + ); + tracing::info!(?hint, "Using Retry-After hint as backoff floor"); + + let hint_sleep = tokio::time::sleep(hint); + tokio::pin!(hint_sleep); + + let mut backoff_done = false; + let mut hint_done = false; + + // Wait for both to complete + loop { + tokio::select! { + _ = backoff.next(), if !backoff_done => { + backoff_done = true; + if hint_done { break; } + } + _ = &mut hint_sleep, if !hint_done => { + hint_done = true; + if backoff_done { break; } + } + // Drain responses while the breaker is shut, but stop when + // the channel closes. recv() on a closed channel returns + // None at once, so looping on it would spin this task. + rsp = self.rsps.recv() => { rsp.ok_or(())?; continue } + _ = self.gate.lost() => return Err(()), + } + } + } else { + // No hint, use normal backoff + loop { + tokio::select! { + _ = backoff.next() => break, + // Drain responses while the breaker is shut, but stop when + // the channel closes. recv() on a closed channel returns + // None at once, so looping on it would spin this task. + rsp = self.rsps.recv() => { rsp.ok_or(())?; continue } + _ = self.gate.lost() => return Err(()), + } + } + } + + Ok(()) + } + + /// Probation state: allow single probe request. + /// + /// Uses `gate.limit(1)` to allow exactly one request through. + async fn probation(&mut self) -> Result { + tracing::debug!("Probation"); + + let _sem = self.gate.limit(1).map_err(|_| ())?; + + tokio::select! { + rsp = self.rsps.recv() => rsp.ok_or(()), + _ = self.gate.lost() => Err(()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use linkerd_proxy_client_policy::DEFAULT_RETRY_AFTER_MAX_DURATION; + use tokio::time; + use tokio_test::{assert_pending, task}; + + fn make_backoff() -> ExponentialBackoff { + ExponentialBackoff::try_new( + time::Duration::from_secs(1), + time::Duration::from_secs(100), + 0.0, // No jitter for deterministic tests + ) + .expect("backoff params are valid") + } + + fn send_class(params: &gate::Params, class: classify::Class) { + params.responses.try_send(class).unwrap() + } + + fn send_ok(params: &gate::Params, status: http::StatusCode) { + send_class(params, classify::Class::Http(Ok(status))); + } + + fn send_err(params: &gate::Params, status: http::StatusCode) { + send_class(params, classify::Class::Http(Err(status))); + } + + fn default_test_config( + gate_tx: gate::Tx, + rsps: mpsc::Receiver, + ) -> UnifiedBreakerConfig { + UnifiedBreakerConfig { + max_failures: 3, + threshold: 0.8, + decay: time::Duration::from_secs(10), + backoff: make_backoff(), + min_requests: 1, + gate: gate_tx, + rsps, + retry_after_store: RetryAfterStore::new(), + grpc_retry_pushback_store: GrpcRetryPushbackStore::new(), + max_duration: DEFAULT_RETRY_AFTER_MAX_DURATION, + } + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn starts_open() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn trips_on_consecutive_failures() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + + // Send 3 consecutive 5xx (should trip) + for i in 1..=3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + if i < 3 { + assert!( + params.gate.is_open(), + "Should still be open after {} failures", + i + ); + } + } + + assert!(params.gate.is_shut(), "Should trip after 3 consecutive 5xx"); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn success_resets_consecutive_count() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Send 2 failures + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + + // Success resets count + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + // 2 more failures - still need 3 consecutive + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_open(), + "Should still be open - only 2 consecutive after reset" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn trips_on_low_success_rate() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // High max_failures so CF doesn't trip + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Send 1 success + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // Send 429s to drop success rate below 80% + // With EWMA, after ~3-4 429s the rate drops below 0.8 + for _ in 0..4 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!(!params.gate.is_open(), "Should trip on low success rate"); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn success_rate_counts_5xx() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // High max_failures so CF doesn't trip first + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // 5xx should count as failure for success rate + for _ in 0..5 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!( + params.gate.is_shut(), + "5xx should count as failure for success rate" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn cold_start_protects_success_rate() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // High max_failures + min_requests: 5, // min_requests = 5 + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Send 4 429s - should NOT trip (only 4 requests, need 5) + for _ in 0..4 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!( + params.gate.is_open(), + "Cold-start should protect success rate" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn probation_success_reopens() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip the breaker + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // Wait for backoff + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // Enter probation + match params.gate.state() { + gate::State::Limited(sem) => { + assert_eq!(sem.available_permits(), 1); + params.gate.opened_for_test().await.unwrap().forget(); + } + _ => panic!("Expected Limited state"), + } + + // Probe succeeds + send_ok(¶ms, http::StatusCode::OK); + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn probation_failure_on_429() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip the breaker + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // Wait for backoff and enter probation + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + match params.gate.state() { + gate::State::Limited(_) => { + params.gate.opened_for_test().await.unwrap().forget(); + } + _ => panic!("Expected Limited state"), + } + + // Probe "fails" with 429 - strict AND recovery (dual mode, min_requests=1) + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "429 during probation should fail (strict AND)" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn retry_after_hint_extends_backoff() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let retry_after_store = RetryAfterStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: retry_after_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record Retry-After hint of 5s (longer than 1s backoff) + retry_after_store.record(time::Duration::from_secs(5)); + + // Trip the breaker + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // After 1s (normal backoff would complete), still shut due to hint + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!(params.gate.is_shut()); + + // After 3s more (4s total), still shut + time::sleep(time::Duration::from_secs(3)).await; + assert_pending!(task.poll()); + assert!(params.gate.is_shut()); + + // After 1s more (5s total), enter probation + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!(params.gate.is_limited()); + } + + // Verify that consecutive-only mode (min_requests == usize::MAX) uses + // class.is_success() for probe checks, preserving the old breaker behavior + // where 429 is evaluated by the default classifier. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn consecutive_only_429_probe_preserves_old_semantics() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + // Consecutive-only mode: min_requests = usize::MAX disables EWMA tripping + // and triggers the class.is_success() path in is_probe_success(). + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + min_requests: usize::MAX, + threshold: 0.0, // threshold=0.0 also prevents SR trip + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip the breaker via consecutive failures + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // Wait for backoff and enter probation + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + match params.gate.state() { + gate::State::Limited(_) => { + params.gate.opened_for_test().await.unwrap().forget(); + } + _ => panic!("Expected Limited state"), + } + + // In consecutive-only mode, 429 probe uses class.is_success(). + // The default HTTP classifier treats 429 as success (only 500-599 are + // failures by default). So the probe should succeed and reopen the gate. + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "In consecutive-only mode, 429 during probation should succeed \ + (delegates to class.is_success() which treats 429 as success)" + ); + } + + fn send_grpc(params: &gate::Params, code: tonic::Code) { + let class = if matches!( + code, + tonic::Code::Unknown + | tonic::Code::DeadlineExceeded + | tonic::Code::PermissionDenied + | tonic::Code::Internal + | tonic::Code::Unavailable + | tonic::Code::DataLoss + ) { + classify::Class::Grpc(Err(code)) + } else { + classify::Class::Grpc(Ok(code)) + }; + send_class(params, class); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn consecutive_failures_ignores_grpc_resource_exhausted() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + threshold: 0.1, // very low threshold so SR doesn't trip + min_requests: 100, // high min_requests so SR doesn't trip + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // RESOURCE_EXHAUSTED should not count as consecutive failures + for _ in 0..5 { + send_grpc(¶ms, tonic::Code::ResourceExhausted); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + + assert!( + params.gate.is_open(), + "gRPC RESOURCE_EXHAUSTED should not trigger consecutive failures trip" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn trips_on_low_success_rate_grpc_resource_exhausted() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // high max_failures so CF doesn't trip + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Send one success + send_grpc(¶ms, tonic::Code::Ok); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // Send RESOURCE_EXHAUSTED to drop success rate below 80% + for _ in 0..4 { + send_grpc(¶ms, tonic::Code::ResourceExhausted); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!( + !params.gate.is_open(), + "Should trip on low success rate from gRPC RESOURCE_EXHAUSTED" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn trips_on_low_success_rate_grpc_server_error() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + send_grpc(¶ms, tonic::Code::Ok); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + for _ in 0..4 { + send_grpc(¶ms, tonic::Code::Internal); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!( + !params.gate.is_open(), + "Should trip on low success rate from gRPC Internal errors" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn probation_success_on_grpc_ok() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip the breaker + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // Wait for backoff + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // Enter probation + match params.gate.state() { + gate::State::Limited(sem) => { + assert_eq!(sem.available_permits(), 1); + params.gate.opened_for_test().await.unwrap().forget(); + } + _ => panic!("Expected Limited state"), + } + + // Probe succeeds with gRPC Ok + send_grpc(¶ms, tonic::Code::Ok); + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "gRPC Ok during probation should succeed" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn consecutive_failures_ignores_429() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + threshold: 0.1, // Very low threshold so SR doesn't trip + min_requests: 100, // High min_requests so SR doesn't trip + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // 429s should not count as consecutive failures for the CF policy + for _ in 0..5 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + + assert!( + params.gate.is_open(), + "429s should not trigger consecutive failures trip" + ); + } + + // Verify threshold=0.0 disables success-rate tripping + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn threshold_zero_disables_success_rate() { + let _trace = linkerd_tracing::test::trace_init(); + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // High max_failures so CF won't trip + threshold: 0.0, // threshold=0.0 (SR disabled) + min_requests: 1, // Cold-start passes after 1 request + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + + // 1 success to pass cold-start (min_requests=1) + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // Many 429s to drop success rate toward 0%. + for _ in 0..10 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!( + params.gate.is_open(), + "threshold=0.0 should disable success-rate tripping" + ); + } + + // Verify Duration::MAX hint is clamped to DEFAULT_RETRY_AFTER_MAX_DURATION + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn duration_max_hint_is_clamped() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let retry_after_store = RetryAfterStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: retry_after_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record a Duration::MAX hint BEFORE tripping the breaker. + retry_after_store.record(Duration::MAX); + + // Trip the breaker with 3 consecutive 5xx errors. + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "Should trip after 3 consecutive 5xx"); + + // After DEFAULT_RETRY_AFTER_MAX_DURATION (300s), the breaker should enter probation. + time::sleep(DEFAULT_RETRY_AFTER_MAX_DURATION).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_limited(), + "Duration::MAX hint should be clamped to DEFAULT_RETRY_AFTER_MAX_DURATION; \ + gate should enter probation after 300s, not sleep forever" + ); + } + + // A closed response channel must end the breaker task even while it is + // parked in the backoff wait. recv() on a closed channel yields None right + // away, so the drain step has to treat None as teardown rather than looping + // on it. The gate receiver is kept alive here, so gate.lost() never fires + // and the task ends only through the closed-channel check. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn wait_for_backoff_terminates_when_channel_closes() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + // max_failures=1 trips the breaker on the first 5xx, dropping it into + // the closed state and the backoff wait. + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 1, + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + + send_err(¶ms, http::StatusCode::INTERNAL_SERVER_ERROR); + assert_pending!(task.poll()); + assert!(params.gate.is_shut(), "single 5xx should trip the breaker"); + + // Close the channel while the breaker waits out the backoff. The task + // must finish rather than spin on None. + drop(params.responses); + assert!( + task.poll().is_ready(), + "breaker must terminate when the response channel closes during backoff" + ); + } + + // Same teardown as above, but a second gate receiver outlives the endpoint + // service so gate.lost() can never resolve. Without the closed-channel + // check the backoff wait spins forever on None. With it the task exits. A + // real task is needed so the Gate plus BroadcastClassification stack can + // drive a classification through the channel. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn wait_for_backoff_terminates_with_leaked_gate() { + use linkerd_app_core::{ + proxy::http::{classify::BroadcastClassification, BoxBody}, + svc::{self, ServiceExt}, + Error, + }; + use tokio::task::yield_now; + + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + // Model a stack that retains gate state after the endpoint service is + // dropped by holding a second gate receiver. + let leaked_gate = params.gate.clone(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 1, + ..default_test_config(gate_tx, rsps) + }); + let breaker = tokio::spawn(breaker.run()); + + let svc = svc::Gate::new( + params.gate, + BroadcastClassification::::new( + params.responses, + svc::mk(|_: http::Request| async move { + Ok::<_, Error>( + http::Response::builder() + .status(http::StatusCode::INTERNAL_SERVER_ERROR) + .body(BoxBody::default()) + .unwrap(), + ) + }), + ), + ); + + let req = http::Request::builder() + .extension(classify::Response::default()) + .body(BoxBody::default()) + .unwrap(); + let rsp = svc.oneshot(req).await.expect("service must succeed"); + // Dropping the response emits a final classification, then drops the + // last sender so the channel closes. + drop(rsp); + + for _ in 0..16 { + if leaked_gate.is_shut() { + break; + } + yield_now().await; + } + assert!( + leaked_gate.is_shut(), + "breaker should enter the closed state" + ); + + for _ in 0..16 { + if breaker.is_finished() { + break; + } + yield_now().await; + } + assert!( + breaker.is_finished(), + "breaker must terminate when the response channel closes, \ + even while a second gate receiver survives" + ); + breaker.await.expect("breaker task must not panic"); + } + + // Probation limits the gate to a single in-flight probe. The breaker calls + // `gate.limit(1)`, so the semaphore offers exactly one permit. Acquiring it + // models the one admitted probe. A second concurrent request finds no permit + // left and would block until the gate state changes. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn probation_admits_single_probe() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip via 3 consecutive 5xx. + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut()); + + // Wait out the base backoff so the breaker enters probation. + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // The gate exposes a one-permit semaphore. Draining that permit leaves + // none for a would-be second probe, which is the single-probe bound. + match params.gate.state() { + gate::State::Limited(sem) => { + assert_eq!(sem.available_permits(), 1, "probation admits one probe"); + let permit = sem.try_acquire().expect("the single probe acquires"); + assert_eq!( + sem.available_permits(), + 0, + "a second concurrent probe finds no permit", + ); + assert!( + sem.try_acquire().is_err(), + "the semaphore admits only one probe at a time", + ); + drop(permit); + } + other => panic!("expected Limited state, got {other:?}"), + } + } + + // After a success-rate trip, an optimistic reopen plus steady healthy + // traffic raises the EWMA above the threshold. A single failure right + // after recovery loses its weight as successes add up across the decay + // window, so the circuit stays open rather than tripping again on stale memory. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn success_rate_recovers_after_decay() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + // Consecutive failures disabled so only success rate drives the trip + // and the later recovery. decay=10s keeps the arithmetic readable. + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 0, + threshold: 0.5, + min_requests: 1, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Seed one success so cold-start clears and the EWMA sits at 1.0. + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // 429s lower the rate below 0.5 and trip the circuit. With 1s spacing + // the EWMA crosses 0.5 around the seventh sample. Stop at the trip so + // the backoff timer is not advanced while the rate is still dropping. + for _ in 0..8 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + if params.gate.is_shut() { + break; + } + } + assert!(params.gate.is_shut(), "low success rate should trip"); + + // Wait out the base backoff and let the probe succeed. + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + match params.gate.state() { + gate::State::Limited(_) => { + params.gate.opened_for_test().await.unwrap().forget(); + } + other => panic!("expected Limited state, got {other:?}"), + } + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "successful probe reopens the circuit" + ); + + // One failure after recovery dips the newly reset EWMA but stays above + // 0.5 (one zero against an optimistic 1.0). + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "a single failure after recovery should not re-trip", + ); + + // Steady successes across two decay windows pull the rate back toward + // 1.0 while the old failure decays out. The circuit holds open throughout. + for _ in 0..20 { + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "recovered success rate should keep the circuit open", + ); + } + } + + // With both policies enabled, a stream of 429s lowers the success rate + // without ever incrementing the consecutive counter (429 is not a hard + // failure). The circuit trips on low success rate while consecutive failures + // stay at zero, and the charged reason reflects that. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn dual_mode_success_rate_trips_before_consecutive() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let (trip_tx, mut trip_rx) = mpsc::unbounded_channel(); + + // Both mechanisms active: a five-failure consecutive budget and an 80% + // success-rate floor with a three-request cold-start. + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 5, + threshold: 0.8, + min_requests: 3, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }) + .with_trip_observer(trip_tx); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Seed one success so the EWMA starts at 1.0 and the cold-start counter + // begins climbing. + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + // 429s push the rate under 0.8. Consecutive failures never move because + // 429 is treated as success by the consecutive policy. With 1s spacing + // the EWMA reads ~0.74 after the third 429, below 0.8, and min_requests + // is met, so the trip happens there. Break at that response to check while + // the gate is shut. A further 429 would advance time past the base + // backoff, reopening the gate for a probe and hiding the shut state. + let mut shut = false; + for _ in 0..4 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + if params.gate.is_shut() { + shut = true; + break; + } + } + + assert!( + shut, + "success rate should trip while consecutive failures stay at zero", + ); + // The reason proves the consecutive policy did not trip. `should_trip` + // checks consecutive failures first, so a `LowSuccessRate` verdict means + // the consecutive counter never reached max_failures. + assert_eq!( + trip_rx.try_recv(), + Ok(TripReason::LowSuccessRate), + "the trip is attributed to low success rate, not consecutive failures", + ); + } + + // A consecutive-failure trip in a single-policy configuration reports + // `ConsecutiveFailures`. Success rate is disabled (threshold 0.0), so the + // only path to a trip is the consecutive counter reaching max_failures. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn consecutive_trip_reports_consecutive_reason() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let (trip_tx, mut trip_rx) = mpsc::unbounded_channel(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 3, + threshold: 0.0, // success rate disabled + min_requests: usize::MAX, + ..default_test_config(gate_tx, rsps) + }) + .with_trip_observer(trip_tx); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + + assert!(params.gate.is_shut(), "three 5xx trip consecutive failures"); + assert_eq!( + trip_rx.try_recv(), + Ok(TripReason::ConsecutiveFailures), + "a consecutive-failure trip reports ConsecutiveFailures", + ); + } + + // A success-rate trip in a single-policy configuration reports + // `LowSuccessRate`. Consecutive failures are disabled (max_failures 0), so + // only EWMA degradation can trip. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn success_rate_trip_reports_low_success_rate_reason() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let (trip_tx, mut trip_rx) = mpsc::unbounded_channel(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 0, // consecutive failures disabled + threshold: 0.5, + min_requests: 1, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }) + .with_trip_observer(trip_tx); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + for _ in 0..8 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + if params.gate.is_shut() { + break; + } + } + + assert!(params.gate.is_shut(), "low success rate trips the circuit"); + assert_eq!( + trip_rx.try_recv(), + Ok(TripReason::LowSuccessRate), + "a success-rate trip reports LowSuccessRate", + ); + } + + // When one response crosses both thresholds at once, consecutive failures + // win the charge because `should_trip` checks them first. The config is tuned + // so the third 5xx reaches max_failures, satisfies min_requests, and drops + // the EWMA below the threshold on the same response. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn simultaneous_threshold_cross_reports_consecutive_reason() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let (trip_tx, mut trip_rx) = mpsc::unbounded_channel(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 3, + threshold: 0.8, + min_requests: 3, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }) + .with_trip_observer(trip_tx); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // 1s spacing decays the EWMA fast. After three 5xx it reads ~0.74, below + // 0.8, exactly as the consecutive counter reaches 3 and min_requests is + // met. Both conditions hold on the third response. + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + assert!(params.gate.is_shut(), "the third 5xx trips the circuit"); + assert_eq!( + trip_rx.try_recv(), + Ok(TripReason::ConsecutiveFailures), + "a simultaneous cross is attributed to consecutive failures", + ); + } +} From 7961afe308bff3de7ce8600ea21989fb830f97de Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Fri, 8 May 2026 10:46:53 +0200 Subject: [PATCH 11/18] feat(outbound): gate each endpoint on its own breaker Put a breaker-controlled gate in front of each balancer endpoint. When failure accrual is set for the target, the endpoint gets its own Retry-After and gRPC pushback stores, and its response classifier is wrapped so 429 and RESOURCE_EXHAUSTED hints land in those stores. The wrapping happens inline during call(), not through a separate layer, and the stores are built per endpoint so a hint from one endpoint does not extend the backoff of another. When accrual is absent, the endpoint uses the stock classifier behind a gate that never shuts, with no stores and no hint parsing, matching an endpoint that has no circuit breaking. An accrual policy that can never trip, where max_failures is zero and any success-rate threshold is at or below zero, resolves the same way, so it allocates no stores and spawns no breaker. The breaker Params struct holds the per-endpoint stores and the Retry-After cap, passing them on to the UnifiedBreaker. Signed-off-by: Alejandro Martinez Ruiz --- linkerd/app/outbound/src/http/breaker.rs | 14 + .../src/http/breaker/wrap_classify.rs | 432 ++++++++++++++++++ .../app/outbound/src/http/concrete/balance.rs | 4 + .../app/outbound/src/http/logical/tests.rs | 12 + .../src/http/logical/tests/failure_accrual.rs | 115 +++++ 5 files changed, 577 insertions(+) create mode 100644 linkerd/app/outbound/src/http/breaker/wrap_classify.rs diff --git a/linkerd/app/outbound/src/http/breaker.rs b/linkerd/app/outbound/src/http/breaker.rs index e08149537e..6f5a8776a6 100644 --- a/linkerd/app/outbound/src/http/breaker.rs +++ b/linkerd/app/outbound/src/http/breaker.rs @@ -7,9 +7,12 @@ use tracing::{trace_span, Instrument}; mod consecutive_failures; pub mod retry_after; mod unified; +pub mod wrap_classify; use self::consecutive_failures::ConsecutiveFailures; +pub use self::retry_after::{GrpcRetryPushbackStore, RetryAfterStore}; use self::unified::{UnifiedBreaker, UnifiedBreakerConfig}; +pub use self::wrap_classify::{HasFailureAccrual, NewRetryAfterGateSet, RetryAfterGateParams}; /// Reason why the circuit breaker tripped. #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -24,10 +27,21 @@ pub enum TripReason { const DEFAULT_SUCCESS_RATE_DECAY: Duration = Duration::from_secs(10); /// Params configuring a circuit breaker stack. +/// +/// Retry-After stores move hints from response classification to the +/// circuit breaker backoff logic. A new pair is built for each endpoint and +/// shared between that endpoint's classifier and its [`UnifiedBreaker`], so a +/// hint seen on one endpoint never extends the backoff of another. #[derive(Clone, Debug)] pub(crate) struct Params { pub(crate) accrual: Option, pub(crate) channel_capacity: usize, + /// Shared store for HTTP Retry-After hints. + pub(crate) retry_after_store: RetryAfterStore, + /// Shared store for gRPC retry pushback hints. + pub(crate) grpc_retry_pushback_store: GrpcRetryPushbackStore, + /// Maximum Retry-After duration the proxy will honor (clamping cap). + pub(crate) max_duration: Duration, } impl svc::ExtractParam, T> for Params { diff --git a/linkerd/app/outbound/src/http/breaker/wrap_classify.rs b/linkerd/app/outbound/src/http/breaker/wrap_classify.rs new file mode 100644 index 0000000000..478a1bac88 --- /dev/null +++ b/linkerd/app/outbound/src/http/breaker/wrap_classify.rs @@ -0,0 +1,432 @@ +//! Per-endpoint gate wiring for the circuit breaker. +//! +//! Each balancer endpoint sits behind a [`svc::Gate`] whose readiness a +//! [`UnifiedBreaker`](super::unified::UnifiedBreaker) controls. The gate is paired with a +//! response classifier that broadcasts each classification over the breaker's channel. +//! +//! When failure accrual is set up for the target, the classifier is wrapped with +//! [`RetryAfterClassify`] so that `Retry-After` and `grpc-retry-pushback-ms` hints are recorded +//! into stores the breaker reads while backing off. The stores are built here, per endpoint, so a +//! hint seen on one endpoint extends only that endpoint's backoff. +//! +//! When accrual is absent, the endpoint uses the stock +//! [`BroadcastClassification`] with no stores and no hint parsing. This matches an +//! endpoint that has no circuit breaking. + +use super::retry_after::{GrpcRetryPushbackStore, RetryAfterClassify, RetryAfterStore}; +use super::Params; +use linkerd_app_core::{ + classify, + proxy::http::{self, classify::gate}, + svc::{self, ExtractParam}, + Error, +}; +use linkerd_http_classify::BroadcastClassification; +use linkerd_proxy_client_policy::{FailureAccrual, RetryAfterConfig}; +use std::{ + task::{Context, Poll}, + time::Duration, +}; + +/// The classifier the breaker reads when accrual is set up. +type WrappedClassify = RetryAfterClassify; + +/// Parameters for creating the Retry-After aware gate set. +#[derive(Clone, Debug)] +pub struct RetryAfterGateParams { + pub channel_capacity: usize, + /// Maximum Retry-After duration to honor. Capped to this value. + pub max_duration: Duration, +} + +/// Reads the per-endpoint failure accrual and Retry-After configuration, then builds the +/// [`NewRetryAfterGate`] that creates each endpoint's gated service. +#[derive(Clone, Debug)] +pub struct NewRetryAfterGateSet { + inner: N, + params: RetryAfterGateParams, +} + +/// Builds each endpoint's [`svc::Gate`], branching on whether failure accrual is set up. +#[derive(Clone, Debug)] +pub struct NewRetryAfterGate { + inner: N, + accrual: Option, + channel_capacity: usize, + max_duration: Duration, +} + +/// Trait for targets that provide failure accrual configuration. +pub trait HasFailureAccrual { + fn failure_accrual(&self) -> Option; +} + +/// Whether an accrual policy can never trip, which makes it the same as having no circuit breaking. +/// +/// Consecutive tracking does nothing when `max_failures` is zero. Success-rate tracking does +/// nothing when it is absent or its threshold is at or below zero, since the EWMA rate stays at or +/// above any such threshold. A policy where both hold could only ever spawn an unused breaker, so +/// it is treated as having no breaker. +fn is_effectively_disabled(accrual: &FailureAccrual) -> bool { + accrual.consecutive.max_failures == 0 + && accrual.success_rate.is_none_or(|sr| sr.threshold <= 0.0) +} + +/// Wraps the request's `classify::Response` with [`RetryAfterClassify`] so the inner broadcaster +/// records hints into the breaker's stores. +/// +/// The wrapped classifier is inserted into the request extensions under its own type. The inner +/// [`BroadcastClassification`] reads it from there. The original `classify::Response` extension +/// stays in place for other consumers such as metrics. +#[derive(Clone, Debug)] +pub struct InsertRetryAfterClassify { + inner: S, + http_store: RetryAfterStore, + grpc_store: GrpcRetryPushbackStore, + max_duration: Duration, +} + +// === impl RetryAfterGateParams === + +impl RetryAfterGateParams { + pub fn new(channel_capacity: usize, max_duration: Duration) -> Self { + Self { + channel_capacity, + max_duration, + } + } +} + +// === impl NewRetryAfterGateSet === + +impl NewRetryAfterGateSet { + /// Create a layer that puts a breaker-controlled gate in front of each endpoint. + pub fn layer( + params: RetryAfterGateParams, + ) -> impl svc::layer::Layer + Clone { + svc::layer::mk(move |inner| Self { + inner, + params: params.clone(), + }) + } +} + +impl svc::NewService for NewRetryAfterGateSet +where + T: HasFailureAccrual + svc::Param> + Clone, + N: svc::NewService + Clone, +{ + type Service = NewRetryAfterGate; + + fn new_service(&self, target: T) -> Self::Service { + // Per-target Retry-After cap wins over the stack default. + let max_duration = svc::Param::>::param(&target) + .map(|ra| ra.max_duration) + .unwrap_or(self.params.max_duration); + + // An accrual policy that can never trip costs the same as none at all. Treat it as having + // no breaker so no stores or breaker task get allocated for it. + let accrual = target + .failure_accrual() + .filter(|a| !is_effectively_disabled(a)); + let inner = self.inner.new_service(target); + + NewRetryAfterGate { + inner, + accrual, + channel_capacity: self.params.channel_capacity, + max_duration, + } + } +} + +// === impl NewRetryAfterGate === + +impl svc::NewService for NewRetryAfterGate +where + N: svc::NewService, + S: svc::Service< + http::Request, + Response = http::Response, + Error = Error, + > + Send + + 'static, + S::Future: Send + 'static, +{ + type Service = svc::Gate>; + + fn new_service(&self, target: T) -> Self::Service { + match self.accrual { + None => { + // No breaker for this endpoint. Build the stock classifier with a gate that never + // shuts and no hint stores. + let (params, _gate_tx, _rsps) = + gate::Params::::channel(self.channel_capacity); + tracing::trace!("No failure accrual policy enabled."); + let inner = self.inner.new_service(target); + let broadcast = + BroadcastClassification::::new(params.responses, inner); + svc::Gate::new( + params.gate, + svc::BoxHttp::new(http::BoxResponse::new(broadcast)), + ) + } + Some(ref accrual) => { + // Build the stores this endpoint's breaker reads, spawn the breaker, then wrap the + // classifier so 429 and RESOURCE_EXHAUSTED hints go into those stores. + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let breaker_params = Params { + accrual: Some(accrual.clone()), + channel_capacity: self.channel_capacity, + retry_after_store: http_store.clone(), + grpc_retry_pushback_store: grpc_store.clone(), + max_duration: self.max_duration, + }; + let params: gate::Params = + , T>>::extract_param( + &breaker_params, + &target, + ); + let inner = self.inner.new_service(target); + let broadcast = + BroadcastClassification::::new(params.responses, inner); + let wrapped = InsertRetryAfterClassify { + inner: broadcast, + http_store, + grpc_store, + max_duration: self.max_duration, + }; + svc::Gate::new( + params.gate, + svc::BoxHttp::new(http::BoxResponse::new(wrapped)), + ) + } + } + } +} + +// === impl InsertRetryAfterClassify === + +impl svc::Service> for InsertRetryAfterClassify +where + S: svc::Service, Response = http::Response, Error = Error>, +{ + type Response = S::Response; + type Error = Error; + type Future = S::Future; + + #[inline] + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_ready(cx) + } + + fn call(&mut self, mut req: http::Request) -> Self::Future { + if let Some(classify) = req.extensions().get::().cloned() { + let wrapped = RetryAfterClassify::new_with_max( + classify, + self.http_store.clone(), + self.grpc_store.clone(), + self.max_duration, + ); + req.extensions_mut().insert(wrapped); + } + self.inner.call(req) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use linkerd_app_core::{exp_backoff::ExponentialBackoff, svc::NewService}; + use linkerd_http_classify::ClassifyResponse; + use linkerd_proxy_client_policy::http::StatusRanges; + use linkerd_proxy_client_policy::{ConsecutiveFailures, SuccessRateConfig}; + use std::time::Duration; + + // `super::*` re-exports `http` (`linkerd_app_core::proxy::http`). Its `Response`, `StatusCode`, + // `HeaderValue`, and `header` are used to build the test responses below. + + // Helper to create a default HTTP classifier for testing + fn test_classifier() -> classify::Response { + classify::Response::Http(StatusRanges::default()) + } + + #[derive(Clone)] + struct MockTarget(Option); + + impl HasFailureAccrual for MockTarget { + fn failure_accrual(&self) -> Option { + self.0.clone() + } + } + + impl svc::Param> for MockTarget { + fn param(&self) -> Option { + None + } + } + + fn mk_backoff() -> ExponentialBackoff { + ExponentialBackoff::try_new(Duration::from_secs(1), Duration::from_secs(100), 0.0) + .expect("backoff params are valid") + } + + // Read a target through the gate-set layer and report the accrual the per-endpoint gate + // would see. `None` means the endpoint has no breaker and no added cost. + fn resolved_accrual(accrual: Option) -> Option { + let set = NewRetryAfterGateSet { + inner: |_: MockTarget| (), + params: RetryAfterGateParams::new(1, Duration::from_secs(300)), + }; + set.new_service(MockTarget(accrual)).accrual + } + + // An accrual policy that can never trip is treated the same as a truly absent policy, so it + // spawns no breaker and allocates no hint stores. This pins the cost of a config that is + // present but can never trip to the cost of having no circuit breaking at all. + #[test] + fn effectively_disabled_accrual_produces_no_breaker_task() { + let disabled = FailureAccrual { + consecutive: ConsecutiveFailures { + max_failures: 0, + backoff: mk_backoff(), + }, + success_rate: Some(SuccessRateConfig { + threshold: 0.0, + decay: Duration::from_secs(10), + min_requests: 1, + }), + }; + + assert!( + is_effectively_disabled(&disabled), + "max_failures 0 with a zero success-rate threshold can never trip", + ); + assert_eq!( + resolved_accrual(Some(disabled)), + None, + "an inert accrual policy must resolve to the disabled path, same as None", + ); + + // A genuinely live policy keeps its enabled resolution. + let live = FailureAccrual { + consecutive: ConsecutiveFailures { + max_failures: 3, + backoff: mk_backoff(), + }, + success_rate: None, + }; + assert!(!is_effectively_disabled(&live)); + assert!(resolved_accrual(Some(live)).is_some()); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn wrapped_classifier_records_retry_after() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = test_classifier(); + let wrapped = RetryAfterClassify::new(inner, http_store.clone(), grpc_store); + + // Create a 429 response with Retry-After header + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::TOO_MANY_REQUESTS; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("30"), + ); + + // start() should parse and record the hint + let _eos = wrapped.start(&rsp); + + let hint = http_store.take(Duration::from_secs(10)); + assert_eq!(hint, Some(Duration::from_secs(30))); + } + + #[test] + fn wrapped_classifier_ignores_non_429() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = test_classifier(); + let wrapped = RetryAfterClassify::new(inner, http_store.clone(), grpc_store); + + // Create a 200 OK response with Retry-After header (shouldn't be recorded) + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::OK; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("30"), + ); + + // start() should NOT record the hint + let _eos = wrapped.start(&rsp); + + // Verify the store is empty + let hint = http_store.take(Duration::from_secs(10)); + assert_eq!(hint, None); + } + + // Check that `RetryAfterConfig.max_duration` caps hints through `new_with_max()`. + // + // A 429 response with a 600s Retry-After header must be capped to 120s in the store + // when the classifier was built with `max_duration = 120s`. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn config_max_duration_caps_hint_at_recording() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = test_classifier(); + let custom_cap = Duration::from_secs(120); + + let wrapped = + RetryAfterClassify::new_with_max(inner, http_store.clone(), grpc_store, custom_cap); + + // Create a 429 response with Retry-After: 600 (exceeds the 120s cap). + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::TOO_MANY_REQUESTS; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("600"), + ); + + // start() parses the header and records via rate_limit_hint(max_duration). + let _eos = wrapped.start(&rsp); + + // The store must contain 120s instead of 600s + let hint = http_store.take(Duration::from_secs(10)); + assert_eq!( + hint, + Some(custom_cap), + "600s Retry-After should be capped to 120s by max_duration at recording site" + ); + } + + // Check that a hint within the cap is recorded at its actual value (no truncation). + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn config_max_duration_passes_through_uncapped_hint() { + let http_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + let inner = test_classifier(); + let custom_cap = Duration::from_secs(120); + + let wrapped = + RetryAfterClassify::new_with_max(inner, http_store.clone(), grpc_store, custom_cap); + + // Create a 429 response with Retry-After: 60 (below the 120s cap). + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::TOO_MANY_REQUESTS; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("60"), + ); + + let _eos = wrapped.start(&rsp); + + // The store must contain the actual 60s value, not the cap. + let hint = http_store.take(Duration::from_secs(10)); + assert_eq!( + hint, + Some(Duration::from_secs(60)), + "60s Retry-After should pass through unchanged when below 120s cap" + ); + } +} diff --git a/linkerd/app/outbound/src/http/concrete/balance.rs b/linkerd/app/outbound/src/http/concrete/balance.rs index 5a7463bbb5..7f7e054e1d 100644 --- a/linkerd/app/outbound/src/http/concrete/balance.rs +++ b/linkerd/app/outbound/src/http/concrete/balance.rs @@ -143,6 +143,10 @@ where move |target: &Self| breaker::Params { accrual: target.parent.param(), channel_capacity, + retry_after_store: breaker::RetryAfterStore::new(), + grpc_retry_pushback_store: breaker::GrpcRetryPushbackStore::new(), + max_duration: + linkerd_proxy_client_policy::DEFAULT_RETRY_AFTER_MAX_DURATION, } }), ) diff --git a/linkerd/app/outbound/src/http/logical/tests.rs b/linkerd/app/outbound/src/http/logical/tests.rs index 643ca73a82..ea3d5431ec 100644 --- a/linkerd/app/outbound/src/http/logical/tests.rs +++ b/linkerd/app/outbound/src/http/logical/tests.rs @@ -127,6 +127,18 @@ async fn mk_rsp(status: StatusCode, body: impl ToString) -> Result { .unwrap()) } +async fn mk_rsp_with_retry_after( + status: StatusCode, + retry_after_secs: u64, + body: impl ToString, +) -> Result { + Ok(http::Response::builder() + .status(status) + .header(http::header::RETRY_AFTER, retry_after_secs.to_string()) + .body(http::BoxBody::new(body.to_string())) + .unwrap()) +} + async fn mk_grpc_rsp(code: tonic::Code) -> Result { Ok(http::Response::builder() .version(::http::Version::HTTP_2) diff --git a/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs b/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs index 2103bee040..d149f24528 100644 --- a/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs +++ b/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs @@ -268,3 +268,118 @@ async fn balancer_doesnt_select_tripped_breakers() { assert_rsp(rsp, StatusCode::OK, "endpoint 1").await; } } + +// A Retry-After hint returned by one endpoint must not extend the backoff of +// another endpoint behind the same balancer. Each endpoint runs an independent +// breaker, so the duration hint it honors has to come from its own responses. +// +// Endpoint A returns `Retry-After: 30` while healthy. Endpoint B then trips on +// its own 5xx failures. When B's breaker computes its backoff it must use the +// base delay, not A's 30s hint. The endpoints are driven one at a time so the +// balancer's endpoint selection cannot hide which breaker honors the hint. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn retry_after_hint_does_not_bleed_across_endpoints() { + let _trace = trace::test::with_default_filter(format!( + "{},linkerd_app_outbound=trace,linkerd_stack=trace", + trace::test::DEFAULT_LOG + )); + + let addr_a = SocketAddr::new([192, 0, 2, 41].into(), PORT); + let addr_b = SocketAddr::new([192, 0, 2, 42].into(), PORT); + let dest: NameAddr = format!("{AUTHORITY}:{PORT}") + .parse::() + .expect("dest addr is valid"); + let (svc_a, mut handle_a) = tower_test::mock::pair(); + let (svc_b, mut handle_b) = tower_test::mock::pair(); + let connect = HttpConnect::default() + .service(addr_a, svc_a) + .service(addr_b, svc_b); + let resolve = support::resolver(); + let mut dest_tx = resolve.endpoint_tx(dest.clone()); + let (rt, _shutdown) = runtime(); + let cfg = default_config(); + let stack = Outbound::new(cfg.clone(), rt, &mut Default::default()) + .with_stack(svc::ArcNewService::new(connect)) + .push_http_cached(resolve) + .into_inner(); + + let backend = default_backend(&dest); + // Keep the probe delay above the failfast timeout so the gate shuts and + // the queue reaches failfast before the breaker probes again. + let min_backoff = cfg.http_request_queue.failfast_timeout + Duration::from_secs(1); + let backoff = ExponentialBackoff::try_new(min_backoff, min_backoff * 6, 0.0).unwrap(); + let mut backoffs = backoff.stream(); + let (_route_tx, routes) = + watch::channel(Routes::Policy(policy::Params::Http(policy::HttpParams { + addr: dest.into(), + meta: ParentRef(client_policy::Meta::new_default("parent")), + backends: Arc::new([backend.clone()]), + routes: Arc::new([default_route(backend)]), + failure_accrual: Some(client_policy::FailureAccrual { + consecutive: client_policy::ConsecutiveFailures { + max_failures: 3, + backoff, + }, + success_rate: None, + }), + }))); + let target = Target { + num: 1, + version: http::Variant::H2, + routes, + }; + let svc = stack.new_service(target); + + // Resolve endpoint A alone and let it answer a 429 with a 30s Retry-After. + // A 429 is not a consecutive failure, so A stays open. The + // hint is recorded for whichever breaker reads A's responses. + dest_tx.add([(addr_a, Default::default())]).unwrap(); + task::yield_now().await; + info!("Recording a Retry-After hint from endpoint A"); + handle_a.allow(1); + let rsp = send_req(svc.clone(), http_get()); + serve( + &mut handle_a, + mk_rsp_with_retry_after(StatusCode::TOO_MANY_REQUESTS, 30, "endpoint a"), + ) + .await; + assert_rsp(rsp, StatusCode::TOO_MANY_REQUESTS, "endpoint a").await; + + // Drop A and resolve endpoint B in its place. The balancer now routes + // solely to B, so B's breaker is the one exercised below. + dest_tx.remove([addr_a]).unwrap(); + dest_tx.add([(addr_b, Default::default())]).unwrap(); + task::yield_now().await; + + info!("Tripping endpoint B on its own failures"); + for i in 1..=3 { + info!("Sending bad request {i}/3 to endpoint B"); + handle_b.allow(1); + let rsp = send_req(svc.clone(), http_get()); + serve( + &mut handle_b, + mk_rsp(StatusCode::INTERNAL_SERVER_ERROR, "endpoint b"), + ) + .await; + assert_rsp(rsp, StatusCode::INTERNAL_SERVER_ERROR, "endpoint b").await; + } + + // B is now shut. Advance exactly one base backoff interval. Without the + // hint, B reopens here and admits a probe. If A's 30s hint had passed to + // B's store, B would still be shut for far longer and the probe below + // would fail in failfast. + info!("Waiting out the base backoff"); + backoffs.next().await; + task::yield_now().await; + + info!("Probing endpoint B after the base backoff"); + handle_b.allow(1); + let rsp = send_req(svc.clone(), http_get()); + tokio::time::timeout( + Duration::from_secs(10), + serve(&mut handle_b, mk_rsp(StatusCode::OK, "endpoint b")), + ) + .await + .expect("probe must reach endpoint B once the base backoff elapses"); + assert_rsp(rsp, StatusCode::OK, "endpoint b").await; +} From 547490ef30f7804c1c23f522e96994cf70865ec6 Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Fri, 8 May 2026 10:48:44 +0200 Subject: [PATCH 12/18] feat(outbound): rewrite breaker onto unified state machine The consecutive-failures breaker is replaced by a unified breaker that follows both consecutive failures and an EWMA success rate. The per-target builder fills a UnifiedBreakerConfig: with a success rate it uses the configured threshold, decay, and minimum request count. Without one, consecutive-only mode sets the threshold to 0.0 and the minimum request count to usize::MAX, so the success-rate policy never trips while probe semantics stay in place. With this in place the consecutive_failures module goes away, since the unified breaker covers its behavior. The balance layer drops NewClassifyGateSet for NewRetryAfterGateSet, which adds Retry-After extraction to each endpoint gate. A per-target RetryAfterConfig now reaches the breaker. The HTTP and gRPC policy protocols already hold this configuration, so the sidecar and ingress route builders pass it into the route params, the router moves it onto each Concrete, and the balancer reads it as a parameter to cap the honored Retry-After duration. The Concrete cache key still leaves out failure_accrual and retry_after, since they configure the breaker but do not decide which backend a key selects, and a config-only change must not rebuild the backend cache. Signed-off-by: Alejandro Martinez Ruiz --- linkerd/app/outbound/src/http/breaker.rs | 104 +++++- .../src/http/breaker/consecutive_failures.rs | 328 ------------------ linkerd/app/outbound/src/http/concrete.rs | 3 +- .../app/outbound/src/http/concrete/balance.rs | 37 +- linkerd/app/outbound/src/http/logical.rs | 124 ++++++- .../app/outbound/src/http/logical/policy.rs | 4 +- .../policy/route/backend/metrics/tests.rs | 2 + .../src/http/logical/policy/router.rs | 3 + .../outbound/src/http/logical/policy/tests.rs | 2 + .../app/outbound/src/http/logical/profile.rs | 3 + .../app/outbound/src/http/logical/tests.rs | 2 + .../outbound/src/http/logical/tests/basic.rs | 1 + .../src/http/logical/tests/failure_accrual.rs | 3 + linkerd/app/outbound/src/ingress.rs | 21 +- linkerd/app/outbound/src/sidecar.rs | 23 +- 15 files changed, 286 insertions(+), 374 deletions(-) delete mode 100644 linkerd/app/outbound/src/http/breaker/consecutive_failures.rs diff --git a/linkerd/app/outbound/src/http/breaker.rs b/linkerd/app/outbound/src/http/breaker.rs index 6f5a8776a6..a0e6f90f82 100644 --- a/linkerd/app/outbound/src/http/breaker.rs +++ b/linkerd/app/outbound/src/http/breaker.rs @@ -1,15 +1,46 @@ +//! Circuit breaker infrastructure for HTTP endpoints. +//! +//! This module implements a unified circuit breaker with two complementary failure +//! tracking mechanisms: consecutive failures and success rate (EWMA-based). +//! +//! The first trips the breaker after N consecutive failures (typically 5xx statuses). +//! It uses exponential backoff before allowing probe requests. +//! +//! The second trips when the success rate drops below a threshold. It treats both 5xx +//! and 429 as failures to enable rate limiting awareness. +//! +//! Both mechanisms are tracked by a single [`UnifiedBreaker`] policy, in which any +//! one of both conditions can trip the circuit. +//! +//! ## Recovery +//! +//! During probation, a probe request must be _non-5xx AND non-429_ to succeed. +//! This ensures the endpoint isn't still rate limiting before reopening. +//! +//! ## Retry-After Integration +//! +//! When a 429 response includes a `Retry-After` header, the duration is captured and used +//! to extend the circuit breaker's backoff period. +//! +//! ## Cold-start +//! +//! Cold-start protection applies to the success rate mechanism. +//! +//! ## Usage +//! +//! The breaker is integrated into the load balancer's endpoint stack via [`NewRetryAfterGateSet`], +//! which replaces the standard `NewClassifyGateSet` to add Retry-After extraction. + use linkerd_app_core::{classify, proxy::http::classify::gate, svc}; use linkerd_proxy_client_policy::FailureAccrual; use tokio::time::Duration; use tracing::{trace_span, Instrument}; -mod consecutive_failures; pub mod retry_after; mod unified; pub mod wrap_classify; -use self::consecutive_failures::ConsecutiveFailures; pub use self::retry_after::{GrpcRetryPushbackStore, RetryAfterStore}; use self::unified::{UnifiedBreaker, UnifiedBreakerConfig}; pub use self::wrap_classify::{HasFailureAccrual, NewRetryAfterGateSet, RetryAfterGateParams}; @@ -48,7 +79,7 @@ impl svc::ExtractParam, T> for Params { fn extract_param(&self, _: &T) -> gate::Params { // Create a channel so that we can receive response summaries and // control the gate. - let (prms, gate, rsps) = gate::Params::channel(self.channel_capacity); + let (prms, gate_tx, rsps) = gate::Params::channel(self.channel_capacity); match self.accrual { None => { @@ -58,24 +89,63 @@ impl svc::ExtractParam, T> for Params { prms } Some(ref accrual) => { - let max_failures = accrual.consecutive.max_failures; - let backoff = accrual.consecutive.backoff; - tracing::trace!( - max_failures, - backoff = ?backoff, - "Using consecutive-failures failure accrual policy.", - ); + // When success_rate is Some, use configured EWMA policy values. + // When None (consecutive-only mode), disable EWMA by using + // threshold 0.0 (rate >= 0 always, so never trips) and + // min_requests usize::MAX (cold-start never passes). + let (success_rate_threshold, success_rate_decay, min_request_threshold) = + if let Some(sr) = accrual.success_rate { + if accrual.consecutive.max_failures == 0 { + tracing::trace!( + success_rate_threshold = %sr.threshold, + min_requests = sr.min_requests, + backoff = ?accrual.consecutive.backoff, + "Using circuit breaker with consecutive failures disabled and success rate tracking.", + ); + } else { + tracing::trace!( + max_failures = accrual.consecutive.max_failures, + backoff = ?accrual.consecutive.backoff, + success_rate_threshold = %sr.threshold, + min_requests = sr.min_requests, + "Using unified circuit breaker with consecutive failures and success rate tracking.", + ); + } + (sr.threshold, sr.decay, sr.min_requests as usize) + } else { + if accrual.consecutive.max_failures == 0 { + tracing::trace!( + backoff = ?accrual.consecutive.backoff, + "Using circuit breaker with consecutive failures disabled.", + ); + } else { + tracing::trace!( + max_failures = accrual.consecutive.max_failures, + backoff = ?accrual.consecutive.backoff, + "Using circuit breaker with consecutive failures only.", + ); + } + (0.0_f64, DEFAULT_SUCCESS_RATE_DECAY, usize::MAX) + }; + + // Spawn the unified breaker that handles both policies + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: accrual.consecutive.max_failures, + threshold: success_rate_threshold, + decay: success_rate_decay, + backoff: accrual.consecutive.backoff, + min_requests: min_request_threshold, + gate: gate_tx, + rsps, + retry_after_store: self.retry_after_store.clone(), + grpc_retry_pushback_store: self.grpc_retry_pushback_store.clone(), + max_duration: self.max_duration, + }); - // 1. If the configured number of consecutive failures are encountered, - // shut the gate. - // 2. After an ejection timeout, open the gate so that 1 request can be processed. - // 3. If that request succeeds, open the gate. If it fails, increase the - // ejection timeout and repeat. - let breaker = ConsecutiveFailures::new(max_failures, backoff, gate, rsps); tokio::spawn( breaker .run() - .instrument(trace_span!("consecutive_failures").or_current()), + .instrument(trace_span!("unified_breaker").or_current()), ); prms diff --git a/linkerd/app/outbound/src/http/breaker/consecutive_failures.rs b/linkerd/app/outbound/src/http/breaker/consecutive_failures.rs deleted file mode 100644 index cc980ed405..0000000000 --- a/linkerd/app/outbound/src/http/breaker/consecutive_failures.rs +++ /dev/null @@ -1,328 +0,0 @@ -use futures::stream::StreamExt; -use linkerd_app_core::{classify, exp_backoff::ExponentialBackoff, proxy::http::classify::gate}; -use tokio::sync::mpsc; - -pub struct ConsecutiveFailures { - max_failures: usize, - backoff: ExponentialBackoff, - gate: gate::Tx, - rsps: mpsc::Receiver, -} - -impl ConsecutiveFailures { - pub fn new( - max_failures: usize, - backoff: ExponentialBackoff, - gate: gate::Tx, - rsps: mpsc::Receiver, - ) -> Self { - Self { - max_failures, - backoff, - gate, - rsps, - } - } - - pub(super) async fn run(mut self) { - loop { - if self.open().await.is_err() { - return; - } - - tracing::info!("Consecutive failure-accrual breaker closed"); - if self.closed().await.is_err() { - return; - } - - tracing::info!("Consecutive failure-accrual breaker reopened"); - } - } - - /// Keep the breaker open until `max_failures` consecutive failures are - /// observed. - async fn open(&mut self) -> Result<(), ()> { - tracing::debug!("Open"); - self.gate.open().map_err(|_| ())?; - let mut failures = 0; - loop { - let class = tokio::select! { - rsp = self.rsps.recv() => rsp.ok_or(())?, - _ = self.gate.lost() => return Err(()), - }; - - tracing::trace!(?class, %failures, "Response"); - if class.is_success() { - failures = 0; - } else { - failures += 1; - if failures == self.max_failures { - return Ok(()); - } - } - } - } - - /// Keep the breaker closed for at least the initial backoff, and then, - /// once the timeout expires, go into probation to admit a single request - /// before reverting to the open state or continuing in the shut state. - async fn closed(&mut self) -> Result<(), ()> { - let mut backoff = self.backoff.stream(); - loop { - // The breaker is shut now. Wait until we can open it again. - tracing::debug!(backoff = ?backoff.duration(), "Shut"); - self.gate.shut().map_err(|_| ())?; - - loop { - tokio::select! { - _ = backoff.next() => break, - // Ignore responses while the breaker is shut, but - // terminate if the channel is closed. recv() on a - // closed channel returns None immediately, which would - // otherwise busy-spin this loop. - rsp = self.rsps.recv() => { rsp.ok_or(())?; }, - _ = self.gate.lost() => return Err(()), - } - } - - let class = self.probation().await?; - tracing::trace!(?class, "Response"); - if class.is_success() { - // Open! - return Ok(()); - } - } - } - - /// Wait for a response to determine whether the breaker should be opened. - async fn probation(&mut self) -> Result { - tracing::debug!("Probation"); - let _sem = self.gate.limit(1).map_err(|_| ())?; - tokio::select! { - rsp = self.rsps.recv() => rsp.ok_or(()), - _ = self.gate.lost() => Err(()), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use linkerd_app_core::{ - proxy::http::{self, classify::BroadcastClassification, BoxBody}, - svc::{self, ServiceExt}, - Error, - }; - use tokio::{task::yield_now, time}; - use tokio_test::{assert_pending, task}; - - #[tokio::test(flavor = "current_thread", start_paused = true)] - async fn transitions() { - let _trace = linkerd_tracing::test::trace_init(); - - let (mut params, gate, rsps) = gate::Params::channel(1); - let send = |res: Result| { - params - .responses - .try_send(classify::Class::Http(res)) - .unwrap() - }; - - let backoff = ExponentialBackoff::try_new( - time::Duration::from_secs(1), - time::Duration::from_secs(100), - // Don't jitter backoffs to ensure tests are deterministic. - 0.0, - ) - .expect("backoff params are valid"); - let breaker = ConsecutiveFailures::new(2, backoff, gate, rsps); - let mut task = task::spawn(breaker.run()); - - // Start open and failing. - send(Err(http::StatusCode::BAD_GATEWAY)); - assert_pending!(task.poll()); - assert!(params.gate.is_open()); - - send(Err(http::StatusCode::BAD_GATEWAY)); - assert_pending!(task.poll()); - assert!(params.gate.is_shut()); - - // After two failures, the breaker is closed. - time::sleep(time::Duration::from_millis(500)).await; - assert_pending!(task.poll()); - assert!(params.gate.is_shut()); - - // It remains closed until the init ejection backoff elapses. Then it's - // limited to a single request. - time::sleep(time::Duration::from_millis(500)).await; - assert_pending!(task.poll()); - - // hold the permit to prevent the breaker from opening - match params.gate.state() { - gate::State::Open => panic!("still open"), - gate::State::Shut => panic!("still shut"), - gate::State::Limited(sem) => { - assert_eq!(sem.available_permits(), 1); - params - .gate - .opened_for_test() - .await - .expect("permit should be acquired") - // The `Gate` service would forget this permit when called, so - // we must do the same here explicitly. - .forget(); - assert_eq!(sem.available_permits(), 0); - } - }; - - // The first request in probation fails, so the breaker remains closed. - send(Err(http::StatusCode::BAD_GATEWAY)); - assert_pending!(task.poll()); - assert!(params.gate.is_shut()); - - // The breaker goes into closed for 2s now... - time::sleep(time::Duration::from_secs(1)).await; - assert_pending!(task.poll()); - assert!(params.gate.is_shut()); - - // If some straggling responses are observed while the breaker is - // closed, they are ignored. - send(Ok(http::StatusCode::OK)); - assert_pending!(task.poll()); - assert!(params.gate.is_shut()); - // After that timeout elapses, we go into probation again. - time::sleep(time::Duration::from_secs(1)).await; - assert_pending!(task.poll()); - - match params.gate.state() { - gate::State::Open => panic!("still open"), - gate::State::Shut => panic!("still shut"), - gate::State::Limited(sem) => { - assert_eq!(sem.available_permits(), 1); - params - .gate - .opened_for_test() - .await - .expect("permit should be acquired") - // The `Gate` service would forget this permit when called, so - // we must do the same here explicitly. - .forget(); - assert_eq!(sem.available_permits(), 0); - } - } - - // And then a single success takes us back into the open state! - send(Ok(http::StatusCode::OK)); - assert_pending!(task.poll()); - assert!(params.gate.is_open()); - } - - #[tokio::test(flavor = "current_thread", start_paused = true)] - async fn closed_state_terminates_on_channel_close() { - let _trace = linkerd_tracing::test::trace_init(); - - let (params, gate, rsps) = gate::Params::channel(1); - let send = |res: Result| { - params - .responses - .try_send(classify::Class::Http(res)) - .unwrap() - }; - - let backoff = ExponentialBackoff::try_new( - time::Duration::from_secs(1), - time::Duration::from_secs(100), - 0.0, - ) - .expect("backoff params are valid"); - - // max_failures=1: a single failure trips the breaker into closed state. - let breaker = ConsecutiveFailures::new(1, backoff, gate, rsps); - let mut task = task::spawn(breaker.run()); - - assert_pending!(task.poll()); - assert!(params.gate.is_open()); - - // Trip the breaker into the closed state. - send(Err(http::StatusCode::INTERNAL_SERVER_ERROR)); - assert_pending!(task.poll()); - assert!(params.gate.is_shut(), "should be in closed state"); - - // Drop the response sender while in the closed state drain loop. - // The breaker must terminate rather than busy-spinning on None from - // the closed channel. - drop(params.responses); - assert!(task.poll().is_ready()); - } - - /// This test uses `tokio::spawn` (real task) instead of - /// `tokio_test::task::spawn` because the Gate + BroadcastClassification - /// service stack requires an actual task context for `oneshot()` to drive - /// response classification through the mpsc channel. - #[tokio::test(flavor = "current_thread", start_paused = true)] - async fn closed_state_terminates_when_service_drops_but_gate_rx_survives() { - let _trace = linkerd_tracing::test::trace_init(); - - let (params, gate, rsps) = gate::Params::channel(1); - // Keep an extra gate receiver alive to model a future stack change that - // retains gate state after the endpoint service itself is dropped. - let leaked_gate = params.gate.clone(); - - let backoff = ExponentialBackoff::try_new( - time::Duration::from_secs(1), - time::Duration::from_secs(100), - 0.0, - ) - .expect("backoff params are valid"); - - let breaker = ConsecutiveFailures::new(1, backoff, gate, rsps); - let breaker = tokio::spawn(breaker.run()); - - let svc = svc::Gate::new( - params.gate, - BroadcastClassification::::new( - params.responses, - svc::mk(|_: http::Request| async move { - Ok::<_, Error>( - http::Response::builder() - .status(http::StatusCode::INTERNAL_SERVER_ERROR) - .body(BoxBody::default()) - .unwrap(), - ) - }), - ), - ); - - let req = http::Request::builder() - .extension(classify::Response::default()) - .body(BoxBody::default()) - .unwrap(); - let rsp = svc.oneshot(req).await.expect("service must succeed"); - drop(rsp); - - for _ in 0..16 { - if leaked_gate.is_shut() { - break; - } - yield_now().await; - } - assert!( - leaked_gate.is_shut(), - "breaker should enter the closed state" - ); - - for _ in 0..16 { - if breaker.is_finished() { - break; - } - yield_now().await; - } - - assert!( - breaker.is_finished(), - "breaker should terminate when the response channel closes, \ - even if a gate::Rx clone survives" - ); - breaker.await.expect("breaker task must not panic"); - } -} diff --git a/linkerd/app/outbound/src/http/concrete.rs b/linkerd/app/outbound/src/http/concrete.rs index 99afba72f6..fef90d752e 100644 --- a/linkerd/app/outbound/src/http/concrete.rs +++ b/linkerd/app/outbound/src/http/concrete.rs @@ -20,7 +20,7 @@ use linkerd_app_core::{ transport::{self, addrs::*}, Error, Infallible, NameAddr, Result, }; -use linkerd_proxy_client_policy::FailureAccrual; +use linkerd_proxy_client_policy::{FailureAccrual, RetryAfterConfig}; use std::{fmt::Debug, net::SocketAddr, sync::Arc}; use tracing::info_span; @@ -77,6 +77,7 @@ impl Outbound { T: svc::Param, T: svc::Param, T: svc::Param>, + T: svc::Param>, T: Clone + Debug + Send + Sync + 'static, // Endpoint resolution. R: Resolve, diff --git a/linkerd/app/outbound/src/http/concrete/balance.rs b/linkerd/app/outbound/src/http/concrete/balance.rs index 7f7e054e1d..98eabd5696 100644 --- a/linkerd/app/outbound/src/http/concrete/balance.rs +++ b/linkerd/app/outbound/src/http/concrete/balance.rs @@ -5,7 +5,6 @@ use crate::{ stack_labels, BackendRef, ParentRef, }; use linkerd_app_core::{ - classify, config::{ConnectConfig, QueueConfig}, proxy::{ api_resolve::{ConcreteAddr, Metadata}, @@ -15,7 +14,9 @@ use linkerd_app_core::{ transport::addrs::*, Error, NameAddr, }; -use linkerd_proxy_client_policy::FailureAccrual; +use linkerd_proxy_client_policy::{ + FailureAccrual, RetryAfterConfig, DEFAULT_RETRY_AFTER_MAX_DURATION, +}; use std::{fmt::Debug, net::SocketAddr}; use tracing::info_span; @@ -71,12 +72,25 @@ impl> svc::Param for Balance { } } +impl>> breaker::HasFailureAccrual for Balance { + fn failure_accrual(&self) -> Option { + self.parent.param() + } +} + +impl>> svc::Param> for Balance { + fn param(&self) -> Option { + self.parent.param() + } +} + impl Balance where // Parent target. T: svc::Param, T: svc::Param, T: svc::Param>, + T: svc::Param>, T: Clone + Debug + Send + Sync + 'static, { pub(super) fn layer( @@ -137,19 +151,12 @@ where }) .push_on_service(svc::MapErr::layer_boxed()) .lift_new_with_target() - .push( - http::NewClassifyGateSet::::layer_via({ - let channel_capacity = http_queue.capacity; - move |target: &Self| breaker::Params { - accrual: target.parent.param(), - channel_capacity, - retry_after_store: breaker::RetryAfterStore::new(), - grpc_retry_pushback_store: breaker::GrpcRetryPushbackStore::new(), - max_duration: - linkerd_proxy_client_policy::DEFAULT_RETRY_AFTER_MAX_DURATION, - } - }), - ) + .push(breaker::NewRetryAfterGateSet::layer( + breaker::RetryAfterGateParams::new( + http_queue.capacity, + DEFAULT_RETRY_AFTER_MAX_DURATION, + ), + )) .push_on_service(svc::OnServiceLayer::new( stack_metrics.layer(stack_labels("http", "endpoint")), )) diff --git a/linkerd/app/outbound/src/http/logical.rs b/linkerd/app/outbound/src/http/logical.rs index 64fec21ce3..794f34d377 100644 --- a/linkerd/app/outbound/src/http/logical.rs +++ b/linkerd/app/outbound/src/http/logical.rs @@ -43,6 +43,7 @@ pub struct Concrete { parent_ref: ParentRef, backend_ref: BackendRef, failure_accrual: Option, + retry_after: Option, } impl PartialEq for Concrete { @@ -52,8 +53,10 @@ impl PartialEq for Concrete { && self.parent == other.parent && self.parent_ref == other.parent_ref && self.backend_ref == other.backend_ref - // failure_accrual intentionally excluded (does not - // determine identity) + // failure_accrual and retry_after are left out on purpose. + // They configure the breaker but do not determine backend + // identity, and the backend cache is rebuilt whenever a policy + // update changes them, so they need not take part in the key. } } @@ -66,8 +69,9 @@ impl Hash for Concrete { self.parent.hash(state); self.parent_ref.hash(state); self.backend_ref.hash(state); - // failure_accrual intentionally excluded (SuccessRateConfig - // contains f64 which does not impl Hash). + // failure_accrual and retry_after are left out on purpose, + // matching the equality impl. They do not determine identity, + // and SuccessRateConfig contains an f64 that does not impl Hash. } } @@ -181,6 +185,7 @@ where authority: None, parent, failure_accrual: None, + retry_after: None, }) } Self::Profile(profile) => { @@ -283,6 +288,12 @@ impl svc::Param> for Concrete { } } +impl svc::Param> for Concrete { + fn param(&self) -> Option { + self.retry_after + } +} + // === impl CanonicalDstHeader === impl From for http::HeaderPair { @@ -293,3 +304,108 @@ impl From for http::HeaderPair { ) } } + +#[cfg(test)] +mod identity_tests { + use super::*; + use crate::http::concrete; + use linkerd_app_core::{exp_backoff::ExponentialBackoff, NameAddr}; + use linkerd_proxy_client_policy::Meta; + use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + time::Duration, + }; + + fn hash_of(value: &T) -> u64 { + let mut hasher = DefaultHasher::new(); + value.hash(&mut hasher); + hasher.finish() + } + + // Builds a `Concrete<()>` whose five identity fields are fixed, leaving + // the breaker configuration to the caller. The identity fields decide + // which backend the cache key selects. + fn base( + failure_accrual: Option, + retry_after: Option, + ) -> Concrete<()> { + let addr: NameAddr = "example.com:1234".parse().unwrap(); + let meta = Meta::new_default("test"); + Concrete { + target: concrete::Dispatch::Balance(addr.clone(), super::profile::DEFAULT_EWMA), + authority: Some(addr.as_http_authority()), + parent: (), + parent_ref: ParentRef(meta.clone()), + backend_ref: BackendRef(meta), + failure_accrual, + retry_after, + } + } + + fn some_accrual() -> policy::FailureAccrual { + policy::FailureAccrual { + consecutive: policy::ConsecutiveFailures { + max_failures: 3, + backoff: ExponentialBackoff::new_unchecked( + Duration::from_secs(1), + Duration::from_secs(60), + 0.0, + ), + }, + success_rate: None, + } + } + + // The concrete target keeps breaker configuration out of its cache key on + // purpose, so two concretes that differ only in `failure_accrual` must + // compare equal and hash equal. Otherwise a config-only change would rebuild + // the backend cache. + #[test] + fn failure_accrual_does_not_affect_identity() { + let without = base(None, None); + let with = base(Some(some_accrual()), None); + + assert_eq!(without, with); + assert_eq!(hash_of(&without), hash_of(&with)); + } + + // `retry_after` is also kept out of the cache key, so toggling it + // must not change a concrete's identity. + #[test] + fn retry_after_does_not_affect_identity() { + let without = base(None, None); + let with = base( + None, + Some(policy::RetryAfterConfig { + max_duration: Duration::from_secs(30), + }), + ); + + assert_eq!(without, with); + assert_eq!(hash_of(&without), hash_of(&with)); + } + + // Both breaker fields are kept out together. A concrete that holds full + // breaker configuration can be swapped with one that holds none. + #[test] + fn breaker_config_does_not_affect_identity() { + let without = base(None, None); + let with = base( + Some(some_accrual()), + Some(policy::RetryAfterConfig { + max_duration: Duration::from_secs(30), + }), + ); + + assert_eq!(without, with); + assert_eq!(hash_of(&without), hash_of(&with)); + + // Check that an identity field still tells concretes apart, so + // equality is not true for an empty reason. + let other_addr: NameAddr = "other.example.com:1234".parse().unwrap(); + let mut different = base(None, None); + different.target = concrete::Dispatch::Balance(other_addr, super::profile::DEFAULT_EWMA); + assert_ne!(without, different); + } +} diff --git a/linkerd/app/outbound/src/http/logical/policy.rs b/linkerd/app/outbound/src/http/logical/policy.rs index 83be9fd34a..2d32a99017 100644 --- a/linkerd/app/outbound/src/http/logical/policy.rs +++ b/linkerd/app/outbound/src/http/logical/policy.rs @@ -11,7 +11,9 @@ pub use self::{ route::{errors, GrpcRouteMetrics, HttpRouteMetrics}, router::{GrpcParams, HttpParams}, }; -pub use linkerd_proxy_client_policy::{ClientPolicy, ConsecutiveFailures, FailureAccrual}; +pub use linkerd_proxy_client_policy::{ + ClientPolicy, ConsecutiveFailures, FailureAccrual, RetryAfterConfig, +}; /// HTTP or gRPC policy route parameters. #[derive(Clone, Debug, PartialEq)] diff --git a/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs b/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs index e4d9693d47..28f8be1d8c 100644 --- a/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs +++ b/linkerd/app/outbound/src/http/logical/policy/route/backend/metrics/tests.rs @@ -450,6 +450,7 @@ fn mock_http_route_backend_metrics( ), authority: None, failure_accrual: None, + retry_after: None, parent: (), parent_ref: parent_ref.clone(), backend_ref: backend_ref.clone(), @@ -506,6 +507,7 @@ fn mock_grpc_route_backend_metrics( ), authority: None, failure_accrual: None, + retry_after: None, parent: (), parent_ref: parent_ref.clone(), backend_ref: backend_ref.clone(), diff --git a/linkerd/app/outbound/src/http/logical/policy/router.rs b/linkerd/app/outbound/src/http/logical/policy/router.rs index a96cbcbce8..42c55d5467 100644 --- a/linkerd/app/outbound/src/http/logical/policy/router.rs +++ b/linkerd/app/outbound/src/http/logical/policy/router.rs @@ -19,6 +19,7 @@ pub struct Params { pub routes: Arc<[http_route::Route>]>, pub backends: Arc<[policy::Backend]>, pub failure_accrual: Option, + pub retry_after: Option, } pub type HttpParams = @@ -143,6 +144,7 @@ where routes, backends, failure_accrual, + retry_after, } = rts; let mk_concrete = { @@ -163,6 +165,7 @@ where backend_ref, parent_ref: parent_ref.clone(), failure_accrual: failure_accrual.clone(), + retry_after, } } }; diff --git a/linkerd/app/outbound/src/http/logical/policy/tests.rs b/linkerd/app/outbound/src/http/logical/policy/tests.rs index 50ed49ad04..a1ae9c041a 100644 --- a/linkerd/app/outbound/src/http/logical/policy/tests.rs +++ b/linkerd/app/outbound/src/http/logical/policy/tests.rs @@ -121,6 +121,7 @@ async fn header_based_route() { .chain(Some(special_backend.clone())) .collect(), failure_accrual: None, + retry_after: None, }); let metrics = HttpRouteMetrics::default(); @@ -232,6 +233,7 @@ async fn http_filter_request_headers() { }]), backends: std::iter::once(backend).collect(), failure_accrual: None, + retry_after: None, } }); diff --git a/linkerd/app/outbound/src/http/logical/profile.rs b/linkerd/app/outbound/src/http/logical/profile.rs index 3eab8e4eae..e6022372f7 100644 --- a/linkerd/app/outbound/src/http/logical/profile.rs +++ b/linkerd/app/outbound/src/http/logical/profile.rs @@ -118,6 +118,7 @@ where authority: Some(addr.as_http_authority()), parent: parent.clone(), failure_accrual: None, + retry_after: None, }; let backends = std::iter::once(concrete.clone()).collect(); let distribution = Distribution::first_available(std::iter::once(concrete)); @@ -134,6 +135,7 @@ where authority: Some(t.addr.as_http_authority()), parent: parent.clone(), failure_accrual: None, + retry_after: None, }) .collect(); let distribution = Distribution::random_available(targets.iter().cloned().map( @@ -147,6 +149,7 @@ where target: concrete::Dispatch::Balance(addr, DEFAULT_EWMA), parent: parent.clone(), failure_accrual: None, + retry_after: None, }; (concrete, weight) }, diff --git a/linkerd/app/outbound/src/http/logical/tests.rs b/linkerd/app/outbound/src/http/logical/tests.rs index ea3d5431ec..dfbf4e3a99 100644 --- a/linkerd/app/outbound/src/http/logical/tests.rs +++ b/linkerd/app/outbound/src/http/logical/tests.rs @@ -265,6 +265,7 @@ fn mock_http(params: client_policy::http::RouteParams) -> (svc::BoxCloneHttp, Ha backends: Arc::new([backend]), routes: Arc::new([route]), failure_accrual: None, + retry_after: None, })) } @@ -278,6 +279,7 @@ fn mock_grpc(params: client_policy::grpc::RouteParams) -> (svc::BoxCloneHttp, Ha backends: Arc::new([backend]), routes: Arc::new([route]), failure_accrual: None, + retry_after: None, })) } diff --git a/linkerd/app/outbound/src/http/logical/tests/basic.rs b/linkerd/app/outbound/src/http/logical/tests/basic.rs index b72e9005fe..b388ebe3dd 100644 --- a/linkerd/app/outbound/src/http/logical/tests/basic.rs +++ b/linkerd/app/outbound/src/http/logical/tests/basic.rs @@ -34,6 +34,7 @@ async fn routes() { backends: Arc::new([backend.clone()]), routes: Arc::new([default_route(backend)]), failure_accrual: None, + retry_after: None, }))); let target = Target { num: 1, diff --git a/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs b/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs index d149f24528..c87ec7f774 100644 --- a/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs +++ b/linkerd/app/outbound/src/http/logical/tests/failure_accrual.rs @@ -57,6 +57,7 @@ async fn consecutive_failures_accrue() { }, success_rate: None, }), + retry_after: None, }))); let target = Target { num: 1, @@ -222,6 +223,7 @@ async fn balancer_doesnt_select_tripped_breakers() { }, success_rate: None, }), + retry_after: None, }))); let target = Target { num: 1, @@ -322,6 +324,7 @@ async fn retry_after_hint_does_not_bleed_across_endpoints() { }, success_rate: None, }), + retry_after: None, }))); let target = Target { num: 1, diff --git a/linkerd/app/outbound/src/ingress.rs b/linkerd/app/outbound/src/ingress.rs index de36ab4f5a..c0895594f3 100644 --- a/linkerd/app/outbound/src/ingress.rs +++ b/linkerd/app/outbound/src/ingress.rs @@ -542,9 +542,17 @@ fn policy_routes( ref http2, .. } => { - let (routes, failure_accrual) = match version { - http::Variant::Http1 => (http1.routes.clone(), http1.failure_accrual.clone()), - http::Variant::H2 => (http2.routes.clone(), http2.failure_accrual.clone()), + let (routes, failure_accrual, retry_after) = match version { + http::Variant::Http1 => ( + http1.routes.clone(), + http1.failure_accrual.clone(), + http1.retry_after, + ), + http::Variant::H2 => ( + http2.routes.clone(), + http2.failure_accrual.clone(), + http2.retry_after, + ), }; Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { @@ -553,6 +561,7 @@ fn policy_routes( backends: policy.backends.clone(), routes, failure_accrual, + retry_after, }, ))) } @@ -562,6 +571,7 @@ fn policy_routes( policy::Protocol::Http1(policy::http::Http1 { ref routes, ref failure_accrual, + retry_after, .. }) => Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { @@ -570,11 +580,13 @@ fn policy_routes( backends: policy.backends.clone(), routes: routes.clone(), failure_accrual: failure_accrual.clone(), + retry_after, }, ))), policy::Protocol::Http2(policy::http::Http2 { ref routes, ref failure_accrual, + retry_after, .. }) => Some(http::Routes::Policy(http::policy::Params::Http( http::policy::HttpParams { @@ -583,11 +595,13 @@ fn policy_routes( backends: policy.backends.clone(), routes: routes.clone(), failure_accrual: failure_accrual.clone(), + retry_after, }, ))), policy::Protocol::Grpc(policy::grpc::Grpc { ref routes, ref failure_accrual, + retry_after, .. }) => Some(http::Routes::Policy(http::policy::Params::Grpc( http::policy::GrpcParams { @@ -596,6 +610,7 @@ fn policy_routes( backends: policy.backends.clone(), routes: routes.clone(), failure_accrual: failure_accrual.clone(), + retry_after, }, ))), _ => None, diff --git a/linkerd/app/outbound/src/sidecar.rs b/linkerd/app/outbound/src/sidecar.rs index c8a39caaf9..29492de2b5 100644 --- a/linkerd/app/outbound/src/sidecar.rs +++ b/linkerd/app/outbound/src/sidecar.rs @@ -231,28 +231,39 @@ impl HttpSidecar { // protocol changes but remains HTTP-ish, we propagate those // changes. If the protocol flips to an opaque protocol, we ignore // the protocol update. - let (routes, failure_accrual) = match policy.protocol { + let (routes, failure_accrual, retry_after) = match policy.protocol { policy::Protocol::Detect { ref http1, ref http2, .. } => match version { - http::Variant::Http1 => (http1.routes.clone(), http1.failure_accrual.clone()), - http::Variant::H2 => (http2.routes.clone(), http2.failure_accrual.clone()), + http::Variant::Http1 => ( + http1.routes.clone(), + http1.failure_accrual.clone(), + http1.retry_after, + ), + http::Variant::H2 => ( + http2.routes.clone(), + http2.failure_accrual.clone(), + http2.retry_after, + ), }, policy::Protocol::Http1(policy::http::Http1 { ref routes, ref failure_accrual, + retry_after, .. - }) => (routes.clone(), failure_accrual.clone()), + }) => (routes.clone(), failure_accrual.clone(), retry_after), policy::Protocol::Http2(policy::http::Http2 { ref routes, ref failure_accrual, + retry_after, .. - }) => (routes.clone(), failure_accrual.clone()), + }) => (routes.clone(), failure_accrual.clone(), retry_after), policy::Protocol::Grpc(policy::grpc::Grpc { ref routes, ref failure_accrual, + retry_after, .. }) => { return Some(http::Routes::Policy(http::policy::Params::Grpc( @@ -262,6 +273,7 @@ impl HttpSidecar { backends: policy.backends.clone(), routes: routes.clone(), failure_accrual: failure_accrual.clone(), + retry_after, }, ))) } @@ -280,6 +292,7 @@ impl HttpSidecar { routes, backends: policy.backends.clone(), failure_accrual, + retry_after, }, ))) } From 0a9d701066e4bf688283ad17178512452e71e683 Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 15:17:50 +0200 Subject: [PATCH 13/18] test(outbound): verify backoff resets to base after recovery Document that closed() creates a new backoff stream on each trip, so exponential escalation is not kept across trip-recover-trip cycles. A second trip starts at the base backoff. Signed-off-by: Alejandro Martinez Ruiz --- .../app/outbound/src/http/breaker/unified.rs | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/linkerd/app/outbound/src/http/breaker/unified.rs b/linkerd/app/outbound/src/http/breaker/unified.rs index a395147900..66a9778a09 100644 --- a/linkerd/app/outbound/src/http/breaker/unified.rs +++ b/linkerd/app/outbound/src/http/breaker/unified.rs @@ -1227,6 +1227,77 @@ mod tests { ); } + // Verify that `closed()` creates a new backoff stream each time the breaker + // trips, so backoff escalation is not kept across trip-recover-trip cycles. + // After recovery the next trip should start at the base backoff (1s), not an + // escalated value (2s). + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn backoff_resets_to_base_on_retrip() { + let _trace = linkerd_tracing::test::trace_init(); + + let (mut params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 2, + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + assert!(params.gate.is_open()); + + // Trip #1 + for _ in 0..2 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "Should trip after 2 consecutive 5xx"); + + // Wait for first backoff (base = 1s) and enter probation + time::sleep(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + match params.gate.state() { + gate::State::Limited(sem) => { + assert_eq!(sem.available_permits(), 1); + params.gate.opened_for_test().await.unwrap().forget(); + } + _ => panic!("Expected Limited state after first backoff"), + } + + // Probe succeeds, breaker reopens + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Should reopen after successful probe" + ); + + // Trip #2 + for _ in 0..2 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!( + params.gate.is_shut(), + "Should trip again after 2 consecutive 5xx" + ); + + // Advance 1s + small buffer. If the backoff had escalated to 2s (iteration 1), + // the gate would still be shut here. With a new stream the base is 1s. + time::sleep(time::Duration::from_millis(1050)).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_limited(), + "Second trip should use base backoff (1s), not escalated (2s); \ + closed() creates a new backoff stream each time" + ); + } + // A closed response channel must end the breaker task even while it is // parked in the backoff wait. recv() on a closed channel yields None right // away, so the drain step has to treat None as teardown rather than looping From efb1e9fe3677909f806c0054273a0a1733df1038 Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 15:18:48 +0200 Subject: [PATCH 14/18] test(outbound): verify both policies disabled produces no trip With max_failures=0 and the success-rate policy off (threshold=0.0, min_requests=usize::MAX), the gate must stay open no matter how many errors arrive. The test floods 5xx then 429 responses and checks the gate never trips. Signed-off-by: Alejandro Martinez Ruiz --- .../app/outbound/src/http/breaker/unified.rs | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/linkerd/app/outbound/src/http/breaker/unified.rs b/linkerd/app/outbound/src/http/breaker/unified.rs index 66a9778a09..20636dbf05 100644 --- a/linkerd/app/outbound/src/http/breaker/unified.rs +++ b/linkerd/app/outbound/src/http/breaker/unified.rs @@ -1298,6 +1298,49 @@ mod tests { ); } + // When both consecutive failures (max_failures=0) and success rate + // (threshold=0.0, min_requests=usize::MAX) are disabled, no combination of + // errors can trip the circuit. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn both_policies_disabled_never_trips() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 0, + threshold: 0.0, + min_requests: usize::MAX, + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Advance 1ms past t=0 so EWMA accepts samples + time::advance(time::Duration::from_millis(1)).await; + + for _ in 0..10 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Breaker must stay open when both policies are disabled (tripped via 5xx)" + ); + } + + for _ in 0..10 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Breaker must stay open when both policies are disabled (tripped via 429)" + ); + } + } + // A closed response channel must end the breaker task even while it is // parked in the backoff wait. recv() on a closed channel yields None right // away, so the drain step has to treat None as teardown rather than looping From 99af31e461bfe4e7bf391efb82bc87b490a7d1f4 Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 15:47:53 +0200 Subject: [PATCH 15/18] test(outbound): cover breaker behavior end-to-end Add a test module for the unified circuit breaker. It checks the success-rate-only trip when consecutive failures are disabled, gRPC response classification, and that a Retry-After hint and a gRPC pushback trailer extend the backoff, driving real responses through the classifier into the stores the breaker reads. Two cases deal with backwards compatibility. In consecutive-only mode a 429 probe still counts as success, and a hint recorded in a store the breaker does not hold never reaches it. A further case confirms that a disabled failure-accrual config spawns no breaker task and leaves the gate open. Signed-off-by: Alejandro Martinez Ruiz --- Cargo.lock | 2 + linkerd/app/outbound/src/http/breaker.rs | 3 + .../src/http/breaker/integration_tests.rs | 704 ++++++++++++++++++ 3 files changed, 709 insertions(+) create mode 100644 linkerd/app/outbound/src/http/breaker/integration_tests.rs diff --git a/Cargo.lock b/Cargo.lock index 935bddf994..e84810ede5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1557,6 +1557,7 @@ dependencies = [ "linkerd-app-core", "linkerd-app-test", "linkerd-distribute", + "linkerd-ewma", "linkerd-http-box", "linkerd-http-classify", "linkerd-http-prom", @@ -1564,6 +1565,7 @@ dependencies = [ "linkerd-http-route", "linkerd-identity", "linkerd-io", + "linkerd-load-biaser", "linkerd-meshtls", "linkerd-mock-http-body", "linkerd-opaq-route", diff --git a/linkerd/app/outbound/src/http/breaker.rs b/linkerd/app/outbound/src/http/breaker.rs index a0e6f90f82..9790fac923 100644 --- a/linkerd/app/outbound/src/http/breaker.rs +++ b/linkerd/app/outbound/src/http/breaker.rs @@ -41,6 +41,9 @@ pub mod retry_after; mod unified; pub mod wrap_classify; +#[cfg(test)] +mod integration_tests; + pub use self::retry_after::{GrpcRetryPushbackStore, RetryAfterStore}; use self::unified::{UnifiedBreaker, UnifiedBreakerConfig}; pub use self::wrap_classify::{HasFailureAccrual, NewRetryAfterGateSet, RetryAfterGateParams}; diff --git a/linkerd/app/outbound/src/http/breaker/integration_tests.rs b/linkerd/app/outbound/src/http/breaker/integration_tests.rs new file mode 100644 index 0000000000..a4fee32b8e --- /dev/null +++ b/linkerd/app/outbound/src/http/breaker/integration_tests.rs @@ -0,0 +1,704 @@ +//! Integration tests for the unified circuit breaker. +//! +//! Tests cover specific breaker behaviors: SR-only trip, gRPC classification, +//! Retry-After hint extension, and backwards-compatibility regression checks. + +use super::*; + +use linkerd_app_core::{exp_backoff::ExponentialBackoff, svc::ExtractParam}; +use linkerd_proxy_client_policy::DEFAULT_RETRY_AFTER_MAX_DURATION; +use std::time::Duration; +use tokio::time; +use tokio_test::{assert_pending, task}; +use tonic; + +// Sends a probe-success classification directly to the breaker's mpsc, +// bypassing Gate/MockService. The caller should first check is_limited() +// to confirm the breaker is in probation (not draining in backoff). +fn send_probe_success(params: &gate::Params) { + params + .responses + .try_send(classify::Class::Http(Ok(http::StatusCode::OK))) + .expect("try_send failed -- bump gate channel capacity"); +} + +fn make_backoff() -> ExponentialBackoff { + ExponentialBackoff::try_new( + Duration::from_secs(1), + Duration::from_secs(100), + 0.0, // no jitter for deterministic tests + ) + .expect("backoff params are valid") +} + +fn send_class(params: &gate::Params, class: classify::Class) { + params.responses.try_send(class).unwrap(); +} + +fn send_ok(params: &gate::Params, status: http::StatusCode) { + send_class(params, classify::Class::Http(Ok(status))); +} + +fn send_err(params: &gate::Params, status: http::StatusCode) { + send_class(params, classify::Class::Http(Err(status))); +} + +fn send_grpc(params: &gate::Params, code: tonic::Code) { + let class = if matches!( + code, + tonic::Code::Unknown + | tonic::Code::DeadlineExceeded + | tonic::Code::PermissionDenied + | tonic::Code::Internal + | tonic::Code::Unavailable + | tonic::Code::DataLoss + ) { + classify::Class::Grpc(Err(code)) + } else { + classify::Class::Grpc(Ok(code)) + }; + send_class(params, class); +} + +fn default_test_config( + gate_tx: gate::Tx, + rsps: tokio::sync::mpsc::Receiver, +) -> UnifiedBreakerConfig { + UnifiedBreakerConfig { + max_failures: 3, + threshold: 0.8, + decay: Duration::from_secs(10), + backoff: make_backoff(), + min_requests: 1, + gate: gate_tx, + rsps, + retry_after_store: RetryAfterStore::new(), + grpc_retry_pushback_store: GrpcRetryPushbackStore::new(), + max_duration: DEFAULT_RETRY_AFTER_MAX_DURATION, + } +} + +// Verifies that the breaker trips via success rate alone when `max_failures=0` +// disables the consecutive-failure policy. The `should_trip` function skips the +// consecutive check when max_failures is zero, so only EWMA degradation can trip. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn sr_only_trip_with_consecutive_disabled() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(8); + + tokio::spawn( + UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 0, // consecutive failures disabled + threshold: 0.5, // trip when SR drops below 50% + min_requests: 1, // cold-start passes after 1 request + ..default_test_config(gate_tx, rsps) + }) + .run(), + ); + + // Advance past t=0 so the EWMA accepts add() calls (skips when ts <= timestamp). + time::sleep(Duration::from_millis(1)).await; + assert!(params.gate.is_open(), "gate should start open"); + + // One success to satisfy min_requests and seed the EWMA. + send_ok(¶ms, http::StatusCode::OK); + time::advance(Duration::from_secs(1)).await; + + // 429s degrade the EWMA without touching consecutive failures (max_failures=0 + // means the counter is never updated). With decay=10s and 1s spacing, + // we need about 7 samples to drop below 0.5. + for _ in 0..8 { + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(Duration::from_secs(1)).await; + } + + assert!( + params.gate.is_shut(), + "breaker should trip via SR when consecutive failures are disabled", + ); + + // Wait for backoff (1s base + safety margin). With tokio::spawn, the breaker + // runs on the runtime and enters probation when the backoff timer fires. + time::advance(Duration::from_secs(2)).await; + + for _ in 0..5 { + if params.gate.is_limited() { + break; + } + time::advance(Duration::from_millis(100)).await; + } + assert!( + params.gate.is_limited(), + "gate should enter probation after backoff", + ); + + // Probe success via the channel (same pattern as send_probe_success). + send_probe_success(¶ms); + time::advance(Duration::from_millis(100)).await; + + assert!( + params.gate.is_open(), + "gate should reopen after probe success" + ); +} + +// Exercises gRPC response classification through the breaker: +// - Grpc(Err(_)) is a configured failure (increments consecutive counter AND degrades EWMA) +// - Grpc(Ok(ResourceExhausted)) is a rate failure (degrades EWMA) but does +// NOT increment consecutive failures +// - Grpc(Ok(Ok)) is a success (resets consecutive counter) +// +// The negative assertion on consecutive-failure count after ResourceExhausted is +// the key distinction tested here. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn grpc_classification_through_breaker() { + let _trace = linkerd_tracing::test::trace_init(); + + // Part A: ResourceExhausted must NOT trip via consecutive failures + + let (params_a, gate_tx_a, rsps_a) = gate::Params::channel(1); + + let breaker_a = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 1, // trip after just 1 consecutive configured failure + threshold: 0.0, // SR disabled (threshold 0.0 never trips) + min_requests: usize::MAX, + ..default_test_config(gate_tx_a, rsps_a) + }); + let mut task_a = task::spawn(breaker_a.run()); + + assert_pending!(task_a.poll()); + + // Send 5 ResourceExhausted responses. Because ResourceExhausted is classified + // as Grpc(Ok(ResourceExhausted)), it is not a configured failure. The + // consecutive counter stays at 0 and cannot trip even with max_failures=1. + for _ in 0..5 { + send_grpc(¶ms_a, tonic::Code::ResourceExhausted); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_a.poll()); + } + + assert!( + params_a.gate.is_open(), + "ResourceExhausted must not trip consecutive failures (counter should stay at 0)", + ); + + // Part B: gRPC configured failures trip consecutive failures + + let (params_b, gate_tx_b, rsps_b) = gate::Params::channel(1); + + let breaker_b = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 2, + threshold: 0.0, // SR disabled + min_requests: usize::MAX, + ..default_test_config(gate_tx_b, rsps_b) + }); + let mut task_b = task::spawn(breaker_b.run()); + + assert_pending!(task_b.poll()); + + // Unavailable is Grpc(Err(Unavailable)), counting as failure + send_grpc(¶ms_b, tonic::Code::Unavailable); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_b.poll()); + + send_grpc(¶ms_b, tonic::Code::Unavailable); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_b.poll()); + + assert!( + params_b.gate.is_shut(), + "Grpc(Err(Unavailable)) should trip consecutive failures at max_failures=2", + ); + + // Part C: Ok resets the consecutive counter + + let (params_c, gate_tx_c, rsps_c) = gate::Params::channel(1); + + let breaker_c = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 2, + threshold: 0.0, + min_requests: usize::MAX, + ..default_test_config(gate_tx_c, rsps_c) + }); + let mut task_c = task::spawn(breaker_c.run()); + + assert_pending!(task_c.poll()); + + // 1 failure, then Ok resets, then 1 failure again. Should not trip. + send_grpc(¶ms_c, tonic::Code::Internal); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_c.poll()); + + send_grpc(¶ms_c, tonic::Code::Ok); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_c.poll()); + + send_grpc(¶ms_c, tonic::Code::Internal); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task_c.poll()); + + assert!( + params_c.gate.is_open(), + "Grpc(Ok(Ok)) should reset consecutive counter, preventing trip at max_failures=2", + ); + + // Part D: ResourceExhausted degrades SR EWMA + + let (params_d, gate_tx_d, rsps_d) = gate::Params::channel(1); + + let breaker_d = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, // consecutive won't trip + threshold: 0.5, + min_requests: 1, + ..default_test_config(gate_tx_d, rsps_d) + }); + let mut task_d = task::spawn(breaker_d.run()); + + assert_pending!(task_d.poll()); + + // Seed with one success. + send_grpc(¶ms_d, tonic::Code::Ok); + time::advance(Duration::from_secs(1)).await; + assert_pending!(task_d.poll()); + + // ResourceExhausted pushes EWMA below threshold. With decay=10s and 1s + // spacing, alpha ~ 0.095 per sample. Need ~7 zeros to drop below 0.5. + for _ in 0..8 { + send_grpc(¶ms_d, tonic::Code::ResourceExhausted); + time::advance(Duration::from_secs(1)).await; + assert_pending!(task_d.poll()); + } + + assert!( + !params_d.gate.is_open(), + "ResourceExhausted should degrade SR EWMA and trip on low success rate", + ); +} + +// Check that a Retry-After hint recorded in the store extends the breaker's +// backoff duration beyond the normal exponential backoff. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn retry_after_hint_extends_backoff_integration() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let ra_store = RetryAfterStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: ra_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record a 5s Retry-After hint before the trip. The hint must be present + // in the store when the breaker enters closed state and calls + // take_combined_hint() on its first backoff iteration. In production the + // classifier records the hint from the Retry-After header before the + // breaker loop processes the tripping response. + ra_store.record(Duration::from_secs(5)); + + // Trip via 3 consecutive 5xx + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // Advance 4s (hint minus 1s). The normal 1s exponential backoff would have + // expired, but the 5s hint keeps the gate shut. This is the important + // check. Without the hint, probation would arrive at ~1s. + time::sleep(Duration::from_secs(4)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 4s (hint is 5s, backoff is 1s)", + ); + + // Advance past the 5s hint boundary. The hint elapses and probation begins. + time::sleep(Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_limited(), + "gate should enter probation after the hint duration expires", + ); +} + +// When both stores hold hints longer than the configured maximum, the breaker +// clamps each hint to that maximum before taking the larger of the two. With +// both hints above the cap, the backoff floor is the cap itself, so the gate +// enters probation when the cap elapses rather than when the raw hints would. +// This pins take_combined_hint's clamp-then-max behavior end-to-end. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn combined_hints_clamp_to_max_duration_integration() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(8); + let ra_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: ra_store.clone(), + grpc_retry_pushback_store: grpc_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Both hints exceed the cap. Each is clamped to max_duration, so the + // combined floor is max(cap, cap) = cap, not the raw 600s. + let over_cap = DEFAULT_RETRY_AFTER_MAX_DURATION + Duration::from_secs(300); + ra_store.record(over_cap); + grpc_store.record(over_cap); + + // Trip via 3 consecutive 5xx + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // Just short of the cap the gate is still shut. The clamped floor has not + // elapsed yet. + time::sleep(DEFAULT_RETRY_AFTER_MAX_DURATION - Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut until the clamped max_duration elapses", + ); + + // Past the cap the gate enters probation. If either raw 600s hint had + // survived unclamped, the gate would still be shut here. + time::sleep(Duration::from_secs(2)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_limited(), + "gate should enter probation once max_duration elapses, proving both hints were clamped", + ); +} + +// In consecutive-only mode (success_rate=None), a 429 response during probation +// is evaluated using `class.is_success()` (the old code behavior), not the +// stricter 429-as-failure check used in dual mode. This preserves backwards +// compatibility for people that do not enable success rate tracking. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn consecutive_only_429_does_not_change_probe_behavior() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + // Consecutive-only mode: success_rate disabled via threshold=0.0 and + // min_requests=usize::MAX (the sentinel values used when the annotation + // has no success_rate configuration). + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 2, + threshold: 0.0, + min_requests: usize::MAX, + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Trip via 2 consecutive 5xx. + for _ in 0..2 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 2x 5xx"); + + // Wait for backoff to enter probation. + time::sleep(Duration::from_secs(2)).await; + assert_pending!(task.poll()); + + for _ in 0..5 { + if params.gate.is_limited() { + break; + } + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!( + params.gate.is_limited(), + "gate should be in probation (Limited)", + ); + + // Send a 429 as the probe response. In consecutive-only mode 429 is + // evaluated by class.is_success() which treats 429 as success (it is not + // a server error). This is the old consecutive-failures breaker behavior. + send_ok(¶ms, http::StatusCode::TOO_MANY_REQUESTS); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + + // The breaker should reopen because the consecutive-only probe check + // delegates to class.is_success(), and 429 is_success() == true. + assert!( + params.gate.is_open(), + "consecutive-only mode: 429 during probation should be treated as success \ + (delegates to class.is_success()), preserving old breaker semantics", + ); +} + +// The breaker reads only the stores it holds. A hint recorded into a different +// store never reaches it, so the backoff stays on the base schedule. The gate +// set builds a separate store pair per endpoint, so one breaker cannot read a +// hint meant for another. +// +// The test models this directly. A hint goes into an unrelated store while the +// breaker holds the empty stores from default_test_config. The breaker trips +// and reopens on the base 1s backoff, proving the recorded hint was never read. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn hint_in_unheld_store_is_ignored() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + // The breaker keeps the empty stores from default_test_config. + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record a long hint into a store the breaker does not hold. With the + // classifier separated from the breaker's stores, this hint cannot + // change the backoff. + let unread_store = RetryAfterStore::new(); + unread_store.record(Duration::from_secs(10)); + + // Trip via 3 consecutive 5xx + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // Advance just past the 1s base backoff. If the 10s hint had reached the + // breaker, the gate would still be shut. Because it did not, probation + // arrives on the base interval. + time::sleep(Duration::from_millis(1050)).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_limited(), + "gate should enter probation on the base 1s backoff when no hint reaches the breaker", + ); + + // The unrelated store still holds its hint. The breaker never took it. + // take() adjusts the returned duration for elapsed time, so the exact value + // is not pinned here. What matters is that the slot is still occupied. + assert!( + unread_store.take(Duration::from_secs(60)).is_some(), + "the hint must remain in the unread store, untouched by the breaker", + ); +} + +// When failure_accrual is None (disabled), the ExtractParam impl does +// not spawn a breaker task. The gate stays open permanently with no +// state transitions. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn failure_accrual_none_produces_no_breaker_task() { + let _trace = linkerd_tracing::test::trace_init(); + + let params_struct = Params { + accrual: None, + channel_capacity: 1, + retry_after_store: RetryAfterStore::new(), + grpc_retry_pushback_store: GrpcRetryPushbackStore::new(), + max_duration: DEFAULT_RETRY_AFTER_MAX_DURATION, + }; + + let gate_params: gate::Params = params_struct.extract_param(&()); + + // The gate should start open. + assert!( + gate_params.gate.is_open(), + "gate should be open when failure_accrual is None", + ); + + // The response channel receiver was dropped because no breaker task was + // spawned. try_send() returns an error because nobody is listening. + // The expected behavior is the channel sender exists in the gate params + // (for BroadcastClassification to use), but sends are silently dropped. + let send_result = gate_params.responses.try_send(classify::Class::Http(Err( + http::StatusCode::INTERNAL_SERVER_ERROR, + ))); + assert!( + send_result.is_err(), + "try_send should fail because no breaker task consumes the receiver", + ); + + // Give the runtime a chance to process any spawned tasks. + time::sleep(Duration::from_millis(100)).await; + + // The gate must still be open. No breaker task was spawned to shut it. + assert!( + gate_params.gate.is_open(), + "gate should remain permanently open when failure_accrual is None \ + (no breaker task spawned to close it)", + ); +} + +// Drives a real 429 with a Retry-After header through the classifier and on +// into the breaker's backoff, end to end. The classifier records into the exact +// RetryAfterStore the breaker drains, so a mismatch between the two store +// instances would show up here as a base-interval backoff instead of the +// extended one. The header value, not a pre-seeded store entry, sets the floor. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn retry_after_header_extends_backoff_end_to_end() { + use super::retry_after::RetryAfterClassify; + use linkerd_http_classify::ClassifyResponse; + use linkerd_proxy_client_policy::http::StatusRanges; + + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let ra_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + + // The breaker drains the very stores the classifier writes to. + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: ra_store.clone(), + grpc_retry_pushback_store: grpc_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Build an actual 429 with `Retry-After: 5` and run it through the + // classifier bound to the breaker's stores. The classifier parses the + // header and records 5s into ra_store. + let classifier = RetryAfterClassify::new( + classify::Response::Http(StatusRanges::default()), + ra_store.clone(), + grpc_store.clone(), + ); + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::TOO_MANY_REQUESTS; + rsp.headers_mut().insert( + http::header::RETRY_AFTER, + http::HeaderValue::from_static("5"), + ); + let _eos = classifier.start(&rsp); + + // Trip via 3 consecutive 5xx. The hint is already in the store when the + // breaker enters its backoff and calls take_combined_hint(). + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // At 4s the base 1s backoff has long passed, but the 5s header hint (less + // the ~300ms consumed while tripping) keeps the gate shut. This is the + // important check. Without the header reaching the breaker's store, + // probation would have arrived at ~1s. + time::sleep(Duration::from_secs(4)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 4s because the Retry-After header extended the backoff", + ); + + // Past the hint window the gate enters probation. + time::sleep(Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_limited(), + "gate should enter probation once the header-derived hint elapses", + ); +} + +// The gRPC counterpart. A real 200 OK whose RESOURCE_EXHAUSTED trailer holds +// `grpc-retry-pushback-ms` runs through the classifier's trailer handling into +// the breaker's gRPC store. As with the HTTP case, the classifier and breaker +// share one store instance, so this catches a wiring split that the +// store-injecting tests cannot. The trailer pushback, not a seeded value, drives +// the backoff. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn grpc_pushback_trailer_extends_backoff_end_to_end() { + use super::retry_after::RetryAfterClassify; + use linkerd_http_classify::{ClassifyEos, ClassifyResponse}; + use linkerd_proxy_client_policy::grpc::Codes; + + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + let ra_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: ra_store.clone(), + grpc_retry_pushback_store: grpc_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // A 200 OK with no gRPC status header opens a streaming gRPC response. The + // RESOURCE_EXHAUSTED status and pushback arrive in the trailers, which the + // classifier parses into the breaker's gRPC store. + let classifier = RetryAfterClassify::new( + classify::Response::Grpc(Codes::default()), + ra_store.clone(), + grpc_store.clone(), + ); + let mut rsp = http::Response::new(()); + *rsp.status_mut() = http::StatusCode::OK; + rsp.headers_mut().insert( + http::header::CONTENT_TYPE, + http::HeaderValue::from_static("application/grpc"), + ); + let eos = classifier.start(&rsp); + + let mut trailers = http::HeaderMap::new(); + trailers.insert("grpc-status", http::HeaderValue::from_static("8")); + trailers.insert( + "grpc-retry-pushback-ms", + http::HeaderValue::from_static("5000"), + ); + let class = eos.eos(Some(&trailers)); + assert_eq!( + class, + classify::Class::Grpc(Ok(tonic::Code::ResourceExhausted)), + "trailer classification should yield RESOURCE_EXHAUSTED", + ); + + // Trip via 3 consecutive 5xx. The 5s pushback is in the gRPC store before + // the breaker reaches its backoff. + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // At 4s the gate is still shut because the 5s trailer pushback (less the + // ~300ms tripping window) extended the backoff beyond the base 1s. + time::sleep(Duration::from_secs(4)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 4s because the gRPC pushback trailer extended the backoff", + ); + + // Past the pushback window the gate enters probation. + time::sleep(Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_limited(), + "gate should enter probation once the trailer-derived pushback elapses", + ); +} From b272d354055dbef4a7e161df007b6d01bdd0098a Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 15:16:14 +0200 Subject: [PATCH 16/18] test(outbound): cover grpc hint extending backoff A gRPC pushback hint recorded in the store before the breaker trips must hold the gate shut past the normal exponential backoff. With a 5s hint and a 1s backoff, the gate stays shut at 4s and enters probation only after the hint elapses. Signed-off-by: Alejandro Martinez Ruiz --- .../src/http/breaker/integration_tests.rs | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/linkerd/app/outbound/src/http/breaker/integration_tests.rs b/linkerd/app/outbound/src/http/breaker/integration_tests.rs index a4fee32b8e..a886b4c89a 100644 --- a/linkerd/app/outbound/src/http/breaker/integration_tests.rs +++ b/linkerd/app/outbound/src/http/breaker/integration_tests.rs @@ -328,6 +328,57 @@ async fn retry_after_hint_extends_backoff_integration() { ); } +// Check that a gRPC pushback hint recorded in the store extends the breaker's +// backoff duration. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn grpc_pushback_hint_extends_backoff_integration() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(8); + let grpc_store = GrpcRetryPushbackStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + grpc_retry_pushback_store: grpc_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record a 5s gRPC pushback hint before the trip. The hint must be + // present in the store when the breaker enters closed state and calls + // take_combined_hint() on its first backoff iteration. + grpc_store.record(Duration::from_secs(5)); + + // Trip via 3 consecutive 5xx + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // Advance 4s (hint minus 1s). The normal 1s exponential backoff would + // have expired, but the 5s gRPC pushback hint keeps the gate shut. This is + // the important check. Without the hint, probation would arrive at + // ~1s. + time::sleep(Duration::from_secs(4)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 4s (gRPC hint is 5s, backoff is 1s)", + ); + + // Advance past the 5s hint boundary. The hint elapses and probation begins. + time::sleep(Duration::from_secs(1)).await; + assert_pending!(task.poll()); + + assert!( + params.gate.is_limited(), + "gate should enter probation after the gRPC hint duration expires", + ); +} + // When both stores hold hints longer than the configured maximum, the breaker // clamps each hint to that maximum before taking the larger of the two. With // both hints above the cap, the backoff floor is the cap itself, so the gate From 072bbdcf2e799ab553751a8df9861d5edb638dba Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 15:16:50 +0200 Subject: [PATCH 17/18] test(outbound): cover combined HTTP and gRPC hint max-wins When both an HTTP Retry-After and a gRPC pushback hint are present, the breaker takes the larger of the two for backoff extension. The integration test records http=3s and grpc=7s, trips via three 5xx, and checks the gate stays shut past the 3s mark, still shut at 6s, then enters probation once the 7s window ends. Signed-off-by: Alejandro Martinez Ruiz --- .../src/http/breaker/integration_tests.rs | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/linkerd/app/outbound/src/http/breaker/integration_tests.rs b/linkerd/app/outbound/src/http/breaker/integration_tests.rs index a886b4c89a..fe30cf1b8c 100644 --- a/linkerd/app/outbound/src/http/breaker/integration_tests.rs +++ b/linkerd/app/outbound/src/http/breaker/integration_tests.rs @@ -379,6 +379,65 @@ async fn grpc_pushback_hint_extends_backoff_integration() { ); } +// Check that when both HTTP Retry-After and gRPC pushback hints are +// present, the breaker uses the maximum of the two durations for backoff +// extension. With http=3s and grpc=7s, the gate stays shut until 7s. +#[tokio::test(flavor = "current_thread", start_paused = true)] +async fn combined_http_and_grpc_hints_max_wins_integration() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(8); + let ra_store = RetryAfterStore::new(); + let grpc_store = GrpcRetryPushbackStore::new(); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + retry_after_store: ra_store.clone(), + grpc_retry_pushback_store: grpc_store.clone(), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + + // Record both hints before the trip. take_combined_hint() returns + // max(http, grpc) = max(3s, 7s) = 7s. + ra_store.record(Duration::from_secs(3)); + grpc_store.record(Duration::from_secs(7)); + + // Trip via 3 consecutive 5xx + for _ in 0..3 { + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(Duration::from_millis(100)).await; + assert_pending!(task.poll()); + } + assert!(params.gate.is_shut(), "breaker should trip after 3x 5xx"); + + // At 3s the HTTP hint has expired, but the gRPC 7s hint keeps the gate + // shut because take_combined_hint returned max(3s, 7s) = 7s. + time::sleep(Duration::from_secs(3)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 3s (gRPC hint is 7s)", + ); + + // At 6s total we're still within the 7s gRPC hint window + time::sleep(Duration::from_secs(3)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "gate should remain shut at 6s (gRPC hint is 7s)", + ); + + // At 7s total the gRPC hint expires and the gate enters probation + time::sleep(Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_limited(), + "gate should enter probation after max(3s, 7s) = 7s hint expires", + ); +} + // When both stores hold hints longer than the configured maximum, the breaker // clamps each hint to that maximum before taking the larger of the two. With // both hints above the cap, the backoff floor is the cap itself, so the gate From 202289583f88879bfa471b48cf4178508f878b16 Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Ruiz Date: Thu, 28 May 2026 15:19:30 +0200 Subject: [PATCH 18/18] test(outbound): verify cold-start restart after idle After an idle gap of many decay windows the request counter resets, so the success-rate policy again needs min_requests samples before it can trip. Signed-off-by: Alejandro Martinez Ruiz --- .../app/outbound/src/http/breaker/unified.rs | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/linkerd/app/outbound/src/http/breaker/unified.rs b/linkerd/app/outbound/src/http/breaker/unified.rs index 20636dbf05..cdc978c713 100644 --- a/linkerd/app/outbound/src/http/breaker/unified.rs +++ b/linkerd/app/outbound/src/http/breaker/unified.rs @@ -1341,6 +1341,125 @@ mod tests { } } + // After a long idle period (many multiples of the decay window), cold-start + // protection re-engages so that one post-idle sample cannot trip the circuit. + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn idle_reengages_cold_start_with_success() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, + threshold: 0.5, + min_requests: 3, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + time::advance(time::Duration::from_millis(1)).await; + + for _ in 0..3 { + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "One failure among successes should not trip (EWMA still above 0.5)" + ); + + // Long idle: 200s = 20x the decay window, well above the 3x + // cold-start re-engagement threshold. + time::advance(time::Duration::from_secs(200)).await; + + // Cold-start re-engaged: request_count reset to 0. A post-idle + // success brings it to 1, well below min_requests=3. + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Post-idle success: cold-start active (1 < min_requests) and EWMA near 1.0" + ); + + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Post-idle failure: cold-start still active (2 < min_requests)" + ); + } + + #[tokio::test(flavor = "current_thread", start_paused = true)] + async fn idle_reengages_cold_start_with_failure() { + let _trace = linkerd_tracing::test::trace_init(); + + let (params, gate_tx, rsps) = gate::Params::channel(1); + + let breaker = UnifiedBreaker::new(UnifiedBreakerConfig { + max_failures: 100, + threshold: 0.5, + min_requests: 3, + decay: time::Duration::from_secs(10), + ..default_test_config(gate_tx, rsps) + }); + let mut task = task::spawn(breaker.run()); + + assert_pending!(task.poll()); + time::advance(time::Duration::from_millis(1)).await; + + for _ in 0..3 { + send_ok(¶ms, http::StatusCode::OK); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + } + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_secs(1)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "One failure among successes should not trip (EWMA still above 0.5)" + ); + + // Long idle: 200s = 20x the decay window + time::advance(time::Duration::from_secs(200)).await; + + // Cold-start re-engaged: request_count reset to 0. Failures after + // idle cannot trip until min_requests new observations accumulate. + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Post-idle failure 1/3: cold-start active (1 < min_requests)" + ); + + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_open(), + "Post-idle failure 2/3: cold-start active (2 < min_requests)" + ); + + // Third failure reaches min_requests=3 with EWMA near 0.0 + send_err(¶ms, http::StatusCode::BAD_GATEWAY); + time::advance(time::Duration::from_millis(10)).await; + assert_pending!(task.poll()); + assert!( + params.gate.is_shut(), + "Post-idle failure 3/3: cold-start satisfied, EWMA near 0.0 < threshold 0.5" + ); + } + // A closed response channel must end the breaker task even while it is // parked in the backoff wait. recv() on a closed channel yields None right // away, so the drain step has to treat None as teardown rather than looping