diff --git a/changelog.d/tag_cardinality_per_tag_cache_size.enhancement.md b/changelog.d/tag_cardinality_per_tag_cache_size.enhancement.md new file mode 100644 index 0000000000000..6f271d1042e18 --- /dev/null +++ b/changelog.d/tag_cardinality_per_tag_cache_size.enhancement.md @@ -0,0 +1,3 @@ +Adds a per-tag `cache_size_per_key` option to configuration options in probabilistic mode. Previously, per-tag overrides always inherited the bloom filter cache size from the enclosing config, which could cause a higher false positive rate when the per-tag `value_limit` is higher than the global or per-metric `value_limit`. When omitted, the cache size value from the enclosing config is used. Only valid in `probabilistic` mode — using it in `exact` mode will cause a configuration error. + +authors: ArunPiduguDD diff --git a/src/transforms/tag_cardinality_limit/config.rs b/src/transforms/tag_cardinality_limit/config.rs index a1471746972fa..b4f09783f35ce 100644 --- a/src/transforms/tag_cardinality_limit/config.rs +++ b/src/transforms/tag_cardinality_limit/config.rs @@ -1,5 +1,6 @@ use std::collections::HashMap; +use snafu::Snafu; use vector_lib::configurable::configurable_component; use crate::{ @@ -212,6 +213,10 @@ impl OverrideMode { /// environment: /// mode: limit_override # track with a per-tag cap /// value_limit: 3 +/// high_cardinality_tag: +/// mode: limit_override +/// value_limit: 1000 +/// cache_size_per_key: 102400 # larger bloom filter for this tag /// trace_id: /// mode: excluded # opt out of tracking entirely /// ``` @@ -225,19 +230,25 @@ pub struct PerTagConfig { /// Mode applied to a specific tag key within a per-metric override. /// -/// The tracking algorithm (`exact`/`probabilistic`), `cache_size_per_key`, -/// `limit_exceeded_action`, and `internal_metrics` are always inherited from the -/// enclosing per-metric configuration. +/// The tracking algorithm (`exact`/`probabilistic`), `limit_exceeded_action`, and +/// `internal_metrics` are inherited from the enclosing per-metric (or global) configuration. +/// `cache_size_per_key` may optionally be overridden per tag when probabilistic mode is in use. #[configurable_component] #[derive(Clone, Copy, Debug, Eq, PartialEq)] #[serde(tag = "mode", rename_all = "snake_case", deny_unknown_fields)] #[configurable(metadata(docs::enum_tag_description = "Controls how this tag key is handled."))] pub enum PerTagMode { - /// Track this tag with a per-tag value limit. The enclosing per-metric tracking - /// algorithm and all other settings still apply. + /// Track this tag with a per-tag value limit. All other settings are inherited from + /// the enclosing config. LimitOverride { /// Maximum number of distinct values to accept for this tag key. value_limit: usize, + + /// Override the bloom filter cache size for this specific tag key. + /// Only valid in `probabilistic` mode; setting this in `exact` mode is a configuration error. + /// Inherits from the enclosing config when unset. + #[serde(default)] + cache_size_per_key: Option, }, /// Opt this tag out of cardinality tracking entirely. All values pass through /// without being recorded or checked against any `value_limit`. @@ -330,10 +341,58 @@ impl GenerateConfig for Config { } } +#[derive(Debug, Snafu)] +pub enum BuildError { + #[snafu(display( + "cache_size_per_key set on per-tag entry `{tag_key}` but the inherited mode is \ + `exact`, where it has no effect. Remove the field or switch to `probabilistic` mode." + ))] + CacheSizeInExactMode { tag_key: String }, +} + +impl Config { + fn validate(&self) -> crate::Result<()> { + // Global per_tag_limits: validate against the global mode. + if self.global.mode == Mode::Exact { + for (tag_key, tag_cfg) in &self.per_tag_limits { + if let PerTagMode::LimitOverride { + cache_size_per_key: Some(_), + .. + } = tag_cfg.mode + { + return Err(Box::new(BuildError::CacheSizeInExactMode { + tag_key: tag_key.clone(), + })); + } + } + } + + // Per-metric per_tag_limits: validate against each per-metric mode. + for per_metric in self.per_metric_limits.values() { + if per_metric.config.mode == OverrideMode::Exact { + for (tag_key, tag_cfg) in &per_metric.per_tag_limits { + if let PerTagMode::LimitOverride { + cache_size_per_key: Some(_), + .. + } = tag_cfg.mode + { + return Err(Box::new(BuildError::CacheSizeInExactMode { + tag_key: tag_key.clone(), + })); + } + } + } + } + + Ok(()) + } +} + #[async_trait::async_trait] #[typetag::serde(name = "tag_cardinality_limit")] impl TransformConfig for Config { async fn build(&self, _context: &TransformContext) -> crate::Result { + self.validate()?; Ok(Transform::event_task(TagCardinalityLimit::new( self.clone(), ))) diff --git a/src/transforms/tag_cardinality_limit/mod.rs b/src/transforms/tag_cardinality_limit/mod.rs index 755a23c8d1006..e43738694912e 100644 --- a/src/transforms/tag_cardinality_limit/mod.rs +++ b/src/transforms/tag_cardinality_limit/mod.rs @@ -26,6 +26,17 @@ use crate::event::metric::TagValueSet; type MetricId = (Option, String); +/// Replaces the bloom filter size in a `Probabilistic` mode with the override. No-op when +/// `override_size` is `None`. +const fn apply_cache_size_override(mode: Mode, override_size: Option) -> Mode { + match (mode, override_size) { + (Mode::Probabilistic(_), Some(size)) => Mode::Probabilistic(BloomFilterConfig { + cache_size_per_key: size, + }), + _ => mode, + } +} + /// Outcome of applying tag cardinality tracking to a tag value. #[derive(Debug, Eq, PartialEq)] enum AcceptResult { @@ -126,13 +137,14 @@ impl TagCardinalityLimit { if let Some(per_tag) = per_metric.per_tag_limits.get(tag_key) { match per_tag.mode { PerTagMode::Excluded => return TagSettings::Excluded, - PerTagMode::LimitOverride { value_limit } => { - // Tracking algorithm and all other settings are always inherited - // from the per-metric config. + PerTagMode::LimitOverride { + value_limit, + cache_size_per_key, + } => { return TagSettings::Tracked(Inner { value_limit, limit_exceeded_action, - mode: metric_mode, + mode: apply_cache_size_override(metric_mode, cache_size_per_key), internal_metrics, }); } @@ -152,8 +164,12 @@ impl TagCardinalityLimit { let global = self.config.global; match self.config.per_tag_limits.get(tag_key).map(|c| c.mode) { Some(PerTagMode::Excluded) => TagSettings::Excluded, - Some(PerTagMode::LimitOverride { value_limit }) => TagSettings::Tracked(Inner { + Some(PerTagMode::LimitOverride { + value_limit, + cache_size_per_key, + }) => TagSettings::Tracked(Inner { value_limit, + mode: apply_cache_size_override(global.mode, cache_size_per_key), ..global }), None => TagSettings::Tracked(global), diff --git a/src/transforms/tag_cardinality_limit/tests.rs b/src/transforms/tag_cardinality_limit/tests.rs index b8a453a3c4844..ed97feca4b4d1 100644 --- a/src/transforms/tag_cardinality_limit/tests.rs +++ b/src/transforms/tag_cardinality_limit/tests.rs @@ -12,7 +12,7 @@ use vrl::compiler::prelude::Kind; use super::*; use crate::{ - config::{LogNamespace, schema::Definition}, + config::{LogNamespace, TransformConfig, TransformContext, schema::Definition}, event::{Event, Metric, MetricTags, metric, metric::TagValue}, test_util::components::assert_transform_compliance, transforms::{ @@ -849,7 +849,10 @@ fn max_tracked_keys_caps_across_per_metric_buckets() { fn make_per_tag(value_limit: usize) -> PerTagConfig { PerTagConfig { - mode: PerTagMode::LimitOverride { value_limit }, + mode: PerTagMode::LimitOverride { + value_limit, + cache_size_per_key: None, + }, } } @@ -1127,7 +1130,10 @@ fn tag_excluded_unbounded_sibling_limited() { #[test] fn per_tag_limit_override_caps_at_explicit_value() { let per_tag = PerTagConfig { - mode: PerTagMode::LimitOverride { value_limit: 2 }, + mode: PerTagMode::LimitOverride { + value_limit: 2, + cache_size_per_key: None, + }, }; let config = make_transform_hashset_with_per_metric_limits( 500, @@ -1210,7 +1216,13 @@ per_metric_limits: let per_metric = parsed.per_metric_limits.get("metric_a").unwrap(); let capped = per_metric.per_tag_limits.get("capped_tag").unwrap(); - assert_eq!(capped.mode, PerTagMode::LimitOverride { value_limit: 10 }); + assert_eq!( + capped.mode, + PerTagMode::LimitOverride { + value_limit: 10, + cache_size_per_key: None, + } + ); let excluded = per_metric.per_tag_limits.get("excluded_tag").unwrap(); assert_eq!(excluded.mode, PerTagMode::Excluded); @@ -1456,8 +1468,222 @@ per_tag_limits: let parsed: Config = serde_yaml::from_str(yaml).expect("yaml should deserialize"); let capped = parsed.per_tag_limits.get("capped_tag").unwrap(); - assert_eq!(capped.mode, PerTagMode::LimitOverride { value_limit: 10 }); + assert_eq!( + capped.mode, + PerTagMode::LimitOverride { + value_limit: 10, + cache_size_per_key: None, + } + ); let excluded = parsed.per_tag_limits.get("excluded_tag").unwrap(); assert_eq!(excluded.mode, PerTagMode::Excluded); } + +/// `apply_cache_size_override` replaces the bloom size when mode is probabilistic and an +/// override is given; leaves the mode unchanged in all other cases. +#[test] +fn apply_cache_size_override_probabilistic_with_some() { + let base = Mode::Probabilistic(BloomFilterConfig { + cache_size_per_key: default_cache_size(), + }); + let result = apply_cache_size_override(base, Some(1024)); + assert_eq!( + result, + Mode::Probabilistic(BloomFilterConfig { + cache_size_per_key: 1024, + }) + ); +} + +#[test] +fn apply_cache_size_override_exact_with_some_is_noop() { + let result = apply_cache_size_override(Mode::Exact, Some(1024)); + assert_eq!(result, Mode::Exact); +} + +#[test] +fn apply_cache_size_override_probabilistic_with_none_inherits() { + let base = Mode::Probabilistic(BloomFilterConfig { + cache_size_per_key: default_cache_size(), + }); + let result = apply_cache_size_override(base, None); + assert_eq!( + result, + Mode::Probabilistic(BloomFilterConfig { + cache_size_per_key: default_cache_size(), + }) + ); +} + +/// A per-metric `limit_override` with `cache_size_per_key` set deserializes correctly. +#[test] +fn per_tag_cache_size_per_key_deserializes() { + let yaml = r#" +value_limit: 5 +mode: probabilistic +cache_size_per_key: 5120 +per_metric_limits: + metric_a: + mode: probabilistic + cache_size_per_key: 5120 + per_tag_limits: + big_tag: + mode: limit_override + value_limit: 100 + cache_size_per_key: 32768 + default_tag: + mode: limit_override + value_limit: 10 +"#; + let parsed: Config = serde_yaml::from_str(yaml).expect("yaml should deserialize"); + let per_metric = parsed.per_metric_limits.get("metric_a").unwrap(); + + let big_tag = per_metric.per_tag_limits.get("big_tag").unwrap(); + assert_eq!( + big_tag.mode, + PerTagMode::LimitOverride { + value_limit: 100, + cache_size_per_key: Some(32768), + } + ); + + // Omitting the field defaults to None (inherits from enclosing config). + let default_tag = per_metric.per_tag_limits.get("default_tag").unwrap(); + assert_eq!( + default_tag.mode, + PerTagMode::LimitOverride { + value_limit: 10, + cache_size_per_key: None, + } + ); +} + +/// A global `per_tag_limits` `limit_override` with `cache_size_per_key` set deserializes correctly. +#[test] +fn global_per_tag_cache_size_per_key_deserializes() { + let yaml = r#" +value_limit: 5 +mode: probabilistic +cache_size_per_key: 5120 +per_tag_limits: + big_tag: + mode: limit_override + value_limit: 100 + cache_size_per_key: 32768 + default_tag: + mode: limit_override + value_limit: 10 +"#; + let parsed: Config = serde_yaml::from_str(yaml).expect("yaml should deserialize"); + + let big_tag = parsed.per_tag_limits.get("big_tag").unwrap(); + assert_eq!( + big_tag.mode, + PerTagMode::LimitOverride { + value_limit: 100, + cache_size_per_key: Some(32768), + } + ); + + let default_tag = parsed.per_tag_limits.get("default_tag").unwrap(); + assert_eq!( + default_tag.mode, + PerTagMode::LimitOverride { + value_limit: 10, + cache_size_per_key: None, + } + ); +} + +// ============================================================================ +// Config validation tests +// ============================================================================ + +/// cache_size_per_key on a global per-tag entry in exact mode is a build error. +#[tokio::test] +async fn validation_rejects_cache_size_in_global_exact_mode() { + let config = make_transform_with_global_per_tag_limits( + 500, + LimitExceededAction::DropTag, + Mode::Exact, + HashMap::from([( + "tag1".to_string(), + PerTagConfig { + mode: PerTagMode::LimitOverride { + value_limit: 10, + cache_size_per_key: Some(1024), + }, + }, + )]), + ); + assert!(config.build(&TransformContext::default()).await.is_err()); +} + +/// cache_size_per_key on a per-metric per-tag entry in exact mode is a build error. +#[tokio::test] +async fn validation_rejects_cache_size_in_per_metric_exact_mode() { + let config = make_transform_hashset_with_per_metric_limits( + 500, + LimitExceededAction::DropTag, + HashMap::from([( + "metricA".to_string(), + make_per_metric( + 10, + LimitExceededAction::DropTag, + HashMap::from([( + "tag1".to_string(), + PerTagConfig { + mode: PerTagMode::LimitOverride { + value_limit: 5, + cache_size_per_key: Some(1024), + }, + }, + )]), + ), + )]), + ); + assert!(config.build(&TransformContext::default()).await.is_err()); +} + +/// cache_size_per_key in probabilistic mode is valid. +#[tokio::test] +async fn validation_allows_cache_size_in_probabilistic_mode() { + let config = make_transform_with_global_per_tag_limits( + 500, + LimitExceededAction::DropTag, + Mode::Probabilistic(BloomFilterConfig { + cache_size_per_key: default_cache_size(), + }), + HashMap::from([( + "tag1".to_string(), + PerTagConfig { + mode: PerTagMode::LimitOverride { + value_limit: 10, + cache_size_per_key: Some(1024), + }, + }, + )]), + ); + assert!(config.build(&TransformContext::default()).await.is_ok()); +} + +/// cache_size_per_key: None in exact mode is fine (no regression). +#[tokio::test] +async fn validation_allows_no_cache_size_override_in_exact_mode() { + let config = make_transform_with_global_per_tag_limits( + 500, + LimitExceededAction::DropTag, + Mode::Exact, + HashMap::from([( + "tag1".to_string(), + PerTagConfig { + mode: PerTagMode::LimitOverride { + value_limit: 10, + cache_size_per_key: None, + }, + }, + )]), + ); + assert!(config.build(&TransformContext::default()).await.is_ok()); +} diff --git a/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue b/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue index fe47c37655f90..440fd7fbd1a1d 100644 --- a/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue +++ b/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue @@ -154,6 +154,16 @@ generated: components: transforms: tag_cardinality_limit: configuration: { description: "An individual tag configuration." required: true type: object: options: { + cache_size_per_key: { + description: """ + Override the bloom filter cache size for this specific tag key. + Only valid in `probabilistic` mode; setting this in `exact` mode is a configuration error. + Inherits from the enclosing config when unset. + """ + relevant_when: "mode = \"limit_override\"" + required: false + type: uint: {} + } mode: { description: "Controls how this tag key is handled." required: true @@ -163,8 +173,8 @@ generated: components: transforms: tag_cardinality_limit: configuration: { without being recorded or checked against any `value_limit`. """ limit_override: """ - Track this tag with a per-tag value limit. The enclosing per-metric tracking - algorithm and all other settings still apply. + Track this tag with a per-tag value limit. All other settings are inherited from + the enclosing config. """ } } @@ -199,6 +209,16 @@ generated: components: transforms: tag_cardinality_limit: configuration: { description: "An individual tag configuration." required: true type: object: options: { + cache_size_per_key: { + description: """ + Override the bloom filter cache size for this specific tag key. + Only valid in `probabilistic` mode; setting this in `exact` mode is a configuration error. + Inherits from the enclosing config when unset. + """ + relevant_when: "mode = \"limit_override\"" + required: false + type: uint: {} + } mode: { description: "Controls how this tag key is handled." required: true @@ -208,8 +228,8 @@ generated: components: transforms: tag_cardinality_limit: configuration: { without being recorded or checked against any `value_limit`. """ limit_override: """ - Track this tag with a per-tag value limit. The enclosing per-metric tracking - algorithm and all other settings still apply. + Track this tag with a per-tag value limit. All other settings are inherited from + the enclosing config. """ } }