diff --git a/changelog.d/tag_cardinality_limit_fingerprint_mode.feature.md b/changelog.d/tag_cardinality_limit_fingerprint_mode.feature.md new file mode 100644 index 0000000000000..9372f2b910fda --- /dev/null +++ b/changelog.d/tag_cardinality_limit_fingerprint_mode.feature.md @@ -0,0 +1,7 @@ +The `tag_cardinality_limit` transform now supports `mode: exact_fingerprint`, a new storage +mode that can reduce memory usage for high-cardinality tag values compared to +`mode: exact`. Instead of storing the full tag-value strings, only a 64 bit fingerprint hash of +each value is kept. The trade-off is that throughput is slightly impacted due to extra hashing +operations, and there is technically a (unlikely) chance of collisions at very high cardinalities + +authors: ArunPiduguDD diff --git a/src/transforms/tag_cardinality_limit/config.rs b/src/transforms/tag_cardinality_limit/config.rs index a1471746972fa..a7a83ba399ee6 100644 --- a/src/transforms/tag_cardinality_limit/config.rs +++ b/src/transforms/tag_cardinality_limit/config.rs @@ -114,6 +114,13 @@ pub enum Mode { /// metrics with new tags after the limit has been hit. Exact, + /// This mode operates similarly to `exact` mode except it tracks cardinality using 64-bit hash fingerprints + /// of tag values instead of the original strings. This leads to lower memory requirements in most + /// scenarios (assuming average tag value size is greater than 8 bytes) at the cost of slightly + /// reduced throughput due to extra hashing operations and a very small chance of collisions at + /// very high cardinalities. + ExactFingerprint, + /// Tracks cardinality probabilistically. /// /// This mode has lower memory requirements than `exact`, but may occasionally allow metric @@ -183,6 +190,9 @@ pub enum OverrideMode { /// Tracks cardinality exactly. See `Mode::Exact` for details. Exact, + /// Tracks cardinality using 64-bit hash fingerprints. See `Mode::ExactFingerprint` for details. + ExactFingerprint, + /// Tracks cardinality probabilistically. See `Mode::Probabilistic` for details. Probabilistic(BloomFilterConfig), @@ -196,6 +206,7 @@ impl OverrideMode { pub const fn as_mode(&self) -> Option { match self { OverrideMode::Exact => Some(Mode::Exact), + OverrideMode::ExactFingerprint => Some(Mode::ExactFingerprint), OverrideMode::Probabilistic(b) => Some(Mode::Probabilistic(*b)), OverrideMode::Excluded => None, } diff --git a/src/transforms/tag_cardinality_limit/tag_value_set.rs b/src/transforms/tag_cardinality_limit/tag_value_set.rs index 0d9bbc216db25..a223bb3ffbedc 100644 --- a/src/transforms/tag_cardinality_limit/tag_value_set.rs +++ b/src/transforms/tag_cardinality_limit/tag_value_set.rs @@ -1,10 +1,24 @@ -use std::{collections::HashSet, fmt}; +use std::{ + collections::{HashSet, hash_map::RandomState}, + fmt, + hash::BuildHasher, +}; use bloomy::BloomFilter; +use hash_hasher::HashedSet; use crate::{event::metric::TagValueSet, transforms::tag_cardinality_limit::config::Mode}; /// Container for storing the set of accepted values for a given tag key. +/// +/// # Storage backend selection +/// +/// | `Mode` | Storage | +/// |----------------------|---------------------------------| +/// | `Exact` | `HashSet` | +/// | `ExactFingerprint` | `HashSet` (fingerprints) | +/// | `Probabilistic` | `BloomFilter | + #[derive(Debug)] pub struct AcceptedTagValueSet { storage: TagValueSetStorage, @@ -13,6 +27,8 @@ pub struct AcceptedTagValueSet { enum TagValueSetStorage { Set(HashSet), Bloom(BloomFilterStorage), + /// Stores 64-bit hash fingerprints of accepted tag values + Fingerprint(FingerprintStorage), } /// A bloom filter that tracks the number of items inserted into it. @@ -49,19 +65,50 @@ impl BloomFilterStorage { } } +#[derive(Default)] +struct FingerprintStorage { + fingerprints: HashedSet, + /// Per-instance randomized hasher state. Each instance gets a distinct seed, making + /// pre-computed collision attacks infeasible. + seed: RandomState, +} + +impl FingerprintStorage { + fn fingerprint(&self, value: &TagValueSet) -> u64 { + self.seed.hash_one(value) + } + + fn insert(&mut self, value: &TagValueSet) { + self.fingerprints.insert(self.fingerprint(value)); + } + + fn contains(&self, value: &TagValueSet) -> bool { + self.fingerprints.contains(&self.fingerprint(value)) + } + + fn len(&self) -> usize { + self.fingerprints.len() + } +} + impl fmt::Debug for TagValueSetStorage { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { TagValueSetStorage::Set(set) => write!(f, "Set({set:?})"), TagValueSetStorage::Bloom(_) => write!(f, "Bloom"), + TagValueSetStorage::Fingerprint(_) => write!(f, "Fingerprint"), } } } impl AcceptedTagValueSet { + /// Create a new `AcceptedTagValueSet` for the given mode. pub fn new(mode: &Mode) -> Self { let storage = match &mode { Mode::Exact => TagValueSetStorage::Set(HashSet::new()), + Mode::ExactFingerprint => { + TagValueSetStorage::Fingerprint(FingerprintStorage::default()) + } Mode::Probabilistic(config) => { TagValueSetStorage::Bloom(BloomFilterStorage::new(config.cache_size_per_key)) } @@ -73,6 +120,7 @@ impl AcceptedTagValueSet { match &self.storage { TagValueSetStorage::Set(set) => set.contains(value), TagValueSetStorage::Bloom(bloom) => bloom.contains(value), + TagValueSetStorage::Fingerprint(fp) => fp.contains(value), } } @@ -80,6 +128,7 @@ impl AcceptedTagValueSet { match &self.storage { TagValueSetStorage::Set(set) => set.len(), TagValueSetStorage::Bloom(bloom) => bloom.count(), + TagValueSetStorage::Fingerprint(fp) => fp.len(), } } @@ -89,6 +138,7 @@ impl AcceptedTagValueSet { set.insert(value); } TagValueSetStorage::Bloom(bloom) => bloom.insert(&value), + TagValueSetStorage::Fingerprint(fp) => fp.insert(&value), }; } } @@ -96,7 +146,10 @@ impl AcceptedTagValueSet { #[cfg(test)] mod tests { use super::*; - use crate::{event::metric::TagValueSet, transforms::tag_cardinality_limit::config::Mode}; + use crate::{ + event::metric::TagValueSet, + transforms::tag_cardinality_limit::config::{BloomFilterConfig, Mode}, + }; #[test] fn test_accepted_tag_value_set_exact() { @@ -116,7 +169,11 @@ mod tests { #[test] fn test_accepted_tag_value_set_probabilistic() { - let mut accepted_tag_value_set = AcceptedTagValueSet::new(&Mode::Exact); + // Previously this test mistakenly constructed Mode::Exact; fixed to use Probabilistic. + let mut accepted_tag_value_set = + AcceptedTagValueSet::new(&Mode::Probabilistic(BloomFilterConfig { + cache_size_per_key: 5 * 1024, + })); assert!(!accepted_tag_value_set.contains(&TagValueSet::from(["value1".to_string()]))); assert_eq!(accepted_tag_value_set.len(), 0); @@ -134,4 +191,71 @@ mod tests { assert_eq!(accepted_tag_value_set.len(), 2); assert!(accepted_tag_value_set.contains(&TagValueSet::from(["value2".to_string()]))); } + + #[test] + fn test_accepted_tag_value_set_fingerprint() { + let mut set = AcceptedTagValueSet::new(&Mode::ExactFingerprint); + + assert!(!set.contains(&TagValueSet::from(["value1".to_string()]))); + assert_eq!(set.len(), 0); + + set.insert(TagValueSet::from(["value1".to_string()])); + assert_eq!(set.len(), 1); + assert!(set.contains(&TagValueSet::from(["value1".to_string()]))); + + // Inserting the same value again must not increase the count. + set.insert(TagValueSet::from(["value1".to_string()])); + assert_eq!(set.len(), 1); + + set.insert(TagValueSet::from(["value2".to_string()])); + assert_eq!(set.len(), 2); + assert!(set.contains(&TagValueSet::from(["value2".to_string()]))); + + // An un-inserted value must not appear to be contained. + assert!(!set.contains(&TagValueSet::from(["value3".to_string()]))); + + // Within-instance consistency: a value inserted into a set is found in that same set. + let mut set2 = AcceptedTagValueSet::new(&Mode::ExactFingerprint); + set2.insert(TagValueSet::from(["value1".to_string()])); + assert!(set2.contains(&TagValueSet::from(["value1".to_string()]))); + assert!(!set2.contains(&TagValueSet::from(["value3".to_string()]))); + } + + #[test] + fn test_fingerprint_storage_uses_independent_seeds() { + // Two fresh FingerprintStorage instances must normally produce different fingerprints + // for the same value, proving that the per-instance random seed is active and no + // shared fixed seed exists that an attacker could exploit. + // + // Collision probability across two independent instances is ~2^-64; a failure here + // would indicate the seed is not being randomised. + let probe = TagValueSet::from(["probe-value".to_string()]); + let s1 = AcceptedTagValueSet::new(&Mode::ExactFingerprint); + let s2 = AcceptedTagValueSet::new(&Mode::ExactFingerprint); + // Insert into s1, must NOT appear in s2 (different seed → different fingerprint) + let mut s1 = s1; + s1.insert(probe.clone()); + assert!( + !s2.contains(&probe), + "distinct FingerprintStorage instances must use independent random seeds" + ); + } + + #[test] + fn test_fingerprint_distribution_no_collisions() { + // Empirically guards the "good distribution" claim: inserting many distinct values + // must yield an equal number of distinct fingerprints. At 64 bits the birthday + // collision probability for 100k values is ~2.7e-10, so any collision here would + // indicate a badly-distributed hash rather than bad luck. + let mut set = AcceptedTagValueSet::new(&Mode::ExactFingerprint); + let n = 100_000; + for i in 0..n { + set.insert(TagValueSet::from([format!("tag-value-{i}")])); + } + assert_eq!( + set.len(), + n, + "distinct values must produce distinct fingerprints" + ); + } } diff --git a/src/transforms/tag_cardinality_limit/tests.rs b/src/transforms/tag_cardinality_limit/tests.rs index b8a453a3c4844..423f65f8412bd 100644 --- a/src/transforms/tag_cardinality_limit/tests.rs +++ b/src/transforms/tag_cardinality_limit/tests.rs @@ -121,6 +121,24 @@ fn make_transform_bloom_with_per_metric_limits( } } +fn make_transform_fingerprint( + value_limit: usize, + limit_exceeded_action: LimitExceededAction, +) -> Config { + Config { + global: Inner { + value_limit, + limit_exceeded_action, + mode: Mode::ExactFingerprint, + internal_metrics: InternalMetricsConfig::default(), + }, + tracking_scope: TrackingScope::default(), + max_tracked_keys: None, + per_metric_limits: HashMap::new(), + per_tag_limits: HashMap::new(), + } +} + fn make_transform_with_global_per_tag_limits( value_limit: usize, limit_exceeded_action: LimitExceededAction, @@ -151,6 +169,15 @@ async fn tag_cardinality_limit_drop_event_bloom() { drop_event(make_transform_bloom(2, LimitExceededAction::DropEvent)).await; } +#[tokio::test] +async fn tag_cardinality_limit_drop_event_fingerprint() { + drop_event(make_transform_fingerprint( + 2, + LimitExceededAction::DropEvent, + )) + .await; +} + async fn drop_event(config: Config) { assert_transform_compliance(async move { let mut event1 = make_metric(metric_tags!("tag1" => "val1")); @@ -203,6 +230,11 @@ async fn tag_cardinality_limit_drop_tag_bloom() { drop_tag(make_transform_bloom(2, LimitExceededAction::DropTag)).await; } +#[tokio::test] +async fn tag_cardinality_limit_drop_tag_fingerprint() { + drop_tag(make_transform_fingerprint(2, LimitExceededAction::DropTag)).await; +} + async fn drop_tag(config: Config) { assert_transform_compliance(async move { let tags1 = metric_tags!("tag1" => "val1", "tag2" => "val1"); @@ -1235,6 +1267,11 @@ fn global_per_tag_excluded_drop_tag_passthrough_bloom() { })); } +#[test] +fn global_per_tag_excluded_drop_tag_passthrough_fingerprint() { + global_per_tag_excluded_drop_tag_passthrough(Mode::ExactFingerprint); +} + /// A globally-excluded tag passes through unchanged on every metric, even when its values /// would have exceeded `value_limit`. Sibling non-excluded tags still respect the limit. fn global_per_tag_excluded_drop_tag_passthrough(mode: Mode) { @@ -1287,6 +1324,11 @@ fn global_per_tag_excluded_drop_event_passthrough_bloom() { })); } +#[test] +fn global_per_tag_excluded_drop_event_passthrough_fingerprint() { + global_per_tag_excluded_drop_event_passthrough(Mode::ExactFingerprint); +} + /// Under `DropEvent`, a globally-excluded tag never triggers a drop, but a non-excluded /// tag exceeding `value_limit` still does. fn global_per_tag_excluded_drop_event_passthrough(mode: Mode) { @@ -1461,3 +1503,71 @@ per_tag_limits: let excluded = parsed.per_tag_limits.get("excluded_tag").unwrap(); assert_eq!(excluded.mode, PerTagMode::Excluded); } + +/// A re-sent already-accepted tag value must pass through even after the limit is hit, +/// for both DropTag and DropEvent actions. +#[test] +fn fingerprint_accepted_value_passes_through_after_limit() { + for action in [LimitExceededAction::DropTag, LimitExceededAction::DropEvent] { + let mut transform = TagCardinalityLimit::new(make_transform_fingerprint(2, action)); + transform + .transform_one(make_metric(metric_tags!("env" => "prod"))) + .unwrap(); + transform + .transform_one(make_metric(metric_tags!("env" => "staging"))) + .unwrap(); + // Limit now hit; re-send of an already-accepted value must still pass through. + let e = transform + .transform_one(make_metric(metric_tags!("env" => "prod"))) + .unwrap(); + assert_eq!("prod", e.as_metric().tags().unwrap().get("env").unwrap()); + } +} + +/// Fingerprint mode must never allocate a tracking entry for a tag that is globally +/// excluded, matching the `Mode::Exact` "never allocate" contract. +#[test] +fn fingerprint_excluded_tag_never_populates_cache() { + let config = make_transform_with_global_per_tag_limits( + 2, + LimitExceededAction::DropTag, + Mode::ExactFingerprint, + HashMap::from([("kube_pod_name".to_string(), make_per_tag_excluded())]), + ); + let mut transform = TagCardinalityLimit::new(config); + + for i in 0..10 { + let event = make_metric(metric_tags!( + "kube_pod_name" => format!("pod-{i}").as_str(), + "tag1" => "val1" + )); + transform.transform_one(event).unwrap(); + } + + let bucket = transform + .accepted_tags + .get(&None) + .expect("non-excluded tag1 should still allocate a global bucket"); + assert!( + bucket.contains_key("tag1"), + "non-excluded tag must still be tracked" + ); + assert!( + !bucket.contains_key("kube_pod_name"), + "excluded tag key must never enter the fingerprint cache" + ); +} + +/// Fingerprint mode YAML round-trips: `mode: exact_fingerprint` deserializes cleanly. +#[test] +fn fingerprint_mode_deserializes() { + let yaml = "mode: exact_fingerprint"; + let mode: Mode = serde_yaml::from_str(yaml).expect("exact_fingerprint should deserialize"); + assert_eq!(mode, Mode::ExactFingerprint); + + let serialized = serde_yaml::to_string(&mode).expect("should serialize"); + assert!( + serialized.contains("exact_fingerprint"), + "serialized form should contain 'exact_fingerprint'" + ); +} diff --git a/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue b/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue index fe47c37655f90..7d4ebe3368e50 100644 --- a/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue +++ b/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue @@ -66,6 +66,13 @@ generated: components: transforms: tag_cardinality_limit: configuration: { This mode has higher memory requirements than `probabilistic`, but never falsely outputs metrics with new tags after the limit has been hit. """ + exact_fingerprint: """ + This mode operates similarly to `exact` mode except it tracks cardinality using 64-bit hash fingerprints + of tag values instead of the original strings. This leads to lower memory requirements in most + scenarios (assuming average tag value size is greater than 8 bytes) at the cost of slightly + reduced throughput due to extra hashing operations and a very small chance of collisions at + very high cardinalities. + """ probabilistic: """ Tracks cardinality probabilistically. @@ -126,7 +133,8 @@ generated: components: transforms: tag_cardinality_limit: configuration: { description: "Controls the approach taken for tracking tag cardinality." required: true type: string: enum: { - exact: "Tracks cardinality exactly. See `Mode::Exact` for details." + exact: "Tracks cardinality exactly. See `Mode::Exact` for details." + exact_fingerprint: "Tracks cardinality using 64-bit hash fingerprints. See `Mode::ExactFingerprint` for details." excluded: """ Skip cardinality tracking for this metric. All tag values pass through and nothing is limited. Other fields in this per-metric configuration are ignored when this is selected. diff --git a/website/cue/reference/components/transforms/tag_cardinality_limit.cue b/website/cue/reference/components/transforms/tag_cardinality_limit.cue index 5aad8656d8eff..ac2d475e1f9c2 100644 --- a/website/cue/reference/components/transforms/tag_cardinality_limit.cue +++ b/website/cue/reference/components/transforms/tag_cardinality_limit.cue @@ -125,6 +125,10 @@ components: transforms: tag_cardinality_limit: { metrics) ``` + Mode `exact_fingerprint` behaves like `exact` but stores an 8-byte hash of each + value instead of the value itself, so use the same formula with `8` in place of + the average tag value length. + In mode `probabilistic`, rather than storing all values seen for each key, each distinct key has a bloom filter which can probabilistically determine whether a given value has been seen for that key. The formula for estimating memory