From 4e9ba858845014f30129421669e423336fbd683c Mon Sep 17 00:00:00 2001 From: "Geoffrey M. Oxberry" Date: Wed, 20 May 2026 20:32:01 -0700 Subject: [PATCH] feat(payload): adopt AtLeastOneHundredth for Config::unique_tag_ratio Change the public `Config::unique_tag_ratio` field from `f32` to the `AtLeastOneHundredth` alias of `BoundedProbability<{ f32::to_bits(0.01) }>`. The `try_from` impl enforces the finite + `[0.01, 1.0]` invariant at deserialize time, so the redundant `MIN_UNIQUE_TAG_RATIO..=MAX_UNIQUE_TAG_RATIO` range check in `common::tags::Generator::new` is removed. The WARN-level check for values in `[MIN, WARN_UNIQUE_TAG_RATIO]` is preserved. The new type threads through `MemberGenerator::new`, `common::tags::Generator::new`, `dogstatsd::common::tags::Generator::new`, and `opentelemetry::common::TagGenerator::new`. The OTel `UNIQUE_TAG_RATIO` constant becomes a `const AtLeastOneHundredth` via a `match` on `try_new`. `MAX_UNIQUE_TAG_RATIO`, no longer referenced outside tests, is now `#[cfg(test)]`. At the comparison site in `::generate`, `.get()` extracts the inner `f32` so RNG sequences and bit-exact output are preserved. Co-Authored-By: Claude Opus 4.7 (1M context) --- lading_payload/src/common/tags.rs | 41 +++++++++------------ lading_payload/src/dogstatsd.rs | 19 +++++----- lading_payload/src/dogstatsd/common/tags.rs | 20 +++++----- lading_payload/src/opentelemetry/common.rs | 14 +++++-- 4 files changed, 49 insertions(+), 45 deletions(-) diff --git a/lading_payload/src/common/tags.rs b/lading_payload/src/common/tags.rs index 2c02b2b25..c8230587d 100644 --- a/lading_payload/src/common/tags.rs +++ b/lading_payload/src/common/tags.rs @@ -9,13 +9,14 @@ use rand::{SeedableRng, rngs::SmallRng}; use tracing::warn; use crate::common::{ - config::ConfRange, + config::{AtLeastOneHundredth, ConfRange}, strings::{Pool, PoolKind}, }; use super::strings::Handle; pub(crate) const MIN_UNIQUE_TAG_RATIO: f32 = 0.01; +#[cfg(test)] pub(crate) const MAX_UNIQUE_TAG_RATIO: f32 = 1.00; pub(crate) const WARN_UNIQUE_TAG_RATIO: f32 = 0.10; pub(crate) const MIN_TAG_LENGTH: u16 = 3; @@ -120,13 +121,13 @@ impl TagGenerator { /// produce only a limited, deterministic set of tags while avoiding needing to /// allocate them all in one shot. /// -/// The `unique_tag_ratio` is a value between 0.10 and 1.0. It represents the -/// ratio of new tags to existing tags. If the value is 1.0, then all tags will -/// be new. If the value 0.0 were allowed, it would conceptually mean "always -/// use an existing tag", however this is a degenerate case as there would never -/// be a new tag generated. Therefore a minimum value is enforced. Despite this -/// minimum, if the configuration is overly restrictive, it may result in -/// non-unique tagsets. +/// The `unique_tag_ratio` is the ratio of new tags to existing tags. If the +/// value is 1.0, then all tags will be new. If the value 0.0 were allowed, it +/// would conceptually mean "always use an existing tag", however this is a +/// degenerate case as there would never be a new tag generated. The +/// [`AtLeastOneHundredth`] type enforces a closed range of `[0.01, 1.0]`. +/// Despite this minimum, if the configuration is overly restrictive, it may +/// result in non-unique tagsets. /// /// As an example: /// `unique_tag_probability`: 0.10 @@ -147,7 +148,7 @@ pub(crate) struct Generator { num_tagsets: usize, // Maximum number of unique tagsets that will ever be generated tags_per_msg: ConfRange, // Maximum number of tags per individually generated tagset tags: TagGenerator, - unique_tag_probability: f32, + unique_tag_probability: AtLeastOneHundredth, tag_store: RefCell, } @@ -165,7 +166,6 @@ impl Generator { /// # Errors /// - If `tags_per_msg` is invalid or exceeds the maximum /// - If `tag_length` is invalid or has minimum value less than 3 - /// - If `unique_tag_probability` is not between 0.10 and 1.0 pub(crate) fn new( seed: u64, tags_per_msg: ConfRange, @@ -173,7 +173,7 @@ impl Generator { num_tagsets: usize, str_pool: Rc, tag_pool: Rc, - unique_tag_probability: f32, + unique_tag_probability: AtLeastOneHundredth, ) -> Result { let (tag_length_valid, tag_length_valid_msg) = tag_length.valid(); if !tag_length_valid { @@ -188,13 +188,7 @@ impl Generator { ))); } - if !(MIN_UNIQUE_TAG_RATIO..=MAX_UNIQUE_TAG_RATIO).contains(&unique_tag_probability) { - return Err(Error::InvalidConstruction(format!( - "Unique tag ratio must be between {MIN_UNIQUE_TAG_RATIO} and {MAX_UNIQUE_TAG_RATIO}" - ))); - } - - if (MIN_UNIQUE_TAG_RATIO..=WARN_UNIQUE_TAG_RATIO).contains(&unique_tag_probability) { + if (MIN_UNIQUE_TAG_RATIO..=WARN_UNIQUE_TAG_RATIO).contains(&unique_tag_probability.get()) { warn!( "unique_tag_probability is less than {WARN_UNIQUE_TAG_RATIO}. This may result in non-unique tagsets" ); @@ -274,7 +268,7 @@ impl<'a> crate::Generator<'a> for Generator { // For remaining tags, decide whether to reuse existing tags or generate new ones while tagset.len() < tags_count { let choose_existing_prob: f32 = OpenClosed01.sample(&mut *rng); - let should_reuse = choose_existing_prob > self.unique_tag_probability; + let should_reuse = choose_existing_prob > self.unique_tag_probability.get(); if should_reuse && !tag_store.is_empty() { // Reuse an existing tag @@ -305,7 +299,7 @@ mod test { use super::{MAX_UNIQUE_TAG_RATIO, MIN_TAG_LENGTH, WARN_UNIQUE_TAG_RATIO}; use crate::Generator; - use crate::common::config::ConfRange; + use crate::common::config::{AtLeastOneHundredth, ConfRange}; use crate::common::strings::{Handle, PoolKind, RandomStringPool}; proptest! { @@ -331,7 +325,7 @@ mod test { num_tagsets, str_pool, tag_pool, - 1.0 + AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]"), ).expect("Tag generator to be valid"); for _ in 0..num_tagsets { @@ -370,7 +364,7 @@ mod test { num_tagsets, str_pool, tag_pool, - 1.0 + AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]"), ).expect("Tag generator to be valid"); // First batch with fresh RNG @@ -420,7 +414,8 @@ mod test { desired_num_tagsets, str_pool, tag_pool, - unique_tag_ratio + AtLeastOneHundredth::try_new(unique_tag_ratio) + .expect("unique_tag_ratio drawn from [WARN, MAX], which is within [0.01, 1.0]"), ).expect("Tag generator to be valid"); let mut unique_tagsets: HashSet> = HashSet::new(); diff --git a/lading_payload/src/dogstatsd.rs b/lading_payload/src/dogstatsd.rs index 68ba806d5..007b8620d 100644 --- a/lading_payload/src/dogstatsd.rs +++ b/lading_payload/src/dogstatsd.rs @@ -9,7 +9,7 @@ use tracing::{debug, warn}; use crate::{ Serialize, common::{ - config::{ConfRange, Probability}, + config::{AtLeastOneHundredth, ConfRange, Probability}, strings, strings::{random_strings_with_length, random_strings_with_length_range}, }, @@ -185,12 +185,12 @@ pub struct Config { /// a 4-byte header that is a little-endian u32 representing the /// total length of the data block. pub length_prefix_framed: bool, - /// This is a ratio between 0.10 and 1.0 which determines how many - /// individual tags are unique vs re-used tags. - /// If this is 1, then every single tag will be unique. - /// If this is 0.10, then most of the tags (90%) will be re-used - /// from existing tags. - pub unique_tag_ratio: f32, + /// This is a ratio that determines how many individual tags are unique vs + /// re-used tags. If this is 1, then every single tag will be unique. If + /// this is 0.10, then most of the tags (90%) will be re-used from existing + /// tags. The type enforces the closed range `[0.01, 1.0]` at deserialize + /// time. + pub unique_tag_ratio: AtLeastOneHundredth, /// A list of possible metric names to generate pub metric_names: Vec, /// A list of possible tag names to generate @@ -278,7 +278,8 @@ impl Default for Config { value: ValueConf::default(), // This should be enabled for UDS-streams, but not for UDS-datagram nor UDP length_prefix_framed: false, - unique_tag_ratio: 0.11, + unique_tag_ratio: AtLeastOneHundredth::try_new(0.11) + .expect("0.11 is in [0.01, 1.0]"), metric_names: Vec::default(), tag_names: Vec::default(), tag_values: Vec::default(), @@ -425,7 +426,7 @@ impl MemberGenerator { kind_weights: KindWeights, metric_weights: MetricWeights, value_conf: ValueConf, - unique_tag_ratio: f32, + unique_tag_ratio: AtLeastOneHundredth, metric_names: &[String], tag_names: &[String], tag_values: &[String], diff --git a/lading_payload/src/dogstatsd/common/tags.rs b/lading_payload/src/dogstatsd/common/tags.rs index 88ce717a4..77122228d 100644 --- a/lading_payload/src/dogstatsd/common/tags.rs +++ b/lading_payload/src/dogstatsd/common/tags.rs @@ -1,6 +1,6 @@ //! Tag generation for dogstatsd payloads use crate::{ - common::{strings::PoolKind, tags}, + common::{config::AtLeastOneHundredth, strings::PoolKind, tags}, dogstatsd::ConfRange, }; use std::rc::Rc; @@ -29,7 +29,6 @@ impl Generator { /// # Errors /// - If `tags_per_msg` is invalid or exceeds the maximum /// - If `tag_length` is invalid or has minimum value less than 3 - /// - If `unique_tag_probability` is not between 0.10 and 1.0 pub(crate) fn new( seed: u64, tags_per_msg: ConfRange, @@ -37,7 +36,7 @@ impl Generator { num_tagsets: usize, key_pool: Rc, tag_pool: Rc, - unique_tag_probability: f32, + unique_tag_probability: AtLeastOneHundredth, ) -> Result { // Adjust tag_length range to account for the colon separator let adjusted_tag_length = ConfRange::Inclusive { @@ -86,6 +85,7 @@ mod test { use rand::{SeedableRng, rngs::SmallRng}; use crate::Generator; + use crate::common::config::AtLeastOneHundredth; use crate::common::strings::{Handle, PoolKind, RandomStringPool, StringListPool}; use crate::common::tags::{MAX_UNIQUE_TAG_RATIO, Tag, WARN_UNIQUE_TAG_RATIO}; use crate::dogstatsd::{ConfRange, tags}; @@ -178,7 +178,7 @@ mod test { let tag_size_range = ConfRange::Inclusive { min: 3, max: 128 }; let tag_pool = Rc::clone(&str_pool); let generator = - tags::Generator::new(seed, tags_per_msg_range, tag_size_range, num_tagsets, str_pool, tag_pool, 1.0) + tags::Generator::new(seed, tags_per_msg_range, tag_size_range, num_tagsets, str_pool, tag_pool, AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]")) .expect("Tag generator to be valid"); let first_batch = (0..num_tagsets) @@ -224,7 +224,7 @@ mod test { .collect(); let tag_pool = Rc::new(PoolKind::StringListPool(StringListPool::new(&tag_list, 10_000).expect("valid patterns"))); let generator = - tags::Generator::new(seed, tags_per_msg_range, tag_size_range, num_tagsets, str_pool, tag_pool, 1.0) + tags::Generator::new(seed, tags_per_msg_range, tag_size_range, num_tagsets, str_pool, tag_pool, AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]")) .expect("Tag generator to be valid"); let first_batch = (0..num_tagsets) @@ -270,7 +270,7 @@ mod test { desired_num_tagsets, str_pool, tag_pool, - 1.0, + AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]"), ) .expect("Tag generator to be valid"); @@ -310,7 +310,8 @@ mod test { desired_num_tagsets, str_pool, tag_pool, - unique_tag_ratio + AtLeastOneHundredth::try_new(unique_tag_ratio) + .expect("unique_tag_ratio drawn from [WARN, MAX), which is within [0.01, 1.0]"), ) .expect("Tag generator to be valid"); @@ -355,7 +356,7 @@ mod test { desired_num_tagsets, str_pool, tag_pool, - 1.0, + AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]"), ) .expect("Tag generator to be valid"); @@ -404,7 +405,8 @@ mod test { desired_num_tagsets, str_pool, tag_pool, - unique_tag_ratio + AtLeastOneHundredth::try_new(unique_tag_ratio) + .expect("unique_tag_ratio drawn from [WARN, MAX), which is within [0.01, 1.0]"), ) .expect("Tag generator to be valid"); diff --git a/lading_payload/src/opentelemetry/common.rs b/lading_payload/src/opentelemetry/common.rs index e0d313c92..fc5a0b302 100644 --- a/lading_payload/src/opentelemetry/common.rs +++ b/lading_payload/src/opentelemetry/common.rs @@ -6,7 +6,10 @@ pub(crate) mod templates; use crate::{ Error, Generator, SizedGenerator, - common::{config::ConfRange, strings, tags}, + common::{ + config::{AtLeastOneHundredth, ConfRange}, + strings, tags, + }, }; use opentelemetry_proto::tonic::common::v1::{AnyValue, KeyValue, any_value}; use prost::Message; @@ -24,7 +27,11 @@ pub enum GeneratorError { } /// Ratio of unique tags to use in tag generation -pub(crate) const UNIQUE_TAG_RATIO: f32 = 0.75; +pub(crate) const UNIQUE_TAG_RATIO: AtLeastOneHundredth = + match AtLeastOneHundredth::try_new(0.75) { + Ok(p) => p, + Err(_) => unreachable!(), + }; /// Smallest useful `KeyValue` protobuf, determined by experimentation and enforced in tests pub(crate) const SMALLEST_KV_PROTOBUF: usize = 10; @@ -41,14 +48,13 @@ impl TagGenerator { /// # Errors /// - If `tags_per_msg` is invalid or exceeds the maximum /// - If `tag_length` is invalid or has minimum value less than 3 - /// - If `unique_tag_probability` is not between 0.10 and 1.0 pub(crate) fn new( seed: u64, tags_per_msg: ConfRange, tag_length: ConfRange, num_tagsets: usize, str_pool: &Rc, - unique_tag_probability: f32, + unique_tag_probability: AtLeastOneHundredth, ) -> Result { let str_pool_kind = Rc::new(strings::PoolKind::RandomStringPool((**str_pool).clone())); let tag_pool = Rc::clone(&str_pool_kind);