Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 18 additions & 23 deletions lading_payload/src/common/tags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ use rand::{SeedableRng, rngs::SmallRng};
use tracing::warn;

use crate::common::{
config::ConfRange,
config::{AtLeastOneHundredth, ConfRange},
strings::{Pool, PoolKind},
};

use super::strings::Handle;

pub(crate) const MIN_UNIQUE_TAG_RATIO: f32 = 0.01;
#[cfg(test)]
pub(crate) const MAX_UNIQUE_TAG_RATIO: f32 = 1.00;
pub(crate) const WARN_UNIQUE_TAG_RATIO: f32 = 0.10;
pub(crate) const MIN_TAG_LENGTH: u16 = 3;
Expand Down Expand Up @@ -120,13 +121,13 @@ impl TagGenerator {
/// produce only a limited, deterministic set of tags while avoiding needing to
/// allocate them all in one shot.
///
/// The `unique_tag_ratio` is a value between 0.10 and 1.0. It represents the
/// ratio of new tags to existing tags. If the value is 1.0, then all tags will
/// be new. If the value 0.0 were allowed, it would conceptually mean "always
/// use an existing tag", however this is a degenerate case as there would never
/// be a new tag generated. Therefore a minimum value is enforced. Despite this
/// minimum, if the configuration is overly restrictive, it may result in
/// non-unique tagsets.
/// The `unique_tag_ratio` is the ratio of new tags to existing tags. If the
/// value is 1.0, then all tags will be new. If the value 0.0 were allowed, it
/// would conceptually mean "always use an existing tag", however this is a
/// degenerate case as there would never be a new tag generated. The
/// [`AtLeastOneHundredth`] type enforces a closed range of `[0.01, 1.0]`.
/// Despite this minimum, if the configuration is overly restrictive, it may
/// result in non-unique tagsets.
///
/// As an example:
/// `unique_tag_probability`: 0.10
Expand All @@ -147,7 +148,7 @@ pub(crate) struct Generator {
num_tagsets: usize, // Maximum number of unique tagsets that will ever be generated
tags_per_msg: ConfRange<u8>, // Maximum number of tags per individually generated tagset
tags: TagGenerator,
unique_tag_probability: f32,
unique_tag_probability: AtLeastOneHundredth,
tag_store: RefCell<TagStore>,
}

Expand All @@ -165,15 +166,14 @@ impl Generator {
/// # Errors
/// - If `tags_per_msg` is invalid or exceeds the maximum
/// - If `tag_length` is invalid or has minimum value less than 3
/// - If `unique_tag_probability` is not between 0.10 and 1.0
pub(crate) fn new(
seed: u64,
tags_per_msg: ConfRange<u8>,
tag_length: ConfRange<u16>,
num_tagsets: usize,
str_pool: Rc<PoolKind>,
tag_pool: Rc<PoolKind>,
unique_tag_probability: f32,
unique_tag_probability: AtLeastOneHundredth,
) -> Result<Self, Error> {
let (tag_length_valid, tag_length_valid_msg) = tag_length.valid();
if !tag_length_valid {
Expand All @@ -188,13 +188,7 @@ impl Generator {
)));
}

if !(MIN_UNIQUE_TAG_RATIO..=MAX_UNIQUE_TAG_RATIO).contains(&unique_tag_probability) {
return Err(Error::InvalidConstruction(format!(
"Unique tag ratio must be between {MIN_UNIQUE_TAG_RATIO} and {MAX_UNIQUE_TAG_RATIO}"
)));
}

if (MIN_UNIQUE_TAG_RATIO..=WARN_UNIQUE_TAG_RATIO).contains(&unique_tag_probability) {
if (MIN_UNIQUE_TAG_RATIO..=WARN_UNIQUE_TAG_RATIO).contains(&unique_tag_probability.get()) {
warn!(
"unique_tag_probability is less than {WARN_UNIQUE_TAG_RATIO}. This may result in non-unique tagsets"
);
Expand Down Expand Up @@ -274,7 +268,7 @@ impl<'a> crate::Generator<'a> for Generator {
// For remaining tags, decide whether to reuse existing tags or generate new ones
while tagset.len() < tags_count {
let choose_existing_prob: f32 = OpenClosed01.sample(&mut *rng);
let should_reuse = choose_existing_prob > self.unique_tag_probability;
let should_reuse = choose_existing_prob > self.unique_tag_probability.get();

if should_reuse && !tag_store.is_empty() {
// Reuse an existing tag
Expand Down Expand Up @@ -305,7 +299,7 @@ mod test {

use super::{MAX_UNIQUE_TAG_RATIO, MIN_TAG_LENGTH, WARN_UNIQUE_TAG_RATIO};
use crate::Generator;
use crate::common::config::ConfRange;
use crate::common::config::{AtLeastOneHundredth, ConfRange};
use crate::common::strings::{Handle, PoolKind, RandomStringPool};

proptest! {
Expand All @@ -331,7 +325,7 @@ mod test {
num_tagsets,
str_pool,
tag_pool,
1.0
AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]"),
).expect("Tag generator to be valid");

for _ in 0..num_tagsets {
Expand Down Expand Up @@ -370,7 +364,7 @@ mod test {
num_tagsets,
str_pool,
tag_pool,
1.0
AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]"),
).expect("Tag generator to be valid");

// First batch with fresh RNG
Expand Down Expand Up @@ -420,7 +414,8 @@ mod test {
desired_num_tagsets,
str_pool,
tag_pool,
unique_tag_ratio
AtLeastOneHundredth::try_new(unique_tag_ratio)
.expect("unique_tag_ratio drawn from [WARN, MAX], which is within [0.01, 1.0]"),
).expect("Tag generator to be valid");

let mut unique_tagsets: HashSet<Vec<(Handle, Handle)>> = HashSet::new();
Expand Down
19 changes: 10 additions & 9 deletions lading_payload/src/dogstatsd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
use crate::{
Serialize,
common::{
config::{ConfRange, Probability},
config::{AtLeastOneHundredth, ConfRange, Probability},
strings,
strings::{random_strings_with_length, random_strings_with_length_range},
},
Expand Down Expand Up @@ -185,12 +185,12 @@
/// a 4-byte header that is a little-endian u32 representing the
/// total length of the data block.
pub length_prefix_framed: bool,
/// This is a ratio between 0.10 and 1.0 which determines how many
/// individual tags are unique vs re-used tags.
/// If this is 1, then every single tag will be unique.
/// If this is 0.10, then most of the tags (90%) will be re-used
/// from existing tags.
pub unique_tag_ratio: f32,
/// This is a ratio that determines how many individual tags are unique vs
/// re-used tags. If this is 1, then every single tag will be unique. If
/// this is 0.10, then most of the tags (90%) will be re-used from existing
/// tags. The type enforces the closed range `[0.01, 1.0]` at deserialize
/// time.
pub unique_tag_ratio: AtLeastOneHundredth,
/// A list of possible metric names to generate
pub metric_names: Vec<String>,
/// A list of possible tag names to generate
Expand Down Expand Up @@ -275,10 +275,11 @@
sampling_probability: Probability::try_new(0.5).expect("0.5 is in [0.0, 1.0]"),
kind_weights: KindWeights::default(),
metric_weights: MetricWeights::default(),
value: ValueConf::default(),

Check warning on line 278 in lading_payload/src/dogstatsd.rs

View workflow job for this annotation

GitHub Actions / Rust Actions (Check/Fmt/Clippy) (ubuntu-latest, fmt)

Diff in /home/runner/work/lading/lading/lading_payload/src/dogstatsd.rs

Check warning on line 278 in lading_payload/src/dogstatsd.rs

View workflow job for this annotation

GitHub Actions / Rust Actions (Check/Fmt/Clippy) (ubuntu-latest, fmt)

Diff in /home/runner/work/lading/lading/lading_payload/src/dogstatsd.rs

Check warning on line 278 in lading_payload/src/dogstatsd.rs

View workflow job for this annotation

GitHub Actions / Rust Actions (Check/Fmt/Clippy) (macos-latest, fmt)

Diff in /Users/runner/work/lading/lading/lading_payload/src/dogstatsd.rs

Check warning on line 278 in lading_payload/src/dogstatsd.rs

View workflow job for this annotation

GitHub Actions / Rust Actions (Check/Fmt/Clippy) (macos-latest, fmt)

Diff in /Users/runner/work/lading/lading/lading_payload/src/dogstatsd.rs
// This should be enabled for UDS-streams, but not for UDS-datagram nor UDP
length_prefix_framed: false,
unique_tag_ratio: 0.11,
unique_tag_ratio: AtLeastOneHundredth::try_new(0.11)
.expect("0.11 is in [0.01, 1.0]"),
metric_names: Vec::default(),
tag_names: Vec::default(),
tag_values: Vec::default(),
Expand Down Expand Up @@ -425,7 +426,7 @@
kind_weights: KindWeights,
metric_weights: MetricWeights,
value_conf: ValueConf,
unique_tag_ratio: f32,
unique_tag_ratio: AtLeastOneHundredth,
metric_names: &[String],
tag_names: &[String],
tag_values: &[String],
Expand Down
20 changes: 11 additions & 9 deletions lading_payload/src/dogstatsd/common/tags.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Tag generation for dogstatsd payloads
use crate::{
common::{strings::PoolKind, tags},
common::{config::AtLeastOneHundredth, strings::PoolKind, tags},
dogstatsd::ConfRange,
};
use std::rc::Rc;
Expand Down Expand Up @@ -29,15 +29,14 @@ impl Generator {
/// # Errors
/// - If `tags_per_msg` is invalid or exceeds the maximum
/// - If `tag_length` is invalid or has minimum value less than 3
/// - If `unique_tag_probability` is not between 0.10 and 1.0
pub(crate) fn new(
seed: u64,
tags_per_msg: ConfRange<u8>,
tag_length: ConfRange<u16>,
num_tagsets: usize,
key_pool: Rc<PoolKind>,
tag_pool: Rc<PoolKind>,
unique_tag_probability: f32,
unique_tag_probability: AtLeastOneHundredth,
) -> Result<Self, Error> {
// Adjust tag_length range to account for the colon separator
let adjusted_tag_length = ConfRange::Inclusive {
Expand Down Expand Up @@ -86,6 +85,7 @@ mod test {
use rand::{SeedableRng, rngs::SmallRng};

use crate::Generator;
use crate::common::config::AtLeastOneHundredth;
use crate::common::strings::{Handle, PoolKind, RandomStringPool, StringListPool};
use crate::common::tags::{MAX_UNIQUE_TAG_RATIO, Tag, WARN_UNIQUE_TAG_RATIO};
use crate::dogstatsd::{ConfRange, tags};
Expand Down Expand Up @@ -178,7 +178,7 @@ mod test {
let tag_size_range = ConfRange::Inclusive { min: 3, max: 128 };
let tag_pool = Rc::clone(&str_pool);
let generator =
tags::Generator::new(seed, tags_per_msg_range, tag_size_range, num_tagsets, str_pool, tag_pool, 1.0)
tags::Generator::new(seed, tags_per_msg_range, tag_size_range, num_tagsets, str_pool, tag_pool, AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]"))
.expect("Tag generator to be valid");

let first_batch = (0..num_tagsets)
Expand Down Expand Up @@ -224,7 +224,7 @@ mod test {
.collect();
let tag_pool = Rc::new(PoolKind::StringListPool(StringListPool::new(&tag_list, 10_000).expect("valid patterns")));
let generator =
tags::Generator::new(seed, tags_per_msg_range, tag_size_range, num_tagsets, str_pool, tag_pool, 1.0)
tags::Generator::new(seed, tags_per_msg_range, tag_size_range, num_tagsets, str_pool, tag_pool, AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]"))
.expect("Tag generator to be valid");

let first_batch = (0..num_tagsets)
Expand Down Expand Up @@ -270,7 +270,7 @@ mod test {
desired_num_tagsets,
str_pool,
tag_pool,
1.0,
AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]"),
)
.expect("Tag generator to be valid");

Expand Down Expand Up @@ -310,7 +310,8 @@ mod test {
desired_num_tagsets,
str_pool,
tag_pool,
unique_tag_ratio
AtLeastOneHundredth::try_new(unique_tag_ratio)
.expect("unique_tag_ratio drawn from [WARN, MAX), which is within [0.01, 1.0]"),
)
.expect("Tag generator to be valid");

Expand Down Expand Up @@ -355,7 +356,7 @@ mod test {
desired_num_tagsets,
str_pool,
tag_pool,
1.0,
AtLeastOneHundredth::try_new(1.0).expect("1.0 is in [0.01, 1.0]"),
)
.expect("Tag generator to be valid");

Expand Down Expand Up @@ -404,7 +405,8 @@ mod test {
desired_num_tagsets,
str_pool,
tag_pool,
unique_tag_ratio
AtLeastOneHundredth::try_new(unique_tag_ratio)
.expect("unique_tag_ratio drawn from [WARN, MAX), which is within [0.01, 1.0]"),
)
.expect("Tag generator to be valid");

Expand Down
14 changes: 10 additions & 4 deletions lading_payload/src/opentelemetry/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@

use crate::{
Error, Generator, SizedGenerator,
common::{config::ConfRange, strings, tags},
common::{
config::{AtLeastOneHundredth, ConfRange},
strings, tags,
},
};
use opentelemetry_proto::tonic::common::v1::{AnyValue, KeyValue, any_value};
use prost::Message;
Expand All @@ -21,10 +24,14 @@
/// Failed to generate string
#[error("Failed to generate string")]
StringGenerate,
}

Check warning on line 27 in lading_payload/src/opentelemetry/common.rs

View workflow job for this annotation

GitHub Actions / Rust Actions (Check/Fmt/Clippy) (ubuntu-latest, fmt)

Diff in /home/runner/work/lading/lading/lading_payload/src/opentelemetry/common.rs

Check warning on line 27 in lading_payload/src/opentelemetry/common.rs

View workflow job for this annotation

GitHub Actions / Rust Actions (Check/Fmt/Clippy) (ubuntu-latest, fmt)

Diff in /home/runner/work/lading/lading/lading_payload/src/opentelemetry/common.rs

Check warning on line 27 in lading_payload/src/opentelemetry/common.rs

View workflow job for this annotation

GitHub Actions / Rust Actions (Check/Fmt/Clippy) (macos-latest, fmt)

Diff in /Users/runner/work/lading/lading/lading_payload/src/opentelemetry/common.rs

Check warning on line 27 in lading_payload/src/opentelemetry/common.rs

View workflow job for this annotation

GitHub Actions / Rust Actions (Check/Fmt/Clippy) (macos-latest, fmt)

Diff in /Users/runner/work/lading/lading/lading_payload/src/opentelemetry/common.rs

/// Ratio of unique tags to use in tag generation
pub(crate) const UNIQUE_TAG_RATIO: f32 = 0.75;
pub(crate) const UNIQUE_TAG_RATIO: AtLeastOneHundredth =
match AtLeastOneHundredth::try_new(0.75) {
Ok(p) => p,
Err(_) => unreachable!(),
};

/// Smallest useful `KeyValue` protobuf, determined by experimentation and enforced in tests
pub(crate) const SMALLEST_KV_PROTOBUF: usize = 10;
Expand All @@ -41,14 +48,13 @@
/// # Errors
/// - If `tags_per_msg` is invalid or exceeds the maximum
/// - If `tag_length` is invalid or has minimum value less than 3
/// - If `unique_tag_probability` is not between 0.10 and 1.0
pub(crate) fn new(
seed: u64,
tags_per_msg: ConfRange<u8>,
tag_length: ConfRange<u16>,
num_tagsets: usize,
str_pool: &Rc<strings::RandomStringPool>,
unique_tag_probability: f32,
unique_tag_probability: AtLeastOneHundredth,
) -> Result<Self, Error> {
let str_pool_kind = Rc::new(strings::PoolKind::RandomStringPool((**str_pool).clone()));
let tag_pool = Rc::clone(&str_pool_kind);
Expand Down
Loading