From ab44cfa5731e566b91dd76da0e2af7ab263e0344 Mon Sep 17 00:00:00 2001 From: Luke Steensen Date: Wed, 17 Jun 2026 18:16:18 -0500 Subject: [PATCH 1/5] test(codecs): avoid mutating global log schema in syslog tests --- lib/codecs/src/decoding/format/syslog.rs | 17 +---------------- lib/codecs/src/encoding/format/syslog.rs | 6 +++--- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/lib/codecs/src/decoding/format/syslog.rs b/lib/codecs/src/decoding/format/syslog.rs index 4d473ed6235bf..29026f9fd0bf2 100644 --- a/lib/codecs/src/decoding/format/syslog.rs +++ b/lib/codecs/src/decoding/format/syslog.rs @@ -480,14 +480,12 @@ fn insert_fields_from_syslog( #[cfg(test)] mod tests { - use vector_core::config::{LogSchema, init_log_schema, log_schema}; + use vector_core::config::log_schema; use super::*; #[test] fn deserialize_syslog_legacy_namespace() { - init(); - let input = Bytes::from("<34>1 2003-10-11T22:14:15.003Z mymachine.example.com su - ID47 - MSG"); let deserializer = SyslogDeserializer::default(); @@ -505,8 +503,6 @@ mod tests { #[test] fn deserialize_syslog_vector_namespace() { - init(); - let input = Bytes::from("<34>1 2003-10-11T22:14:15.003Z mymachine.example.com su - ID47 - MSG"); let deserializer = SyslogDeserializer::default(); @@ -516,15 +512,4 @@ mod tests { assert_eq!(events[0].as_log()["message"], "MSG".into()); assert!(events[0].as_log()["timestamp"].is_timestamp()); } - - fn init() { - let mut schema = LogSchema::default(); - schema.set_message_key(Some(OwnedTargetPath::event(owned_value_path!( - "legacy_message" - )))); - schema.set_message_key(Some(OwnedTargetPath::event(owned_value_path!( - "legacy_timestamp" - )))); - init_log_schema(schema, false); - } } diff --git a/lib/codecs/src/encoding/format/syslog.rs b/lib/codecs/src/encoding/format/syslog.rs index 188ecb0d4cb52..77a85b7644252 100644 --- a/lib/codecs/src/encoding/format/syslog.rs +++ b/lib/codecs/src/encoding/format/syslog.rs @@ -612,7 +612,7 @@ mod tests { use bytes::BytesMut; use chrono::NaiveDate; use std::sync::Arc; - use vector_core::config::LogNamespace; + use vector_core::config::{LogNamespace, log_schema}; use vector_core::event::Event::Metric; use vector_core::event::{Event, MetricKind, MetricValue, StatisticKind}; use vrl::path::parse_target_path; @@ -765,7 +765,7 @@ mod tests { let mut log = create_simple_log(); log.insert( - event_path!("message"), + log_schema().message_key_target_path().unwrap(), "A\nB\tC, Привіт D, E\u{0007}F", //newline, tab, unicode ); @@ -889,7 +889,7 @@ mod tests { .unwrap(); let mut log = create_simple_log(); - log.insert(event_path!("message"), ""); + log.insert(log_schema().message_key_target_path().unwrap(), ""); log.insert(event_path!("structured_data"), value!({})); let output = run_encode(config, Event::Log(log)); From fc27be0c045ff85012ddf08432efd55552dadb8d Mon Sep 17 00:00:00 2001 From: Luke Steensen Date: Wed, 17 Jun 2026 18:16:28 -0500 Subject: [PATCH 2/5] test(vector-core): keep empty DDSketch fixtures canonical --- lib/vector-core/src/event/arbitrary_impl.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/vector-core/src/event/arbitrary_impl.rs b/lib/vector-core/src/event/arbitrary_impl.rs index fe02b846aa9b2..daff5aafee8bc 100644 --- a/lib/vector-core/src/event/arbitrary_impl.rs +++ b/lib/vector-core/src/event/arbitrary_impl.rs @@ -243,7 +243,9 @@ impl Arbitrary for MetricValue { let mut sketch = AgentDDSketch::with_agent_defaults(); sketch.insert_many(&samples); #[cfg(feature = "generate-fixtures")] - sketch.set_sum_avg(f64_for_arbitrary(g), f64_for_arbitrary(g)); + if !sketch.is_empty() { + sketch.set_sum_avg(f64_for_arbitrary(g), f64_for_arbitrary(g)); + } MetricValue::Sketch { sketch: MetricSketch::AgentDDSketch(sketch), From 16effac21eceeb8496245ce9ffad363e9d7f269e Mon Sep 17 00:00:00 2001 From: Luke Steensen Date: Wed, 17 Jun 2026 18:16:37 -0500 Subject: [PATCH 3/5] refactor: adapt Vector to VRL ObjectMap --- Cargo.lock | 8 ++++- Cargo.toml | 2 +- benches/codecs/encoder.rs | 5 +-- lib/codecs/src/encoding/format/avro.rs | 6 ++-- lib/codecs/src/encoding/format/cef.rs | 5 +-- lib/codecs/src/encoding/format/csv.rs | 12 +++---- lib/codecs/src/encoding/format/gelf.rs | 32 +++++++++---------- lib/codecs/src/encoding/format/json.rs | 18 +++++++---- lib/codecs/src/encoding/format/logfmt.rs | 5 +-- lib/codecs/src/encoding/format/native_json.rs | 9 +++--- lib/codecs/src/encoding/transformer.rs | 3 +- lib/opentelemetry-proto/src/spans.rs | 6 ++-- lib/vector-common/src/byte_size_of.rs | 8 ++++- lib/vector-core/src/event/arbitrary_impl.rs | 12 +++++-- .../event/estimated_json_encoded_size_of.rs | 19 ++++++++++- .../src/event/util/log/all_fields.rs | 5 +-- lib/vector-core/src/event/vrl_target.rs | 13 ++++---- lib/vector-core/src/schema/definition.rs | 14 ++++---- .../metrics/src/aggregate_vector_metrics.rs | 2 +- lib/vector-vrl/metrics/src/common.rs | 4 +-- .../metrics/src/find_vector_metrics.rs | 2 +- .../metrics/src/get_vector_metric.rs | 2 +- lib/vector-vrl/tests/src/docs.rs | 2 +- lib/vector-vrl/web-playground/src/lib.rs | 2 +- src/config/unit_test/mod.rs | 2 +- src/sinks/datadog/logs/sink.rs | 4 +-- src/sinks/elasticsearch/config.rs | 4 +-- src/sinks/util/encoding.rs | 32 +++++++++---------- src/sources/fluent/message.rs | 4 +-- src/sources/logstash.rs | 4 +-- src/sources/socket/mod.rs | 6 ++-- src/sources/util/http/headers.rs | 10 +++--- src/transforms/exclusive_route/tests.rs | 5 +-- src/transforms/metric_to_log.rs | 2 +- src/transforms/trace_to_log.rs | 5 +-- 35 files changed, 161 insertions(+), 113 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f4041c9a1f47e..991e8995558c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3788,6 +3788,12 @@ dependencies = [ "spki", ] +[[package]] +name = "ecow" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78e4f79b296fbaab6ce2e22d52cb4c7f010fe0ebe7a32e34fa25885fd797bd02" + [[package]] name = "ed25519" version = "2.2.3" @@ -13475,7 +13481,6 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "vrl" version = "0.33.1" -source = "git+https://github.com/vectordotdev/vrl.git?branch=main#74e3e74582aa6b4445c1a94004bb3f14276d07df" dependencies = [ "aes", "aes-siv", @@ -13505,6 +13510,7 @@ dependencies = [ "dns-lookup", "domain", "dyn-clone", + "ecow", "encoding_rs", "exitcode", "fancy-regex", diff --git a/Cargo.toml b/Cargo.toml index 324843447172d..06edeedeb6684 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -225,7 +225,7 @@ vector-common-macros = { path = "lib/vector-common-macros" } vector-lib = { path = "lib/vector-lib", default-features = false, features = ["vrl"] } vector-vrl-category = { path = "lib/vector-vrl/category" } vector-vrl-functions = { path = "lib/vector-vrl/functions", default-features = false } -vrl = { git = "https://github.com/vectordotdev/vrl.git", branch = "main", default-features = false, features = ["arbitrary", "cli", "test", "test_framework", "stdlib-base"] } +vrl = { path = "../vrl", default-features = false, features = ["arbitrary", "cli", "test", "test_framework", "stdlib-base"] } mock_instant = { version = "0.6" } serial_test = { version = "3.4" } strum = { version = "0.28", features = ["derive"] } diff --git a/benches/codecs/encoder.rs b/benches/codecs/encoder.rs index 7826e8fc714cb..3af445ab75a87 100644 --- a/benches/codecs/encoder.rs +++ b/benches/codecs/encoder.rs @@ -12,6 +12,7 @@ use vector_lib::{ byte_size_of::ByteSizeOf, codecs::{JsonSerializerConfig, NewlineDelimitedEncoder, encoding::Framer}, }; +use vrl::value::ObjectMap; #[derive(Debug, Clone)] pub struct JsonLogSerializer; @@ -45,11 +46,11 @@ fn encoder(c: &mut Criterion) { let mut group: BenchmarkGroup = c.benchmark_group("encoder"); group.sampling_mode(SamplingMode::Auto); - let input: Event = Event::Log(LogEvent::from(btreemap! { + let input: Event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "key1" => "value1", "key2" => "value2", "key3" => "value3" - })); + }))); group.throughput(Throughput::Bytes(input.size_of() as u64)); group.bench_with_input("JsonLogVecSerializer::encode", &(), |b, ()| { diff --git a/lib/codecs/src/encoding/format/avro.rs b/lib/codecs/src/encoding/format/avro.rs index 304dcc2e7cc90..49bf1f41963f3 100644 --- a/lib/codecs/src/encoding/format/avro.rs +++ b/lib/codecs/src/encoding/format/avro.rs @@ -83,15 +83,15 @@ mod tests { use bytes::BytesMut; use indoc::indoc; use vector_core::event::{LogEvent, Value}; - use vrl::btreemap; + use vrl::{btreemap, value::ObjectMap}; use super::*; #[test] fn serialize_avro() { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "foo" => Value::from("bar") - })); + }))); let schema = indoc! {r#" { "type": "record", diff --git a/lib/codecs/src/encoding/format/cef.rs b/lib/codecs/src/encoding/format/cef.rs index 9ecc95821a492..bd7a3c3b17c13 100644 --- a/lib/codecs/src/encoding/format/cef.rs +++ b/lib/codecs/src/encoding/format/cef.rs @@ -392,6 +392,7 @@ mod tests { use ordered_float::NotNan; use vector_common::btreemap; use vector_core::event::{Event, LogEvent, Value}; + use vrl::value::ObjectMap; use super::*; @@ -470,7 +471,7 @@ mod tests { #[test] fn serialize_extensions() { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "cef" => Value::from(btreemap! { "severity" => Value::from(1), "name" => Value::from("Event name"), @@ -484,7 +485,7 @@ mod tests { "quote" => Value::from("the \"quote\" should be escaped"), "bool" => Value::from(true), "other" => Value::from("data"), - })); + }))); let extensions = HashMap::from([ ( diff --git a/lib/codecs/src/encoding/format/csv.rs b/lib/codecs/src/encoding/format/csv.rs index d967b0d33a567..c2f84138731b9 100644 --- a/lib/codecs/src/encoding/format/csv.rs +++ b/lib/codecs/src/encoding/format/csv.rs @@ -341,7 +341,7 @@ mod tests { #[test] fn serialize_fields() { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "foo" => Value::from("bar"), "int" => Value::from(123), "comma" => Value::from("abc,bcd"), @@ -351,7 +351,7 @@ mod tests { "quote" => Value::from("the \"quote\" should be escaped"), "bool" => Value::from(true), "other" => Value::from("data"), - })); + }))); let fields = vec![ "foo".into(), "int".into(), @@ -382,13 +382,13 @@ mod tests { #[test] fn serialize_order() { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "field1" => Value::from("value1"), "field2" => Value::from("value2"), "field3" => Value::from("value3"), "field4" => Value::from("value4"), "field5" => Value::from("value5"), - })); + }))); let fields = vec![ "field1".into(), "field5".into(), @@ -414,12 +414,12 @@ mod tests { #[test] fn correct_quoting() { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "field1" => Value::from("hello world"), "field2" => Value::from(1), "field3" => Value::from("foo\"bar"), "field4" => Value::from("baz,bas"), - })); + }))); let fields = vec![ "field1".into(), "field2".into(), diff --git a/lib/codecs/src/encoding/format/gelf.rs b/lib/codecs/src/encoding/format/gelf.rs index 966126df4f895..4c83187bd341c 100644 --- a/lib/codecs/src/encoding/format/gelf.rs +++ b/lib/codecs/src/encoding/format/gelf.rs @@ -281,7 +281,7 @@ mod tests { use chrono::NaiveDateTime; use vector_core::event::{Event, EventMetadata}; use vrl::{ - btreemap, + objectmap, value::{ObjectMap, Value}, }; @@ -310,7 +310,7 @@ mod tests { fn gelf_serde_json_to_value_supported_success() { let serializer = SerializerConfig::Gelf(Default::default()).build().unwrap(); - let event_fields = btreemap! { + let event_fields = objectmap! { VERSION => "1.1", HOST => "example.org", SHORT_MESSAGE => "Some message", @@ -324,7 +324,7 @@ mod tests { #[test] fn gelf_serde_json_to_value_supported_failure_to_encode() { let serializer = SerializerConfig::Gelf(Default::default()).build().unwrap(); - let event_fields = btreemap! {}; + let event_fields = objectmap! {}; let log_event: Event = LogEvent::from_map(event_fields, EventMetadata::default()).into(); assert!(serializer.supports_json()); assert!(serializer.to_json_value(log_event).is_err()); @@ -332,7 +332,7 @@ mod tests { #[test] fn gelf_serializing_valid() { - let event_fields = btreemap! { + let event_fields = objectmap! { VERSION => "1.1", HOST => "example.org", SHORT_MESSAGE => "Some message", @@ -354,7 +354,7 @@ mod tests { fn gelf_serializing_coerced() { // no underscore { - let event_fields = btreemap! { + let event_fields = objectmap! { VERSION => "1.1", HOST => "example.org", SHORT_MESSAGE => "Some message", @@ -367,7 +367,7 @@ mod tests { // "message" => SHORT_MESSAGE { - let event_fields = btreemap! { + let event_fields = objectmap! { VERSION => "1.1", HOST => "example.org", log_schema().message_key().unwrap().to_string() => "Some message", @@ -386,7 +386,7 @@ mod tests { NaiveDateTime::parse_from_str("1970-01-01 00:00:00.1", "%Y-%m-%d %H:%M:%S%.f"); let dt = naive_dt.unwrap().and_utc(); - let event_fields = btreemap! { + let event_fields = objectmap! { VERSION => "1.1", SHORT_MESSAGE => "Some message", HOST => "example.org", @@ -404,7 +404,7 @@ mod tests { NaiveDateTime::parse_from_str("1970-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S%.f"); let dt = naive_dt.unwrap().and_utc(); - let event_fields = btreemap! { + let event_fields = objectmap! { VERSION => "1.1", SHORT_MESSAGE => "Some message", HOST => "example.org", @@ -421,7 +421,7 @@ mod tests { fn gelf_serializing_invalid_error() { // no host { - let event_fields = btreemap! { + let event_fields = objectmap! { VERSION => "1.1", SHORT_MESSAGE => "Some message", }; @@ -429,7 +429,7 @@ mod tests { } // no message { - let event_fields = btreemap! { + let event_fields = objectmap! { HOST => "example.org", VERSION => "1.1", }; @@ -437,7 +437,7 @@ mod tests { } // expected string { - let event_fields = btreemap! { + let event_fields = objectmap! { HOST => "example.org", VERSION => "1.1", SHORT_MESSAGE => 0, @@ -446,7 +446,7 @@ mod tests { } // expected integer { - let event_fields = btreemap! { + let event_fields = objectmap! { HOST => "example.org", VERSION => "1.1", SHORT_MESSAGE => "Some message", @@ -456,7 +456,7 @@ mod tests { } // expected float { - let event_fields = btreemap! { + let event_fields = objectmap! { HOST => "example.org", VERSION => "1.1", SHORT_MESSAGE => "Some message", @@ -466,7 +466,7 @@ mod tests { } // invalid field name { - let event_fields = btreemap! { + let event_fields = objectmap! { HOST => "example.org", VERSION => "1.1", SHORT_MESSAGE => "Some message", @@ -476,7 +476,7 @@ mod tests { } // invalid additional value type - bool { - let event_fields = btreemap! { + let event_fields = objectmap! { HOST => "example.org", VERSION => "1.1", SHORT_MESSAGE => "Some message", @@ -486,7 +486,7 @@ mod tests { } // invalid additional value type - null { - let event_fields = btreemap! { + let event_fields = objectmap! { HOST => "example.org", VERSION => "1.1", SHORT_MESSAGE => "Some message", diff --git a/lib/codecs/src/encoding/format/json.rs b/lib/codecs/src/encoding/format/json.rs index 44d2ed52deaf8..47da29f625f62 100644 --- a/lib/codecs/src/encoding/format/json.rs +++ b/lib/codecs/src/encoding/format/json.rs @@ -125,16 +125,17 @@ mod tests { metric_tags, }; use vrl::btreemap; + use vrl::value::ObjectMap; use super::*; #[test] fn serialize_json_log() { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "x" => Value::from("23"), "z" => Value::from(25), "a" => Value::from("0"), - })); + }))); let bytes = serialize(JsonSerializerConfig::default(), event); assert_eq!(bytes, r#"{"a":"0","x":"23","z":25}"#); @@ -209,9 +210,9 @@ mod tests { #[test] fn serialize_equals_to_json_value() { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "foo" => Value::from("bar") - })); + }))); let mut serializer = JsonSerializerConfig::default().build(); let mut bytes = BytesMut::new(); @@ -283,6 +284,7 @@ mod tests { metric_tags, }; use vrl::btreemap; + use vrl::value::ObjectMap; use super::*; @@ -295,9 +297,9 @@ mod tests { #[test] fn serialize_json_log() { - let event = Event::Log(LogEvent::from( + let event = Event::Log(LogEvent::from(ObjectMap::from( btreemap! {"x" => Value::from("23"),"z" => Value::from(25),"a" => Value::from("0"),}, - )); + ))); let bytes = serialize(get_pretty_json_config(), event); assert_eq!( bytes, @@ -399,7 +401,9 @@ mod tests { } #[test] fn serialize_equals_to_json_value() { - let event = Event::Log(LogEvent::from(btreemap! {"foo" => Value::from("bar")})); + let event = Event::Log(LogEvent::from(ObjectMap::from( + btreemap! {"foo" => Value::from("bar")}, + ))); let mut serializer = get_pretty_json_config().build(); let mut bytes = BytesMut::new(); serializer.encode(event.clone(), &mut bytes).unwrap(); diff --git a/lib/codecs/src/encoding/format/logfmt.rs b/lib/codecs/src/encoding/format/logfmt.rs index 972f19eaf35e0..d8db22c270e22 100644 --- a/lib/codecs/src/encoding/format/logfmt.rs +++ b/lib/codecs/src/encoding/format/logfmt.rs @@ -53,14 +53,15 @@ mod tests { use bytes::BytesMut; use vector_core::event::{LogEvent, Value}; use vrl::btreemap; + use vrl::value::ObjectMap; use super::*; #[test] fn serialize_logfmt() { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "foo" => Value::from("bar") - })); + }))); let mut serializer = LogfmtSerializer; let mut bytes = BytesMut::new(); diff --git a/lib/codecs/src/encoding/format/native_json.rs b/lib/codecs/src/encoding/format/native_json.rs index d224fdb20df87..42414c161e5bc 100644 --- a/lib/codecs/src/encoding/format/native_json.rs +++ b/lib/codecs/src/encoding/format/native_json.rs @@ -52,14 +52,15 @@ mod tests { event::{LogEvent, Metric, MetricKind, MetricValue, Value}, }; use vrl::btreemap; + use vrl::value::ObjectMap; use super::*; #[test] fn serialize_json() { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "foo" => Value::from("bar") - })); + }))); let mut serializer = NativeJsonSerializer; let mut bytes = BytesMut::new(); @@ -70,9 +71,9 @@ mod tests { #[test] fn serialize_equals_to_json_value() { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "foo" => Value::from("bar") - })); + }))); let mut serializer = NativeJsonSerializer; let mut bytes = BytesMut::new(); diff --git a/lib/codecs/src/encoding/transformer.rs b/lib/codecs/src/encoding/transformer.rs index 2fb84f00e6fd1..2e660c515e05d 100644 --- a/lib/codecs/src/encoding/transformer.rs +++ b/lib/codecs/src/encoding/transformer.rs @@ -126,7 +126,8 @@ impl Transformer { fn apply_only_fields(&self, log: &mut LogEvent) { if let Some(only_fields) = self.only_fields.as_ref() { - let mut old_value = std::mem::replace(log.value_mut(), Value::Object(BTreeMap::new())); + let mut old_value = + std::mem::replace(log.value_mut(), Value::Object(BTreeMap::new().into())); for field in only_fields { if let Some(value) = old_value.remove(field, true) { diff --git a/lib/opentelemetry-proto/src/spans.rs b/lib/opentelemetry-proto/src/spans.rs index 40644138fe9dc..c6de4d87661b0 100644 --- a/lib/opentelemetry-proto/src/spans.rs +++ b/lib/opentelemetry-proto/src/spans.rs @@ -130,7 +130,7 @@ impl From for Value { "dropped_attributes_count".into(), Value::Integer(ev.dropped_attributes_count as i64), ); - Value::Object(obj) + Value::Object(obj.into()) } } @@ -145,7 +145,7 @@ impl From for Value { "dropped_attributes_count".into(), Value::Integer(link.dropped_attributes_count as i64), ); - Value::Object(obj) + Value::Object(obj.into()) } } @@ -154,6 +154,6 @@ impl From for Value { let mut obj: BTreeMap = BTreeMap::new(); obj.insert("message".into(), status.message.into()); obj.insert("code".into(), status.code.into()); - Value::Object(obj) + Value::Object(obj.into()) } } diff --git a/lib/vector-common/src/byte_size_of.rs b/lib/vector-common/src/byte_size_of.rs index aed57b8ff590b..5a3fb86f95b7d 100644 --- a/lib/vector-common/src/byte_size_of.rs +++ b/lib/vector-common/src/byte_size_of.rs @@ -7,7 +7,7 @@ use bytes::{Bytes, BytesMut}; use chrono::{DateTime, Utc}; use serde_json::{Value as JsonValue, value::RawValue}; use smallvec::SmallVec; -use vrl::value::{KeyString, Value}; +use vrl::value::{KeyString, ObjectMap, Value}; pub trait ByteSizeOf { /// Returns the in-memory size of this type @@ -179,6 +179,12 @@ impl ByteSizeOf for JsonValue { } } +impl ByteSizeOf for ObjectMap { + fn allocated_bytes(&self) -> usize { + self.iter().map(|(k, v)| k.size_of() + v.size_of()).sum() + } +} + impl ByteSizeOf for Value { fn allocated_bytes(&self) -> usize { match self { diff --git a/lib/vector-core/src/event/arbitrary_impl.rs b/lib/vector-core/src/event/arbitrary_impl.rs index daff5aafee8bc..b6367e1fb18e5 100644 --- a/lib/vector-core/src/event/arbitrary_impl.rs +++ b/lib/vector-core/src/event/arbitrary_impl.rs @@ -101,11 +101,14 @@ impl Arbitrary for Event { impl Arbitrary for LogEvent { fn arbitrary(g: &mut Gen) -> Self { + use std::collections::BTreeMap; + use vrl::value::KeyString; + #[cfg(feature = "generate-fixtures")] let mut generator = Gen::from_size_and_seed(MAX_MAP_SIZE, u64::arbitrary(g)); #[cfg(not(feature = "generate-fixtures"))] let mut generator = Gen::new(MAX_MAP_SIZE); - let map: ObjectMap = ObjectMap::arbitrary(&mut generator); + let map: ObjectMap = BTreeMap::::arbitrary(&mut generator).into(); let metadata: EventMetadata = EventMetadata::arbitrary(g); LogEvent::from_map(map, metadata) } @@ -127,12 +130,15 @@ impl Arbitrary for TraceEvent { } fn shrink(&self) -> Box> { + use std::collections::BTreeMap; + use vrl::value::KeyString; let (fields, metadata) = self.clone().into_parts(); + let fields_btree: BTreeMap = fields.into_iter().collect(); Box::new( - fields + fields_btree .shrink() - .map(move |x| TraceEvent::from_parts(x, metadata.clone())), + .map(move |x| TraceEvent::from_parts(x.into(), metadata.clone())), ) } } diff --git a/lib/vector-core/src/event/estimated_json_encoded_size_of.rs b/lib/vector-core/src/event/estimated_json_encoded_size_of.rs index c6e8c17bc95b2..df39be260c564 100644 --- a/lib/vector-core/src/event/estimated_json_encoded_size_of.rs +++ b/lib/vector-core/src/event/estimated_json_encoded_size_of.rs @@ -5,7 +5,7 @@ use chrono::{DateTime, Timelike, Utc}; use ordered_float::NotNan; use smallvec::SmallVec; use vector_common::json_size::JsonSize; -use vrl::value::{KeyString, Value}; +use vrl::value::{KeyString, ObjectMap, Value}; const NULL_SIZE: JsonSize = JsonSize::new(4); const TRUE_SIZE: JsonSize = JsonSize::new(4); @@ -183,6 +183,23 @@ where } } +impl EstimatedJsonEncodedSizeOf for ObjectMap { + fn estimated_json_encoded_size_of(&self) -> JsonSize { + let size = self.iter().fold(BRACES_SIZE, |acc, (k, v)| { + acc + k.as_ref().estimated_json_encoded_size_of().get() + + COLON_SIZE + + v.estimated_json_encoded_size_of().get() + + COMMA_SIZE + }); + + JsonSize::new(if size > BRACES_SIZE { + size - COMMA_SIZE + } else { + size + }) + } +} + impl EstimatedJsonEncodedSizeOf for Vec where V: EstimatedJsonEncodedSizeOf, diff --git a/lib/vector-core/src/event/util/log/all_fields.rs b/lib/vector-core/src/event/util/log/all_fields.rs index 2e1511c124e42..90a50d9c0e87c 100644 --- a/lib/vector-core/src/event/util/log/all_fields.rs +++ b/lib/vector-core/src/event/util/log/all_fields.rs @@ -1,10 +1,11 @@ -use std::{collections::btree_map, fmt::Write as _, iter, slice, sync::LazyLock}; +use std::{fmt::Write as _, iter, slice, sync::LazyLock}; use regex::Regex; use serde::{Serialize, Serializer}; use vrl::path::PathPrefix; use crate::event::{KeyString, ObjectMap, Value}; +use vrl::value::ObjectMapIter; static IS_VALID_PATH_SEGMENT: LazyLock = LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9_]+$").unwrap()); @@ -41,7 +42,7 @@ pub fn all_fields_skip_array_elements(fields: &ObjectMap) -> FieldsIter<'_> { #[derive(Clone, Debug)] enum LeafIter<'a> { Root((&'a Value, bool)), - Map(btree_map::Iter<'a, KeyString, Value>), + Map(ObjectMapIter<'a>), Array(iter::Enumerate>), } diff --git a/lib/vector-core/src/event/vrl_target.rs b/lib/vector-core/src/event/vrl_target.rs index 64e81b2281331..9ba4ecfd9f557 100644 --- a/lib/vector-core/src/event/vrl_target.rs +++ b/lib/vector-core/src/event/vrl_target.rs @@ -823,7 +823,7 @@ mod test { ]; for (value, path, expect) in cases { - let value: ObjectMap = value; + let value: ObjectMap = value.into(); let info = ProgramInfo { fallible: false, abortable: false, @@ -927,7 +927,7 @@ mod test { ]; for (object, path, value, expect, result) in cases { - let object: ObjectMap = object; + let object: ObjectMap = object.into(); let info = ProgramInfo { fallible: false, abortable: false, @@ -935,7 +935,7 @@ mod test { target_assignments: vec![], }; let mut target = VrlTarget::new(Event::Log(LogEvent::from(object)), &info, false); - let expect = LogEvent::from(expect); + let expect = LogEvent::from(ObjectMap::from(expect)); let value: Value = value; let path = OwnedTargetPath::event(path); @@ -1027,6 +1027,7 @@ mod test { ]; for (object, path, compact, expect) in cases { + let object: ObjectMap = object.into(); let info = ProgramInfo { fallible: false, abortable: false, @@ -1101,7 +1102,7 @@ mod test { }, expect .into_iter() - .map(|v| Event::Log(LogEvent::from_map(v, metadata.clone()))) + .map(|v| Event::Log(LogEvent::from_map(v.into(), metadata.clone()))) .collect::>() ); } @@ -1278,7 +1279,7 @@ mod test { let mut target = VrlTarget::new(Event::Metric(metric), &info, false); let _result = target.target_insert( &OwnedTargetPath::event(owned_value_path!("tags")), - Value::Object(BTreeMap::from([("a".into(), "b".into())])), + Value::Object(ObjectMap::from([("a".into(), "b".into())])), ); match target { @@ -1398,7 +1399,7 @@ mod test { assert_eq!( vrl_tags_value, - &Value::Object(BTreeMap::from([( + &Value::Object(ObjectMap::from([( "foo".into(), Value::Array(vec!["a".into(), "".into(), Value::Null, "b".into()]) )])) diff --git a/lib/vector-core/src/schema/definition.rs b/lib/vector-core/src/schema/definition.rs index 2d01632fb48b2..f5afe1aa3eb23 100644 --- a/lib/vector-core/src/schema/definition.rs +++ b/lib/vector-core/src/schema/definition.rs @@ -617,7 +617,7 @@ mod tests { use std::collections::{BTreeMap, HashMap}; use lookup::{lookup_v2::parse_target_path, owned_value_path}; - use vrl::value::Value; + use vrl::value::{ObjectMap, Value}; use super::*; use crate::event::{Event, EventMetadata, LogEvent}; @@ -640,7 +640,7 @@ mod tests { TestCase { title: "match", definition: Definition::new(Kind::any(), Kind::any(), [LogNamespace::Legacy]), - event: Event::Log(LogEvent::from(BTreeMap::new())), + event: Event::Log(LogEvent::from(ObjectMap::new())), valid: true, }, TestCase { @@ -650,7 +650,7 @@ mod tests { Kind::any(), [LogNamespace::Legacy], ), - event: Event::Log(LogEvent::from(BTreeMap::from([("foo".into(), 4.into())]))), + event: Event::Log(LogEvent::from(ObjectMap::from([("foo".into(), 4.into())]))), valid: false, }, TestCase { @@ -661,9 +661,9 @@ mod tests { [LogNamespace::Legacy], ), event: Event::Log(LogEvent::from_parts( - Value::Object(BTreeMap::new()), + Value::Object(ObjectMap::new()), EventMetadata::default_with_value( - BTreeMap::from([("foo".into(), 4.into())]).into(), + ObjectMap::from([("foo".into(), 4.into())]).into(), ), )), valid: false, @@ -671,7 +671,7 @@ mod tests { TestCase { title: "wrong log namespace", definition: Definition::new(Kind::any(), Kind::any(), []), - event: Event::Log(LogEvent::from(BTreeMap::new())), + event: Event::Log(LogEvent::from(ObjectMap::new())), valid: false, }, TestCase { @@ -681,7 +681,7 @@ mod tests { Kind::any(), [LogNamespace::Legacy], ), - event: Event::Log(LogEvent::from(BTreeMap::from([( + event: Event::Log(LogEvent::from(ObjectMap::from([( "foo".into(), Value::Null, )]))), diff --git a/lib/vector-vrl/metrics/src/aggregate_vector_metrics.rs b/lib/vector-vrl/metrics/src/aggregate_vector_metrics.rs index 645334cbccc5e..7c08aa572c07f 100644 --- a/lib/vector-vrl/metrics/src/aggregate_vector_metrics.rs +++ b/lib/vector-vrl/metrics/src/aggregate_vector_metrics.rs @@ -11,7 +11,7 @@ use crate::common::resolve_tags; use crate::common::validate_tags; use crate::common::{Error, MetricsStorage}; -static DEFAULT_TAGS: LazyLock = LazyLock::new(|| Value::Object(BTreeMap::new())); +static DEFAULT_TAGS: LazyLock = LazyLock::new(|| Value::Object(BTreeMap::new().into())); static PARAMETERS: LazyLock> = LazyLock::new(|| { vec![ Parameter::required("function", kind::BYTES, "The aggregation function to apply to the matched metrics.") diff --git a/lib/vector-vrl/metrics/src/common.rs b/lib/vector-vrl/metrics/src/common.rs index 8f765c486d193..2091bd6d922e3 100644 --- a/lib/vector-vrl/metrics/src/common.rs +++ b/lib/vector-vrl/metrics/src/common.rs @@ -202,7 +202,7 @@ pub(crate) fn resolve_tags( mod tests { use vector_core::{ compile_vrl, - event::{Event, LogEvent, MetricKind, MetricTags, VrlTarget}, + event::{Event, LogEvent, MetricKind, MetricTags, ObjectMap, VrlTarget}, }; use vrl::{ compiler::{ @@ -245,7 +245,7 @@ mod tests { } fn assert_metric_matches( - metric: &BTreeMap, + metric: &ObjectMap, name: &str, value: f64, tags: Option>, diff --git a/lib/vector-vrl/metrics/src/find_vector_metrics.rs b/lib/vector-vrl/metrics/src/find_vector_metrics.rs index 7b278a17c2fb7..7d4a8a305e249 100644 --- a/lib/vector-vrl/metrics/src/find_vector_metrics.rs +++ b/lib/vector-vrl/metrics/src/find_vector_metrics.rs @@ -23,7 +23,7 @@ fn find_metrics( )) } -static DEFAULT_TAGS: LazyLock = LazyLock::new(|| Value::Object(BTreeMap::new())); +static DEFAULT_TAGS: LazyLock = LazyLock::new(|| Value::Object(BTreeMap::new().into())); static PARAMETERS: LazyLock> = LazyLock::new(|| { vec![ diff --git a/lib/vector-vrl/metrics/src/get_vector_metric.rs b/lib/vector-vrl/metrics/src/get_vector_metric.rs index 08de641c67548..84c814bf07057 100644 --- a/lib/vector-vrl/metrics/src/get_vector_metric.rs +++ b/lib/vector-vrl/metrics/src/get_vector_metric.rs @@ -20,7 +20,7 @@ fn get_metric( Ok(value) } -static DEFAULT_TAGS: LazyLock = LazyLock::new(|| Value::Object(BTreeMap::new())); +static DEFAULT_TAGS: LazyLock = LazyLock::new(|| Value::Object(BTreeMap::new().into())); static PARAMETERS: LazyLock> = LazyLock::new(|| { vec![ diff --git a/lib/vector-vrl/tests/src/docs.rs b/lib/vector-vrl/tests/src/docs.rs index 8c25745e5aefc..ddbba0030a2c8 100644 --- a/lib/vector-vrl/tests/src/docs.rs +++ b/lib/vector-vrl/tests/src/docs.rs @@ -136,7 +136,7 @@ fn test_from_cue_example(category: &'static str, name: String, example: Example) Some(event) => { serde_json::from_value::(serde_json::Value::Object(event.log)).unwrap() } - None => Value::Object(BTreeMap::default()), + None => Value::Object(BTreeMap::default().into()), }; if returns.is_some() && output.is_some() { diff --git a/lib/vector-vrl/web-playground/src/lib.rs b/lib/vector-vrl/web-playground/src/lib.rs index 07fcb4bb6b096..c366bf31ed455 100644 --- a/lib/vector-vrl/web-playground/src/lib.rs +++ b/lib/vector-vrl/web-playground/src/lib.rs @@ -104,7 +104,7 @@ fn compile( let mut target_value = TargetValue { value: event.clone(), - metadata: Value::Object(BTreeMap::new()), + metadata: Value::Object(BTreeMap::new().into()), secrets: Secrets::new(), }; diff --git a/src/config/unit_test/mod.rs b/src/config/unit_test/mod.rs index aadd8c893d5ac..5a04e7b4a0100 100644 --- a/src/config/unit_test/mod.rs +++ b/src/config/unit_test/mod.rs @@ -662,7 +662,7 @@ fn build_input_event(input: &TestInput) -> Result { let mut target = TargetValue { value: value!({}), - metadata: value::Value::Object(BTreeMap::new()), + metadata: value::Value::Object(BTreeMap::new().into()), secrets: value::Secrets::default(), }; diff --git a/src/sinks/datadog/logs/sink.rs b/src/sinks/datadog/logs/sink.rs index 5fb5e2189fa10..a0e35454936d8 100644 --- a/src/sinks/datadog/logs/sink.rs +++ b/src/sinks/datadog/logs/sink.rs @@ -170,8 +170,8 @@ pub fn normalize_as_agent_event(event: &mut Event) { .cloned() .collect::>(); for key in keys_to_move { - if let Some((entry_k, entry_v)) = object_map.remove_entry(key.as_str()) { - local_root.insert(entry_k, entry_v); + if let Some(entry_v) = object_map.remove(key.as_str()) { + local_root.insert(key, entry_v); } } // .. nest this object at the root under the reserved key named 'message' diff --git a/src/sinks/elasticsearch/config.rs b/src/sinks/elasticsearch/config.rs index 8c2c617e82a08..5e8d5783c1ce4 100644 --- a/src/sinks/elasticsearch/config.rs +++ b/src/sinks/elasticsearch/config.rs @@ -479,13 +479,13 @@ impl DataStreamConfig { let namespace = self.namespace(&*log); if log.as_map().is_none() { - *log.value_mut() = Value::Object(BTreeMap::new()); + *log.value_mut() = Value::Object(BTreeMap::new().into()); } let existing = log .as_map_mut() .expect("must be a map") .entry("data_stream".into()) - .or_insert_with(|| Value::Object(BTreeMap::new())) + .or_insert_with(|| Value::Object(BTreeMap::new().into())) .as_object_mut_unwrap(); if let Some(dtype) = dtype { diff --git a/src/sinks/util/encoding.rs b/src/sinks/util/encoding.rs index 63f8407cef53b..d33151054dc8d 100644 --- a/src/sinks/util/encoding.rs +++ b/src/sinks/util/encoding.rs @@ -215,7 +215,7 @@ where #[cfg(test)] mod tests { - use std::{collections::BTreeMap, env, path::PathBuf}; + use std::{env, path::PathBuf}; use bytes::{BufMut, Bytes}; use cfg_if::cfg_if; @@ -229,7 +229,7 @@ mod tests { internal_event::CountByteSize, json_size::JsonSize, }; - use vrl::value::{KeyString, Value}; + use vrl::value::{KeyString, ObjectMap, Value}; cfg_if! { if #[cfg(feature = "codecs-arrow")] { @@ -276,7 +276,7 @@ mod tests { ); let mut writer = Vec::new(); - let input = vec![Event::Log(LogEvent::from(BTreeMap::from([( + let input = vec![Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("key"), Value::from("value"), )])))]; @@ -304,15 +304,15 @@ mod tests { ); let input = vec![ - Event::Log(LogEvent::from(BTreeMap::from([( + Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("key"), Value::from("value1"), )]))), - Event::Log(LogEvent::from(BTreeMap::from([( + Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("key"), Value::from("value2"), )]))), - Event::Log(LogEvent::from(BTreeMap::from([( + Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("key"), Value::from("value3"), )]))), @@ -367,7 +367,7 @@ mod tests { ); let mut writer = Vec::new(); - let input = vec![Event::Log(LogEvent::from(BTreeMap::from([( + let input = vec![Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("key"), Value::from("value"), )])))]; @@ -395,15 +395,15 @@ mod tests { let mut writer = Vec::new(); let input = vec![ - Event::Log(LogEvent::from(BTreeMap::from([( + Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("key"), Value::from("value1"), )]))), - Event::Log(LogEvent::from(BTreeMap::from([( + Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("key"), Value::from("value2"), )]))), - Event::Log(LogEvent::from(BTreeMap::from([( + Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("key"), Value::from("value3"), )]))), @@ -431,7 +431,7 @@ mod tests { ); let mut writer = Vec::new(); - let input = Event::Log(LogEvent::from(BTreeMap::from([( + let input = Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("key"), Value::from("value"), )]))); @@ -452,7 +452,7 @@ mod tests { ); let mut writer = Vec::new(); - let input = Event::Log(LogEvent::from(BTreeMap::from([( + let input = Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("message"), Value::from("value"), )]))); @@ -498,7 +498,7 @@ mod tests { ); let mut writer = Vec::new(); - let input = vec![Event::Log(LogEvent::from(BTreeMap::from([ + let input = vec![Event::Log(LogEvent::from(ObjectMap::from([ (KeyString::from("id"), Value::from("123")), (KeyString::from("name"), Value::from("Alice")), (KeyString::from("age"), Value::from(30)), @@ -554,7 +554,7 @@ mod tests { let mut writer = Vec::new(); let input = vec![ - Event::Log(LogEvent::from(BTreeMap::from([ + Event::Log(LogEvent::from(ObjectMap::from([ (KeyString::from("id"), Value::from("123")), (KeyString::from("name"), Value::from("Alice")), (KeyString::from("age"), Value::from(30)), @@ -563,7 +563,7 @@ mod tests { Value::from(vec!["alice@example.com", "alice@work.com"]), ), ]))), - Event::Log(LogEvent::from(BTreeMap::from([ + Event::Log(LogEvent::from(ObjectMap::from([ (KeyString::from("id"), Value::from("123")), (KeyString::from("name"), Value::from("Alice")), (KeyString::from("age"), Value::from(30)), @@ -600,7 +600,7 @@ mod tests { let encoder = BatchEncoder::new(BatchSerializer::Arrow(serializer)); let encoding = (Transformer::default(), encoder); - let event = Event::Log(LogEvent::from(BTreeMap::from([( + let event = Event::Log(LogEvent::from(ObjectMap::from([( KeyString::from("message"), Value::from("not_an_integer"), )]))); diff --git a/src/sources/fluent/message.rs b/src/sources/fluent/message.rs index a096fabbd2c32..792d3ace7ae4b 100644 --- a/src/sources/fluent/message.rs +++ b/src/sources/fluent/message.rs @@ -224,8 +224,6 @@ impl From for Value { #[cfg(test)] mod test { - use std::collections::BTreeMap; - use approx::assert_relative_eq; use quickcheck::quickcheck; use vrl::value::{ObjectMap, Value}; @@ -330,7 +328,7 @@ mod test { let actual_inner: Vec<(rmpv::Value, rmpv::Value)> = input.into_iter().map(|(k,v)| (key_fn(k), val_fn(v))).collect(); let actual = rmpv::Value::Map(actual_inner); - let expected = Value::Object(BTreeMap::new()); + let expected = Value::Object(ObjectMap::new()); assert_eq!(Value::from(FluentValue(actual)), expected); } diff --git a/src/sources/logstash.rs b/src/sources/logstash.rs index a8b6c6b389e28..ac764663aaef7 100644 --- a/src/sources/logstash.rs +++ b/src/sources/logstash.rs @@ -20,7 +20,7 @@ use vector_lib::{ lookup::{OwnedValuePath, event_path, metadata_path, owned_value_path, path}, schema::Definition, }; -use vrl::value::{KeyString, Kind, kind::Collection}; +use vrl::value::{KeyString, Kind, ObjectMap, kind::Collection}; use super::util::net::{SocketListenAddr, TcpSource, TcpSourceAck, TcpSourceAcker}; use crate::{ @@ -774,7 +774,7 @@ impl From for Event { .fields .into_iter() .map(|(key, value)| (key, Value::from(value))) - .collect::>(), + .collect::(), )) } } diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index f0cfa3f561839..638de434280f6 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -672,7 +672,8 @@ mod test { let tls_meta: ObjectMap = btreemap!( "subject" => "CN=localhost,OU=Vector,O=Datadog,L=New York,ST=New York,C=US" - ); + ) + .into(); assert_eq!(event.as_log()["tls_peer"], tls_meta.clone().into(),); @@ -737,7 +738,8 @@ mod test { let tls_meta: ObjectMap = btreemap!( "subject" => "CN=localhost,OU=Vector,O=Datadog,L=New York,ST=New York,C=US" - ); + ) + .into(); assert_eq!( event_meta diff --git a/src/sources/util/http/headers.rs b/src/sources/util/http/headers.rs index 1e03ba0033012..c20638f188839 100644 --- a/src/sources/util/http/headers.rs +++ b/src/sources/util/http/headers.rs @@ -83,7 +83,7 @@ mod tests { use chrono::{DateTime, Utc}; use std::time::SystemTime; use vector_lib::config::LogNamespace; - use vrl::{path, value}; + use vrl::{path, value, value::ObjectMap}; use warp::http::HeaderMap; use crate::event::{Event, MetricKind, MetricTags, MetricValue}; @@ -147,12 +147,12 @@ mod tests { "test", ); - let mut trace = [TraceEvent::from(btreemap! { + let mut trace = [TraceEvent::from(ObjectMap::from(btreemap! { "span_id" => "abc123", "trace_id" => "xyz789", "span_name" => "test-span", "service" => "my-service", - }) + })) .into()]; add_headers( @@ -268,12 +268,12 @@ mod tests { "Checking metric contains Content-Encoding header" ); - let mut trace = [TraceEvent::from(btreemap! { + let mut trace = [TraceEvent::from(ObjectMap::from(btreemap! { "span_id" => "abc123", "trace_id" => "xyz789", "span_name" => "test-span", "service" => "my-service", - }) + })) .into()]; add_headers( diff --git a/src/transforms/exclusive_route/tests.rs b/src/transforms/exclusive_route/tests.rs index 1f8445c7ed408..22e2672ebda8f 100644 --- a/src/transforms/exclusive_route/tests.rs +++ b/src/transforms/exclusive_route/tests.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use indoc::indoc; use vector_lib::transform::TransformOutputsBuf; +use vrl::value::ObjectMap; use crate::{ config::{ConfigBuilder, DataType, TransformOutput, build_unit_tests}, @@ -50,9 +51,9 @@ fn exclusive_routes() { let (output_names, mut outputs) = get_outputs_buf(); for service in ["a", "b", "c"] { - let event = Event::Log(LogEvent::from(btreemap! { + let event = Event::Log(LogEvent::from(ObjectMap::from(btreemap! { "service" => service - })); + }))); transform.transform(event.clone(), &mut outputs); for name in output_names.clone() { let mut events: Vec<_> = outputs.drain_named(name).collect(); diff --git a/src/transforms/metric_to_log.rs b/src/transforms/metric_to_log.rs index 551f37cffda67..d8fb5a25bf4dc 100644 --- a/src/transforms/metric_to_log.rs +++ b/src/transforms/metric_to_log.rs @@ -325,7 +325,7 @@ impl MetricToLog { // This can be removed once metrics support namespacing. log.insert( (PathPrefix::Metadata, path!("vector")), - vrl::value::Value::Object(BTreeMap::new()), + vrl::value::Value::Object(BTreeMap::new().into()), ); } Some(log) diff --git a/src/transforms/trace_to_log.rs b/src/transforms/trace_to_log.rs index 278bd0b0d4251..515196428b6ef 100644 --- a/src/transforms/trace_to_log.rs +++ b/src/transforms/trace_to_log.rs @@ -110,13 +110,14 @@ mod tests { #[tokio::test] async fn transform_trace() { use vrl::btreemap; + use vrl::value::ObjectMap; - let trace = TraceEvent::from(btreemap! { + let trace = TraceEvent::from(ObjectMap::from(btreemap! { "span_id" => "abc123", "trace_id" => "xyz789", "span_name" => "test-span", "service" => "my-service", - }); + })); let (expected_map, _) = trace.clone().into_parts(); From abfb8337641bd4bb62090d811e96d76928ef9d73 Mon Sep 17 00:00:00 2001 From: Luke Steensen Date: Wed, 17 Jun 2026 18:16:42 -0500 Subject: [PATCH 4/5] chore(bench): add ObjectMap benchmark helpers --- Dockerfile.bench | 36 ++++++++++++ SMP_NOTES.md | 140 +++++++++++++++++++++++++++++++++++++++++++++++ bench-all.sh | 77 ++++++++++++++++++++++++++ bench.sh | 87 +++++++++++++++++++++++++++++ 4 files changed, 340 insertions(+) create mode 100644 Dockerfile.bench create mode 100644 SMP_NOTES.md create mode 100755 bench-all.sh create mode 100755 bench.sh diff --git a/Dockerfile.bench b/Dockerfile.bench new file mode 100644 index 0000000000000..0846025f7d5ae --- /dev/null +++ b/Dockerfile.bench @@ -0,0 +1,36 @@ +# +# VECTOR BUILDER (local bench builds with VRL override) +# +# Build with: +# docker buildx build --build-context vrl=../vrl -f Dockerfile.bench -t vector: . +# +FROM rust:1.92-bookworm AS builder +RUN apt-get update && apt-get -y --no-install-recommends install \ + cmake \ + libclang-dev \ + libsasl2-dev \ + libssl-dev \ + pkg-config \ + protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* +WORKDIR /vector +COPY --from=vrl . /vrl +COPY . . +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/usr/local/cargo/git \ + --mount=type=cache,target=/vector/target \ + cargo build --bin vector --release && \ + cp target/release/vector . + +# +# TARGET +# +FROM debian:trixie-slim +RUN apt-get update && apt-get dist-upgrade -y && apt-get -y --no-install-recommends install zlib1g ca-certificates libsasl2-2 && rm -rf /var/lib/apt/lists/* +COPY --from=builder /vector/vector /usr/bin/vector +RUN mkdir --parents --mode=0777 /var/lib/vector + +# Smoke test +RUN ["/usr/bin/vector", "--version"] + +ENTRYPOINT ["/usr/bin/vector"] diff --git a/SMP_NOTES.md b/SMP_NOTES.md new file mode 100644 index 0000000000000..2cf1f51c0a6d5 --- /dev/null +++ b/SMP_NOTES.md @@ -0,0 +1,140 @@ +# Local Regression Benchmarking Notes + +## What we're testing + +We're measuring the performance impact of several VRL (Vector Remap Language) optimizations on Vector's throughput and memory usage. The optimizations are: + +1. **Flat object map** — Replace BTreeMap with a flat array-backed map for storing event fields. Better cache locality, avoids tree traversal. +2. **Copy elimination** — Remove unnecessary `String` allocations when constructing map keys in VRL. +3. **KeyString as EcoString** — Change the map key type to `EcoString`, a reference-counted immutable string with cheap cloning. +4. **KeyString as CompactString** — Change the map key type to `CompactString`, an inline small-string-optimized type that avoids heap allocation for short strings. + +The object map implementation is selected at runtime via the `VRL_OBJECT_MAP` env var (`btree`, `vec`, or the default `flat`). The KeyString and copy elimination changes are compile-time (different VRL builds). + +## Tooling + +### `bench.sh` — Build and run benchmarks + +```bash +./bench.sh build # Build image from current source + ../vrl +./bench.sh derive KEY=VAL ... # Layer env vars on an existing image (instant) +./bench.sh run ... # Run smp local comparison +./bench.sh cases # List regression cases +``` + +The script auto-detects Colima and sets `DOCKER_HOST` and `TMPDIR` accordingly. + +### `Dockerfile.bench` — Local build Dockerfile + +Two-stage build using `rust:1.92-bookworm` (for native ARM64 on Apple Silicon) with BuildKit named context `--build-context vrl=../vrl` to include the local VRL checkout. Uses cache mounts for fast incremental rebuilds. + +**Important**: The VRL repo at `../vrl` needs a `.dockerignore` excluding `target/` — without it, Docker sends ~25GB of build artifacts as context. + +### `bench-all.sh` — Batch runner + +Runs all regression cases across multiple variants. Skips completed runs (checks for `Δ mean` in output logs) and skips `file_to_blackhole`/`file_100_to_blackhole` (need FUSE, unavailable in Colima). Tolerates individual failures without aborting. + +### `smp local run` + +The actual comparison tool. Runs 3 replicates of each variant with 270 samples at 1Hz. Each case takes ~15 min (except `splunk_hec_route_s3` at ~32 min). Raw capture data (parquet) goes to `comparative-captures//`. + +## Docker image inventory + +All images use the same Vector source; they differ only in VRL version and the `VRL_OBJECT_MAP` env var. + +| Image | VRL version | Map type | Description | +|---|---|---|---| +| `vector:btree` | baseline (no opts) | btree | **The baseline for all comparisons** | +| `vector:flat` | baseline | flat | Flat map only | +| `vector:btree-ks` | + EcoString | btree | Old keystring on btree | +| `vector:flat-ks` | + EcoString | flat | Old keystring on flat | +| `vector:btree-co` | + copy elim | btree | Copy elimination only | +| `vector:flat-co` | + copy elim | flat | Copy elimination only | +| `vector:btree-co-ks` | + copy elim + EcoString | btree | Copy elim + EcoString | +| `vector:flat-co-ks` | + copy elim + EcoString | flat | Copy elim + EcoString | +| `vector:btree-co-ksc` | + copy elim + CompactString | btree | Copy elim + CompactString | +| `vector:flat-co-ksc` | + copy elim + CompactString | flat | Copy elim + CompactString | + +To rebuild: the VRL repo (`../vrl`) has copy elimination committed on `main`. KeyString variants are managed via `git stash`: +- **EcoString**: `git stash pop` the older stash +- **CompactString**: dirty working tree state (or stash pop the newer stash) +- **No keystring**: `git stash` everything + +## Results + +### Complete data: flat map across all cases + +Throughput vs btree baseline, from `bench-results/full/`: + +| Tier | Cases | Flat map Δ | +|---|---|---| +| High (heavy VRL) | datadog_agent_remap_blackhole | +35.5% | +| | datadog_agent_remap_blackhole_acks | +30.1% | +| | syslog_humio_logs | +25.8% | +| | syslog_log2metric_splunk_hec_metrics | +19.8% | +| | syslog_splunk_hec_logs | +18.4% | +| | syslog_loki | +18.3% | +| | datadog_agent_remap_datadog_logs_acks | +18.5% | +| | syslog_log2metric_humio_metrics | +17.1% | +| | syslog_log2metric_tag_cardinality_limit_blackhole | +16.9% | +| | datadog_agent_remap_datadog_logs | +16.2% | +| | syslog_regex_logs2metric_ddmetrics | +15.0% | +| Medium | http_text_to_http_json | +13.8% | +| | statsd_to_datadog_metrics | +1.6% | +| Low (passthrough) | all http_to_http, socket, splunk_hec, fluent, otlp | ~0% (no regression) | + +Memory (RSS) is 3–8% lower with flat map on VRL-heavy workloads. + +### Optimization interaction study (datadog_agent_remap_blackhole) + +All measurements vs old btree baseline: + +| Configuration | Throughput Δ | +|---|---| +| flat map only | +29.2% | +| btree + copy elim | -0.8% | +| btree + copy elim + EcoString | +7.4% | +| btree + copy elim + CompactString | +2.7% | +| flat + copy elim + EcoString | +22.8% | +| flat + copy elim + CompactString | +29.2% | + +Key findings: +- **EcoString helps btree (+7.4%) but hurts flat (-9.6% vs flat alone)**. The reference-counting indirection undermines flat map cache locality. +- **CompactString is neutral on flat**. Inline small-string storage preserves cache behavior. Full +29.2% recovered. +- **Copy elimination is near-zero on its own** but is a prerequisite for the KeyString changes. + +### Partially complete: full matrix + +`bench-results/full/` has results for `flat` (all 25 working cases) and `flat-co-ks` (15 of 25 cases). The remaining variants (`flat-co-ksc`, `btree-co-ks`, `btree-co-ksc`) have not been run across all cases yet. + +## Resuming the batch run + +```bash +# The script skips completed runs automatically +caffeinate -d -i -s bash -c './bench-all.sh' +``` + +Colima must be running with 10 CPUs and sufficient disk: + +```bash +colima start --cpu 10 --memory 16 --disk 60 +``` + +## Infrastructure notes + +- **Colima on Apple Silicon**: The `timberio/vector-dev` image is amd64-only. We use `rust:1.92-bookworm` instead (has native arm64). +- **Docker buildx**: Required for `--build-context`. Install: `brew install docker-buildx`, then `mkdir -p ~/.docker/cli-plugins && ln -sfn $(brew --prefix docker-buildx)/bin/docker-buildx ~/.docker/cli-plugins/docker-buildx`. +- **smp + Colima**: smp needs `DOCKER_HOST` set to the Colima socket, and `TMPDIR` under `$HOME` (Colima doesn't mount `/var/folders`). The `bench.sh` script handles this automatically. +- **FUSE cases**: `file_to_blackhole` and `file_100_to_blackhole` need FUSE, unavailable in Colima VMs. Skipped. +- **Disk space**: The Colima VM disk fills up from Docker build cache. Run `docker builder prune --all -f` before large rebuilds. +- **VRL .dockerignore**: Must contain `target` to avoid sending 25GB of build artifacts as Docker context. + +## File locations + +- `bench.sh` — Main benchmarking script +- `Dockerfile.bench` — Build Dockerfile +- `bench-all.sh` — Batch runner for full matrix +- `bench-results/` — Ad-hoc comparison logs from early exploration +- `bench-results/full/` — Systematic matrix results (`--.log`) +- `comparative-captures/` — Raw smp parquet data (overwritten by each run; only latest is available) +- `../vrl/.dockerignore` — Must exist with `target` entry diff --git a/bench-all.sh b/bench-all.sh new file mode 100755 index 0000000000000..d11b30956ec1f --- /dev/null +++ b/bench-all.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +set -uo pipefail + +BENCH="/Users/luke.steensen/code/vector/bench.sh" +RESULTS_DIR="/Users/luke.steensen/code/vector/bench-results/full" +mkdir -p "$RESULTS_DIR" + +VARIANTS=( + flat + flat-co-ks + flat-co-ksc + btree-co-ks + btree-co-ksc +) + +CASES=( + datadog_agent_remap_blackhole + datadog_agent_remap_blackhole_acks + datadog_agent_remap_datadog_logs + datadog_agent_remap_datadog_logs_acks + file_100_to_blackhole + file_to_blackhole + fluent_elasticsearch + http_elasticsearch + http_text_to_http_json + http_to_http_acks + http_to_http_disk_buffer + http_to_http_json + http_to_http_noack + http_to_s3 + otlp_grpc_to_blackhole + otlp_http_to_blackhole + socket_to_socket_blackhole + splunk_hec_indexer_ack_blackhole + splunk_hec_route_s3 + splunk_hec_to_splunk_hec_logs_acks + splunk_hec_to_splunk_hec_logs_noack + statsd_to_datadog_metrics + syslog_humio_logs + syslog_log2metric_humio_metrics + syslog_log2metric_splunk_hec_metrics + syslog_log2metric_tag_cardinality_limit_blackhole + syslog_loki + syslog_regex_logs2metric_ddmetrics + syslog_splunk_hec_logs +) + +total=$(( ${#VARIANTS[@]} * ${#CASES[@]} )) +n=0 + +for variant in "${VARIANTS[@]}"; do + for case_name in "${CASES[@]}"; do + n=$((n + 1)) + outfile="${RESULTS_DIR}/${case_name}--${variant}.log" + + # Skip if already completed + if [[ -f "$outfile" ]] && grep -q "Δ mean" "$outfile" 2>/dev/null; then + echo "[$n/$total] SKIP (already done): ${case_name} -- btree vs ${variant}" + continue + fi + + # Skip known-broken cases (need FUSE) + if [[ "$case_name" == "file_100_to_blackhole" || "$case_name" == "file_to_blackhole" ]]; then + echo "[$n/$total] SKIP (needs FUSE): ${case_name} -- btree vs ${variant}" + continue + fi + + echo "[$n/$total] Running: ${case_name} -- btree vs ${variant}" + docker rm -f $(docker ps -aq) 2>/dev/null || true + if ! "${BENCH}" run "${case_name}" btree "${variant}" 2>&1 | tee "${outfile}"; then + echo "[$n/$total] FAILED: ${case_name} -- btree vs ${variant}" + fi + echo "" + done +done + +echo "=== ALL DONE ===" diff --git a/bench.sh b/bench.sh new file mode 100755 index 0000000000000..b215abf6e893d --- /dev/null +++ b/bench.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Auto-detect Colima Docker socket and set TMPDIR to a path the VM can mount +COLIMA_SOCK="$HOME/.colima/default/docker.sock" +if [[ -z "${DOCKER_HOST:-}" && -S "${COLIMA_SOCK}" ]]; then + export DOCKER_HOST="unix://${COLIMA_SOCK}" +fi +if [[ -n "${DOCKER_HOST:-}" && "${DOCKER_HOST}" == *colima* ]]; then + # macOS /var/folders tmp dir isn't mounted in Colima; use a home-dir path + mkdir -p "$HOME/.tmp-smp" + export TMPDIR="$HOME/.tmp-smp" +fi + +usage() { + cat < [args...] + +Commands: + build Build a Vector image from current source + derive KEY=VAL ... Create a variant image with env vars + run .. Run an smp comparison + cases List available regression cases +EOF + exit 1 +} + +cmd_build() { + local tag="${1:?Usage: $0 build }" + echo "Building vector:${tag} ..." + DOCKER_BUILDKIT=1 docker buildx build \ + --build-context "vrl=${SCRIPT_DIR}/../vrl" \ + -f "${SCRIPT_DIR}/Dockerfile.bench" \ + -t "vector:${tag}" \ + "${SCRIPT_DIR}" +} + +cmd_derive() { + if [[ $# -lt 3 ]]; then + echo "Usage: $0 derive KEY=VAL [KEY=VAL ...]" >&2 + exit 1 + fi + local base="$1"; shift + local new_tag="$1"; shift + + local dockerfile="FROM vector:${base}" + for kv in "$@"; do + dockerfile="${dockerfile}"$'\n'"ENV ${kv}" + done + + echo "Deriving vector:${new_tag} from vector:${base} ..." + echo "${dockerfile}" | docker buildx build -t "vector:${new_tag}" - +} + +cmd_run() { + if [[ $# -lt 3 ]]; then + echo "Usage: $0 run [smp args...]" >&2 + exit 1 + fi + local case_name="$1"; shift + local baseline="$1"; shift + local comparison="$1"; shift + + smp local run \ + --experiment-dir "${SCRIPT_DIR}/regression" \ + --case "${case_name}" \ + --baseline-image "vector:${baseline}" \ + --comparison-image "vector:${comparison}" \ + "$@" +} + +cmd_cases() { + ls "${SCRIPT_DIR}/regression/cases/" +} + +[[ $# -lt 1 ]] && usage + +command="$1"; shift +case "${command}" in + build) cmd_build "$@" ;; + derive) cmd_derive "$@" ;; + run) cmd_run "$@" ;; + cases) cmd_cases ;; + *) usage ;; +esac From 1b1ac03a64f1e8e8d400552b762d87fc229d4812 Mon Sep 17 00:00:00 2001 From: Luke Steensen Date: Wed, 17 Jun 2026 18:24:45 -0500 Subject: [PATCH 5/5] chore: use git VRL ObjectMap dependency --- Cargo.lock | 1 + Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 991e8995558c8..cf17a39d0c6c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13481,6 +13481,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "vrl" version = "0.33.1" +source = "git+https://github.com/lukesteensen/vrl.git?branch=experiment%2Fobjectmap-backends#d243198bc134ab19ea6293322d36055fd54c2fe2" dependencies = [ "aes", "aes-siv", diff --git a/Cargo.toml b/Cargo.toml index 06edeedeb6684..e73228c0cbe85 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -225,7 +225,7 @@ vector-common-macros = { path = "lib/vector-common-macros" } vector-lib = { path = "lib/vector-lib", default-features = false, features = ["vrl"] } vector-vrl-category = { path = "lib/vector-vrl/category" } vector-vrl-functions = { path = "lib/vector-vrl/functions", default-features = false } -vrl = { path = "../vrl", default-features = false, features = ["arbitrary", "cli", "test", "test_framework", "stdlib-base"] } +vrl = { git = "https://github.com/lukesteensen/vrl.git", branch = "experiment/objectmap-backends", default-features = false, features = ["arbitrary", "cli", "test", "test_framework", "stdlib-base"] } mock_instant = { version = "0.6" } serial_test = { version = "3.4" } strum = { version = "0.28", features = ["derive"] }