featherbread
diff --git a/‎benches/README.md‎
Lines changed: 47 additions & 11 deletions b/‎benches/README.md‎
Lines changed: 47 additions & 11 deletions
diff --git a/‎benches/criterion.rs‎
Lines changed: 72 additions & 7 deletions b/‎benches/criterion.rs‎
Lines changed: 72 additions & 7 deletions
diff --git a/‎benches/k8s-job.json‎
Lines changed: 0 additions & 25 deletions b/‎benches/k8s-job.json‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎benches/k8s-job.yaml‎
Lines changed: 15 additions & 0 deletions b/‎benches/k8s-job.yaml‎
Lines changed: 15 additions & 0 deletions
@@ -40,7 +40,7 @@ cargo bench json
 The argument to `cargo bench` is a substring match against the full benchmark
 names of the form `{size}_{format}/{source}`.
 
-- **size**: `small` (see below)
+- **size**: `small` or `medium` (see below)
 - **format**: A full format name as given to xt's `-f` or `-t` (e.g. `json`)
 - **source**: `buffer` (non-streaming) or `reader` (streaming)
 
@@ -50,22 +50,58 @@ benchmark run, including charts and comparisons with any previous run.
 
 ## Test Inputs
 
-The small input, `k8s-job.json`, is a simple Kubernetes `Job` that runs the
+Each benchmark loads test data into an in-memory buffer by translating a
+"default" version of the input with xt. This approach limits the size of the xt
+repository and ensures that disk I/O performance doesn't influence the results.
+However, it allows changes to xt's output formatting (e.g. whitespace, quoting)
+to influence the results. I expect such changes to be rare, at least compared
+to other changes whose impact is worth benchmarking.
+
+### Small
+
+The small input, `k8s-job.yaml`, is a simple Kubernetes `Job` that runs the
 Docker `hello-world` image. Translation time is usually a few microseconds for
 even the slowest input formats, so each benchmark runs in just a few seconds.
 This provides relatively fast feedback as you work.
 
+### Medium
+
+The medium input, `k8s-kyverno.yaml`, is a full set of Kubernetes manifests for
+deploying [Kyverno][kyverno] v1.16.2, generated from version 3.6.2 of the
+official chart using Helm v4.1.0 on `darwin/arm64`:
+
+```sh
+helm template kyverno kyverno/kyverno \
+  --version 3.6.2 \
+  --set admissionController.replicas=1 \
+  --set backgroundController.replicas=1 \
+  --set reportsController.replicas=1 \
+  --set cleanupController.replicas=1 \
+  --set webhooksCleanup.image.pullPolicy=IfNotPresent
+```
+
+To ensure TOML compatibility:
+
+1. The above `--set` options were chosen to eliminate all `null` values.
+2. The benchmark harness processes the raw Helm output by turning the stream of
+   YAML documents into a single object, with a single `manifests` field
+   containing an array of the documents. It does this by creating a small
+   MessagePack "header" to set up the object structure and type-length marker
+   for an array, then translating the YAML documents with xt. It then
+   translates the complete object to the final format for benchmarking.
+
+The strategy for generating the medium input is intended to be reproducible and
+auditable. The size of the input was chosen to balance space requirements for
+an xt repository checkout with the desire to avoid non-human-readable encodings.
+
+### Large (removed)
+
 The benchmarks previously included a 20 - 30 MB large input based on a sample of
 GitHub events, which was included in the xt repository (and remains in its
 history) as a Zstandard compressed archive of MessagePack data. Based on the
 reveal of the xz-utils backdoor that was obfuscated in part as compressed test
-data, **I have chosen to temporarily eliminate the large benchmarks** until they
-are reimplemented to rely exclusively on human-readable inputs, ideally without
-bloating the size of xt repository checkouts.
+data, **I have chosen to eliminate the large benchmarks** until they are
+reimplemented to rely exclusively on human-readable inputs, ideally without
+bloating the size of xt repository checkouts too much.
 
-Each benchmark loads test data into an in-memory buffer by translating a
-"default" version of the input with xt. This approach reduces the size of the xt
-repository and ensures that disk I/O performance does not influence the
-benchmark results. However, it allows changes to xt's output formatting
-(whitespace, quoting, etc.) to influence the results. I expect such changes to
-be rare, at least compared to other changes whose impact is worth benchmarking.
+[kyverno]: https://kyverno.io/
@@ -1,17 +1,24 @@
 use std::hint::black_box;
+use std::time::Duration;
 
 use criterion::{Criterion, criterion_group, criterion_main};
 
 use xt::Format;
 
-criterion_main!(small);
+criterion_main!(small, medium);
 
 criterion_group! {
 	name = small;
 	config = Criterion::default();
 	targets = small_json, small_yaml, small_toml, small_msgpack
 }
 
+criterion_group! {
+	name = medium;
+	config = Criterion::default().measurement_time(Duration::from_secs(20));
+	targets = medium_json, medium_yaml, medium_toml, medium_msgpack
+}
+
 macro_rules! xt_benchmark {
 	(
 		name = $name:ident;
@@ -73,13 +80,71 @@ xt_benchmark! {
 }
 
 fn load_small_data(format: Format) -> Vec<u8> {
-	// The Kubernetes Job expands to a few hundred bytes regardless of format.
-	load_test_data(include_bytes!("k8s-job.json"), format, 512)
+	let input: &[u8] = include_bytes!("k8s-job.yaml");
+
+	let mut output = Vec::with_capacity(512);
+	xt::translate_slice(input, Some(Format::Yaml), format, &mut output)
+		.expect("k8s-job.yaml should be valid YAML");
+
+	output
 }
 
-fn load_test_data(input: &[u8], format: Format, capacity: usize) -> Vec<u8> {
-	let mut output = Vec::with_capacity(capacity);
-	xt::translate_slice(input, Some(Format::Json), format, &mut output)
-		.expect("failed to translate test data");
+xt_benchmark! {
+	name = medium_json;
+	sources = buffer, reader;
+	loader = load_medium_data;
+	translation = Format::Json => Format::Msgpack;
+}
+
+xt_benchmark! {
+	name = medium_yaml;
+	sources = buffer, reader;
+	loader = load_medium_data;
+	translation = Format::Yaml => Format::Json;
+}
+
+xt_benchmark! {
+	name = medium_toml;
+	sources = buffer;
+	loader = load_medium_data;
+	translation = Format::Toml => Format::Json;
+}
+
+xt_benchmark! {
+	name = medium_msgpack;
+	sources = buffer, reader;
+	loader = load_medium_data;
+	translation = Format::Msgpack => Format::Json;
+}
+
+fn load_medium_data(format: Format) -> Vec<u8> {
+	// These manifests were generated using a `helm template` command that should be reproducible
+	// given the correct version of the original chart.
+	let input: &[u8] = include_bytes!("k8s-kyverno.yaml");
+
+	// For TOML compatibility, we need to take this stream of Kubernetes manifests and put them
+	// into a single object. Since MessagePack doesn't use characters or indentation for structure,
+	// it's (surprisingly) the easiest way I can think to do this.
+	//
+	// See https://github.com/msgpack/msgpack/blob/master/spec.md for a description of the bytes.
+	let mut packed = Vec::new();
+
+	packed.push(0x81); // Map of 1 element; key and value follow.
+
+	packed.push(0xa9); // String of 9 characters.
+	packed.extend(b"manifests");
+
+	packed.push(0xdc); // Array; 16-bit size to follow.
+	packed.extend(79u16.to_be_bytes()); // `xt k8s-kyverno.yaml | jq -s length`
+
+	// The 79 elements of the array.
+	xt::translate_slice(input, Some(Format::Yaml), Format::Msgpack, &mut packed)
+		.expect("k8s-kyverno.yaml should be valid YAML");
+
+	// Now, translate that {"manifests": [...]} object to the final output format.
+	let mut output = Vec::new();
+	xt::translate_slice(&packed, Some(Format::Msgpack), format, &mut output)
+		.expect("packed object should be valid");
+
 	output
 }
@@ -0,0 +1,15 @@
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: hello-world
+spec:
+  template:
+    metadata:
+      labels:
+        job: hello-world
+    spec:
+      restartPolicy: Never
+      containers:
+      - name: hello-world
+        image: docker.io/library/hello-world:latest