From 4e33856871092f663ef0aa8dea48e086b29247a9 Mon Sep 17 00:00:00 2001 From: pshu Date: Wed, 27 May 2026 10:59:54 +0800 Subject: [PATCH 1/2] chore(bench): split specifier microbenches into a separate bench binary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `specifier/realistic[rw/hash-only]` (~340 instructions) and `specifier/branches[fragment/short]` (~170 instructions) are short enough that the cold instruction-cache fill on each CodSpeed measurement iteration dominates the result: ~200 cycles of fixed overhead translates to +5% to +10% deltas under any unrelated binary-layout shift, even when the parser itself is unchanged or faster. CodSpeed surfaces these as false-positive regressions. Move the four `specifier/*` bench groups into their own `benches/specifier.rs` and register it as a second `[[bench]]` in Cargo.toml. Each `[[bench]]` runs in its own process, so the specifier binary gets a fresh, much smaller instruction-cache footprint instead of competing with the large `bench_resolver` code for cache lines. The per-case Ir is unchanged — what changes is the working-set the kernel and the L1/LL caches see before measurement starts, which makes cold-start misses predictable across runs. - `benches/specifier.rs`: new file. Allocator wrapper mirrors the one in `bench_resolver` so allocation costs are measured identically. - `benches/resolver.rs`: drops `specifier/*` groups, helpers, unused imports. - `Cargo.toml`: adds `[[bench]] name = "specifier"`. No code paths in `Specifier::parse` are touched; this is purely test-infrastructure stabilization. --- Cargo.toml | 10 ++ benches/resolver.rs | 244 ++------------------------------------ benches/specifier.rs | 273 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 290 insertions(+), 237 deletions(-) create mode 100644 benches/specifier.rs diff --git a/Cargo.toml b/Cargo.toml index 529adb90..44f5447c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,16 @@ harness = false name = "resolver" required-features = ["__internal_bench"] +# Specifier microbenchmarks are split into their own bench binary so they +# don't share an instruction cache with the much larger `resolver` bench +# code. Cases like `specifier/realistic[rw/hash-only]` only execute a few +# hundred instructions per iteration; cold-start cache misses in a combined +# binary previously surfaced as false-positive CodSpeed regressions. +[[bench]] +harness = false +name = "specifier" +required-features = ["__internal_bench"] + [lints.clippy] all = { level = "warn", priority = -1 } cargo = { level = "warn", priority = -1 } diff --git a/benches/resolver.rs b/benches/resolver.rs index 2a66e6aa..08aa09cc 100644 --- a/benches/resolver.rs +++ b/benches/resolver.rs @@ -46,10 +46,8 @@ unsafe impl GlobalAlloc for NeverGrowInPlaceAllocator { } } -use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use rspack_resolver::{ - FileSystemOptions, FileSystemOs, ResolveOptions, Resolver, __BenchSpecifier as Specifier, -}; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use rspack_resolver::{FileSystemOptions, FileSystemOs, ResolveOptions, Resolver}; use serde_json::Value; use tokio::{ runtime::{self, Builder}, @@ -415,238 +413,10 @@ fn bench_resolver(c: &mut Criterion) { ); } -// ============================================================================ -// Specifier micro-benchmarks -// ---------------------------------------------------------------------------- -// `parse_query_framgment` lives behind `Specifier::parse`. The wrapper does a -// single byte read + length check, so benchmarking `parse` is effectively -// benchmarking the query/fragment scanner. Cases are split into four groups: -// branch matrix, length sweep, escape scaling, and realistic specimens. -// ============================================================================ - -// `path` is repeated to reach `len`, then optional `?query` / `#fragment` -// suffix is appended. Lets us scale a single shape across short/medium/long -// inputs without changing branch coverage. -fn specifier_shaped(base: &str, len: usize, query: Option<&str>, fragment: Option<&str>) -> String { - let mut s = String::with_capacity(len + 64); - while s.len() < len { - s.push_str(base); - } - s.truncate(len); - if let Some(q) = query { - s.push('?'); - s.push_str(q); - } - if let Some(f) = fragment { - s.push('#'); - s.push_str(f); - } - s -} - -// Webpack-style synthesized escapes: inserts `\0#` sequences inside the path -// so the scanner hits the `prev == '\0'` branch and is forced onto the -// Cow::Owned slow path. -fn specifier_with_escapes(parts: &[&str]) -> String { - parts.join("\0#") -} - -fn specifier_branch_cases() -> Vec<(&'static str, String)> { - vec![ - // 1. (None, None) — fast path, Cow::Borrowed - ("none/short", "./foo.js".to_string()), - ( - "none/medium", - "./packages/utils/src/internal/helpers/normalizePath.ts".to_string(), - ), - // 2. (Some, None) — query only - ("query/short", "./a.js?vue".to_string()), - ( - "query/medium", - "./Button.tsx?vue&type=script&lang=ts&scoped=true&hash=abc12345".to_string(), - ), - // 3. (None, Some) — fragment only, scanner breaks early - ("fragment/short", "./a.js#main".to_string()), - ( - "fragment/medium", - "./pages/Home.tsx#section-introduction-to-the-rspack-resolver".to_string(), - ), - // 4. (Some, Some) — query then fragment - ("query+fragment/short", "./a.js?x#y".to_string()), - ( - "query+fragment/medium", - "./Widget.vue?vue&type=template&lang=html#root".to_string(), - ), - // 5. multiple `?`, only first becomes query_start - ( - "multi-question", - "./a.js?one?two?three?four?five?six?seven".to_string(), - ), - // 6. `\0#` escape → slow path (Cow::Owned + char_indices filter) - ( - "escape/single", - specifier_with_escapes(&["path/", "real-hash"]), - ), - // 7. multiple escapes — repeats the slow path several times in one input - ( - "escape/many", - specifier_with_escapes(&["./pkg/", "repo", "repo2", "repo3", "repo4#hash"]), - ), - // 8. leading `/`, `.`, `#` → offset=1; the first char is skipped intentionally - ("leading-slash", "/abs/path/to/file.mjs?q#f".to_string()), - ("leading-hash", "#alias/module.cjs?q#f".to_string()), - // 9. bare module — offset=0, scan starts at index 0 - ( - "bare-module", - "@scope/package/sub/path/index.js?q#f".to_string(), - ), - // 10. `?` inside a fragment must NOT be promoted to query — scanner already - // broke at the `#`, but worth pinning so a future refactor can't regress it. - ( - "fragment-with-question", - "./a.js#frag?not-a-query&also-not".to_string(), - ), - ] -} - -// Same shape, four sizes — measures how each branch scales with input length. -// Sizes chosen at ~5x steps so codspeed renders a clear curve. -const SPECIFIER_LENGTH_TIERS: &[(&str, usize)] = &[ - ("len_8", 8), - ("len_64", 64), - ("len_256", 256), - ("len_1536", 1536), -]; - -#[allow(clippy::type_complexity)] -fn specifier_length_shapes() -> Vec<( - &'static str, - &'static str, - Option<&'static str>, - Option<&'static str>, -)> { - vec![ - // Pure path: stresses the loop body without any branch hits. - ("path-only", "./a/b/c/d/e/", None, None), - // Query at the very end: full scan before query_start is set. - ( - "query-tail", - "./a/b/c/d/e/", - Some("vue&type=script&lang=ts"), - None, - ), - // Fragment at the very end: full scan, then early break at last char. - ("frag-tail", "./a/b/c/d/e/", None, Some("section-end")), - // Both at the tail. - ( - "both-tail", - "./a/b/c/d/e/", - Some("vue&type=script"), - Some("hash"), - ), - ] -} - -// Hand-picked from typical rspack/webpack loader chains; these are what the -// parser actually sees in a production resolve flow. -fn specifier_realistic_cases() -> Vec<(&'static str, &'static str)> { - vec![ - ("rw/loader-chain", - "./node_modules/.pnpm/vue-loader@17.0.0/node_modules/vue-loader/dist/templateLoader.js?vue&type=template&id=2f8c6e7a&scoped=true&lang=html"), - ("rw/css-modules", - "./src/components/Sidebar/Sidebar.module.css?ngGlobalStyle&hash=d41d8cd98f00b204e9800998ecf8427e"), - ("rw/asset-query", - "./public/assets/images/hero@2x.png?as=webp&w=1920&h=1080&quality=80&format=webp"), - ("rw/hash-only", - "./shared/utils/index.ts#tree-shaken-export-marker-do-not-strip"), - ("rw/inline-loader", - "!!./node_modules/css-loader/dist/cjs.js??ref--6-oneOf-1-1!./node_modules/postcss-loader/dist/cjs.js??ref--6-oneOf-1-2!./src/App.vue?vue&type=style&index=0&id=7ba5bd90&scoped=true&lang=css"), - ] -} - -fn bench_specifier_branches(c: &mut Criterion) { - let mut group = c.benchmark_group("specifier/branches"); - for (label, input) in specifier_branch_cases() { - group.throughput(Throughput::Bytes(input.len() as u64)); - group.bench_with_input(BenchmarkId::from_parameter(label), &input, |b, s| { - b.iter(|| { - let parsed = Specifier::parse(black_box(s.as_str())).unwrap(); - black_box(parsed); - }); - }); - } - group.finish(); -} - -fn bench_specifier_length_sweep(c: &mut Criterion) { - let mut group = c.benchmark_group("specifier/length"); - for (shape_label, base, query, fragment) in specifier_length_shapes() { - for (len_label, len) in SPECIFIER_LENGTH_TIERS { - let input = specifier_shaped(base, *len, query, fragment); - let id = BenchmarkId::new(shape_label, len_label); - group.throughput(Throughput::Bytes(input.len() as u64)); - group.bench_with_input(id, &input, |b, s| { - b.iter(|| { - let parsed = Specifier::parse(black_box(s.as_str())).unwrap(); - black_box(parsed); - }); - }); - } - } - group.finish(); -} - -fn bench_specifier_escape_scaling(c: &mut Criterion) { - // Slow path scales with both input length AND the number of escapes (the - // filter closure does an O(n*k) `escaped_indexes.contains(&i)` per char). - // Worth a dedicated knob so the optimizer can target it. - let mut group = c.benchmark_group("specifier/escapes"); - for &n in &[1usize, 4, 16, 64] { - // `parts.len()` must equal `n + 1` so that `join("\0#")` inserts exactly - // `n` separators (= `n` escape markers in the input). The first element - // is a path prefix, the last is the real `#fragment`, and we pad the - // middle with `n - 1` filler segments. - let mut parts = vec!["./pkg/"]; - for _ in 0..n.saturating_sub(1) { - parts.push("segment"); - } - parts.push("real#hash"); - let input = specifier_with_escapes(&parts); - group.throughput(Throughput::Bytes(input.len() as u64)); - group.bench_with_input( - BenchmarkId::from_parameter(format!("escapes_{n}")), - &input, - |b, s| { - b.iter(|| { - let parsed = Specifier::parse(black_box(s.as_str())).unwrap(); - black_box(parsed); - }); - }, - ); - } - group.finish(); -} - -fn bench_specifier_realistic(c: &mut Criterion) { - let mut group = c.benchmark_group("specifier/realistic"); - for (label, input) in specifier_realistic_cases() { - group.throughput(Throughput::Bytes(input.len() as u64)); - group.bench_with_input(BenchmarkId::from_parameter(label), input, |b, s| { - b.iter(|| { - let parsed = Specifier::parse(black_box(s)).unwrap(); - black_box(parsed); - }); - }); - } - group.finish(); -} +// Specifier microbenchmarks live in `benches/specifier.rs` (separate +// `[[bench]]` binary) so the very short `specifier/*` cases get a fresh +// instruction cache instead of competing with the resolver bench code for +// cache lines. See that file for the rationale. -criterion_group!( - resolver, - bench_resolver, - bench_specifier_branches, - bench_specifier_length_sweep, - bench_specifier_escape_scaling, - bench_specifier_realistic -); +criterion_group!(resolver, bench_resolver); criterion_main!(resolver); diff --git a/benches/specifier.rs b/benches/specifier.rs new file mode 100644 index 00000000..e28ff8aa --- /dev/null +++ b/benches/specifier.rs @@ -0,0 +1,273 @@ +//! Microbenchmarks for `Specifier::parse`. +//! +//! Kept in a separate bench binary from `bench_resolver` for measurement +//! stability: each `[[bench]]` runs in its own process, so the short +//! `specifier/*` cases get a fresh, predictable instruction cache instead of +//! competing with the much larger resolver bench code for cache lines. This +//! keeps cold-start cache misses out of the per-case CodSpeed deltas. + +#[cfg(target_family = "wasm")] +use std::alloc::System; +use std::alloc::{GlobalAlloc, Layout}; + +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use rspack_resolver::__BenchSpecifier as Specifier; + +#[global_allocator] +#[cfg(not(target_family = "wasm"))] +static GLOBAL: NeverGrowInPlaceAllocator = + NeverGrowInPlaceAllocator::new(mimalloc::MiMalloc); + +#[global_allocator] +#[cfg(target_family = "wasm")] +static GLOBAL: NeverGrowInPlaceAllocator = NeverGrowInPlaceAllocator::new(System); + +/// Mirrors the allocator wrapper in `bench_resolver` so allocation costs are +/// measured identically across both bench binaries. See `benches/resolver.rs` +/// for the rationale. +struct NeverGrowInPlaceAllocator { + allocator: A, +} + +impl NeverGrowInPlaceAllocator { + const fn new(allocator: A) -> Self { + Self { allocator } + } +} + +// SAFETY: Methods simply delegate to the wrapped allocator. +unsafe impl GlobalAlloc for NeverGrowInPlaceAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + self.allocator.alloc(layout) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + self.allocator.dealloc(ptr, layout) + } +} + +// `path` is repeated to reach `len`, then optional `?query` / `#fragment` +// suffix is appended. Lets us scale a single shape across short/medium/long +// inputs without changing branch coverage. +fn specifier_shaped(base: &str, len: usize, query: Option<&str>, fragment: Option<&str>) -> String { + let mut s = String::with_capacity(len + 64); + while s.len() < len { + s.push_str(base); + } + s.truncate(len); + if let Some(q) = query { + s.push('?'); + s.push_str(q); + } + if let Some(f) = fragment { + s.push('#'); + s.push_str(f); + } + s +} + +// Webpack-style synthesized escapes: inserts `\0#` sequences inside the path +// so the scanner hits the `prev == '\0'` branch and is forced onto the +// Cow::Owned slow path. +fn specifier_with_escapes(parts: &[&str]) -> String { + parts.join("\0#") +} + +fn specifier_branch_cases() -> Vec<(&'static str, String)> { + vec![ + // 1. (None, None) — fast path, Cow::Borrowed + ("none/short", "./foo.js".to_string()), + ( + "none/medium", + "./packages/utils/src/internal/helpers/normalizePath.ts".to_string(), + ), + // 2. (Some, None) — query only + ("query/short", "./a.js?vue".to_string()), + ( + "query/medium", + "./Button.tsx?vue&type=script&lang=ts&scoped=true&hash=abc12345".to_string(), + ), + // 3. (None, Some) — fragment only, scanner breaks early + ("fragment/short", "./a.js#main".to_string()), + ( + "fragment/medium", + "./pages/Home.tsx#section-introduction-to-the-rspack-resolver".to_string(), + ), + // 4. (Some, Some) — query then fragment + ("query+fragment/short", "./a.js?x#y".to_string()), + ( + "query+fragment/medium", + "./Widget.vue?vue&type=template&lang=html#root".to_string(), + ), + // 5. multiple `?`, only first becomes query_start + ( + "multi-question", + "./a.js?one?two?three?four?five?six?seven".to_string(), + ), + // 6. `\0#` escape → slow path (Cow::Owned + char_indices filter) + ( + "escape/single", + specifier_with_escapes(&["path/", "real-hash"]), + ), + // 7. multiple escapes — repeats the slow path several times in one input + ( + "escape/many", + specifier_with_escapes(&["./pkg/", "repo", "repo2", "repo3", "repo4#hash"]), + ), + // 8. leading `/`, `.`, `#` → offset=1; the first char is skipped intentionally + ("leading-slash", "/abs/path/to/file.mjs?q#f".to_string()), + ("leading-hash", "#alias/module.cjs?q#f".to_string()), + // 9. bare module — offset=0, scan starts at index 0 + ( + "bare-module", + "@scope/package/sub/path/index.js?q#f".to_string(), + ), + // 10. `?` inside a fragment must NOT be promoted to query — scanner already + // broke at the `#`, but worth pinning so a future refactor can't regress it. + ( + "fragment-with-question", + "./a.js#frag?not-a-query&also-not".to_string(), + ), + ] +} + +// Same shape, four sizes — measures how each branch scales with input length. +// Sizes chosen at ~5x steps so codspeed renders a clear curve. +const SPECIFIER_LENGTH_TIERS: &[(&str, usize)] = &[ + ("len_8", 8), + ("len_64", 64), + ("len_256", 256), + ("len_1536", 1536), +]; + +#[allow(clippy::type_complexity)] +fn specifier_length_shapes() -> Vec<( + &'static str, + &'static str, + Option<&'static str>, + Option<&'static str>, +)> { + vec![ + // Pure path: stresses the loop body without any branch hits. + ("path-only", "./a/b/c/d/e/", None, None), + // Query at the very end: full scan before query_start is set. + ( + "query-tail", + "./a/b/c/d/e/", + Some("vue&type=script&lang=ts"), + None, + ), + // Fragment at the very end: full scan, then early break at last char. + ("frag-tail", "./a/b/c/d/e/", None, Some("section-end")), + // Both at the tail. + ( + "both-tail", + "./a/b/c/d/e/", + Some("vue&type=script"), + Some("hash"), + ), + ] +} + +// Hand-picked from typical rspack/webpack loader chains; these are what the +// parser actually sees in a production resolve flow. +fn specifier_realistic_cases() -> Vec<(&'static str, &'static str)> { + vec![ + ("rw/loader-chain", + "./node_modules/.pnpm/vue-loader@17.0.0/node_modules/vue-loader/dist/templateLoader.js?vue&type=template&id=2f8c6e7a&scoped=true&lang=html"), + ("rw/css-modules", + "./src/components/Sidebar/Sidebar.module.css?ngGlobalStyle&hash=d41d8cd98f00b204e9800998ecf8427e"), + ("rw/asset-query", + "./public/assets/images/hero@2x.png?as=webp&w=1920&h=1080&quality=80&format=webp"), + ("rw/hash-only", + "./shared/utils/index.ts#tree-shaken-export-marker-do-not-strip"), + ("rw/inline-loader", + "!!./node_modules/css-loader/dist/cjs.js??ref--6-oneOf-1-1!./node_modules/postcss-loader/dist/cjs.js??ref--6-oneOf-1-2!./src/App.vue?vue&type=style&index=0&id=7ba5bd90&scoped=true&lang=css"), + ] +} + +fn bench_specifier_branches(c: &mut Criterion) { + let mut group = c.benchmark_group("specifier/branches"); + for (label, input) in specifier_branch_cases() { + group.throughput(Throughput::Bytes(input.len() as u64)); + group.bench_with_input(BenchmarkId::from_parameter(label), &input, |b, s| { + b.iter(|| { + let parsed = Specifier::parse(black_box(s.as_str())).unwrap(); + black_box(parsed); + }); + }); + } + group.finish(); +} + +fn bench_specifier_length_sweep(c: &mut Criterion) { + let mut group = c.benchmark_group("specifier/length"); + for (shape_label, base, query, fragment) in specifier_length_shapes() { + for (len_label, len) in SPECIFIER_LENGTH_TIERS { + let input = specifier_shaped(base, *len, query, fragment); + let id = BenchmarkId::new(shape_label, len_label); + group.throughput(Throughput::Bytes(input.len() as u64)); + group.bench_with_input(id, &input, |b, s| { + b.iter(|| { + let parsed = Specifier::parse(black_box(s.as_str())).unwrap(); + black_box(parsed); + }); + }); + } + } + group.finish(); +} + +fn bench_specifier_escape_scaling(c: &mut Criterion) { + // Slow path scales with both input length AND the number of escapes (the + // filter closure does an O(n*k) `escaped_indexes.contains(&i)` per char). + // Worth a dedicated knob so the optimizer can target it. + let mut group = c.benchmark_group("specifier/escapes"); + for &n in &[1usize, 4, 16, 64] { + // `parts.len()` must equal `n + 1` so that `join("\0#")` inserts exactly + // `n` separators (= `n` escape markers in the input). The first element + // is a path prefix, the last is the real `#fragment`, and we pad the + // middle with `n - 1` filler segments. + let mut parts = vec!["./pkg/"]; + for _ in 0..n.saturating_sub(1) { + parts.push("segment"); + } + parts.push("real#hash"); + let input = specifier_with_escapes(&parts); + group.throughput(Throughput::Bytes(input.len() as u64)); + group.bench_with_input( + BenchmarkId::from_parameter(format!("escapes_{n}")), + &input, + |b, s| { + b.iter(|| { + let parsed = Specifier::parse(black_box(s.as_str())).unwrap(); + black_box(parsed); + }); + }, + ); + } + group.finish(); +} + +fn bench_specifier_realistic(c: &mut Criterion) { + let mut group = c.benchmark_group("specifier/realistic"); + for (label, input) in specifier_realistic_cases() { + group.throughput(Throughput::Bytes(input.len() as u64)); + group.bench_with_input(BenchmarkId::from_parameter(label), input, |b, s| { + b.iter(|| { + let parsed = Specifier::parse(black_box(s)).unwrap(); + black_box(parsed); + }); + }); + } + group.finish(); +} + +criterion_group!( + specifier, + bench_specifier_branches, + bench_specifier_length_sweep, + bench_specifier_escape_scaling, + bench_specifier_realistic +); +criterion_main!(specifier); From 68da90c63000de2be16050744eac0f5b4e65d570 Mon Sep 17 00:00:00 2001 From: pshu Date: Wed, 27 May 2026 17:14:27 +0800 Subject: [PATCH 2/2] chore(bench): warm parse before b.iter to isolate cold i-cache misses CodSpeed's `WARMUP_RUNS=5` inside `b.iter` primes the harness but does not absorb the single cold I-fetch miss (~105 estimated cycles) that a binary-layout shift can introduce on short cases like `specifier/realistic[rw/hash-only]`. Add a per-input `warm_parse` setup pass that runs 32 parses outside the Callgrind instrumentation window, paging in parse code, lazy-initializing the allocator, and training the branch predictor on the actual input before measurement begins. --- benches/specifier.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/benches/specifier.rs b/benches/specifier.rs index e28ff8aa..019fa18c 100644 --- a/benches/specifier.rs +++ b/benches/specifier.rs @@ -5,6 +5,15 @@ //! `specifier/*` cases get a fresh, predictable instruction cache instead of //! competing with the much larger resolver bench code for cache lines. This //! keeps cold-start cache misses out of the per-case CodSpeed deltas. +//! +//! Each `bench_with_input` additionally runs a per-input `warm_parse` setup +//! pass before `b.iter`. CodSpeed's `WARMUP_RUNS=5` happens inside `b.iter` +//! and is intended to prime the harness, not absorb cold cache misses from +//! a freshly-relayouted binary. The pre-`b.iter` warmup pages in parse code, +//! lazy-inits the allocator, and trains the branch predictor on the actual +//! input *before* CodSpeed flips on Callgrind instrumentation, so a single +//! cold I-fetch miss (~105 estimated cycles) no longer dominates the short +//! `specifier/realistic[*]` deltas. #[cfg(target_family = "wasm")] use std::alloc::System; @@ -186,11 +195,26 @@ fn specifier_realistic_cases() -> Vec<(&'static str, &'static str)> { ] } +// Number of pre-`b.iter` parse calls used to warm i-cache, branch predictor +// and allocator state for the input under measurement. Sized to comfortably +// exceed the parse hot-path instruction footprint (~few KB) and to dwarf +// CodSpeed's internal `WARMUP_RUNS=5`, which alone is not enough to absorb +// the single cold-fetch miss caused by binary-layout shifts. +const WARM_PARSE_ITERS: usize = 32; + +#[inline(never)] +fn warm_parse(s: &str) { + for _ in 0..WARM_PARSE_ITERS { + let _ = black_box(Specifier::parse(black_box(s))); + } +} + fn bench_specifier_branches(c: &mut Criterion) { let mut group = c.benchmark_group("specifier/branches"); for (label, input) in specifier_branch_cases() { group.throughput(Throughput::Bytes(input.len() as u64)); group.bench_with_input(BenchmarkId::from_parameter(label), &input, |b, s| { + warm_parse(s.as_str()); b.iter(|| { let parsed = Specifier::parse(black_box(s.as_str())).unwrap(); black_box(parsed); @@ -208,6 +232,7 @@ fn bench_specifier_length_sweep(c: &mut Criterion) { let id = BenchmarkId::new(shape_label, len_label); group.throughput(Throughput::Bytes(input.len() as u64)); group.bench_with_input(id, &input, |b, s| { + warm_parse(s.as_str()); b.iter(|| { let parsed = Specifier::parse(black_box(s.as_str())).unwrap(); black_box(parsed); @@ -239,6 +264,7 @@ fn bench_specifier_escape_scaling(c: &mut Criterion) { BenchmarkId::from_parameter(format!("escapes_{n}")), &input, |b, s| { + warm_parse(s.as_str()); b.iter(|| { let parsed = Specifier::parse(black_box(s.as_str())).unwrap(); black_box(parsed); @@ -254,6 +280,7 @@ fn bench_specifier_realistic(c: &mut Criterion) { for (label, input) in specifier_realistic_cases() { group.throughput(Throughput::Bytes(input.len() as u64)); group.bench_with_input(BenchmarkId::from_parameter(label), input, |b, s| { + warm_parse(s); b.iter(|| { let parsed = Specifier::parse(black_box(s)).unwrap(); black_box(parsed);