rstackjs · stormslowly · May 28, 2026 · May 27, 2026 · May 27, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -25,6 +25,16 @@ harness           = false
 name              = "resolver"
 required-features = ["__internal_bench"]
 
+# Specifier microbenchmarks are split into their own bench binary so they
+# don't share an instruction cache with the much larger `resolver` bench
+# code. Cases like `specifier/realistic[rw/hash-only]` only execute a few
+# hundred instructions per iteration; cold-start cache misses in a combined
+# binary previously surfaced as false-positive CodSpeed regressions.
+[[bench]]
+harness           = false
+name              = "specifier"
+required-features = ["__internal_bench"]
+
 [lints.clippy]
 all   = { level = "warn", priority = -1 }
 cargo = { level = "warn", priority = -1 }

diff --git a/benches/resolver.rs b/benches/resolver.rs
@@ -46,10 +46,8 @@ unsafe impl<A: GlobalAlloc> GlobalAlloc for NeverGrowInPlaceAllocator<A> {
   }
 }
 
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
-use rspack_resolver::{
-  FileSystemOptions, FileSystemOs, ResolveOptions, Resolver, __BenchSpecifier as Specifier,
-};
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use rspack_resolver::{FileSystemOptions, FileSystemOs, ResolveOptions, Resolver};
 use serde_json::Value;
 use tokio::{
   runtime::{self, Builder},
@@ -415,238 +413,10 @@ fn bench_resolver(c: &mut Criterion) {
   );
 }
 
-// ============================================================================
-// Specifier micro-benchmarks
-// ----------------------------------------------------------------------------
-// `parse_query_framgment` lives behind `Specifier::parse`. The wrapper does a
-// single byte read + length check, so benchmarking `parse` is effectively
-// benchmarking the query/fragment scanner. Cases are split into four groups:
-// branch matrix, length sweep, escape scaling, and realistic specimens.
-// ============================================================================
-
-// `path` is repeated to reach `len`, then optional `?query` / `#fragment`
-// suffix is appended. Lets us scale a single shape across short/medium/long
-// inputs without changing branch coverage.
-fn specifier_shaped(base: &str, len: usize, query: Option<&str>, fragment: Option<&str>) -> String {
-  let mut s = String::with_capacity(len + 64);
-  while s.len() < len {
-    s.push_str(base);
-  }
-  s.truncate(len);
-  if let Some(q) = query {
-    s.push('?');
-    s.push_str(q);
-  }
-  if let Some(f) = fragment {
-    s.push('#');
-    s.push_str(f);
-  }
-  s
-}
-
-// Webpack-style synthesized escapes: inserts `\0#` sequences inside the path
-// so the scanner hits the `prev == '\0'` branch and is forced onto the
-// Cow::Owned slow path.
-fn specifier_with_escapes(parts: &[&str]) -> String {
-  parts.join("\0#")
-}
-
-fn specifier_branch_cases() -> Vec<(&'static str, String)> {
-  vec![
-    // 1. (None, None) — fast path, Cow::Borrowed
-    ("none/short", "./foo.js".to_string()),
-    (
-      "none/medium",
-      "./packages/utils/src/internal/helpers/normalizePath.ts".to_string(),
-    ),
-    // 2. (Some, None) — query only
-    ("query/short", "./a.js?vue".to_string()),
-    (
-      "query/medium",
-      "./Button.tsx?vue&type=script&lang=ts&scoped=true&hash=abc12345".to_string(),
-    ),
-    // 3. (None, Some) — fragment only, scanner breaks early
-    ("fragment/short", "./a.js#main".to_string()),
-    (
-      "fragment/medium",
-      "./pages/Home.tsx#section-introduction-to-the-rspack-resolver".to_string(),
-    ),
-    // 4. (Some, Some) — query then fragment
-    ("query+fragment/short", "./a.js?x#y".to_string()),
-    (
-      "query+fragment/medium",
-      "./Widget.vue?vue&type=template&lang=html#root".to_string(),
-    ),
-    // 5. multiple `?`, only first becomes query_start
-    (
-      "multi-question",
-      "./a.js?one?two?three?four?five?six?seven".to_string(),
-    ),
-    // 6. `\0#` escape → slow path (Cow::Owned + char_indices filter)
-    (
-      "escape/single",
-      specifier_with_escapes(&["path/", "real-hash"]),
-    ),
-    // 7. multiple escapes — repeats the slow path several times in one input
-    (
-      "escape/many",
-      specifier_with_escapes(&["./pkg/", "repo", "repo2", "repo3", "repo4#hash"]),
-    ),
-    // 8. leading `/`, `.`, `#` → offset=1; the first char is skipped intentionally
-    ("leading-slash", "/abs/path/to/file.mjs?q#f".to_string()),
-    ("leading-hash", "#alias/module.cjs?q#f".to_string()),
-    // 9. bare module — offset=0, scan starts at index 0
-    (
-      "bare-module",
-      "@scope/package/sub/path/index.js?q#f".to_string(),
-    ),
-    // 10. `?` inside a fragment must NOT be promoted to query — scanner already
-    //     broke at the `#`, but worth pinning so a future refactor can't regress it.
-    (
-      "fragment-with-question",
-      "./a.js#frag?not-a-query&also-not".to_string(),
-    ),
-  ]
-}
-
-// Same shape, four sizes — measures how each branch scales with input length.
-// Sizes chosen at ~5x steps so codspeed renders a clear curve.
-const SPECIFIER_LENGTH_TIERS: &[(&str, usize)] = &[
-  ("len_8", 8),
-  ("len_64", 64),
-  ("len_256", 256),
-  ("len_1536", 1536),
-];
-
-#[allow(clippy::type_complexity)]
-fn specifier_length_shapes() -> Vec<(
-  &'static str,
-  &'static str,
-  Option<&'static str>,
-  Option<&'static str>,
-)> {
-  vec![
-    // Pure path: stresses the loop body without any branch hits.
-    ("path-only", "./a/b/c/d/e/", None, None),
-    // Query at the very end: full scan before query_start is set.
-    (
-      "query-tail",
-      "./a/b/c/d/e/",
-      Some("vue&type=script&lang=ts"),
-      None,
-    ),
-    // Fragment at the very end: full scan, then early break at last char.
-    ("frag-tail", "./a/b/c/d/e/", None, Some("section-end")),
-    // Both at the tail.
-    (
-      "both-tail",
-      "./a/b/c/d/e/",
-      Some("vue&type=script"),
-      Some("hash"),
-    ),
-  ]
-}
-
-// Hand-picked from typical rspack/webpack loader chains; these are what the
-// parser actually sees in a production resolve flow.
-fn specifier_realistic_cases() -> Vec<(&'static str, &'static str)> {
-  vec![
-    ("rw/loader-chain",
-     "./node_modules/.pnpm/vue-loader@17.0.0/node_modules/vue-loader/dist/templateLoader.js?vue&type=template&id=2f8c6e7a&scoped=true&lang=html"),
-    ("rw/css-modules",
-     "./src/components/Sidebar/Sidebar.module.css?ngGlobalStyle&hash=d41d8cd98f00b204e9800998ecf8427e"),
-    ("rw/asset-query",
-     "./public/assets/images/hero@2x.png?as=webp&w=1920&h=1080&quality=80&format=webp"),
-    ("rw/hash-only",
-     "./shared/utils/index.ts#tree-shaken-export-marker-do-not-strip"),
-    ("rw/inline-loader",
-     "!!./node_modules/css-loader/dist/cjs.js??ref--6-oneOf-1-1!./node_modules/postcss-loader/dist/cjs.js??ref--6-oneOf-1-2!./src/App.vue?vue&type=style&index=0&id=7ba5bd90&scoped=true&lang=css"),
-  ]
-}
-
-fn bench_specifier_branches(c: &mut Criterion) {
-  let mut group = c.benchmark_group("specifier/branches");
-  for (label, input) in specifier_branch_cases() {
-    group.throughput(Throughput::Bytes(input.len() as u64));
-    group.bench_with_input(BenchmarkId::from_parameter(label), &input, |b, s| {
-      b.iter(|| {
-        let parsed = Specifier::parse(black_box(s.as_str())).unwrap();
-        black_box(parsed);
-      });
-    });
-  }
-  group.finish();
-}
-
-fn bench_specifier_length_sweep(c: &mut Criterion) {
-  let mut group = c.benchmark_group("specifier/length");
-  for (shape_label, base, query, fragment) in specifier_length_shapes() {
-    for (len_label, len) in SPECIFIER_LENGTH_TIERS {
-      let input = specifier_shaped(base, *len, query, fragment);
-      let id = BenchmarkId::new(shape_label, len_label);
-      group.throughput(Throughput::Bytes(input.len() as u64));
-      group.bench_with_input(id, &input, |b, s| {
-        b.iter(|| {
-          let parsed = Specifier::parse(black_box(s.as_str())).unwrap();
-          black_box(parsed);
-        });
-      });
-    }
-  }
-  group.finish();
-}
-
-fn bench_specifier_escape_scaling(c: &mut Criterion) {
-  // Slow path scales with both input length AND the number of escapes (the
-  // filter closure does an O(n*k) `escaped_indexes.contains(&i)` per char).
-  // Worth a dedicated knob so the optimizer can target it.
-  let mut group = c.benchmark_group("specifier/escapes");
-  for &n in &[1usize, 4, 16, 64] {
-    // `parts.len()` must equal `n + 1` so that `join("\0#")` inserts exactly
-    // `n` separators (= `n` escape markers in the input). The first element
-    // is a path prefix, the last is the real `#fragment`, and we pad the
-    // middle with `n - 1` filler segments.
-    let mut parts = vec!["./pkg/"];
-    for _ in 0..n.saturating_sub(1) {
-      parts.push("segment");
-    }
-    parts.push("real#hash");
-    let input = specifier_with_escapes(&parts);
-    group.throughput(Throughput::Bytes(input.len() as u64));
-    group.bench_with_input(
-      BenchmarkId::from_parameter(format!("escapes_{n}")),
-      &input,
-      |b, s| {
-        b.iter(|| {
-          let parsed = Specifier::parse(black_box(s.as_str())).unwrap();
-          black_box(parsed);
-        });
-      },
-    );
-  }
-  group.finish();
-}
-
-fn bench_specifier_realistic(c: &mut Criterion) {
-  let mut group = c.benchmark_group("specifier/realistic");
-  for (label, input) in specifier_realistic_cases() {
-    group.throughput(Throughput::Bytes(input.len() as u64));
-    group.bench_with_input(BenchmarkId::from_parameter(label), input, |b, s| {
-      b.iter(|| {
-        let parsed = Specifier::parse(black_box(s)).unwrap();
-        black_box(parsed);
-      });
-    });
-  }
-  group.finish();
-}
+// Specifier microbenchmarks live in `benches/specifier.rs` (separate
+// `[[bench]]` binary) so the very short `specifier/*` cases get a fresh
+// instruction cache instead of competing with the resolver bench code for
+// cache lines. See that file for the rationale.
 
-criterion_group!(
-  resolver,
-  bench_resolver,
-  bench_specifier_branches,
-  bench_specifier_length_sweep,
-  bench_specifier_escape_scaling,
-  bench_specifier_realistic
-);
+criterion_group!(resolver, bench_resolver);
 criterion_main!(resolver);