optave · carlos-alm · Apr 5, 2026 · Apr 4, 2026 · Apr 4, 2026 · Apr 5, 2026
diff --git a/generated/benchmarks/QUERY-BENCHMARKS.md b/generated/benchmarks/QUERY-BENCHMARKS.md
@@ -5,6 +5,8 @@ Latencies are median over 5 runs. Hub target = most-connected node.
 
 | Version | Engine | fnDeps d1 | fnDeps d3 | fnDeps d5 | fnImpact d1 | fnImpact d3 | fnImpact d5 | diffImpact |
 |---------|--------|----------:|----------:|----------:|------------:|------------:|------------:|-----------:|
+| 3.9.0 | native | 27.4 ↑182% | 27.5 ↑178% | 27.5 ↑184% | 4 ↑11% | 4 ↑11% | 4 ↑14% | 9.3ms ↑4% |
+| 3.9.0 | wasm | 26.9 ↑177% | 26.9 ↑174% | 26.9 ↑177% | 4 ↑14% | 4 ↑14% | 3.9 ↑8% | 7.9ms ↑8% |
 | 3.7.0 | native | 9.7 ↑3% | 9.9 ↑3% | 9.7 ↑3% | 3.6 ↑6% | 3.6 ↑6% | 3.5 ↑6% | 8.9ms ↑7% |
 | 3.7.0 | wasm | 9.7 ~ | 9.8 ~ | 9.7 ~ | 3.5 ↑3% | 3.5 ↑3% | 3.6 ↑6% | 7.3ms ↓19% |
 | 3.6.0 | native | 9.4 | 9.6 | 9.4 | 3.4 | 3.4 | 3.3 | 8.3ms |
@@ -43,39 +45,39 @@ Latencies are median over 5 runs. Hub target = most-connected node.
 
 ### Latest results
 
-**Version:** 3.7.0 | **Date:** 2026-04-01
-
-> **Note:** v3.8.1 query data was removed — it was measured before the `findCallersBatch` fix
-> and showed artificially inflated fnDeps latencies (25ms vs 10ms baseline). The next benchmark
-> run will record accurate post-fix numbers.
+**Version:** 3.9.0 | **Date:** 2026-04-04
 
 #### Native (Rust)
 
 **Targets:** hub=`buildGraph`, mid=`node`, leaf=`docs`
 
 | Metric | Value |
 |--------|------:|
-| fnDeps depth 1 | 9.7ms |
-| fnDeps depth 3 | 9.9ms |
-| fnDeps depth 5 | 9.7ms |
-| fnImpact depth 1 | 3.6ms |
-| fnImpact depth 3 | 3.6ms |
-| fnImpact depth 5 | 3.5ms |
-| diffImpact latency | 8.9ms |
+| fnDeps depth 1 | 27.4ms |
+| fnDeps depth 3 | 27.5ms |
+| fnDeps depth 5 | 27.5ms |
+| fnImpact depth 1 | 4ms |
+| fnImpact depth 3 | 4ms |
+| fnImpact depth 5 | 4ms |
+| diffImpact latency | 9.3ms |
+| diffImpact affected functions | 0 |
+| diffImpact affected files | 0 |
 
 #### WASM
 
 **Targets:** hub=`buildGraph`, mid=`node`, leaf=`docs`
 
 | Metric | Value |
 |--------|------:|
-| fnDeps depth 1 | 9.7ms |
-| fnDeps depth 3 | 9.8ms |
-| fnDeps depth 5 | 9.7ms |
-| fnImpact depth 1 | 3.5ms |
-| fnImpact depth 3 | 3.5ms |
-| fnImpact depth 5 | 3.6ms |
-| diffImpact latency | 7.3ms |
+| fnDeps depth 1 | 26.9ms |
+| fnDeps depth 3 | 26.9ms |
+| fnDeps depth 5 | 26.9ms |
+| fnImpact depth 1 | 4ms |
+| fnImpact depth 3 | 4ms |
+| fnImpact depth 5 | 3.9ms |
+| diffImpact latency | 7.9ms |
+| diffImpact affected functions | 0 |
+| diffImpact affected files | 0 |
 
 <!-- NOTES_START -->
 
@@ -92,7 +94,56 @@ Latencies are median over 5 runs. Hub target = most-connected node.
 **Note (3.3.1):** The ↑157-192% fnDeps/fnImpact deltas for 3.3.1 vs 3.3.0 are not comparable. PR #528 changed the hub target from auto-selected `src/types.ts` (shallow type-barrel) to pinned `buildGraph` (deep orchestration function with 2-3x more edges). There is no engine regression — `diffImpact` improved 20-44% in the same release. Future version comparisons (3.3.1+) are stable and meaningful.
 <!-- NOTES_END -->
 
-<!-- QUERY_BENCHMARK_DATA [
+<!-- QUERY_BENCHMARK_DATA
+[
+  {
+    "version": "3.9.0",
+    "date": "2026-04-04",
+    "wasm": {
+      "targets": {
+        "hub": "buildGraph",
+        "mid": "node",
+        "leaf": "docs"
+      },
+      "fnDeps": {
+        "depth1Ms": 26.9,
+        "depth3Ms": 26.9,
+        "depth5Ms": 26.9
+      },
+      "fnImpact": {
+        "depth1Ms": 4,
+        "depth3Ms": 4,
+        "depth5Ms": 3.9
+      },
+      "diffImpact": {
+        "latencyMs": 7.9,
+        "affectedFunctions": 0,
+        "affectedFiles": 0
+      }
+    },
+    "native": {
+      "targets": {
+        "hub": "buildGraph",
+        "mid": "node",
+        "leaf": "docs"
+      },
+      "fnDeps": {
+        "depth1Ms": 27.4,
+        "depth3Ms": 27.5,
+        "depth5Ms": 27.5
+      },
+      "fnImpact": {
+        "depth1Ms": 4,
+        "depth3Ms": 4,
+        "depth5Ms": 4
+      },
+      "diffImpact": {
+        "latencyMs": 9.3,
+        "affectedFunctions": 0,
+        "affectedFiles": 0
+      }
+    }
+  },
   {
     "version": "3.7.0",
     "date": "2026-04-01",
@@ -936,4 +987,5 @@ Latencies are median over 5 runs. Hub target = most-connected node.
       }
     }
   }
-] -->
+]
+-->
diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts
@@ -113,6 +113,42 @@ function minorGap(a: string, b: string): number {
   return Math.abs(sa[0] * 100 + sa[1] - (sb[0] * 100 + sb[1]));
 }
 
+/**
+ * Count the effective version gap between two versions, including
+ * skipped versions between them.  When multiple intermediate versions
+ * are in SKIP_VERSIONS (e.g. 3.8.0 and 3.8.1), the comparison spans
+ * a larger real gap than the raw minor-version distance suggests.
+ * Adding skipped-version count to the minor gap prevents comparing
+ * across feature-expansion boundaries where intermediate baselines
+ * were invalidated.
+ */
+function effectiveGap(a: string, b: string): number {
+  const raw = minorGap(a, b);
+  if (raw === Infinity) return Infinity;
+  const sa = parseSemver(a);
+  const sb = parseSemver(b);
+  if (!sa || !sb) return Infinity;
+  const [lo, hi] = [a, b].sort((x, y) => {
+    const px = parseSemver(x)!;
+    const py = parseSemver(y)!;
+    return px[0] * 10000 + px[1] * 100 + px[2] - (py[0] * 10000 + py[1] * 100 + py[2]);
+  });
+  const loSv = parseSemver(lo)!;
+  const hiSv = parseSemver(hi)!;
+  const loVal = loSv[0] * 10000 + loSv[1] * 100 + loSv[2];
+  const hiVal = hiSv[0] * 10000 + hiSv[1] * 100 + hiSv[2];
+  // Count distinct skipped versions that fall between lo and hi
+  const skippedBetween = new Set(
+    [...SKIP_VERSIONS].filter((v) => {
+      const sv = parseSemver(v);
+      if (!sv) return false;
+      const val = sv[0] * 10000 + sv[1] * 100 + sv[2];
+      return val > loVal && val < hiVal;
+    }),
+  );
+  return raw + skippedBetween.size;
+}
+
 /**
  * Find the latest entry for a given engine, then the next non-dev
  * entry with data for that engine (the "previous release").
@@ -121,31 +157,34 @@ function findLatestPair<T extends { version: string }>(
   history: T[],
   hasEngine: (entry: T) => boolean,
 ): { latest: T; previous: T } | null {
-  // Find the latest entry, skipping versions with unreliable data
-  let latestIdx = -1;
-  for (let i = 0; i < history.length; i++) {
-    if (SKIP_VERSIONS.has(history[i].version)) continue;
-    if (hasEngine(history[i])) {
-      latestIdx = i;
-      break;
+  // Try each candidate as "latest", starting from the most recent.
+  // If the latest entry has no valid baseline within the effective gap,
+  // fall through to the next candidate — this ensures we always find
+  // the most recent *comparable* pair rather than giving up when the
+  // newest entry spans a large feature-expansion gap.
+  for (let latestIdx = 0; latestIdx < history.length; latestIdx++) {
+    if (SKIP_VERSIONS.has(history[latestIdx].version)) continue;
+    if (!hasEngine(history[latestIdx])) continue;
+
+    const latestVersion = history[latestIdx].version;
+
+    // Find previous non-dev entry with data for this engine, skipping
+    // versions with known unreliable benchmark data and versions that
+    // are too far apart for meaningful comparison.  The effective gap
+    // includes skipped versions between the pair — when intermediate
+    // releases are in SKIP_VERSIONS, the real distance is larger than
+    // the raw minor-version count.
+    for (let i = latestIdx + 1; i < history.length; i++) {
+      const entry = history[i];
+      if (entry.version === 'dev') continue;
+      if (SKIP_VERSIONS.has(entry.version)) continue;
+      if (!hasEngine(entry)) continue;
+      if (effectiveGap(latestVersion, entry.version) > MAX_VERSION_GAP) continue;
+      return { latest: history[latestIdx], previous: entry };
     }
+    // No valid baseline for this latest — try the next candidate
   }
-  if (latestIdx < 0) return null;
-
-  const latestVersion = history[latestIdx].version;
-
-  // Find previous non-dev entry with data for this engine, skipping
-  // versions with known unreliable benchmark data and versions that
-  // are too far apart for meaningful comparison.
-  for (let i = latestIdx + 1; i < history.length; i++) {
-    const entry = history[i];
-    if (entry.version === 'dev') continue;
-    if (SKIP_VERSIONS.has(entry.version)) continue;
-    if (!hasEngine(entry)) continue;
-    if (minorGap(latestVersion, entry.version) > MAX_VERSION_GAP) continue;
-    return { latest: history[latestIdx], previous: entry };
-  }
-  return null; // No suitable baseline to compare against
+  return null; // No suitable pair found anywhere in the history
 }
 
 /**