diff --git a/generated/benchmarks/QUERY-BENCHMARKS.md b/generated/benchmarks/QUERY-BENCHMARKS.md index b9b10e08..bc33c72c 100644 --- a/generated/benchmarks/QUERY-BENCHMARKS.md +++ b/generated/benchmarks/QUERY-BENCHMARKS.md @@ -5,6 +5,8 @@ Latencies are median over 5 runs. Hub target = most-connected node. | Version | Engine | fnDeps d1 | fnDeps d3 | fnDeps d5 | fnImpact d1 | fnImpact d3 | fnImpact d5 | diffImpact | |---------|--------|----------:|----------:|----------:|------------:|------------:|------------:|-----------:| +| 3.9.0 | native | 27.4 ↑182% | 27.5 ↑178% | 27.5 ↑184% | 4 ↑11% | 4 ↑11% | 4 ↑14% | 9.3ms ↑4% | +| 3.9.0 | wasm | 26.9 ↑177% | 26.9 ↑174% | 26.9 ↑177% | 4 ↑14% | 4 ↑14% | 3.9 ↑8% | 7.9ms ↑8% | | 3.7.0 | native | 9.7 ↑3% | 9.9 ↑3% | 9.7 ↑3% | 3.6 ↑6% | 3.6 ↑6% | 3.5 ↑6% | 8.9ms ↑7% | | 3.7.0 | wasm | 9.7 ~ | 9.8 ~ | 9.7 ~ | 3.5 ↑3% | 3.5 ↑3% | 3.6 ↑6% | 7.3ms ↓19% | | 3.6.0 | native | 9.4 | 9.6 | 9.4 | 3.4 | 3.4 | 3.3 | 8.3ms | @@ -43,11 +45,7 @@ Latencies are median over 5 runs. Hub target = most-connected node. ### Latest results -**Version:** 3.7.0 | **Date:** 2026-04-01 - -> **Note:** v3.8.1 query data was removed — it was measured before the `findCallersBatch` fix -> and showed artificially inflated fnDeps latencies (25ms vs 10ms baseline). The next benchmark -> run will record accurate post-fix numbers. +**Version:** 3.9.0 | **Date:** 2026-04-04 #### Native (Rust) @@ -55,13 +53,15 @@ Latencies are median over 5 runs. Hub target = most-connected node. | Metric | Value | |--------|------:| -| fnDeps depth 1 | 9.7ms | -| fnDeps depth 3 | 9.9ms | -| fnDeps depth 5 | 9.7ms | -| fnImpact depth 1 | 3.6ms | -| fnImpact depth 3 | 3.6ms | -| fnImpact depth 5 | 3.5ms | -| diffImpact latency | 8.9ms | +| fnDeps depth 1 | 27.4ms | +| fnDeps depth 3 | 27.5ms | +| fnDeps depth 5 | 27.5ms | +| fnImpact depth 1 | 4ms | +| fnImpact depth 3 | 4ms | +| fnImpact depth 5 | 4ms | +| diffImpact latency | 9.3ms | +| diffImpact affected functions | 0 | +| diffImpact affected files | 0 | #### WASM @@ -69,13 +69,15 @@ Latencies are median over 5 runs. Hub target = most-connected node. | Metric | Value | |--------|------:| -| fnDeps depth 1 | 9.7ms | -| fnDeps depth 3 | 9.8ms | -| fnDeps depth 5 | 9.7ms | -| fnImpact depth 1 | 3.5ms | -| fnImpact depth 3 | 3.5ms | -| fnImpact depth 5 | 3.6ms | -| diffImpact latency | 7.3ms | +| fnDeps depth 1 | 26.9ms | +| fnDeps depth 3 | 26.9ms | +| fnDeps depth 5 | 26.9ms | +| fnImpact depth 1 | 4ms | +| fnImpact depth 3 | 4ms | +| fnImpact depth 5 | 3.9ms | +| diffImpact latency | 7.9ms | +| diffImpact affected functions | 0 | +| diffImpact affected files | 0 | @@ -92,7 +94,56 @@ Latencies are median over 5 runs. Hub target = most-connected node. **Note (3.3.1):** The ↑157-192% fnDeps/fnImpact deltas for 3.3.1 vs 3.3.0 are not comparable. PR #528 changed the hub target from auto-selected `src/types.ts` (shallow type-barrel) to pinned `buildGraph` (deep orchestration function with 2-3x more edges). There is no engine regression — `diffImpact` improved 20-44% in the same release. Future version comparisons (3.3.1+) are stable and meaningful. - +] +--> diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts index 26240301..06218d0b 100644 --- a/tests/benchmarks/regression-guard.test.ts +++ b/tests/benchmarks/regression-guard.test.ts @@ -113,6 +113,42 @@ function minorGap(a: string, b: string): number { return Math.abs(sa[0] * 100 + sa[1] - (sb[0] * 100 + sb[1])); } +/** + * Count the effective version gap between two versions, including + * skipped versions between them. When multiple intermediate versions + * are in SKIP_VERSIONS (e.g. 3.8.0 and 3.8.1), the comparison spans + * a larger real gap than the raw minor-version distance suggests. + * Adding skipped-version count to the minor gap prevents comparing + * across feature-expansion boundaries where intermediate baselines + * were invalidated. + */ +function effectiveGap(a: string, b: string): number { + const raw = minorGap(a, b); + if (raw === Infinity) return Infinity; + const sa = parseSemver(a); + const sb = parseSemver(b); + if (!sa || !sb) return Infinity; + const [lo, hi] = [a, b].sort((x, y) => { + const px = parseSemver(x)!; + const py = parseSemver(y)!; + return px[0] * 10000 + px[1] * 100 + px[2] - (py[0] * 10000 + py[1] * 100 + py[2]); + }); + const loSv = parseSemver(lo)!; + const hiSv = parseSemver(hi)!; + const loVal = loSv[0] * 10000 + loSv[1] * 100 + loSv[2]; + const hiVal = hiSv[0] * 10000 + hiSv[1] * 100 + hiSv[2]; + // Count distinct skipped versions that fall between lo and hi + const skippedBetween = new Set( + [...SKIP_VERSIONS].filter((v) => { + const sv = parseSemver(v); + if (!sv) return false; + const val = sv[0] * 10000 + sv[1] * 100 + sv[2]; + return val > loVal && val < hiVal; + }), + ); + return raw + skippedBetween.size; +} + /** * Find the latest entry for a given engine, then the next non-dev * entry with data for that engine (the "previous release"). @@ -121,31 +157,34 @@ function findLatestPair( history: T[], hasEngine: (entry: T) => boolean, ): { latest: T; previous: T } | null { - // Find the latest entry, skipping versions with unreliable data - let latestIdx = -1; - for (let i = 0; i < history.length; i++) { - if (SKIP_VERSIONS.has(history[i].version)) continue; - if (hasEngine(history[i])) { - latestIdx = i; - break; + // Try each candidate as "latest", starting from the most recent. + // If the latest entry has no valid baseline within the effective gap, + // fall through to the next candidate — this ensures we always find + // the most recent *comparable* pair rather than giving up when the + // newest entry spans a large feature-expansion gap. + for (let latestIdx = 0; latestIdx < history.length; latestIdx++) { + if (SKIP_VERSIONS.has(history[latestIdx].version)) continue; + if (!hasEngine(history[latestIdx])) continue; + + const latestVersion = history[latestIdx].version; + + // Find previous non-dev entry with data for this engine, skipping + // versions with known unreliable benchmark data and versions that + // are too far apart for meaningful comparison. The effective gap + // includes skipped versions between the pair — when intermediate + // releases are in SKIP_VERSIONS, the real distance is larger than + // the raw minor-version count. + for (let i = latestIdx + 1; i < history.length; i++) { + const entry = history[i]; + if (entry.version === 'dev') continue; + if (SKIP_VERSIONS.has(entry.version)) continue; + if (!hasEngine(entry)) continue; + if (effectiveGap(latestVersion, entry.version) > MAX_VERSION_GAP) continue; + return { latest: history[latestIdx], previous: entry }; } + // No valid baseline for this latest — try the next candidate } - if (latestIdx < 0) return null; - - const latestVersion = history[latestIdx].version; - - // Find previous non-dev entry with data for this engine, skipping - // versions with known unreliable benchmark data and versions that - // are too far apart for meaningful comparison. - for (let i = latestIdx + 1; i < history.length; i++) { - const entry = history[i]; - if (entry.version === 'dev') continue; - if (SKIP_VERSIONS.has(entry.version)) continue; - if (!hasEngine(entry)) continue; - if (minorGap(latestVersion, entry.version) > MAX_VERSION_GAP) continue; - return { latest: history[latestIdx], previous: entry }; - } - return null; // No suitable baseline to compare against + return null; // No suitable pair found anywhere in the history } /**