Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2bf7115
perf(pm): bump manifests-concurrency-limit 64 → 256 + add fetch break…
elrrrrrrr May 8, 2026
8ac97ae
chore(p1): revert concurrency 256 → 64 + restore manifest-bench
elrrrrrrr May 8, 2026
5690a9b
ci(p1): wire manifest-bench standalone HTTP sweep into bench-phases-l…
elrrrrrrr May 8, 2026
94af458
perf(ruborist): inline JSON parse, drop rayon::spawn dispatch
elrrrrrrr May 8, 2026
ee5f5f4
perf(ruborist): switch JSON parse to tokio spawn_blocking
elrrrrrrr May 8, 2026
16404fc
perf(ruborist): switch extract_core_version to spawn_blocking too
elrrrrrrr May 8, 2026
460a538
revert + instrument(ruborist): post-build phase timing
elrrrrrrr May 8, 2026
58d49aa
instrument(ruborist): preload main loop dispatch + result split
elrrrrrrr May 8, 2026
8114bf4
perf(pm): grow rayon pool to max(num_cpus, 8) to drain p1 extract queue
elrrrrrrr May 8, 2026
394f6c9
perf(pm): skip preload for p1 path; BFS does per-level parallel prefetch
elrrrrrrr May 8, 2026
596cd20
perf(pm): fast_preload bypasses UnifiedRegistry for utoo deps path
elrrrrrrr May 8, 2026
2e74bba
perf(pm): dispatch fast_preload settle to rayon to free tokio runtime
elrrrrrrr May 8, 2026
04c9ec3
perf(pm): bump manifests-concurrency-limit 64 → 96 (manifest-bench best)
elrrrrrrr May 8, 2026
6455852
perf(pm): fast_preload populates (name, spec) cache slot for BFS fast…
elrrrrrrr May 8, 2026
4bbcae8
perf(pm): fuse primary settle into fetch task to drop dispatch RTT
elrrrrrrr May 8, 2026
671ac98
perf(pm): combined-parse fetch path eliminates per-fetch double simd_…
elrrrrrrr May 8, 2026
542d7f1
perf(pm): bump manifests-concurrency-limit 96 → 128
elrrrrrrr May 8, 2026
c8768ac
revert(pm): manifests-concurrency-limit back to 96
elrrrrrrr May 8, 2026
3be7487
perf(pm): mb_resolve experimental fetch path (parallel track to fast_…
elrrrrrrr May 9, 2026
02cc12e
perf(pm): mb_resolve v3 — two-phase pure HTTP + rayon batch parse
elrrrrrrr May 9, 2026
24165fb
fix(pm): mb_resolve v3 — restore spec-level dedup to terminate
elrrrrrrr May 9, 2026
41822b0
perf(pm): preload-bench — self-contained streaming preload baseline
elrrrrrrr May 9, 2026
01d1513
perf(pm): integrate standalone preload into ruborist for lockfile-onl…
elrrrrrrr May 9, 2026
05486b5
experiment(pm): swap DiskManifestStore for NoopStore
elrrrrrrr May 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions .github/workflows/pm-e2e-bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,43 @@ jobs:
name: utoo-linux-x64
path: target/x86_64-unknown-linux-gnu/release/utoo
retention-days: 1
# manifest-bench is a standalone HTTP-only fetch sweeper used as
# the network-only baseline for p1_resolve perf work. Built only
# when phases bench is going to run (label or dispatch), so plain
# PR builds aren't slowed by the extra crate.
- name: Build manifest-bench (p1 baseline)
if: >
(github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
(github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
run: cargo build --release --target x86_64-unknown-linux-gnu -p manifest-bench
- name: Upload manifest-bench binary
if: >
(github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
(github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
uses: actions/upload-artifact@v4
with:
name: manifest-bench-linux-x64
path: target/x86_64-unknown-linux-gnu/release/manifest-bench
retention-days: 1
# preload-bench: same HTTP setup as manifest-bench, but discovers
# names by walking transitive deps from a package.json root —
# tests whether a fully self-contained streaming preload can match
# standalone manifest-bench's wall on the same workload that
# ruborist's path runs at ~2.18s.
- name: Build preload-bench
if: >
(github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
(github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
run: cargo build --release --target x86_64-unknown-linux-gnu -p preload-bench
- name: Upload preload-bench binary
if: >
(github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
(github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
uses: actions/upload-artifact@v4
with:
name: preload-bench-linux-x64
path: target/x86_64-unknown-linux-gnu/release/preload-bench
retention-days: 1
# Piggyback on the already-built target/ from the step above: when the
# PR is labeled `benchmark`, overlay origin/next's tree onto the current
# workdir and re-run cargo build. cargo's incremental compile only
Expand Down Expand Up @@ -516,6 +553,33 @@ jobs:
mv /tmp/utoo-next-dist/utoo /tmp/utoo-next
echo "Baseline utoo (next) version: $(/tmp/utoo-next --version)"
echo "UTOO_NEXT_BIN=/tmp/utoo-next" >> $GITHUB_ENV
# Download the manifest-bench binary built by build-linux. Used as
# the network-only baseline for p1_resolve work — strips out parse,
# BFS, dedup, lockfile write so the wall is pure HTTP fetch.
- name: Download manifest-bench binary
uses: actions/download-artifact@v4
with:
name: manifest-bench-linux-x64
path: /tmp/manifest-bench-dist
- name: Install manifest-bench
run: |
chmod +x /tmp/manifest-bench-dist/manifest-bench
mv /tmp/manifest-bench-dist/manifest-bench /tmp/manifest-bench
echo "MANIFEST_BENCH_BIN=/tmp/manifest-bench" >> $GITHUB_ENV
# Self-contained streaming preload bench — same HTTP setup as
# manifest-bench but discovers names via transitive walk from a
# package.json. Used to test whether a fully-isolated path can
# match standalone manifest-bench's wall on the same workload.
- name: Download preload-bench binary
uses: actions/download-artifact@v4
with:
name: preload-bench-linux-x64
path: /tmp/preload-bench-dist
- name: Install preload-bench
run: |
chmod +x /tmp/preload-bench-dist/preload-bench
mv /tmp/preload-bench-dist/preload-bench /tmp/preload-bench
echo "PRELOAD_BENCH_BIN=/tmp/preload-bench" >> $GITHUB_ENV
- name: Verify tools
run: |
hyperfine --version
Expand Down Expand Up @@ -565,6 +629,91 @@ jobs:
run: |
mkdir -p /tmp/pm-bench-output
bash bench/pm-bench-phases.sh 2>&1 | tee /tmp/pm-bench-output/bench-phases-npmmirror.log
# Standalone HTTP-only sweep — sweeps the network-only ceiling
# against the same lockfile-derived workload phase-bench just used.
# Output goes into the bench logs artifact; no PR comment surface.
- name: Standalone manifest-bench (HTTP-only sweep)
env:
PROJECT: ${{ github.event.inputs.project || 'ant-design' }}
REGISTRY: 'https://registry.npmjs.org'
run: |
set -eu
mkdir -p /tmp/pm-bench-output
PROJECT_DIR="/tmp/pm-bench/$PROJECT"
if [ ! -d "$PROJECT_DIR" ]; then
mkdir -p /tmp/pm-bench
git clone --depth 1 "https://github.com/ant-design/$PROJECT" "$PROJECT_DIR"
fi
cd "$PROJECT_DIR"
if [ ! -f package-lock.json ]; then
echo "==> generating lockfile via utoo (one-shot, untimed)"
utoo deps --registry "$REGISTRY" || true
fi
ls -la package-lock.json || { echo "no lockfile; skipping manifest-bench"; exit 0; }

MB_LOG=/tmp/pm-bench-output/manifest-bench-npmjs.log
{
echo "============================================================"
echo "manifest-bench: HTTP-only fetch (no parse, no resolver)"
echo " Goal: isolate reqwest/rustls/tokio behaviour from"
echo " ruborist's resolver pipeline. Same metric shape as"
echo " ruborist's p1-breakdown line."
echo "============================================================"
for CAP in 32 64 96 128 192 256; do
echo
echo "--- concurrency=$CAP, h1, full manifest, default UA ---"
"$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
--concurrency "$CAP" --reps 2 --http1-only || true
done
echo
echo "--- concurrency=128, h2 negotiate, full manifest, default UA ---"
"$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
--concurrency 128 --reps 2 || true
echo
echo "--- concurrency=128, h1, single-version endpoint ---"
"$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
--concurrency 128 --reps 2 --http1-only --single-version || true
echo
echo "--- concurrency=128, h1, UA=Bun/1.2.21 ---"
"$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
--concurrency 128 --reps 2 --http1-only --user-agent "Bun/1.2.21" || true
} 2>&1 | tee "$MB_LOG"
# Self-contained streaming preload (transitive walk from
# package.json) — same HTTP setup as manifest-bench but with a
# streaming FuturesUnordered + per-future parse. This tests
# whether a fully ruborist-independent path can hit standalone
# manifest-bench's wall under the same project workload.
- name: Standalone preload-bench (transitive walk sweep)
env:
PROJECT: ${{ github.event.inputs.project || 'ant-design' }}
REGISTRY: 'https://registry.npmjs.org'
run: |
set -eu
mkdir -p /tmp/pm-bench-output
PROJECT_DIR="/tmp/pm-bench/$PROJECT"
if [ ! -d "$PROJECT_DIR" ]; then
echo "no project dir; skipping preload-bench"; exit 0
fi
PJ="$PROJECT_DIR/package.json"
if [ ! -f "$PJ" ]; then
echo "no package.json; skipping preload-bench"; exit 0
fi

PB_LOG=/tmp/pm-bench-output/preload-bench-npmjs.log
{
echo "============================================================"
echo "preload-bench: streaming transitive-walk preload"
echo " Self-contained (no ruborist deps). Same HTTP setup as"
echo " manifest-bench, but discovers names by walking transitive"
echo " deps from package.json instead of consuming a flat list."
echo "============================================================"
for CAP in 64 96 128; do
echo
echo "--- concurrency=$CAP, h1, transitive walk ---"
"$PRELOAD_BENCH_BIN" --package-json "$PJ" --registry "$REGISTRY" \
--concurrency "$CAP" --reps 4 || true
done
} 2>&1 | tee "$PB_LOG"
- name: Upload bench logs
if: always()
uses: actions/upload-artifact@v4
Expand Down
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[workspace]
resolver = "2"
members = [
"crates/manifest-bench",
"crates/preload-bench",
"crates/pack-api",
"crates/pack-cli",
"crates/pack-core",
Expand Down
7 changes: 7 additions & 0 deletions bench/pm-bench-phases.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ UTOO_NEXT_CACHE="${UTOO_NEXT_CACHE:-/tmp/utoo-next-bench-cache}"
BUN_CACHE="${BUN_CACHE:-/tmp/bun-bench-cache}"
export BUN_INSTALL_CACHE_DIR="$BUN_CACHE"

# utoo path defaults to fast_preload (combined-parse) so we have a
# stable baseline to compare against. preload-bench is run as a
# separate standalone tool by the CI workflow — its wall is the
# self-contained-streaming reference, ruborist's utoo p1_resolve
# wall is the integrated path. The gap between them is what
# remains to close.

# Drop optional baselines from the PM list when their binary is not wired
# up — UTOO_NPM_BIN is set by CI's "Install utoo@npm" step, UTOO_NEXT_BIN
# by the optional "Build next branch utoo" step. Local runs without them
Expand Down
37 changes: 37 additions & 0 deletions crates/manifest-bench/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[package]
name = "manifest-bench"
version = "0.0.0"
edition = "2024"
license = "MIT"
publish = false
description = "Standalone HTTP-only manifest fetch benchmark, isolating network behaviour from ruborist's resolver pipeline."

[[bin]]
name = "manifest-bench"
path = "src/main.rs"

# tombi: format.rules.table-keys-order.disabled = true
[dependencies]
anyhow = { workspace = true }
clap = { workspace = true }
futures = "0.3"
serde = { version = "1", features = ["derive"] }
serde_json = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt-multi-thread", "fs", "time"] }

# Identical TLS / DNS choices to ruborist so we measure the *protocol*
# characteristics of the same stack, not a different implementation.
reqwest = { version = "0.12", default-features = false, features = [
"brotli",
"gzip",
"http2",
"rustls-tls-native-roots-no-provider",
"socks"
] }
rustls = { version = "0.23", default-features = false, features = [
"aws-lc-rs",
"logging",
"std",
"tls12"
] }
rustls-native-certs = "0.8"
Loading
Loading