utooland · elrrrrrrr · May 8, 2026 · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/.github/workflows/pm-e2e-bench.yml b/.github/workflows/pm-e2e-bench.yml
@@ -143,6 +143,43 @@ jobs:
           name: utoo-linux-x64
           path: target/x86_64-unknown-linux-gnu/release/utoo
           retention-days: 1
+      # manifest-bench is a standalone HTTP-only fetch sweeper used as
+      # the network-only baseline for p1_resolve perf work. Built only
+      # when phases bench is going to run (label or dispatch), so plain
+      # PR builds aren't slowed by the extra crate.
+      - name: Build manifest-bench (p1 baseline)
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        run: cargo build --release --target x86_64-unknown-linux-gnu -p manifest-bench
+      - name: Upload manifest-bench binary
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        uses: actions/upload-artifact@v4
+        with:
+          name: manifest-bench-linux-x64
+          path: target/x86_64-unknown-linux-gnu/release/manifest-bench
+          retention-days: 1
+      # preload-bench: same HTTP setup as manifest-bench, but discovers
+      # names by walking transitive deps from a package.json root —
+      # tests whether a fully self-contained streaming preload can match
+      # standalone manifest-bench's wall on the same workload that
+      # ruborist's path runs at ~2.18s.
+      - name: Build preload-bench
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        run: cargo build --release --target x86_64-unknown-linux-gnu -p preload-bench
+      - name: Upload preload-bench binary
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        uses: actions/upload-artifact@v4
+        with:
+          name: preload-bench-linux-x64
+          path: target/x86_64-unknown-linux-gnu/release/preload-bench
+          retention-days: 1
       # Piggyback on the already-built target/ from the step above: when the
       # PR is labeled `benchmark`, overlay origin/next's tree onto the current
       # workdir and re-run cargo build. cargo's incremental compile only
@@ -516,6 +553,33 @@ jobs:
           mv /tmp/utoo-next-dist/utoo /tmp/utoo-next
           echo "Baseline utoo (next) version: $(/tmp/utoo-next --version)"
           echo "UTOO_NEXT_BIN=/tmp/utoo-next" >> $GITHUB_ENV
+      # Download the manifest-bench binary built by build-linux. Used as
+      # the network-only baseline for p1_resolve work — strips out parse,
+      # BFS, dedup, lockfile write so the wall is pure HTTP fetch.
+      - name: Download manifest-bench binary
+        uses: actions/download-artifact@v4
+        with:
+          name: manifest-bench-linux-x64
+          path: /tmp/manifest-bench-dist
+      - name: Install manifest-bench
+        run: |
+          chmod +x /tmp/manifest-bench-dist/manifest-bench
+          mv /tmp/manifest-bench-dist/manifest-bench /tmp/manifest-bench
+          echo "MANIFEST_BENCH_BIN=/tmp/manifest-bench" >> $GITHUB_ENV
+      # Self-contained streaming preload bench — same HTTP setup as
+      # manifest-bench but discovers names via transitive walk from a
+      # package.json. Used to test whether a fully-isolated path can
+      # match standalone manifest-bench's wall on the same workload.
+      - name: Download preload-bench binary
+        uses: actions/download-artifact@v4
+        with:
+          name: preload-bench-linux-x64
+          path: /tmp/preload-bench-dist
+      - name: Install preload-bench
+        run: |
+          chmod +x /tmp/preload-bench-dist/preload-bench
+          mv /tmp/preload-bench-dist/preload-bench /tmp/preload-bench
+          echo "PRELOAD_BENCH_BIN=/tmp/preload-bench" >> $GITHUB_ENV
       - name: Verify tools
         run: |
           hyperfine --version
@@ -565,6 +629,91 @@ jobs:
         run: |
           mkdir -p /tmp/pm-bench-output
           bash bench/pm-bench-phases.sh 2>&1 | tee /tmp/pm-bench-output/bench-phases-npmmirror.log
+      # Standalone HTTP-only sweep — sweeps the network-only ceiling
+      # against the same lockfile-derived workload phase-bench just used.
+      # Output goes into the bench logs artifact; no PR comment surface.
+      - name: Standalone manifest-bench (HTTP-only sweep)
+        env:
+          PROJECT: ${{ github.event.inputs.project || 'ant-design' }}
+          REGISTRY: 'https://registry.npmjs.org'
+        run: |
+          set -eu
+          mkdir -p /tmp/pm-bench-output
+          PROJECT_DIR="/tmp/pm-bench/$PROJECT"
+          if [ ! -d "$PROJECT_DIR" ]; then
+            mkdir -p /tmp/pm-bench
+            git clone --depth 1 "https://github.com/ant-design/$PROJECT" "$PROJECT_DIR"
+          fi
+          cd "$PROJECT_DIR"
+          if [ ! -f package-lock.json ]; then
+            echo "==> generating lockfile via utoo (one-shot, untimed)"
+            utoo deps --registry "$REGISTRY" || true
+          fi
+          ls -la package-lock.json || { echo "no lockfile; skipping manifest-bench"; exit 0; }
+
+          MB_LOG=/tmp/pm-bench-output/manifest-bench-npmjs.log
+          {
+            echo "============================================================"
+            echo "manifest-bench: HTTP-only fetch (no parse, no resolver)"
+            echo "  Goal: isolate reqwest/rustls/tokio behaviour from"
+            echo "  ruborist's resolver pipeline. Same metric shape as"
+            echo "  ruborist's p1-breakdown line."
+            echo "============================================================"
+            for CAP in 32 64 96 128 192 256; do
+              echo
+              echo "--- concurrency=$CAP, h1, full manifest, default UA ---"
+              "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+                --concurrency "$CAP" --reps 2 --http1-only || true
+            done
+            echo
+            echo "--- concurrency=128, h2 negotiate, full manifest, default UA ---"
+            "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+              --concurrency 128 --reps 2 || true
+            echo
+            echo "--- concurrency=128, h1, single-version endpoint ---"
+            "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+              --concurrency 128 --reps 2 --http1-only --single-version || true
+            echo
+            echo "--- concurrency=128, h1, UA=Bun/1.2.21 ---"
+            "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+              --concurrency 128 --reps 2 --http1-only --user-agent "Bun/1.2.21" || true
+          } 2>&1 | tee "$MB_LOG"
+      # Self-contained streaming preload (transitive walk from
+      # package.json) — same HTTP setup as manifest-bench but with a
+      # streaming FuturesUnordered + per-future parse. This tests
+      # whether a fully ruborist-independent path can hit standalone
+      # manifest-bench's wall under the same project workload.
+      - name: Standalone preload-bench (transitive walk sweep)
+        env:
+          PROJECT: ${{ github.event.inputs.project || 'ant-design' }}
+          REGISTRY: 'https://registry.npmjs.org'
+        run: |
+          set -eu
+          mkdir -p /tmp/pm-bench-output
+          PROJECT_DIR="/tmp/pm-bench/$PROJECT"
+          if [ ! -d "$PROJECT_DIR" ]; then
+            echo "no project dir; skipping preload-bench"; exit 0
+          fi
+          PJ="$PROJECT_DIR/package.json"
+          if [ ! -f "$PJ" ]; then
+            echo "no package.json; skipping preload-bench"; exit 0
+          fi
+
+          PB_LOG=/tmp/pm-bench-output/preload-bench-npmjs.log
+          {
+            echo "============================================================"
+            echo "preload-bench: streaming transitive-walk preload"
+            echo "  Self-contained (no ruborist deps). Same HTTP setup as"
+            echo "  manifest-bench, but discovers names by walking transitive"
+            echo "  deps from package.json instead of consuming a flat list."
+            echo "============================================================"
+            for CAP in 64 96 128; do
+              echo
+              echo "--- concurrency=$CAP, h1, transitive walk ---"
+              "$PRELOAD_BENCH_BIN" --package-json "$PJ" --registry "$REGISTRY" \
+                --concurrency "$CAP" --reps 4 || true
+            done
+          } 2>&1 | tee "$PB_LOG"
       - name: Upload bench logs
         if: always()
         uses: actions/upload-artifact@v4

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,8 @@
 [workspace]
 resolver = "2"
 members  = [
+  "crates/manifest-bench",
+  "crates/preload-bench",
   "crates/pack-api",
   "crates/pack-cli",
   "crates/pack-core",

diff --git a/bench/pm-bench-phases.sh b/bench/pm-bench-phases.sh
@@ -22,6 +22,13 @@ UTOO_NEXT_CACHE="${UTOO_NEXT_CACHE:-/tmp/utoo-next-bench-cache}"
 BUN_CACHE="${BUN_CACHE:-/tmp/bun-bench-cache}"
 export BUN_INSTALL_CACHE_DIR="$BUN_CACHE"
 
+# utoo path defaults to fast_preload (combined-parse) so we have a
+# stable baseline to compare against. preload-bench is run as a
+# separate standalone tool by the CI workflow — its wall is the
+# self-contained-streaming reference, ruborist's utoo p1_resolve
+# wall is the integrated path. The gap between them is what
+# remains to close.
+
 # Drop optional baselines from the PM list when their binary is not wired
 # up — UTOO_NPM_BIN is set by CI's "Install utoo@npm" step, UTOO_NEXT_BIN
 # by the optional "Build next branch utoo" step. Local runs without them

diff --git a/crates/manifest-bench/Cargo.toml b/crates/manifest-bench/Cargo.toml
@@ -0,0 +1,37 @@
+[package]
+name        = "manifest-bench"
+version     = "0.0.0"
+edition     = "2024"
+license     = "MIT"
+publish     = false
+description = "Standalone HTTP-only manifest fetch benchmark, isolating network behaviour from ruborist's resolver pipeline."
+
+[[bin]]
+name = "manifest-bench"
+path = "src/main.rs"
+
+# tombi: format.rules.table-keys-order.disabled = true
+[dependencies]
+anyhow      = { workspace = true }
+clap        = { workspace = true }
+futures     = "0.3"
+serde       = { version = "1", features = ["derive"] }
+serde_json  = { workspace = true }
+tokio       = { workspace = true, features = ["macros", "rt-multi-thread", "fs", "time"] }
+
+# Identical TLS / DNS choices to ruborist so we measure the *protocol*
+# characteristics of the same stack, not a different implementation.
+reqwest             = { version = "0.12", default-features = false, features = [
+  "brotli",
+  "gzip",
+  "http2",
+  "rustls-tls-native-roots-no-provider",
+  "socks"
+] }
+rustls              = { version = "0.23", default-features = false, features = [
+  "aws-lc-rs",
+  "logging",
+  "std",
+  "tls12"
+] }
+rustls-native-certs = "0.8"