From 1a06501d588333f991e77f317100a69e7b08f89d Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sun, 7 Jun 2026 06:03:25 -0300 Subject: [PATCH 1/3] =?UTF-8?q?bench(atomicassets):=20add=20http-bench.mjs?= =?UTF-8?q?=20=E2=80=94=20e2e=20HTTP=20load=20harness=20(WormDB=20vs=20ato?= =?UTF-8?q?micassets-api)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit End-to-end served-latency + throughput driver for the AtomicAssets read path. Cycle B made WormDB serve the identical eosio-contract-api shape + query params as the reference Postgres atomicassets-api, so the same URL corpus hits both targets. - Samples a real corpus (asset_ids / owners / collections / (coll,schema) pairs) from a source endpoint, then runs a weighted mixed workload (point / collection / owner / faceted / browse / account) against each target under C concurrent workers. - Reports per-query-type + overall p50/p95/p99 latency and sustained req/s; warms caches first; runs targets sequentially so the client doesn't self-contend. - Env-driven: WORMDB / ATOMIC base URLs, N, C, SAMPLE, SAMPLE_FROM. Portable ESM (node or bun). Resource use (CPU/RSS) is sampled separately per host while it runs. Validated against the jungle4 wormdb-aa endpoint (0 errors, full per-type breakdown). The WAX-232M side-by-side vs the production atomicassets-api is the proving run (remote env). --- .../atomicassets/validate/http-bench.mjs | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 benchmark/atomicassets/validate/http-bench.mjs diff --git a/benchmark/atomicassets/validate/http-bench.mjs b/benchmark/atomicassets/validate/http-bench.mjs new file mode 100644 index 0000000..0643dc7 --- /dev/null +++ b/benchmark/atomicassets/validate/http-bench.mjs @@ -0,0 +1,117 @@ +#!/usr/bin/env node +// http-bench.mjs — end-to-end HTTP latency + throughput for the AtomicAssets read path, comparing one or +// more endpoints under the SAME query corpus. Cycle B made WormDB serve the identical eosio-contract-api +// shape + query params as the reference Postgres atomicassets-api, so the same URLs hit both. +// +// WORMDB=http://127.0.0.1:6390 ATOMIC=https://wax.api.atomicassets.io N=10000 C=50 node http-bench.mjs +// +// Env: WORMDB / ATOMIC = base URLs of the targets (set one or both); N = requests per target; C = +// concurrency; SAMPLE = corpus size; SAMPLE_FROM = base URL to sample the corpus from (default WORMDB). +// +// It samples a REAL corpus (asset_ids / owners / collections / (coll,schema) pairs) from a source, then +// runs a weighted mixed workload (point / collection / owner / faceted / browse / account) against each +// target and reports per-query-type + overall p50/p95/p99 latency and sustained req/s. Latency is the +// client-observed served-HTTP time (what a consumer sees) — the apples-to-apples number across both. +// Resource use (CPU/RSS) is measured separately on each host (e.g. `docker stats`) while this runs. + +const env = process.env; +const N = Number(env.N ?? 10000); +const C = Number(env.C ?? 50); +const SAMPLE = Number(env.SAMPLE ?? 400); +const P = "/atomicassets/v1"; + +const norm = (u) => u.replace(/\/+$/, ""); +const targets = []; +if (env.WORMDB) targets.push({ name: "wormdb", base: norm(env.WORMDB) }); +if (env.ATOMIC) targets.push({ name: "atomic", base: norm(env.ATOMIC) }); +if (!targets.length) { + console.error("set WORMDB and/or ATOMIC to the target base URLs"); + process.exit(1); +} +const sampleBase = norm(env.SAMPLE_FROM ?? env.WORMDB ?? env.ATOMIC); + +const now = () => performance.now(); +const pick = (a) => a[(Math.random() * a.length) | 0]; +async function getJson(url) { + try { + const r = await fetch(url); + return r.ok ? await r.json() : null; + } catch { + return null; + } +} + +// ── 1) sample a real corpus from one endpoint ── +console.log(`[bench] sampling ${SAMPLE} assets from ${sampleBase} …`); +const seed = await getJson(`${sampleBase}${P}/assets?limit=${SAMPLE}`); +const rows = seed?.data ?? []; +if (!rows.length) { + console.error(`[bench] no sample data from ${sampleBase} — is it serving /atomicassets/v1/assets?`); + process.exit(1); +} +const collOf = (a) => a.collection?.collection_name ?? a.collection_name ?? null; +const schemaOf = (a) => a.schema?.schema_name ?? a.schema_name ?? null; +const ids = [...new Set(rows.map((a) => a.asset_id).filter(Boolean))]; +const owners = [...new Set(rows.map((a) => a.owner).filter(Boolean))]; +const colls = [...new Set(rows.map(collOf).filter(Boolean))]; +const csPairs = [...new Map(rows.filter((a) => collOf(a) && schemaOf(a)).map((a) => [`${collOf(a)}|${schemaOf(a)}`, { c: collOf(a), s: schemaOf(a) }])).values()]; +console.log(`[bench] corpus: ${ids.length} ids, ${owners.length} owners, ${colls.length} collections, ${csPairs.length} (coll,schema) pairs`); + +// ── 2) weighted query mix (roughly real-API-traffic-shaped; per-type latency is reported separately) ── +const MIX = [ + { type: "point", w: 35, url: () => `${P}/assets/${pick(ids)}` }, + { type: "coll", w: 25, url: () => `${P}/assets?collection_name=${pick(colls)}&limit=100` }, + { type: "owner", w: 15, url: () => `${P}/assets?owner=${pick(owners)}&limit=100` }, + { type: "faceted", w: 10, url: () => { const cs = pick(csPairs); return `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=100`; } }, + { type: "browse", w: 8, url: () => `${P}/assets?limit=100` }, + { type: "account", w: 7, url: () => `${P}/accounts/${pick(owners)}` }, +].filter((m) => m.type === "point" || m.type === "browse" || (m.type === "faceted" ? csPairs.length : m.type === "account" || m.type === "owner" ? owners.length : colls.length)); +const totalW = MIX.reduce((s, m) => s + m.w, 0); +function pickMix() { + let r = Math.random() * totalW; + for (const m of MIX) if ((r -= m.w) < 0) return m; + return MIX[0]; +} + +const pctile = (arr, p) => (arr.length ? arr.sort((a, b) => a - b)[Math.min(arr.length - 1, Math.floor(arr.length * p))] : NaN); +const f = (x) => (Number.isFinite(x) ? x.toFixed(2) : "—"); + +async function run(target) { + const lat = Object.fromEntries(MIX.map((m) => [m.type, []])); + const all = []; + let errs = 0; + const per = Math.ceil(N / C); + // warm up (fill caches / JIT) before measuring + await Promise.all(Array.from({ length: Math.min(C, 20) }, async () => { for (let i = 0; i < 5; i++) { try { await fetch(`${target.base}${pickMix().url()}`).then((r) => r.text()); } catch {} } })); + const t0 = now(); + await Promise.all( + Array.from({ length: C }, async () => { + for (let i = 0; i < per; i++) { + const m = pickMix(); + const s = now(); + const ok = await fetch(`${target.base}${m.url()}`).then((r) => r.text()).then(() => true).catch(() => false); + const d = now() - s; + if (ok) { lat[m.type].push(d); all.push(d); } else errs++; + } + }), + ); + const wall = now() - t0; + const done = per * C; + console.log(`\n══ ${target.name} (${target.base}) ══`); + console.log(`${done} reqs in ${wall.toFixed(0)}ms → ${((done / wall) * 1000).toFixed(0)} req/s (c=${C}) errors=${errs}`); + console.log(` type n p50 p95 p99 (ms)`); + for (const m of MIX) { + const a = lat[m.type]; + console.log(` ${m.type.padEnd(8)} ${String(a.length).padStart(6)} ${f(pctile(a, 0.5)).padStart(6)} ${f(pctile(a, 0.95)).padStart(6)} ${f(pctile(a, 0.99)).padStart(6)}`); + } + console.log(` ${"OVERALL".padEnd(8)} ${String(all.length).padStart(6)} ${f(pctile(all, 0.5)).padStart(6)} ${f(pctile(all, 0.95)).padStart(6)} ${f(pctile(all, 0.99)).padStart(6)}`); + return { name: target.name, reqps: (done / wall) * 1000, p50: pctile(all, 0.5), p99: pctile(all, 0.99) }; +} + +const results = []; +for (const t of targets) results.push(await run(t)); // sequential so the two targets don't contend on the client + +if (results.length > 1) { + console.log(`\n══ side-by-side ══`); + for (const r of results) console.log(` ${r.name.padEnd(8)} ${f(r.reqps).padStart(9)} req/s p50=${f(r.p50)}ms p99=${f(r.p99)}ms`); +} From ccc502ea63127f1f74d11dc0551e2d413a7b6354 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sun, 7 Jun 2026 06:08:15 -0300 Subject: [PATCH 2/3] =?UTF-8?q?bench(atomicassets):=20harden=20http-bench?= =?UTF-8?q?=20=E2=80=94=20duration=20mode,=20docker-stats,=20results=20fil?= =?UTF-8?q?es,=20mix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up hardening on the load harness: - DURATION= steady-state mode (each worker loops to a deadline) alongside N-per-target. - STATS_WORMDB / STATS_ATOMIC sample container CPU%/RSS via `docker stats` during that target's run (self-scheduling --no-stream polls; silently skipped if docker is absent). - Writes .json + .md — per-type + overall p50/95/99, min/mean/max, a latency histogram, resource use, and a side-by-side table — a committable proving artifact. - MIX=type=w,… overrides the query weights; corpus now sampled across newest+oldest pages for collection/owner variety. - README: a benchmark section (env table + the proving-run caveat: WAX-232M on native Linux is the real test; a Windows-loopback jungle4 run only validates the harness). Validated on jungle4 wormdb-aa: 8s/c20 -> 6.5k req/s, p50 2.5ms / p99 10ms, RSS ~74MiB, 0 errors; JSON+MD emitted with the histogram + resource sample. --- benchmark/atomicassets/validate/README.md | 32 +++ .../atomicassets/validate/http-bench.mjs | 185 ++++++++++++++---- 2 files changed, 179 insertions(+), 38 deletions(-) diff --git a/benchmark/atomicassets/validate/README.md b/benchmark/atomicassets/validate/README.md index 61f990d..312c94e 100644 --- a/benchmark/atomicassets/validate/README.md +++ b/benchmark/atomicassets/validate/README.md @@ -43,3 +43,35 @@ exact matches vs `test.wax.api.atomicassets.io`. On-disk footprint (WiredTiger-c fully indexed ≈ **8 GB**, dominated by the `data.$**` wildcard. (Report on-disk `storageSize`, not the uncompressed `size`.) The one parity fix this surfaced: asset `float`/`double` attributes render as strings on the live API (templates keep numbers) — handled in `map_asset`. + +## HTTP load benchmark (WormDB vs the Postgres atomicassets-api) + +`http-bench.mjs` measures **end-to-end served latency + throughput** for the read path. Cycle B made +WormDB serve the identical eosio-contract-api shape + query params as the reference Postgres +`atomicassets-api`, so the **same URL corpus hits both** and the comparison is apples-to-apples. + +```sh +# one or both targets; same corpus, run sequentially so the client never self-contends +WORMDB=http://127.0.0.1:6390 ATOMIC=https://wax.api.atomicassets.io \ + N=50000 C=100 STATS_WORMDB=aa-wormdb OUT=wax-run node http-bench.mjs +``` + +It samples a real corpus (ids / owners / collections / (coll,schema) pairs, newest+oldest pages), runs a +weighted mix — `point` `/assets/:id`, `coll`, `owner`, `faceted` (coll+schema), `browse`, `account` — +and reports per-type + overall **p50/p95/p99** (min/mean/max + a latency histogram in the JSON) and +**req/s**. Writes `.json` + `.md`. + +| env | meaning | +|---|---| +| `WORMDB` / `ATOMIC` | target base URLs (set one or both) | +| `N` / `DURATION` | requests per target, or seconds of steady-state load (`DURATION` wins) | +| `C` | concurrency | +| `SAMPLE` / `SAMPLE_FROM` | corpus size / base URL to sample from (default `WORMDB`) | +| `MIX` | override weights, e.g. `MIX=point=50,coll=20,owner=10,faceted=10,browse=5,account=5` | +| `STATS_WORMDB` / `STATS_ATOMIC` | container to sample CPU/RSS via `docker stats` during that run | +| `OUT` | results-file prefix (default `bench-results`) | + +**Proving run = WAX 232M on native Linux**, both targets on the same data. Note that latency on the +**Windows Docker-Desktop loopback adds ~2–4 ms** and a tiny testnet segment makes postings trivial — so a +jungle4 run validates the harness but is *not* a proving number. The WSEG micro-bench already shows the +storage win (~33×) + µs in-process lookups; this harness is the served-HTTP p50/95/99 + throughput half. diff --git a/benchmark/atomicassets/validate/http-bench.mjs b/benchmark/atomicassets/validate/http-bench.mjs index 0643dc7..2c9b6a3 100644 --- a/benchmark/atomicassets/validate/http-bench.mjs +++ b/benchmark/atomicassets/validate/http-bench.mjs @@ -3,27 +3,38 @@ // more endpoints under the SAME query corpus. Cycle B made WormDB serve the identical eosio-contract-api // shape + query params as the reference Postgres atomicassets-api, so the same URLs hit both. // -// WORMDB=http://127.0.0.1:6390 ATOMIC=https://wax.api.atomicassets.io N=10000 C=50 node http-bench.mjs +// WORMDB=http://127.0.0.1:6390 ATOMIC=https://wax.api.atomicassets.io \ +// N=50000 C=100 STATS_WORMDB=aa-wormdb OUT=wax-run node http-bench.mjs // -// Env: WORMDB / ATOMIC = base URLs of the targets (set one or both); N = requests per target; C = -// concurrency; SAMPLE = corpus size; SAMPLE_FROM = base URL to sample the corpus from (default WORMDB). +// Targets: WORMDB / ATOMIC = base URLs (set one or both). +// Load: N = requests per target (ignored if DURATION set); DURATION = seconds of steady-state load +// per target; C = concurrency. +// Corpus: SAMPLE = corpus size; SAMPLE_FROM = base URL to sample from (default WORMDB). Sampled across +// newest+oldest pages for collection/owner variety. +// Mix: MIX = override weights, e.g. MIX=point=50,coll=20,owner=10,faceted=10,browse=5,account=5 +// Resource: STATS_WORMDB / STATS_ATOMIC = container name to sample CPU/RSS via `docker stats` during +// that target's run (skipped if docker is absent / the name is unset). +// Output: OUT = results-file prefix (default "bench-results") -> writes .json + .md. // -// It samples a REAL corpus (asset_ids / owners / collections / (coll,schema) pairs) from a source, then -// runs a weighted mixed workload (point / collection / owner / faceted / browse / account) against each -// target and reports per-query-type + overall p50/p95/p99 latency and sustained req/s. Latency is the -// client-observed served-HTTP time (what a consumer sees) — the apples-to-apples number across both. -// Resource use (CPU/RSS) is measured separately on each host (e.g. `docker stats`) while this runs. +// Reports per-query-type + overall p50/p95/p99 (min/mean/max + a latency histogram in the JSON) and +// sustained req/s. Latency is the client-observed served-HTTP time — the apples-to-apples consumer number. +// Portable ESM (node or bun). Targets run sequentially so the client never self-contends. + +import { spawn } from "node:child_process"; +import { writeFileSync } from "node:fs"; const env = process.env; const N = Number(env.N ?? 10000); const C = Number(env.C ?? 50); +const DURATION = env.DURATION ? Number(env.DURATION) : 0; // seconds; >0 => duration-based const SAMPLE = Number(env.SAMPLE ?? 400); +const OUT = env.OUT ?? "bench-results"; const P = "/atomicassets/v1"; const norm = (u) => u.replace(/\/+$/, ""); const targets = []; -if (env.WORMDB) targets.push({ name: "wormdb", base: norm(env.WORMDB) }); -if (env.ATOMIC) targets.push({ name: "atomic", base: norm(env.ATOMIC) }); +if (env.WORMDB) targets.push({ name: "wormdb", base: norm(env.WORMDB), stats: env.STATS_WORMDB }); +if (env.ATOMIC) targets.push({ name: "atomic", base: norm(env.ATOMIC), stats: env.STATS_ATOMIC }); if (!targets.length) { console.error("set WORMDB and/or ATOMIC to the target base URLs"); process.exit(1); @@ -32,6 +43,8 @@ const sampleBase = norm(env.SAMPLE_FROM ?? env.WORMDB ?? env.ATOMIC); const now = () => performance.now(); const pick = (a) => a[(Math.random() * a.length) | 0]; +const sum = (a) => a.reduce((x, y) => x + y, 0); +const f = (x) => (Number.isFinite(x) ? x.toFixed(2) : "—"); async function getJson(url) { try { const r = await fetch(url); @@ -41,10 +54,21 @@ async function getJson(url) { } } -// ── 1) sample a real corpus from one endpoint ── -console.log(`[bench] sampling ${SAMPLE} assets from ${sampleBase} …`); -const seed = await getJson(`${sampleBase}${P}/assets?limit=${SAMPLE}`); -const rows = seed?.data ?? []; +// ── 1) sample a real corpus (newest + oldest pages for collection/owner variety) ── +async function sampleCorpus(base, want) { + const out = []; + for (let page = 1; out.length < want && page <= 25; page++) { + const j = await getJson(`${base}${P}/assets?limit=200&order=desc&page=${page}`); + const d = j?.data ?? []; + if (!d.length) break; + out.push(...d); + } + const asc = await getJson(`${base}${P}/assets?limit=200&order=asc`); + if (asc?.data) out.push(...asc.data); + return out; +} +console.log(`[bench] sampling ~${SAMPLE} assets from ${sampleBase} …`); +const rows = await sampleCorpus(sampleBase, SAMPLE); if (!rows.length) { console.error(`[bench] no sample data from ${sampleBase} — is it serving /atomicassets/v1/assets?`); process.exit(1); @@ -55,63 +79,148 @@ const ids = [...new Set(rows.map((a) => a.asset_id).filter(Boolean))]; const owners = [...new Set(rows.map((a) => a.owner).filter(Boolean))]; const colls = [...new Set(rows.map(collOf).filter(Boolean))]; const csPairs = [...new Map(rows.filter((a) => collOf(a) && schemaOf(a)).map((a) => [`${collOf(a)}|${schemaOf(a)}`, { c: collOf(a), s: schemaOf(a) }])).values()]; +const corpus = { ids: ids.length, owners: owners.length, collections: colls.length, csPairs: csPairs.length }; console.log(`[bench] corpus: ${ids.length} ids, ${owners.length} owners, ${colls.length} collections, ${csPairs.length} (coll,schema) pairs`); -// ── 2) weighted query mix (roughly real-API-traffic-shaped; per-type latency is reported separately) ── +// ── 2) weighted query mix (default ~real-API-traffic-shaped; override with MIX=type=w,…) ── +const W = { point: 35, coll: 25, owner: 15, faceted: 10, browse: 8, account: 7 }; +if (env.MIX) for (const part of env.MIX.split(",")) { const [k, v] = part.split("="); if (k in W && Number.isFinite(+v)) W[k] = +v; } +const have = { point: ids.length, coll: colls.length, owner: owners.length, faceted: csPairs.length, browse: 1, account: owners.length }; const MIX = [ - { type: "point", w: 35, url: () => `${P}/assets/${pick(ids)}` }, - { type: "coll", w: 25, url: () => `${P}/assets?collection_name=${pick(colls)}&limit=100` }, - { type: "owner", w: 15, url: () => `${P}/assets?owner=${pick(owners)}&limit=100` }, - { type: "faceted", w: 10, url: () => { const cs = pick(csPairs); return `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=100`; } }, - { type: "browse", w: 8, url: () => `${P}/assets?limit=100` }, - { type: "account", w: 7, url: () => `${P}/accounts/${pick(owners)}` }, -].filter((m) => m.type === "point" || m.type === "browse" || (m.type === "faceted" ? csPairs.length : m.type === "account" || m.type === "owner" ? owners.length : colls.length)); -const totalW = MIX.reduce((s, m) => s + m.w, 0); + { type: "point", url: () => `${P}/assets/${pick(ids)}` }, + { type: "coll", url: () => `${P}/assets?collection_name=${pick(colls)}&limit=100` }, + { type: "owner", url: () => `${P}/assets?owner=${pick(owners)}&limit=100` }, + { type: "faceted", url: () => { const cs = pick(csPairs); return `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=100`; } }, + { type: "browse", url: () => `${P}/assets?limit=100` }, + { type: "account", url: () => `${P}/accounts/${pick(owners)}` }, +].map((m) => ({ ...m, w: W[m.type] })).filter((m) => m.w > 0 && have[m.type] > 0); +const totalW = sum(MIX.map((m) => m.w)); function pickMix() { let r = Math.random() * totalW; for (const m of MIX) if ((r -= m.w) < 0) return m; return MIX[0]; } -const pctile = (arr, p) => (arr.length ? arr.sort((a, b) => a - b)[Math.min(arr.length - 1, Math.floor(arr.length * p))] : NaN); -const f = (x) => (Number.isFinite(x) ? x.toFixed(2) : "—"); +// ── stats helpers ── +const pctile = (a, p) => (a.length ? a[Math.min(a.length - 1, Math.floor(a.length * p))] : NaN); +const HBINS = [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]; +function stats(arr) { + if (!arr.length) return { n: 0 }; + const a = [...arr].sort((x, y) => x - y); + const histo = {}; + let lo = 0; + for (const hi of HBINS) { histo[`<${hi}`] = a.filter((x) => x >= lo && x < hi).length; lo = hi; } + histo[`>=${HBINS[HBINS.length - 1]}`] = a.filter((x) => x >= HBINS[HBINS.length - 1]).length; + return { n: a.length, min: a[0], mean: sum(a) / a.length, p50: pctile(a, 0.5), p95: pctile(a, 0.95), p99: pctile(a, 0.99), max: a[a.length - 1], histo }; +} + +// ── docker-stats resource sampler (self-scheduling --no-stream polls) ── +function startStats(container) { + if (!container) return null; + const samples = []; + let stopped = false; + (function tick() { + if (stopped) return; + let out = ""; + let proc; + try { + proc = spawn("docker", ["stats", "--no-stream", "--format", "{{.CPUPerc}};{{.MemUsage}}", container], { stdio: ["ignore", "pipe", "ignore"] }); + } catch { + stopped = true; + return; + } + proc.stdout.on("data", (d) => (out += d.toString())); + proc.on("error", () => (stopped = true)); // docker not installed + proc.on("close", () => { + const m = out.trim().match(/([\d.]+)%\s*;\s*([\d.]+)\s*([KMGi]+)/i); + if (m) { + let mem = parseFloat(m[2]); + const u = m[3].toLowerCase(); + if (u.startsWith("g")) mem *= 1024; + else if (u.startsWith("k")) mem /= 1024; // -> MiB + samples.push({ cpu: parseFloat(m[1]), mem }); + } + if (!stopped) setTimeout(tick, 250); + }); + })(); + return { + stop() { + stopped = true; + if (!samples.length) return null; + const cpu = samples.map((s) => s.cpu), mem = samples.map((s) => s.mem); + return { container, samples: samples.length, cpuAvgPct: sum(cpu) / cpu.length, cpuPeakPct: Math.max(...cpu), memAvgMiB: sum(mem) / mem.length, memPeakMiB: Math.max(...mem) }; + }, + }; +} async function run(target) { const lat = Object.fromEntries(MIX.map((m) => [m.type, []])); const all = []; - let errs = 0; - const per = Math.ceil(N / C); - // warm up (fill caches / JIT) before measuring + let errs = 0, done = 0; + // warm up (fill caches / JIT) before measuring + before sampling resources await Promise.all(Array.from({ length: Math.min(C, 20) }, async () => { for (let i = 0; i < 5; i++) { try { await fetch(`${target.base}${pickMix().url()}`).then((r) => r.text()); } catch {} } })); + const sampler = startStats(target.stats); const t0 = now(); + const deadline = DURATION ? t0 + DURATION * 1000 : 0; + const per = DURATION ? Infinity : Math.ceil(N / C); await Promise.all( Array.from({ length: C }, async () => { for (let i = 0; i < per; i++) { + if (DURATION && now() >= deadline) break; const m = pickMix(); const s = now(); const ok = await fetch(`${target.base}${m.url()}`).then((r) => r.text()).then(() => true).catch(() => false); const d = now() - s; + done++; if (ok) { lat[m.type].push(d); all.push(d); } else errs++; } }), ); const wall = now() - t0; - const done = per * C; + const res = sampler ? sampler.stop() : null; + const perType = Object.fromEntries(MIX.map((m) => [m.type, stats(lat[m.type])])); + const overall = stats(all); + const reqps = (done / wall) * 1000; + console.log(`\n══ ${target.name} (${target.base}) ══`); - console.log(`${done} reqs in ${wall.toFixed(0)}ms → ${((done / wall) * 1000).toFixed(0)} req/s (c=${C}) errors=${errs}`); - console.log(` type n p50 p95 p99 (ms)`); - for (const m of MIX) { - const a = lat[m.type]; - console.log(` ${m.type.padEnd(8)} ${String(a.length).padStart(6)} ${f(pctile(a, 0.5)).padStart(6)} ${f(pctile(a, 0.95)).padStart(6)} ${f(pctile(a, 0.99)).padStart(6)}`); - } - console.log(` ${"OVERALL".padEnd(8)} ${String(all.length).padStart(6)} ${f(pctile(all, 0.5)).padStart(6)} ${f(pctile(all, 0.95)).padStart(6)} ${f(pctile(all, 0.99)).padStart(6)}`); - return { name: target.name, reqps: (done / wall) * 1000, p50: pctile(all, 0.5), p99: pctile(all, 0.99) }; + console.log(`${done} reqs in ${wall.toFixed(0)}ms → ${reqps.toFixed(0)} req/s (c=${C}${DURATION ? `, ${DURATION}s` : ""}) errors=${errs}`); + console.log(` type n p50 p95 p99 max (ms)`); + for (const m of MIX) { const t = perType[m.type]; console.log(` ${m.type.padEnd(8)} ${String(t.n).padStart(6)} ${f(t.p50).padStart(6)} ${f(t.p95).padStart(6)} ${f(t.p99).padStart(6)} ${f(t.max).padStart(6)}`); } + console.log(` ${"OVERALL".padEnd(8)} ${String(overall.n).padStart(6)} ${f(overall.p50).padStart(6)} ${f(overall.p95).padStart(6)} ${f(overall.p99).padStart(6)} ${f(overall.max).padStart(6)}`); + if (res) console.log(` resource (${res.container}): cpu avg ${f(res.cpuAvgPct)}% peak ${f(res.cpuPeakPct)}% | rss avg ${f(res.memAvgMiB)}MiB peak ${f(res.memPeakMiB)}MiB (${res.samples} samples)`); + return { name: target.name, base: target.base, reqps, errors: errs, wallMs: wall, concurrency: C, overall, perType, resource: res }; } const results = []; for (const t of targets) results.push(await run(t)); // sequential so the two targets don't contend on the client +// ── side-by-side + results files ── if (results.length > 1) { console.log(`\n══ side-by-side ══`); - for (const r of results) console.log(` ${r.name.padEnd(8)} ${f(r.reqps).padStart(9)} req/s p50=${f(r.p50)}ms p99=${f(r.p99)}ms`); + for (const r of results) console.log(` ${r.name.padEnd(8)} ${f(r.reqps).padStart(9)} req/s p50=${f(r.overall.p50)}ms p95=${f(r.overall.p95)}ms p99=${f(r.overall.p99)}ms`); +} + +const report = { generatedBy: "http-bench.mjs", params: { N, C, DURATION, SAMPLE, mix: W, sampleBase }, corpus, targets: results }; +writeFileSync(`${OUT}.json`, JSON.stringify(report, null, 2)); + +const md = []; +md.push(`# AtomicAssets HTTP benchmark`); +md.push(""); +md.push(`Load: ${DURATION ? `${DURATION}s steady-state` : `${N} reqs`} per target, concurrency ${C}. Corpus: ${corpus.ids} ids / ${corpus.owners} owners / ${corpus.collections} collections / ${corpus.csPairs} (coll,schema) pairs sampled from \`${sampleBase}\`. Latency = client-observed served-HTTP time (ms).`); +md.push(""); +md.push(`| target | req/s | p50 | p95 | p99 | max | errors | cpu avg/peak | rss avg/peak (MiB) |`); +md.push(`|---|---:|---:|---:|---:|---:|---:|---:|---:|`); +for (const r of results) { + const o = r.overall, res = r.resource; + md.push(`| ${r.name} | ${f(r.reqps)} | ${f(o.p50)} | ${f(o.p95)} | ${f(o.p99)} | ${f(o.max)} | ${r.errors} | ${res ? `${f(res.cpuAvgPct)}%/${f(res.cpuPeakPct)}%` : "—"} | ${res ? `${f(res.memAvgMiB)}/${f(res.memPeakMiB)}` : "—"} |`); +} +md.push(""); +for (const r of results) { + md.push(`## ${r.name} — per query type`); + md.push(`| type | n | p50 | p95 | p99 | max |`); + md.push(`|---|---:|---:|---:|---:|---:|`); + for (const m of MIX) { const t = r.perType[m.type]; md.push(`| ${m.type} | ${t.n} | ${f(t.p50)} | ${f(t.p95)} | ${f(t.p99)} | ${f(t.max)} |`); } + md.push(""); } +writeFileSync(`${OUT}.md`, md.join("\n")); +console.log(`\n[bench] wrote ${OUT}.json + ${OUT}.md`); From a22769aa063a6dbf1623cb3cb6405fc0d99a21fe Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sun, 7 Jun 2026 06:54:19 -0300 Subject: [PATCH 3/3] =?UTF-8?q?bench(atomicassets):=20address=20PR=20#12?= =?UTF-8?q?=20review=20=E2=80=94=20correctness,=20fairness,=20clean=20exit?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bot review (Codex/Gemini/Copilot) + an adversarial multi-lens self-review: - Count non-2xx/timeout/network failures as errors, never as fast responses: drain the body and return r.ok; per-request timeout via AbortController+clearTimeout (fetchT). - req/s counts SUCCESSFUL requests only, so fast error pages can't inflate throughput; a nonzero error count loudly flags the run as suspect. - Percentile = nearest-rank Math.round(p*(n-1)) — no p99==max collapse on small n. - docker-stats mem regex handles B/KiB/MiB/GiB/TiB (was MiB/GiB only). - Empty query mix fails fast with a clear message. - Wrap execution in main() + process.exitCode (no abrupt process.exit while undici sockets are open — that tripped a libuv "handle closing" assertion on Windows). - Fairness (adversarial review, confirmed): with 2+ targets, reduce the corpus to the cross-target INTERSECTION so a divergent dataset (live API vs lagging local) can't make targets do different work for the same URL; record dropped counts in JSON `coverage` so any divergence is visible. README documents the same-data assumption. Verified on jungle4: single-target unaffected (intersection gated off); 2-target run (same endpoint) drops 0, emits the side-by-side + coverage; all fatal paths exit cleanly. --- benchmark/atomicassets/validate/README.md | 6 + .../atomicassets/validate/http-bench.mjs | 318 +++++++++++------- 2 files changed, 207 insertions(+), 117 deletions(-) diff --git a/benchmark/atomicassets/validate/README.md b/benchmark/atomicassets/validate/README.md index 312c94e..87f849d 100644 --- a/benchmark/atomicassets/validate/README.md +++ b/benchmark/atomicassets/validate/README.md @@ -71,6 +71,12 @@ and reports per-type + overall **p50/p95/p99** (min/mean/max + a latency histogr | `STATS_WORMDB` / `STATS_ATOMIC` | container to sample CPU/RSS via `docker stats` during that run | | `OUT` | results-file prefix (default `bench-results`) | +**Same-data assumption.** A fair side-by-side requires both targets on the same chain/head. With 2+ +targets the harness reduces the corpus to the **cross-target intersection** (entities present on every +target) so a divergent dataset can't make them do different work for the same URL; the dropped counts are +recorded in the JSON `coverage` block so any divergence is visible. (Point misses already surface as +errors; the intersection is what protects the list queries, whose misses are HTTP 200 with smaller pages.) + **Proving run = WAX 232M on native Linux**, both targets on the same data. Note that latency on the **Windows Docker-Desktop loopback adds ~2–4 ms** and a tiny testnet segment makes postings trivial — so a jungle4 run validates the harness but is *not* a proving number. The WSEG micro-bench already shows the diff --git a/benchmark/atomicassets/validate/http-bench.mjs b/benchmark/atomicassets/validate/http-bench.mjs index 2c9b6a3..aeb0815 100644 --- a/benchmark/atomicassets/validate/http-bench.mjs +++ b/benchmark/atomicassets/validate/http-bench.mjs @@ -8,17 +8,22 @@ // // Targets: WORMDB / ATOMIC = base URLs (set one or both). // Load: N = requests per target (ignored if DURATION set); DURATION = seconds of steady-state load -// per target; C = concurrency. +// per target; C = concurrency; TIMEOUT_MS = per-request deadline (a timeout counts as error). // Corpus: SAMPLE = corpus size; SAMPLE_FROM = base URL to sample from (default WORMDB). Sampled across -// newest+oldest pages for collection/owner variety. +// newest+oldest pages for collection/owner variety. With 2+ targets the corpus is reduced to +// the cross-target INTERSECTION (entities present on every target) so a divergent dataset (a +// live API vs a lagging local) can't make targets do different work for the same URL; the +// dropped counts land in the JSON `coverage` so any divergence is visible. Fair side-by-sides +// still assume both targets are on the same chain/head. // Mix: MIX = override weights, e.g. MIX=point=50,coll=20,owner=10,faceted=10,browse=5,account=5 // Resource: STATS_WORMDB / STATS_ATOMIC = container name to sample CPU/RSS via `docker stats` during // that target's run (skipped if docker is absent / the name is unset). // Output: OUT = results-file prefix (default "bench-results") -> writes .json + .md. // // Reports per-query-type + overall p50/p95/p99 (min/mean/max + a latency histogram in the JSON) and -// sustained req/s. Latency is the client-observed served-HTTP time — the apples-to-apples consumer number. -// Portable ESM (node or bun). Targets run sequentially so the client never self-contends. +// sustained req/s (successful only). Latency is the client-observed served-HTTP time — the apples-to- +// apples consumer number. Portable ESM (node or bun). Targets run sequentially so the client never +// self-contends. Non-2xx, timeouts, and network errors are counted as failures, never as fast responses. import { spawn } from "node:child_process"; import { writeFileSync } from "node:fs"; @@ -27,34 +32,38 @@ const env = process.env; const N = Number(env.N ?? 10000); const C = Number(env.C ?? 50); const DURATION = env.DURATION ? Number(env.DURATION) : 0; // seconds; >0 => duration-based +const TIMEOUT_MS = Number(env.TIMEOUT_MS ?? 10000); // per-request deadline; a timeout counts as an error const SAMPLE = Number(env.SAMPLE ?? 400); const OUT = env.OUT ?? "bench-results"; const P = "/atomicassets/v1"; const norm = (u) => u.replace(/\/+$/, ""); -const targets = []; -if (env.WORMDB) targets.push({ name: "wormdb", base: norm(env.WORMDB), stats: env.STATS_WORMDB }); -if (env.ATOMIC) targets.push({ name: "atomic", base: norm(env.ATOMIC), stats: env.STATS_ATOMIC }); -if (!targets.length) { - console.error("set WORMDB and/or ATOMIC to the target base URLs"); - process.exit(1); -} -const sampleBase = norm(env.SAMPLE_FROM ?? env.WORMDB ?? env.ATOMIC); - const now = () => performance.now(); const pick = (a) => a[(Math.random() * a.length) | 0]; const sum = (a) => a.reduce((x, y) => x + y, 0); const f = (x) => (Number.isFinite(x) ? x.toFixed(2) : "—"); + +// fetch with a per-request deadline. A manual AbortController + clearTimeout (not AbortSignal.timeout) so +// the timer is freed the instant the request settles — no lingering 10s handles piling up over a big run. +async function fetchT(url) { + const ac = new AbortController(); + const timer = setTimeout(() => ac.abort(), TIMEOUT_MS); + try { + return await fetch(url, { signal: ac.signal }); + } finally { + clearTimeout(timer); + } +} async function getJson(url) { try { - const r = await fetch(url); + const r = await fetchT(url); return r.ok ? await r.json() : null; } catch { return null; } } -// ── 1) sample a real corpus (newest + oldest pages for collection/owner variety) ── +// sample a real corpus (newest + oldest pages for collection/owner variety) async function sampleCorpus(base, want) { const out = []; for (let page = 1; out.length < want && page <= 25; page++) { @@ -67,42 +76,33 @@ async function sampleCorpus(base, want) { if (asc?.data) out.push(...asc.data); return out; } -console.log(`[bench] sampling ~${SAMPLE} assets from ${sampleBase} …`); -const rows = await sampleCorpus(sampleBase, SAMPLE); -if (!rows.length) { - console.error(`[bench] no sample data from ${sampleBase} — is it serving /atomicassets/v1/assets?`); - process.exit(1); -} -const collOf = (a) => a.collection?.collection_name ?? a.collection_name ?? null; -const schemaOf = (a) => a.schema?.schema_name ?? a.schema_name ?? null; -const ids = [...new Set(rows.map((a) => a.asset_id).filter(Boolean))]; -const owners = [...new Set(rows.map((a) => a.owner).filter(Boolean))]; -const colls = [...new Set(rows.map(collOf).filter(Boolean))]; -const csPairs = [...new Map(rows.filter((a) => collOf(a) && schemaOf(a)).map((a) => [`${collOf(a)}|${schemaOf(a)}`, { c: collOf(a), s: schemaOf(a) }])).values()]; -const corpus = { ids: ids.length, owners: owners.length, collections: colls.length, csPairs: csPairs.length }; -console.log(`[bench] corpus: ${ids.length} ids, ${owners.length} owners, ${colls.length} collections, ${csPairs.length} (coll,schema) pairs`); - -// ── 2) weighted query mix (default ~real-API-traffic-shaped; override with MIX=type=w,…) ── -const W = { point: 35, coll: 25, owner: 15, faceted: 10, browse: 8, account: 7 }; -if (env.MIX) for (const part of env.MIX.split(",")) { const [k, v] = part.split("="); if (k in W && Number.isFinite(+v)) W[k] = +v; } -const have = { point: ids.length, coll: colls.length, owner: owners.length, faceted: csPairs.length, browse: 1, account: owners.length }; -const MIX = [ - { type: "point", url: () => `${P}/assets/${pick(ids)}` }, - { type: "coll", url: () => `${P}/assets?collection_name=${pick(colls)}&limit=100` }, - { type: "owner", url: () => `${P}/assets?owner=${pick(owners)}&limit=100` }, - { type: "faceted", url: () => { const cs = pick(csPairs); return `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=100`; } }, - { type: "browse", url: () => `${P}/assets?limit=100` }, - { type: "account", url: () => `${P}/accounts/${pick(owners)}` }, -].map((m) => ({ ...m, w: W[m.type] })).filter((m) => m.w > 0 && have[m.type] > 0); -const totalW = sum(MIX.map((m) => m.w)); -function pickMix() { - let r = Math.random() * totalW; - for (const m of MIX) if ((r -= m.w) < 0) return m; - return MIX[0]; + +// Restrict a candidate list to entries that return data on EVERY target, so divergent datasets (a live +// API vs a lagging/pruned local WormDB) can't make the two targets do different work for the "same" URL. +// Bounded concurrency; a getJson failure/timeout counts as absent (conservatively dropped). This is the +// fairness guard for list queries (owner/collection/faceted), where a miss is an HTTP 200 with a smaller +// page rather than a flagged 404 — so without it, data-shape divergence would silently skew percentiles. +async function keepPresentOnAll(items, toUrl, bases) { + const kept = []; + let dropped = 0, idx = 0; + async function worker() { + while (idx < items.length) { + const it = items[idx++]; + let present = true; + for (const b of bases) { + const j = await getJson(`${b}${toUrl(it)}`); + const has = j && (Array.isArray(j.data) ? j.data.length > 0 : j.data != null); + if (!has) { present = false; break; } + } + if (present) kept.push(it); else dropped++; + } + } + await Promise.all(Array.from({ length: Math.min(12, items.length) }, worker)); + return { kept, dropped }; } // ── stats helpers ── -const pctile = (a, p) => (a.length ? a[Math.min(a.length - 1, Math.floor(a.length * p))] : NaN); +const pctile = (a, p) => (a.length ? a[Math.round(p * (a.length - 1))] : NaN); // nearest-rank; 1.0 -> max, no collapse on small n const HBINS = [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]; function stats(arr) { if (!arr.length) return { n: 0 }; @@ -132,12 +132,16 @@ function startStats(container) { proc.stdout.on("data", (d) => (out += d.toString())); proc.on("error", () => (stopped = true)); // docker not installed proc.on("close", () => { - const m = out.trim().match(/([\d.]+)%\s*;\s*([\d.]+)\s*([KMGi]+)/i); + // docker MemUsage unit is B / KiB / MiB / GiB / TiB (the "used" side, before the " / limit") + const m = out.trim().match(/([\d.]+)%\s*;\s*([\d.]+)\s*([A-Za-z]+)/); if (m) { let mem = parseFloat(m[2]); const u = m[3].toLowerCase(); - if (u.startsWith("g")) mem *= 1024; - else if (u.startsWith("k")) mem /= 1024; // -> MiB + if (u.startsWith("t")) mem *= 1024 * 1024; + else if (u.startsWith("g")) mem *= 1024; + else if (u.startsWith("m")) mem *= 1; // already MiB + else if (u.startsWith("k")) mem /= 1024; + else mem /= 1024 * 1024; // plain bytes -> MiB samples.push({ cpu: parseFloat(m[1]), mem }); } if (!stopped) setTimeout(tick, 250); @@ -153,74 +157,154 @@ function startStats(container) { }; } -async function run(target) { - const lat = Object.fromEntries(MIX.map((m) => [m.type, []])); - const all = []; - let errs = 0, done = 0; - // warm up (fill caches / JIT) before measuring + before sampling resources - await Promise.all(Array.from({ length: Math.min(C, 20) }, async () => { for (let i = 0; i < 5; i++) { try { await fetch(`${target.base}${pickMix().url()}`).then((r) => r.text()); } catch {} } })); - const sampler = startStats(target.stats); - const t0 = now(); - const deadline = DURATION ? t0 + DURATION * 1000 : 0; - const per = DURATION ? Infinity : Math.ceil(N / C); - await Promise.all( - Array.from({ length: C }, async () => { - for (let i = 0; i < per; i++) { - if (DURATION && now() >= deadline) break; - const m = pickMix(); - const s = now(); - const ok = await fetch(`${target.base}${m.url()}`).then((r) => r.text()).then(() => true).catch(() => false); - const d = now() - s; - done++; - if (ok) { lat[m.type].push(d); all.push(d); } else errs++; - } - }), - ); - const wall = now() - t0; - const res = sampler ? sampler.stop() : null; - const perType = Object.fromEntries(MIX.map((m) => [m.type, stats(lat[m.type])])); - const overall = stats(all); - const reqps = (done / wall) * 1000; - - console.log(`\n══ ${target.name} (${target.base}) ══`); - console.log(`${done} reqs in ${wall.toFixed(0)}ms → ${reqps.toFixed(0)} req/s (c=${C}${DURATION ? `, ${DURATION}s` : ""}) errors=${errs}`); - console.log(` type n p50 p95 p99 max (ms)`); - for (const m of MIX) { const t = perType[m.type]; console.log(` ${m.type.padEnd(8)} ${String(t.n).padStart(6)} ${f(t.p50).padStart(6)} ${f(t.p95).padStart(6)} ${f(t.p99).padStart(6)} ${f(t.max).padStart(6)}`); } - console.log(` ${"OVERALL".padEnd(8)} ${String(overall.n).padStart(6)} ${f(overall.p50).padStart(6)} ${f(overall.p95).padStart(6)} ${f(overall.p99).padStart(6)} ${f(overall.max).padStart(6)}`); - if (res) console.log(` resource (${res.container}): cpu avg ${f(res.cpuAvgPct)}% peak ${f(res.cpuPeakPct)}% | rss avg ${f(res.memAvgMiB)}MiB peak ${f(res.memPeakMiB)}MiB (${res.samples} samples)`); - return { name: target.name, base: target.base, reqps, errors: errs, wallMs: wall, concurrency: C, overall, perType, resource: res }; -} +async function main() { + const targets = []; + if (env.WORMDB) targets.push({ name: "wormdb", base: norm(env.WORMDB), stats: env.STATS_WORMDB }); + if (env.ATOMIC) targets.push({ name: "atomic", base: norm(env.ATOMIC), stats: env.STATS_ATOMIC }); + if (!targets.length) { + console.error("set WORMDB and/or ATOMIC to the target base URLs"); + process.exitCode = 1; + return; + } + const sampleBase = norm(env.SAMPLE_FROM ?? env.WORMDB ?? env.ATOMIC); -const results = []; -for (const t of targets) results.push(await run(t)); // sequential so the two targets don't contend on the client + // ── 1) sample a real corpus ── + console.log(`[bench] sampling ~${SAMPLE} assets from ${sampleBase} …`); + const rows = await sampleCorpus(sampleBase, SAMPLE); + if (!rows.length) { + console.error(`[bench] no sample data from ${sampleBase} — is it serving /atomicassets/v1/assets?`); + process.exitCode = 1; + return; + } + const collOf = (a) => a.collection?.collection_name ?? a.collection_name ?? null; + const schemaOf = (a) => a.schema?.schema_name ?? a.schema_name ?? null; + let ids = [...new Set(rows.map((a) => a.asset_id).filter(Boolean))]; + let owners = [...new Set(rows.map((a) => a.owner).filter(Boolean))]; + let colls = [...new Set(rows.map(collOf).filter(Boolean))]; + let csPairs = [...new Map(rows.filter((a) => collOf(a) && schemaOf(a)).map((a) => [`${collOf(a)}|${schemaOf(a)}`, { c: collOf(a), s: schemaOf(a) }])).values()]; + console.log(`[bench] sampled: ${ids.length} ids, ${owners.length} owners, ${colls.length} collections, ${csPairs.length} (coll,schema) pairs`); -// ── side-by-side + results files ── -if (results.length > 1) { - console.log(`\n══ side-by-side ══`); - for (const r of results) console.log(` ${r.name.padEnd(8)} ${f(r.reqps).padStart(9)} req/s p50=${f(r.overall.p50)}ms p95=${f(r.overall.p95)}ms p99=${f(r.overall.p99)}ms`); -} + // With 2+ targets, restrict the corpus to entities present on ALL of them so each target serves the + // same rows even if the datasets diverge (live API vs lagging local). Dropped counts make divergence + // visible; residual list-query page-size differences (a present owner with different counts across + // out-of-sync targets) are not fully equalized — reflected in `coverage` for the reader to judge. + let coverage = null; + if (targets.length > 1) { + const bases = targets.map((t) => t.base); + console.log(`[bench] cross-target intersection over ${bases.length} targets…`); + const ri = await keepPresentOnAll(ids, (id) => `${P}/assets/${id}`, bases); + const ro = await keepPresentOnAll(owners, (o) => `${P}/assets?owner=${o}&limit=1`, bases); + const rc = await keepPresentOnAll(colls, (c) => `${P}/assets?collection_name=${c}&limit=1`, bases); + const rcs = await keepPresentOnAll(csPairs, (cs) => `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=1`, bases); + coverage = { idsDropped: ri.dropped, ownersDropped: ro.dropped, collectionsDropped: rc.dropped, csPairsDropped: rcs.dropped }; + ids = ri.kept; owners = ro.kept; colls = rc.kept; csPairs = rcs.kept; + const totalDropped = ri.dropped + ro.dropped + rc.dropped + rcs.dropped; + if (totalDropped) console.log(`[bench] ⚠ dropped ${totalDropped} entities not present on all targets (ids -${ri.dropped}, owners -${ro.dropped}, colls -${rc.dropped}, pairs -${rcs.dropped}) — targets are NOT fully in sync; list-query page sizes may still differ`); + if (!ids.length && !owners.length && !colls.length) { + console.error("[bench] the targets' datasets do not overlap — nothing common to benchmark; are they on the same chain/head?"); + process.exitCode = 1; + return; + } + } + const corpus = { ids: ids.length, owners: owners.length, collections: colls.length, csPairs: csPairs.length, coverage }; + console.log(`[bench] corpus: ${ids.length} ids, ${owners.length} owners, ${colls.length} collections, ${csPairs.length} (coll,schema) pairs`); -const report = { generatedBy: "http-bench.mjs", params: { N, C, DURATION, SAMPLE, mix: W, sampleBase }, corpus, targets: results }; -writeFileSync(`${OUT}.json`, JSON.stringify(report, null, 2)); - -const md = []; -md.push(`# AtomicAssets HTTP benchmark`); -md.push(""); -md.push(`Load: ${DURATION ? `${DURATION}s steady-state` : `${N} reqs`} per target, concurrency ${C}. Corpus: ${corpus.ids} ids / ${corpus.owners} owners / ${corpus.collections} collections / ${corpus.csPairs} (coll,schema) pairs sampled from \`${sampleBase}\`. Latency = client-observed served-HTTP time (ms).`); -md.push(""); -md.push(`| target | req/s | p50 | p95 | p99 | max | errors | cpu avg/peak | rss avg/peak (MiB) |`); -md.push(`|---|---:|---:|---:|---:|---:|---:|---:|---:|`); -for (const r of results) { - const o = r.overall, res = r.resource; - md.push(`| ${r.name} | ${f(r.reqps)} | ${f(o.p50)} | ${f(o.p95)} | ${f(o.p99)} | ${f(o.max)} | ${r.errors} | ${res ? `${f(res.cpuAvgPct)}%/${f(res.cpuPeakPct)}%` : "—"} | ${res ? `${f(res.memAvgMiB)}/${f(res.memPeakMiB)}` : "—"} |`); -} -md.push(""); -for (const r of results) { - md.push(`## ${r.name} — per query type`); - md.push(`| type | n | p50 | p95 | p99 | max |`); - md.push(`|---|---:|---:|---:|---:|---:|`); - for (const m of MIX) { const t = r.perType[m.type]; md.push(`| ${m.type} | ${t.n} | ${f(t.p50)} | ${f(t.p95)} | ${f(t.p99)} | ${f(t.max)} |`); } + // ── 2) weighted query mix (default ~real-API-traffic-shaped; override with MIX=type=w,…) ── + const W = { point: 35, coll: 25, owner: 15, faceted: 10, browse: 8, account: 7 }; + if (env.MIX) for (const part of env.MIX.split(",")) { const [k, v] = part.split("="); if (k in W && Number.isFinite(+v)) W[k] = +v; } + const have = { point: ids.length, coll: colls.length, owner: owners.length, faceted: csPairs.length, browse: 1, account: owners.length }; + const MIX = [ + { type: "point", url: () => `${P}/assets/${pick(ids)}` }, + { type: "coll", url: () => `${P}/assets?collection_name=${pick(colls)}&limit=100` }, + { type: "owner", url: () => `${P}/assets?owner=${pick(owners)}&limit=100` }, + { type: "faceted", url: () => { const cs = pick(csPairs); return `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=100`; } }, + { type: "browse", url: () => `${P}/assets?limit=100` }, + { type: "account", url: () => `${P}/accounts/${pick(owners)}` }, + ].map((m) => ({ ...m, w: W[m.type] })).filter((m) => m.w > 0 && have[m.type] > 0); + if (!MIX.length) { + console.error("[bench] empty query mix — the sampled corpus has no usable dimensions, or every MIX weight is 0"); + process.exitCode = 1; + return; + } + const totalW = sum(MIX.map((m) => m.w)); + const pickMix = () => { + let r = Math.random() * totalW; + for (const m of MIX) if ((r -= m.w) < 0) return m; + return MIX[0]; + }; + + async function run(target) { + const lat = Object.fromEntries(MIX.map((m) => [m.type, []])); + const all = []; + let errs = 0, done = 0; + // warm up (fill caches / JIT) before measuring + before sampling resources + await Promise.all(Array.from({ length: Math.min(C, 20) }, async () => { for (let i = 0; i < 5; i++) { try { await fetchT(`${target.base}${pickMix().url()}`).then((r) => r.text()); } catch {} } })); + const sampler = startStats(target.stats); + const t0 = now(); + const deadline = DURATION ? t0 + DURATION * 1000 : 0; + const per = DURATION ? Infinity : Math.ceil(N / C); + await Promise.all( + Array.from({ length: C }, async () => { + for (let i = 0; i < per; i++) { + if (DURATION && now() >= deadline) break; + const m = pickMix(); + const s = now(); + // drain the body either way (frees the connection); count non-2xx + timeouts + network errors as failures + const ok = await fetchT(`${target.base}${m.url()}`).then((r) => r.text().then(() => r.ok)).catch(() => false); + const d = now() - s; + done++; + if (ok) { lat[m.type].push(d); all.push(d); } else errs++; + } + }), + ); + const wall = now() - t0; + const res = sampler ? sampler.stop() : null; + const perType = Object.fromEntries(MIX.map((m) => [m.type, stats(lat[m.type])])); + const overall = stats(all); + const reqps = (all.length / wall) * 1000; // successful only — fast error pages must not inflate throughput + + console.log(`\n══ ${target.name} (${target.base}) ══`); + console.log(`${all.length} ok / ${done} sent in ${wall.toFixed(0)}ms → ${reqps.toFixed(0)} req/s (c=${C}${DURATION ? `, ${DURATION}s` : ""}) errors=${errs}${errs ? " ⚠ results suspect — investigate errors" : ""}`); + console.log(` type n p50 p95 p99 max (ms)`); + for (const m of MIX) { const t = perType[m.type]; console.log(` ${m.type.padEnd(8)} ${String(t.n).padStart(6)} ${f(t.p50).padStart(6)} ${f(t.p95).padStart(6)} ${f(t.p99).padStart(6)} ${f(t.max).padStart(6)}`); } + console.log(` ${"OVERALL".padEnd(8)} ${String(overall.n).padStart(6)} ${f(overall.p50).padStart(6)} ${f(overall.p95).padStart(6)} ${f(overall.p99).padStart(6)} ${f(overall.max).padStart(6)}`); + if (res) console.log(` resource (${res.container}): cpu avg ${f(res.cpuAvgPct)}% peak ${f(res.cpuPeakPct)}% | rss avg ${f(res.memAvgMiB)}MiB peak ${f(res.memPeakMiB)}MiB (${res.samples} samples)`); + return { name: target.name, base: target.base, reqps, sent: done, errors: errs, wallMs: wall, concurrency: C, overall, perType, resource: res }; + } + + const results = []; + for (const t of targets) results.push(await run(t)); // sequential so the two targets don't contend on the client + + // ── side-by-side + results files ── + if (results.length > 1) { + console.log(`\n══ side-by-side ══`); + for (const r of results) console.log(` ${r.name.padEnd(8)} ${f(r.reqps).padStart(9)} req/s p50=${f(r.overall.p50)}ms p95=${f(r.overall.p95)}ms p99=${f(r.overall.p99)}ms${r.errors ? ` (errors=${r.errors}!)` : ""}`); + } + + const report = { generatedBy: "http-bench.mjs", params: { N, C, DURATION, SAMPLE, timeoutMs: TIMEOUT_MS, mix: W, sampleBase }, corpus, targets: results }; + writeFileSync(`${OUT}.json`, JSON.stringify(report, null, 2)); + + const md = []; + md.push(`# AtomicAssets HTTP benchmark`); + md.push(""); + md.push(`Load: ${DURATION ? `${DURATION}s steady-state` : `${N} reqs`} per target, concurrency ${C}. Corpus: ${corpus.ids} ids / ${corpus.owners} owners / ${corpus.collections} collections / ${corpus.csPairs} (coll,schema) pairs sampled from \`${sampleBase}\`. Latency = client-observed served-HTTP time (ms); req/s = successful only.`); md.push(""); + md.push(`| target | req/s | p50 | p95 | p99 | max | errors | cpu avg/peak | rss avg/peak (MiB) |`); + md.push(`|---|---:|---:|---:|---:|---:|---:|---:|---:|`); + for (const r of results) { + const o = r.overall, res = r.resource; + md.push(`| ${r.name} | ${f(r.reqps)} | ${f(o.p50)} | ${f(o.p95)} | ${f(o.p99)} | ${f(o.max)} | ${r.errors} | ${res ? `${f(res.cpuAvgPct)}%/${f(res.cpuPeakPct)}%` : "—"} | ${res ? `${f(res.memAvgMiB)}/${f(res.memPeakMiB)}` : "—"} |`); + } + md.push(""); + for (const r of results) { + md.push(`## ${r.name} — per query type`); + md.push(`| type | n | p50 | p95 | p99 | max |`); + md.push(`|---|---:|---:|---:|---:|---:|`); + for (const m of MIX) { const t = r.perType[m.type]; md.push(`| ${m.type} | ${t.n} | ${f(t.p50)} | ${f(t.p95)} | ${f(t.p99)} | ${f(t.max)} |`); } + md.push(""); + } + writeFileSync(`${OUT}.md`, md.join("\n")); + console.log(`\n[bench] wrote ${OUT}.json + ${OUT}.md`); } -writeFileSync(`${OUT}.md`, md.join("\n")); -console.log(`\n[bench] wrote ${OUT}.json + ${OUT}.md`); + +await main();