Skip to content

Commit d7db28e

Browse files
authored
[trees] improve tree initialization and rebuild performance (#429)
* [trees] Add core tree primitives benchmark Add benchmarkTreeCorePrimitives.ts for realistic createTree/rebuildTree measurement. Includes: - Realistic init path: createTree → setMounted(true) → rebuildTree() - Feature profiles: minimal, root-default, virtualized-card - Rebuild modes: unchanged and expanded-copy (changed-state) - Large-set focus: large-wide (8k), large-monorepo (2.4k), linux (93k) - Deterministic checksum guards for drift detection - JSON output with --compare baseline diffing - Shared benchmark helpers extracted from fileListToTree benchmarks Also refactors benchmarkFileListToTree.ts to use shared helpers and adds expanded folder fixture support for realistic expansion state. * [trees] Optimize large tree initialization (~66% faster) Two allocation-reduction optimizations that significantly reduce initialization cost for large trees (93k linux kernel file set): 1. getItemsMeta traversal (tree/feature.ts): Replace per-node path.concat() and path.includes() with a reusable lineage stack + Set for circular-reference detection. Eliminates O(depth) array allocations and O(depth²) includes scans per node. 2. buildStaticInstance finalization (build-static-instance.ts): Replace Object.entries(definition) with for-in iteration to avoid allocating a key-value pair array on every instance method wiring call. This is the hot path during rebuildItemMeta for large trees. Benchmark results (linux 93k file set, virtualized-card profile): - Initialize median: ~201ms → ~69ms (-66%) - Changed-state rebuild median: ~28ms → ~21ms (-25%) All existing tests pass with identical behavior.
1 parent 1a35f2f commit d7db28e

File tree

9 files changed

+1220
-207
lines changed

9 files changed

+1220
-207
lines changed

packages/trees/README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ From `packages/trees`:
206206
```bash
207207
bun test
208208
bun run benchmark
209+
bun run benchmark:core
209210
bun run test:e2e
210211
bun run tsc
211212
bun run build
@@ -265,6 +266,41 @@ bun ws trees benchmark -- --case=linux --compare tmp/fileListToTree-baseline.jso
265266
mismatches. That makes it useful both for performance regressions and for
266267
catching accidental behavior changes while refactoring.
267268

269+
For core tree primitive profiling, use the dedicated benchmark runner:
270+
271+
```bash
272+
bun ws trees benchmark:core
273+
```
274+
275+
If you care most about large datasets, run a filtered large-shape subset:
276+
277+
```bash
278+
bun ws trees benchmark:core -- --case=large-wide --case=large-monorepo --case=linux
279+
```
280+
281+
This benchmark isolates core tree costs by preparing fixture-backed tree data up
282+
front and timing only primitive calls. The `createTree` timing reflects the real
283+
initialization path (`createTree` + `setMounted(true)` + initial `rebuildTree`).
284+
`rebuildTree` can run either as unchanged hot rebuilds or as changed-state
285+
rebuilds via `--rebuild-mode=expanded-copy`.
286+
287+
To better mirror the trees-dev virtualization workload, benchmark cases are
288+
built with `sort: false` and `flattenEmptyDirectories: true`.
289+
290+
It also supports `--json`, `--compare`, and `--case` filters, plus:
291+
292+
- `--create-iterations` to batch multiple create+mount+initial-rebuild calls per
293+
measured sample
294+
- `--rebuild-iterations` to batch multiple `rebuildTree` calls per measured
295+
sample
296+
- `--rebuild-mode` to choose unchanged rebuilds or a changed-state mode
297+
(`expanded-copy`) with stronger update-path signal
298+
- `--feature-profile` to switch between `virtualized-card` realism,
299+
`root-default`, and `minimal` core-only feature overhead
300+
301+
Those batching flags improve confidence for fast operations by reducing timer
302+
jitter while still reporting per-call milliseconds.
303+
268304
# Credits and Acknolwedgements
269305

270306
The core of this library's underlying tree implementation started as a hard fork

packages/trees/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
"scripts": {
3636
"build": "tsdown --clean",
3737
"benchmark": "bun run ./scripts/benchmarkFileListToTree.ts",
38+
"benchmark:file-list-to-tree": "bun run ./scripts/benchmarkFileListToTree.ts",
39+
"benchmark:core": "bun run ./scripts/benchmarkTreeCorePrimitives.ts",
3840
"dev": "echo 'Watching for changes…' && tsdown --watch --log-level error",
3941
"test": "bun test",
4042
"coverage": "bun test --coverage",

packages/trees/scripts/benchmarkFileListToTree.ts

Lines changed: 15 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,29 @@
11
import { readFileSync } from 'node:fs';
22
import { resolve } from 'node:path';
33

4-
import type { FileTreeData } from '../src/types';
54
import {
65
benchmarkFileListToTreeStages,
76
type FileListToTreeStageName,
87
} from '../src/utils/fileListToTree';
8+
import {
9+
type BenchmarkEnvironment,
10+
calculateDeltaPercent,
11+
formatMs,
12+
formatSignedMs,
13+
formatSignedPercent,
14+
getEnvironment,
15+
parseNonNegativeInteger,
16+
parsePositiveInteger,
17+
printTable,
18+
summarizeSamples,
19+
type TimingSummary,
20+
} from './lib/benchmarkUtils';
921
import {
1022
type FileListToTreeBenchmarkCase,
1123
filterBenchmarkCases,
1224
getFileListToTreeBenchmarkCases,
1325
} from './lib/fileListToTreeBenchmarkData';
26+
import { checksumFileTreeData } from './lib/treeBenchmarkChecksums';
1427

1528
interface BenchmarkConfig {
1629
runs: number;
@@ -20,22 +33,6 @@ interface BenchmarkConfig {
2033
comparePath?: string;
2134
}
2235

23-
interface BenchmarkEnvironment {
24-
bunVersion: string;
25-
platform: string;
26-
arch: string;
27-
}
28-
29-
interface TimingSummary {
30-
runs: number;
31-
meanMs: number;
32-
medianMs: number;
33-
p95Ms: number;
34-
minMs: number;
35-
maxMs: number;
36-
stdDevMs: number;
37-
}
38-
3936
interface CaseSummary extends TimingSummary {
4037
name: string;
4138
source: FileListToTreeBenchmarkCase['source'];
@@ -121,26 +118,6 @@ const STAGE_ORDER: FileListToTreeStageName[] = [
121118
'hashTreeKeys',
122119
];
123120

124-
function parsePositiveInteger(value: string, flagName: string): number {
125-
const parsed = Number.parseInt(value, 10);
126-
if (!Number.isFinite(parsed) || parsed <= 0) {
127-
throw new Error(
128-
`Invalid ${flagName} value '${value}'. Expected a positive integer.`
129-
);
130-
}
131-
return parsed;
132-
}
133-
134-
function parseNonNegativeInteger(value: string, flagName: string): number {
135-
const parsed = Number.parseInt(value, 10);
136-
if (!Number.isFinite(parsed) || parsed < 0) {
137-
throw new Error(
138-
`Invalid ${flagName} value '${value}'. Expected a non-negative integer.`
139-
);
140-
}
141-
return parsed;
142-
}
143-
144121
function printHelpAndExit(): never {
145122
console.log('Usage: bun ws trees benchmark -- [options]');
146123
console.log('');
@@ -209,136 +186,6 @@ function parseArgs(argv: string[]): BenchmarkConfig {
209186
return config;
210187
}
211188

212-
function percentile(sortedValues: number[], percentileRank: number): number {
213-
if (sortedValues.length === 0) {
214-
return 0;
215-
}
216-
217-
const rank = (sortedValues.length - 1) * percentileRank;
218-
const lowerIndex = Math.floor(rank);
219-
const upperIndex = Math.ceil(rank);
220-
const lower = sortedValues[lowerIndex] ?? sortedValues[0] ?? 0;
221-
const upper =
222-
sortedValues[upperIndex] ?? sortedValues[sortedValues.length - 1] ?? lower;
223-
if (lowerIndex === upperIndex) {
224-
return lower;
225-
}
226-
227-
const interpolation = rank - lowerIndex;
228-
return lower + (upper - lower) * interpolation;
229-
}
230-
231-
function summarizeSamples(samples: number[]): TimingSummary {
232-
if (samples.length === 0) {
233-
return {
234-
runs: 0,
235-
meanMs: 0,
236-
medianMs: 0,
237-
p95Ms: 0,
238-
minMs: 0,
239-
maxMs: 0,
240-
stdDevMs: 0,
241-
};
242-
}
243-
244-
const sortedSamples = [...samples].sort((left, right) => left - right);
245-
const total = samples.reduce((sum, value) => sum + value, 0);
246-
const mean = total / samples.length;
247-
const variance =
248-
samples.reduce((sum, value) => sum + (value - mean) ** 2, 0) /
249-
samples.length;
250-
251-
return {
252-
runs: samples.length,
253-
meanMs: mean,
254-
medianMs: percentile(sortedSamples, 0.5),
255-
p95Ms: percentile(sortedSamples, 0.95),
256-
minMs: sortedSamples[0] ?? 0,
257-
maxMs: sortedSamples[sortedSamples.length - 1] ?? 0,
258-
stdDevMs: Math.sqrt(variance),
259-
};
260-
}
261-
262-
function formatMs(value: number): string {
263-
return value.toFixed(3);
264-
}
265-
266-
function formatSignedMs(value: number): string {
267-
const prefix = value > 0 ? '+' : '';
268-
return `${prefix}${value.toFixed(3)}`;
269-
}
270-
271-
function formatSignedPercent(value: number): string {
272-
if (!Number.isFinite(value)) {
273-
return value > 0 ? '+inf%' : value < 0 ? '-inf%' : '0.0%';
274-
}
275-
276-
const prefix = value > 0 ? '+' : '';
277-
return `${prefix}${value.toFixed(1)}%`;
278-
}
279-
280-
function checksumTree(tree: FileTreeData): number {
281-
let checksum = 0;
282-
283-
for (const [id, node] of Object.entries(tree)) {
284-
checksum += id.length;
285-
checksum += node.name.length;
286-
checksum += node.path.length;
287-
288-
if (node.children != null) {
289-
checksum += node.children.direct.length;
290-
for (const child of node.children.direct) {
291-
checksum += child.length;
292-
}
293-
if (node.children.flattened != null) {
294-
checksum += node.children.flattened.length;
295-
for (const child of node.children.flattened) {
296-
checksum += child.length;
297-
}
298-
}
299-
}
300-
301-
if (node.flattens != null) {
302-
checksum += node.flattens.length;
303-
for (const path of node.flattens) {
304-
checksum += path.length;
305-
}
306-
}
307-
}
308-
309-
return checksum;
310-
}
311-
312-
function printTable(rows: Record<string, string>[], headers: string[]): void {
313-
const widths = headers.map((header) => {
314-
const valueWidth = rows.reduce(
315-
(max, row) => Math.max(max, row[header]?.length ?? 0),
316-
header.length
317-
);
318-
return valueWidth;
319-
});
320-
321-
const formatRow = (row: Record<string, string>) =>
322-
headers
323-
.map((header, index) => (row[header] ?? '').padEnd(widths[index]))
324-
.join(' ')
325-
.trimEnd();
326-
327-
const headerRow = Object.fromEntries(
328-
headers.map((header) => [header, header])
329-
);
330-
console.log(formatRow(headerRow));
331-
console.log(
332-
widths
333-
.map((width) => '-'.repeat(width))
334-
.join(' ')
335-
.trimEnd()
336-
);
337-
for (const row of rows) {
338-
console.log(formatRow(row));
339-
}
340-
}
341-
342189
function createStageSampleStorage(): Record<FileListToTreeStageName, number[]> {
343190
return {
344191
buildPathGraph: [],
@@ -348,22 +195,6 @@ function createStageSampleStorage(): Record<FileListToTreeStageName, number[]> {
348195
};
349196
}
350197

351-
function getEnvironment(): BenchmarkEnvironment {
352-
return {
353-
bunVersion: Bun.version,
354-
platform: process.platform,
355-
arch: process.arch,
356-
};
357-
}
358-
359-
function calculateDeltaPercent(current: number, baseline: number): number {
360-
if (baseline === 0) {
361-
return current === 0 ? 0 : Number.POSITIVE_INFINITY;
362-
}
363-
364-
return ((current - baseline) / baseline) * 100;
365-
}
366-
367198
// Benchmarks only stay comparable when the output payload has the same shape.
368199
// Load and validate the previous JSON run up front so comparison failures are
369200
// immediate instead of producing misleading deltas later on.
@@ -608,7 +439,7 @@ function main() {
608439
const startTime = performance.now();
609440
const result = benchmarkFileListToTreeStages(caseConfig.files);
610441
const elapsedMs = performance.now() - startTime;
611-
const resultChecksum = checksumTree(result.tree);
442+
const resultChecksum = checksumFileTreeData(result.tree);
612443
const existingChecksum = caseChecksums[caseIndex];
613444

614445
if (existingChecksum == null) {

0 commit comments

Comments
 (0)