From f6c90a244a49e5271e906bc5b58fb7790808cb90 Mon Sep 17 00:00:00 2001 From: Joel Arbuckle Date: Wed, 3 Jun 2026 07:26:45 -0700 Subject: [PATCH 1/3] Plans and initial experiments --- .../core/src/rendering/webgpu/graphs/nodes.ts | 366 ++++++++++++++++++ .../webgpu/pipelines/binding-graphs.ts | 116 ++++++ .../src/rendering/webgpu/plan-2026-06-01.md | 184 +++++++++ 3 files changed, 666 insertions(+) create mode 100644 packages/core/src/rendering/webgpu/graphs/nodes.ts create mode 100644 packages/core/src/rendering/webgpu/pipelines/binding-graphs.ts create mode 100644 packages/core/src/rendering/webgpu/plan-2026-06-01.md diff --git a/packages/core/src/rendering/webgpu/graphs/nodes.ts b/packages/core/src/rendering/webgpu/graphs/nodes.ts new file mode 100644 index 00000000..b73795c6 --- /dev/null +++ b/packages/core/src/rendering/webgpu/graphs/nodes.ts @@ -0,0 +1,366 @@ +class GraphNode { + constructor(public id: string, public type: string, public data: any, public children: GraphNode[] = []) {} +} + +/** + * Merges two DAGs into a single DAG that preserves every ancestor-descendant + * relationship present in either source graph. Concretely, if node A is an + * ancestor of node B in either input graph it will remain an ancestor of B in + * the merged result. + * + * When the same node `id` appears in both graphs the first graph's `type` and + * `data` are kept; edges from both graphs are unioned. + * + * @throws if the union of edges from both graphs contains a cycle, which + * means the two graphs impose contradictory ordering constraints and a + * consistent merge is impossible. + * + * @param rootsA - Root nodes of the first graph. + * @param rootsB - Root nodes of the second graph. + * @returns Root nodes of the merged graph (nodes with no incoming edges). + */ +function mergeGraphs(rootsA: GraphNode[], rootsB: GraphNode[]): GraphNode[] { + // ------------------------------------------------------------------ + // Step 1 – Collect all unique nodes from both graphs. + // A fresh `seen` set is used per graph so that a node shared between the + // two graphs is fully traversed in both contexts (important when the node + // has graph-specific children that live below it in only one graph). + // ------------------------------------------------------------------ + const nodeMap = new Map(); + + function gatherNodes(roots: GraphNode[]): void { + const seen = new Set(); + function visit(node: GraphNode): void { + if (seen.has(node.id)) return; // TODO: this should actually be an error, since it indicates that the input graph has a cycle + seen.add(node.id); + if (!nodeMap.has(node.id)) { + // Children are intentionally omitted here; they are wired up + // in Step 4 after the full edge union is known. + nodeMap.set(node.id, new GraphNode(node.id, node.type, node.data)); + } + for (const child of node.children) visit(child); + } + for (const root of roots) visit(root); + } + + gatherNodes(rootsA); + gatherNodes(rootsB); + + // ------------------------------------------------------------------ + // Step 2 – Build the union of directed edges from both graphs. + // Again a per-graph `seen` set ensures that shared nodes contribute + // their edges from both graphs even though they share the same id. + // ------------------------------------------------------------------ + const edges = new Map>(); + + function gatherEdges(roots: GraphNode[]): void { + const seen = new Set(); + function visit(node: GraphNode): void { + if (seen.has(node.id)) return; // TODO: this should actually be an error, since it indicates that the input graph has a cycle + seen.add(node.id); + + // TODO: will want to restructure this to avoid .get() for every child, maybe? want this to be as memory/cpu efficient as possible + if (!edges.has(node.id)) edges.set(node.id, new Set()); + for (const child of node.children) { + const childSet = edges.get(node.id); + if (childSet) { + childSet.add(child.id); + } + visit(child); + } + } + for (const root of roots) visit(root); + } + + gatherEdges(rootsA); + gatherEdges(rootsB); + + // ------------------------------------------------------------------ + // Step 3 – Topological sort via Kahn's algorithm. + // Serves two purposes: produces a valid processing order AND detects + // cycles. A cycle in the union means the two graphs have contradictory + // ordering constraints (A before B in one graph, B before A in the other). + // ------------------------------------------------------------------ + const inDegree = new Map(); + for (const id of nodeMap.keys()) inDegree.set(id, 0); + + for (const [, childIds] of edges) { + for (const childId of childIds) { + inDegree.set(childId, (inDegree.get(childId) ?? 0) + 1); + } + } + + const queue: string[] = []; + for (const [id, deg] of inDegree) { + if (deg === 0) queue.push(id); + } + + const topoOrder: string[] = []; + while (queue.length > 0) { + const id = queue.shift()!; + topoOrder.push(id); + for (const childId of edges.get(id) ?? []) { + const newDeg = inDegree.get(childId)! - 1; + inDegree.set(childId, newDeg); + if (newDeg === 0) queue.push(childId); + } + } + + if (topoOrder.length !== nodeMap.size) { + throw new Error( + 'Cannot merge graphs: the combined edge set contains a cycle. ' + + 'The two graphs impose contradictory ordering constraints.' + ); + } + + // ------------------------------------------------------------------ + // Step 4 – Wire up children on the merged nodes using the combined edges. + // ------------------------------------------------------------------ + // TODO: this is a problem: rather than potentially inserting nodes in between ancestors and descendants, this is just reconnecting + // them, causing "gaps" in the traversal logic of the new graph + for (const [parentId, childIds] of edges) { + const parent = nodeMap.get(parentId)!; + parent.children = [...childIds].map(id => nodeMap.get(id)!); + } + + // ------------------------------------------------------------------ + // Step 5 – Return root nodes (those that have no incoming edges). + // ------------------------------------------------------------------ + const hasParent = new Set(); + for (const [, childIds] of edges) { + for (const childId of childIds) hasParent.add(childId); + } + + return [...nodeMap.keys()] + .filter(id => !hasParent.has(id)) + .map(id => nodeMap.get(id)!); +} + +/** + * Merges a WebGPU Command Graph (CG) with a Bind Group Graph (BGG) to + * produce a command hierarchy suitable for driving a GPURenderPassEncoder. + * + * ── Why this is different from mergeGraphs ───────────────────────────── + * + * mergeGraphs performs a topological union of edges: every node is shared + * by ID and every edge from both graphs is preserved. That strategy fails + * for this domain in two ways: + * + * 1. Command ordering. A plain edge union leaves BindGroup and Drawable + * nodes as unordered siblings under their Pipeline node. WebGPU + * requires setBindGroup calls to precede draw calls in the encoded + * command stream. + * + * 2. Multiple pipeline occurrences. The same Pipeline may be referenced + * from several points in the CG (each heading a distinct draw batch). + * In a shared-node DAG the Pipeline (and therefore its bind-group + * sub-tree) would be visited exactly once during traversal, so each + * later batch would never re-emit setBindGroup — producing incorrect + * GPU state. Each occurrence must own an independent copy of the + * bind-group sub-tree so the encoder visits it the correct number of + * times. + * + * ── Join point ───────────────────────────────────────────────────────── + * + * The two graphs share Pipeline nodes as their only common node type: + * + * BGG ── Pipeline → BindGroup(s) → Resource(s) + * CG ── … → Pipeline → Drawable(s) [ → other state commands ] + * + * For every Pipeline occurrence found during CG traversal the algorithm: + * + * a. Clones the corresponding BGG sub-tree independently (one clone per + * CG occurrence) so that each pipeline occurrence is self-contained. + * + * b. Identifies the leaf nodes of the cloned BGG sub-tree. Leaves + * represent fully-specified bind-group states; Drawables placed there + * are guaranteed to fire after all ancestor setBindGroup calls. + * + * c. Distributes the Drawable children of the CG Pipeline node across the + * appropriate BGG leaf nodes via the configurable `matchDrawablesToLeaf` + * callback (default: attach all Drawables to every leaf, which is + * correct when all draws under a pipeline share a single bind-group + * path). + * + * d. Prepends any non-Drawable CG children of the Pipeline (viewport + * settings, scissor rects, etc.) before the BGG sub-tree so that + * general state commands are encoded before bind-group / draw commands. + * + * ── Efficiency preservation ──────────────────────────────────────────── + * + * The BGG tree structure encodes the setBindGroup efficiency strategy: bind + * groups shared across many draws sit high in the tree (set once per batch) + * while per-draw bind groups sit at the leaves. Cloning the sub-tree + * wholesale preserves this structure verbatim, so no redundant setBindGroup + * calls are introduced within a single pipeline batch. + * + * @param commandRoots - Root nodes of the Command Graph. + * @param bindGroupRoots - Root nodes of the Bind Group Graph. + * @param options + * @param options.pipelineType - Node type string identifying Pipeline + * nodes (default: `'Pipeline'`). + * @param options.drawableType - Node type string identifying Drawable + * nodes (default: `'Drawable'`). + * @param options.matchDrawablesToLeaf - Optional callback invoked for each + * leaf of the cloned BGG sub-tree. Receives the leaf node and the full + * list of Drawable siblings from the CG; returns the subset to attach + * under that leaf. Override when different Drawables require different + * bind-group states (i.e. when the BGG sub-tree has multiple leaves, each + * representing a distinct fully-specified configuration). + * + * @returns Root nodes of the merged graph. + */ +function mergeCommandAndBindGroupGraphs( + commandRoots: GraphNode[], + bindGroupRoots: GraphNode[], + { + pipelineType = 'Pipeline', + drawableType = 'Drawable', + matchDrawablesToLeaf = (_leaf: GraphNode, drawables: GraphNode[]) => drawables, + }: { + pipelineType?: string; + drawableType?: string; + matchDrawablesToLeaf?: (leaf: GraphNode, drawables: GraphNode[]) => GraphNode[]; + } = {} +): GraphNode[] { + // ── Step 1 ───────────────────────────────────────────────────────────── + // Index the BGG: for each Pipeline node record its direct children + // (= the roots of its bind-group sub-tree). One pass, DAG-safe. + // ─────────────────────────────────────────────────────────────────────── + const bgSubtrees = new Map(); + { + const visited = new Set(); + function indexBGG(node: GraphNode): void { + if (visited.has(node.id)) return; + visited.add(node.id); + if (node.type === pipelineType) { + bgSubtrees.set(node.id, node.children); + } + for (const child of node.children) indexBGG(child); + } + for (const root of bindGroupRoots) indexBGG(root); + } + + // ── Step 2 ───────────────────────────────────────────────────────────── + // Deep-clone a BGG sub-tree. + // + // Each CG occurrence of a pipeline needs its own independent copy. + // If two CG contexts shared the same BGG sub-tree nodes, a traversal + // that drives command encoding would only visit the bind-group nodes + // once (whichever context was reached first), leaving the second + // context without the required setBindGroup preamble. + // ─────────────────────────────────────────────────────────────────────── + function cloneSubtree(node: GraphNode): GraphNode { + return new GraphNode( + node.id, + node.type, + node.data, + node.children.map(cloneSubtree) + ); + } + + // ── Step 3 ───────────────────────────────────────────────────────────── + // Collect the leaf nodes of an array of sub-tree roots. + // + // Leaves are the deepest bind-group nodes in the BGG sub-tree. + // Drawable nodes appended here will always be reached after all + // ancestor setBindGroup commands have been encoded. + // ─────────────────────────────────────────────────────────────────────── + function collectLeaves(roots: GraphNode[]): GraphNode[] { + const leaves: GraphNode[] = []; + function walk(node: GraphNode): void { + if (node.children.length === 0) { + leaves.push(node); + } else { + for (const child of node.children) walk(child); + } + } + for (const root of roots) walk(root); + return leaves; + } + + // ── Step 4 ───────────────────────────────────────────────────────────── + // Traverse the CG and construct the merged graph. + // + // Non-pipeline nodes: built once and cached by ID (standard DAG + // semantics — multiple parents can safely share a single merged node). + // + // Pipeline nodes: NEVER cached. A fresh GraphNode (with a freshly + // cloned BGG sub-tree) is created for every visit, which is what + // allows the same logical pipeline to appear multiple times in the + // merged output, each with its own independent bind-group preamble. + // ─────────────────────────────────────────────────────────────────────── + const mergedCache = new Map(); + + function buildNode(node: GraphNode): GraphNode { + if (node.type === pipelineType) { + return buildPipelineNode(node); + } + + if (mergedCache.has(node.id)) { + return mergedCache.get(node.id)!; + } + + const merged = new GraphNode(node.id, node.type, node.data); + // Cache before recursing so that any (non-pipeline) diamond shapes + // in the CG are handled without double-building. + mergedCache.set(node.id, merged); + merged.children = node.children.map(buildNode); + return merged; + } + + function buildPipelineNode(pipeline: GraphNode): GraphNode { + // Partition this pipeline's CG children into: + // drawables – will become leaves of the BGG sub-tree + // otherState – viewport, scissor, etc.; must precede bind-group + // and draw commands in the encoded stream + const drawables = pipeline.children.filter(c => c.type === drawableType); + const otherState = pipeline.children + .filter(c => c.type !== drawableType) + .map(buildNode); + + const bgRoots = bgSubtrees.get(pipeline.id); + + let bindGroupSubtreeRoots: GraphNode[]; + + if (bgRoots && bgRoots.length > 0) { + // Clone the entire BGG sub-tree for this pipeline occurrence so + // it is independent of every other occurrence. + const clonedRoots = bgRoots.map(cloneSubtree); + const leaves = collectLeaves(clonedRoots); + + // Attach each leaf's assigned Drawables. + // + // Drawable nodes are themselves leaves (no children), so sharing + // original CG Drawable references across multiple BGG leaves is + // safe — the consumer will simply encode the draw call once per + // leaf that owns it. Provide matchDrawablesToLeaf to restrict + // assignment when different draws require different bind-group + // configurations. + for (const leaf of leaves) { + const assigned = matchDrawablesToLeaf(leaf, drawables); + if (assigned.length > 0) { + leaf.children = [...leaf.children, ...assigned]; + } + } + + bindGroupSubtreeRoots = clonedRoots; + } else { + // No BGG entry for this pipeline — degenerate case, preserve + // the CG's Drawable children directly. + bindGroupSubtreeRoots = drawables; + } + + return new GraphNode( + pipeline.id, + pipeline.type, + pipeline.data, + // Non-Drawable state commands first, then the BGG sub-tree + // (with Drawables appended at its leaves). + [...otherState, ...bindGroupSubtreeRoots] + ); + } + + return commandRoots.map(buildNode); +} + diff --git a/packages/core/src/rendering/webgpu/pipelines/binding-graphs.ts b/packages/core/src/rendering/webgpu/pipelines/binding-graphs.ts new file mode 100644 index 00000000..a8c06b38 --- /dev/null +++ b/packages/core/src/rendering/webgpu/pipelines/binding-graphs.ts @@ -0,0 +1,116 @@ +import { WgslShader } from '../shaders'; +import { WgpuResource } from './resources'; + +export type BindingGraphResourceNode = { + __nodeType: 'resource'; + resource: Resource; + pipelines: BindingGraphPipelineNode[]; + label?: string; +}; + +export function isBindingGraphResourceNode(value: unknown): value is BindingGraphResourceNode { + return ( + typeof value === 'object' && value !== null && + '__nodeType' in value && value.__nodeType === 'resource' && + 'resource' in value && typeof value.resource === 'object' && + 'pipelines' in value && Array.isArray(value.pipelines) + ); +} + +export type BindingGraphGroupNode = { + __nodeType: 'group'; + resources: BindingGraphResourceNode[]; + subgroup?: BindingGraphGroupNode; + label?: string; +}; + +export type BindingGraphPipelineNode = { + __nodeType: 'pipeline'; + shader: WgslShader; + depthStencil?: GPUDepthStencilState; + multisample?: GPUMultisampleState; + primitive?: GPUPrimitiveTopology; + label?: string; +}; + +export type BindingGraph = { + groups: BindingGraphGroupNode[]; +}; + +function recursivelyCheckGroup(group: BindingGraphGroupNode, depth: number, depthLimit: number): boolean { + if (depth > depthLimit) { + return false; + } + if (group.subgroup) { + if (!recursivelyCheckGroup(group.subgroup, depth + 1, depthLimit)) { + return false; + } + } + return true; +} + +function checkGroupDepth(groups: BindingGraphGroupNode[], depthLimit: number): boolean { + for (const group of groups) { + if (group.__nodeType !== 'group') { + throw new Error('expected group node'); + } + if (!recursivelyCheckGroup(group, 1, depthLimit)) { + return false; + } + } + return true; +} + +export function makeBindingGraph(groups: BindingGraphGroupNode[], groupDepthLimit = 4): BindingGraph { + if (groupDepthLimit > 0) { + if (!checkGroupDepth(groups, groupDepthLimit)) { + throw new Error(`binding graph group depth exceeds the limit of ${groupDepthLimit}`); + } + } + return { groups }; +} + +export function isBindingGraph(value: unknown): value is BindingGraph { + return typeof value === 'object' && value !== null && 'groups' in value && Array.isArray(value.groups); +} + +export function group( + label: string | undefined, + resources: (WgpuResource | BindingGraphResourceNode)[], + subgroup?: BindingGraphGroupNode +): BindingGraphGroupNode { + return { + __nodeType: 'group', + label, + resources: resources.map((r) => (isBindingGraphResourceNode(r) ? r : resource(undefined, r, []))), + subgroup, + }; +} + +export function resource( + label: string | undefined, + resource: WgpuResource, + pipelines: BindingGraphPipelineNode[] +): BindingGraphResourceNode { + return { + __nodeType: 'resource', + label, + resource, + pipelines, + }; +} + +export function pipeline( + shader: WgslShader, + depthStencil?: GPUDepthStencilState, + multisample?: GPUMultisampleState, + primitive?: GPUPrimitiveTopology +): BindingGraphPipelineNode { + return { + __nodeType: 'pipeline', + shader, + depthStencil, + multisample, + primitive, + }; +} diff --git a/packages/core/src/rendering/webgpu/plan-2026-06-01.md b/packages/core/src/rendering/webgpu/plan-2026-06-01.md new file mode 100644 index 00000000..616b5307 --- /dev/null +++ b/packages/core/src/rendering/webgpu/plan-2026-06-01.md @@ -0,0 +1,184 @@ +# Plan: Stateful WebGPU Graph Encoder with Generalized No-Op Elision + Incremental Re-Encoding + +## TL;DR + +The merged graph (produced by `mergeCommandAndBindGroupGraphs`) contains redundant state-setting nodes. A **stateful encoder** tracks all active GPU state and skips no-op commands via a generalized `resolveFingerprint` callback. On top of this, a **persistent command cache** enables incremental re-encoding: unchanged subtrees (same node versions + same entry state) replay their cached command segment without re-traversing, re-resolving, or re-computing. A layout compatibility oracle handles bind-group invalidation on pipeline switch. + +## Context + +- File: `packages/core/src/rendering/webgpu/graphs/nodes.ts` +- The merged graph is a tree (pipelines are cloned per-occurrence) traversed via DFS to drive a `GPURenderPassEncoder`. +- Many node types represent idempotent state-setting commands; redundant calls are safe but wasteful. +- The encoder generalizes no-op detection: any node that resolves to the same `(stateKey, stateValue)` already active is skipped. +- **Incremental re-encoding**: structure is stable across frames; data (uniforms, textures, bind group contents) changes. The encoder caches subtree command segments and replays them when versions + entry state match. + +## Steps + +### Phase 1: Types & Callbacks + +1. **`StateFingerprint`** — `{ key: string; value: string } | null` + - `key`: the state category/slot being set (e.g., `"pipeline"`, `"bindGroup:0"`, `"viewport"`, `"scissor"`, `"stencilRef"`) + - `value`: opaque identity for the specific value being set + - `null`: the node is not a state-setting command (e.g., Drawable) — always encode, never skip. + +2. **`ResolveStateFingerprint`** — `(node: GraphNode) => StateFingerprint` + User-supplied. Keeps the encoder decoupled from node data conventions. + +3. **`IsSlotCompatible`** — `(prevPipeline: GraphNode, newPipeline: GraphNode, slotKey: string) => boolean` + - Called on pipeline switch for every active `"bindGroup:*"` key. + - `true` → slot remains valid. `false` → slot invalidated. + - Default: invalidate all bind-group slots on any pipeline switch (conservative). + +4. **`EncodedCommand`** — a cached instruction: + ```ts + type EncodedCommand = + | { action: 'encode'; node: GraphNode } + | { action: 'skip'; node: GraphNode } + ``` + The command list is the output of the "what to encode" phase. Consumers iterate it and call the actual GPU APIs. + +5. **`SubtreeCacheEntry`** — persistent across frames: + ```ts + type SubtreeCacheEntry = { + valid: boolean; // false if markDirty() has been called + version: number; // node version when this was recorded + entryState: Map; // active state snapshot at subtree entry + exitState: Map; // active state snapshot at subtree exit + commands: EncodedCommand[]; // the command segment for this subtree + } + ``` + Keyed by node ID. Since structure is stable, a node's position in the DFS is deterministic — its cache entry is valid whenever `valid === true` + entry state matches. + +6. **`GraphNode` extension** — add a `version: number` field (or the user exposes it via a callback `getVersion(node: GraphNode) => number`). Bumped by the user whenever node data changes. + +7. **`EncodeGraphOptions`**: + ```ts + { + resolveFingerprint: ResolveStateFingerprint; + isSlotCompatible?: IsSlotCompatible; + getVersion: (node: GraphNode) => number; + pipelineType?: string; // default 'Pipeline' + onEncode: (node: GraphNode) => void; + onSkip?: (node: GraphNode) => void; + } + ``` + +### Phase 2: Two-Phase Encoder + +The encoder is split into two phases: + +**Phase 2a: Plan (produces command list)** + +8. `planEncode(roots, options, cache: Map)`: + - DFS traversal maintaining `activeState: Map`. + - At each node: + a. Check cache: if `cache[node.id].valid === true` AND `cache[node.id].entryState` deep-equals current `activeState`: + - **Cache hit** → append `cache[node.id].commands` to output, fast-forward `activeState` to `cache[node.id].exitState`, skip recursion into children. + b. **Cache miss** → resolve fingerprint, apply no-op elision logic (same as before), emit `{ action: 'encode' | 'skip', node }`, recurse into children. + - After processing a subtree root (on DFS exit), update cache: + - Record `{ valid: true, version, entryState (snapshot at entry), exitState (snapshot at exit), commands (segment produced during this subtree) }`. + +9. **Entry state snapshot**: taken *before* processing the node. If the node itself is a state command, `entryState` is the state before it fires. + +10. **Exit state snapshot**: taken *after* processing the node and all its descendants. This is the state the rest of the graph sees after the subtree. + +**Phase 2b: Execute (replays command list)** + +11. `executeCommands(commands: EncodedCommand[], onEncode, onSkip?)`: + - Iterate the flat command list. For each `'encode'` entry → call `onEncode(node)`. For each `'skip'` entry → call `onSkip(node)` if provided. + - This is a simple linear pass — no tree traversal, no fingerprint resolution, no state tracking. + +### Phase 3: Dirty Propagation & Structural Changes + +12. **Parent map**: The encoder builds an internal `parentMap: Map` during the first full encode (and updates it on structural changes). This is a one-time O(N) cost, amortized across frames. + +13. **`markDirty(nodeId: string)`** — public API on `GraphEncoder`: + - Looks up `nodeId` in the parent map. + - Walks from `nodeId` up to root via parent pointers, marking each ancestor's cache entry as invalid (`entry.valid = false`). + - Cost: O(depth) per call. + - The user calls this whenever: + - A Drawable is added/removed from a node + - A node's data changes (alternative to version-bump; either mechanism works) + - Any structural change occurs + +14. **Cache hit logic (updated)**: + During `planEncode`, at each node: + - If `cache[node.id].valid === true` AND `cache[node.id].entryState` matches current `activeState`: + → **TRUE SKIP**: splice cached commands, fast-forward activeState to exitState, **do not visit children at all**. + - If `valid === false` OR entry-state mismatch: + → Process normally (resolve fingerprints, no-op elision, recurse into children). On DFS exit, rebuild cache entry with `valid = true`. + - Non-dirty subtrees are skipped in O(1) — no recursion, no fingerprint comparison. + +15. **Parent map maintenance on structural change**: + When `markDirty` is called due to child add/remove, the encoder also updates parent pointers for the affected children: + - New child added → `parentMap.set(child.id, parentId)`. + - Child removed → `parentMap.delete(child.id)`. + - The encoder detects the structural change on next visit to the dirty node (children IDs differ from cache) and reconciles the parent map for that subtree only. + +16. **`getVersion` callback is still used** — but primarily as a secondary check within dirty subtrees to determine if individual node data changed (enabling no-op elision within re-traversed regions). Clean subtrees are never version-checked — they're skipped entirely. + +17. **Batch marking**: For common patterns (e.g., "re-sort all Drawables under this Pipeline"), expose `markSubtreeDirty(nodeId)` which marks the node + all descendants as invalid. This avoids O(descendant_count) individual `markDirty` calls when the user knows an entire subtree is stale. + +### Phase 4: Pipeline-Switch & State Invalidation + +18. On pipeline switch (key changes to different value): + - Same pipeline ID → no invalidation (fast path). + - Different pipeline → call `isSlotCompatible` for each active `"bindGroup:*"` key. Delete incompatible entries from `activeState`. + - This interacts with caching: if a subtree's entry state included a bind-group slot that was subsequently invalidated by an upstream pipeline switch, the entry state won't match → cache miss → correct re-encoding. + +### Phase 5: Encoder Lifecycle + +19. The encoder is a **persistent object**: + ```ts + class GraphEncoder { + private cache: Map; + private parentMap: Map; + constructor(private options: EncodeGraphOptions) {} + + encode(roots: GraphNode[]): EncodedCommand[]; + markDirty(nodeId: string): void; + markSubtreeDirty(nodeId: string): void; + reset(): void; // full cache clear (graph rebuilt from scratch) + } + ``` + - First `encode()` call: full DFS, builds cache + parent map. + - Subsequent calls: skips clean subtrees in O(1), re-traverses only dirty paths. + - `markDirty()`: O(depth) propagation to root. + - `reset()`: for rare full structural rebuilds (e.g., graph is discarded and rebuilt). + +## Cost Summary + +| Operation | Cost | +|-----------|------| +| `markDirty(nodeId)` | O(depth) | +| `encode()` — nothing dirty | O(1) at root | +| `encode()` — one leaf dirty | O(depth) | +| `encode()` — k leaves dirty | O(k × depth) worst case | +| First encode (cold cache) | O(N) | +| `reset()` + next encode | O(N) | + +## Relevant files + +- `packages/core/src/rendering/webgpu/graphs/nodes.ts` — `GraphNode` type, `mergeCommandAndBindGroupGraphs` (unchanged) +- New file: `packages/core/src/rendering/webgpu/graphs/encoder.ts` — `GraphEncoder` class, types, `planEncode`, `executeCommands` + +## Verification + +1. Unit test: first encode → full DFS, all nodes processed, cache + parent map built. +2. Unit test: second encode with no `markDirty` calls → O(1) at root (cache hit), same command list replayed. +3. Unit test: `markDirty(leafId)` → only root-to-leaf path re-traversed; sibling subtrees skipped entirely (verify they are never visited). +4. Unit test: add a Drawable to a Pipeline, call `markDirty(pipelineId)` → Pipeline re-processed with new Drawable, parent chain re-processed, siblings untouched. +5. Unit test: remove a Drawable, call `markDirty(pipelineId)` → correct commands produced without the removed Drawable. +6. Unit test: pipeline switch on dirty path invalidates bind-group slot → downstream entry-state mismatch on a sibling that WAS clean → that sibling also re-encodes (correctly detected via entry-state check). +7. Unit test: `markSubtreeDirty(nodeId)` → entire subtree re-traversed. +8. Performance test: 1000-node graph, `markDirty` on one leaf → encode time proportional to depth, not node count. + +## Decisions + +- Merge algorithm (`mergeCommandAndBindGroupGraphs`) stays unchanged. +- Incremental invalidation via explicit `markDirty()` — O(depth) per change, O(1) skip for clean subtrees. +- Parent map maintained internally by the encoder (built on first encode, updated on structural changes). +- No-op elision still applies within dirty re-traversals (unchanged data nodes are still skipped). +- `getVersion` callback remains useful for no-op elision within dirty paths, but is NOT the primary cache invalidation mechanism (that's `markDirty`). +- Cache entry `valid` flag is the primary gate — no per-frame version computation for clean subtrees. +- Entry-state check is the secondary gate — catches cases where an upstream state change (pipeline switch) requires re-encoding a nominally "clean" subtree. From fdfcd85ceb5b391ec0893221fd77163507f0a6e0 Mon Sep 17 00:00:00 2001 From: Joel Arbuckle Date: Wed, 3 Jun 2026 07:27:40 -0700 Subject: [PATCH 2/3] Forgot one! --- .../rendering/webgpu/pipelines/resources.ts | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 packages/core/src/rendering/webgpu/pipelines/resources.ts diff --git a/packages/core/src/rendering/webgpu/pipelines/resources.ts b/packages/core/src/rendering/webgpu/pipelines/resources.ts new file mode 100644 index 00000000..8685c2b2 --- /dev/null +++ b/packages/core/src/rendering/webgpu/pipelines/resources.ts @@ -0,0 +1,63 @@ +export type WgpuResource = { + texture: GPUTexture; +} | { + buffer: GPUBuffer; +} | { + sampler: GPUSampler; +} + +/* +What do we need to know about resources at the start? + + +BindGroupLayout +- `binding` is obviously going to be set the bindinggraph traversal +- the `visibility` parameter will be determined by which shader stages are present in the pipeline(s) that use(s) that resource +- Resource Layout objects + - buffer + - type (optional, default: "uniform") + - uniform + - read-only-storage + - storage + - hasDynamicOffset (optional) + - minBindingSize (optional) + = can be specified in Resource definition + = maybe calculatable by shader code/webgpu-utils? + - externalTexture + = no properties + - storageTexture + - access (optional, default: "write-only") + - read-only + - read-write + - write-only + - format + - viewDimension (optional, default: "2d") + - 1d + - 2d + - 2d-array + - cube + - cube-array + - 3d + - texture + - multisampled (optional, default: false) + - sampleType (optional, default: "float") + - depth + - float + - sint + - uint + - unfilterable-float + - viewDimension (optional, default: "2d") + - 1d + - 2d + - 2d-array + - cube + - cube-array + - 3d + - sampler + - type (optional, default: "filtering") + - comparison + - filtering + - non-filtering + + +*/ \ No newline at end of file From d5f2bdb7e80ced9bef4ab4bee2f2c177868801a5 Mon Sep 17 00:00:00 2001 From: Joel Arbuckle Date: Thu, 4 Jun 2026 12:58:19 -0700 Subject: [PATCH 3/3] Work-in-progress and Agent plan --- .../core/src/rendering/webgpu/graphs/nodes.ts | 33 ++--- .../webgpu/pipelines/binding-graphs.ts | 16 ++- .../rendering/webgpu/pipelines/resources.ts | 19 +-- .../src/rendering/webgpu/plan-2026-06-01.md | 120 +++++++++--------- 4 files changed, 97 insertions(+), 91 deletions(-) diff --git a/packages/core/src/rendering/webgpu/graphs/nodes.ts b/packages/core/src/rendering/webgpu/graphs/nodes.ts index b73795c6..f5436acf 100644 --- a/packages/core/src/rendering/webgpu/graphs/nodes.ts +++ b/packages/core/src/rendering/webgpu/graphs/nodes.ts @@ -1,5 +1,10 @@ class GraphNode { - constructor(public id: string, public type: string, public data: any, public children: GraphNode[] = []) {} + constructor( + public id: string, + public type: string, + public data: any, + public children: GraphNode[] = [] + ) {} } /** @@ -59,7 +64,7 @@ function mergeGraphs(rootsA: GraphNode[], rootsB: GraphNode[]): GraphNode[] { if (seen.has(node.id)) return; // TODO: this should actually be an error, since it indicates that the input graph has a cycle seen.add(node.id); - // TODO: will want to restructure this to avoid .get() for every child, maybe? want this to be as memory/cpu efficient as possible + // TODO: will want to restructure this to avoid .get() for every child, maybe? want this to be as memory/cpu efficient as possible if (!edges.has(node.id)) edges.set(node.id, new Set()); for (const child of node.children) { const childSet = edges.get(node.id); @@ -109,7 +114,7 @@ function mergeGraphs(rootsA: GraphNode[], rootsB: GraphNode[]): GraphNode[] { if (topoOrder.length !== nodeMap.size) { throw new Error( 'Cannot merge graphs: the combined edge set contains a cycle. ' + - 'The two graphs impose contradictory ordering constraints.' + 'The two graphs impose contradictory ordering constraints.' ); } @@ -120,7 +125,7 @@ function mergeGraphs(rootsA: GraphNode[], rootsB: GraphNode[]): GraphNode[] { // them, causing "gaps" in the traversal logic of the new graph for (const [parentId, childIds] of edges) { const parent = nodeMap.get(parentId)!; - parent.children = [...childIds].map(id => nodeMap.get(id)!); + parent.children = [...childIds].map((id) => nodeMap.get(id)!); } // ------------------------------------------------------------------ @@ -131,9 +136,7 @@ function mergeGraphs(rootsA: GraphNode[], rootsB: GraphNode[]): GraphNode[] { for (const childId of childIds) hasParent.add(childId); } - return [...nodeMap.keys()] - .filter(id => !hasParent.has(id)) - .map(id => nodeMap.get(id)!); + return [...nodeMap.keys()].filter((id) => !hasParent.has(id)).map((id) => nodeMap.get(id)!); } /** @@ -251,12 +254,7 @@ function mergeCommandAndBindGroupGraphs( // context without the required setBindGroup preamble. // ─────────────────────────────────────────────────────────────────────── function cloneSubtree(node: GraphNode): GraphNode { - return new GraphNode( - node.id, - node.type, - node.data, - node.children.map(cloneSubtree) - ); + return new GraphNode(node.id, node.type, node.data, node.children.map(cloneSubtree)); } // ── Step 3 ───────────────────────────────────────────────────────────── @@ -314,10 +312,8 @@ function mergeCommandAndBindGroupGraphs( // drawables – will become leaves of the BGG sub-tree // otherState – viewport, scissor, etc.; must precede bind-group // and draw commands in the encoded stream - const drawables = pipeline.children.filter(c => c.type === drawableType); - const otherState = pipeline.children - .filter(c => c.type !== drawableType) - .map(buildNode); + const drawables = pipeline.children.filter((c) => c.type === drawableType); + const otherState = pipeline.children.filter((c) => c.type !== drawableType).map(buildNode); const bgRoots = bgSubtrees.get(pipeline.id); @@ -327,7 +323,7 @@ function mergeCommandAndBindGroupGraphs( // Clone the entire BGG sub-tree for this pipeline occurrence so // it is independent of every other occurrence. const clonedRoots = bgRoots.map(cloneSubtree); - const leaves = collectLeaves(clonedRoots); + const leaves = collectLeaves(clonedRoots); // Attach each leaf's assigned Drawables. // @@ -363,4 +359,3 @@ function mergeCommandAndBindGroupGraphs( return commandRoots.map(buildNode); } - diff --git a/packages/core/src/rendering/webgpu/pipelines/binding-graphs.ts b/packages/core/src/rendering/webgpu/pipelines/binding-graphs.ts index a8c06b38..692e77cb 100644 --- a/packages/core/src/rendering/webgpu/pipelines/binding-graphs.ts +++ b/packages/core/src/rendering/webgpu/pipelines/binding-graphs.ts @@ -1,5 +1,5 @@ -import { WgslShader } from '../shaders'; -import { WgpuResource } from './resources'; +import type { WgslShader } from '../shaders'; +import type { WgpuResource } from './resources'; export type BindingGraphResourceNode = { __nodeType: 'resource'; @@ -10,10 +10,14 @@ export type BindingGraphResourceNode = { export function isBindingGraphResourceNode(value: unknown): value is BindingGraphResourceNode { return ( - typeof value === 'object' && value !== null && - '__nodeType' in value && value.__nodeType === 'resource' && - 'resource' in value && typeof value.resource === 'object' && - 'pipelines' in value && Array.isArray(value.pipelines) + typeof value === 'object' && + value !== null && + '__nodeType' in value && + value.__nodeType === 'resource' && + 'resource' in value && + typeof value.resource === 'object' && + 'pipelines' in value && + Array.isArray(value.pipelines) ); } diff --git a/packages/core/src/rendering/webgpu/pipelines/resources.ts b/packages/core/src/rendering/webgpu/pipelines/resources.ts index 8685c2b2..0e6e9688 100644 --- a/packages/core/src/rendering/webgpu/pipelines/resources.ts +++ b/packages/core/src/rendering/webgpu/pipelines/resources.ts @@ -1,10 +1,13 @@ -export type WgpuResource = { - texture: GPUTexture; -} | { - buffer: GPUBuffer; -} | { - sampler: GPUSampler; -} +export type WgpuResource = + | { + texture: GPUTexture; + } + | { + buffer: GPUBuffer; + } + | { + sampler: GPUSampler; + }; /* What do we need to know about resources at the start? @@ -60,4 +63,4 @@ BindGroupLayout - non-filtering -*/ \ No newline at end of file +*/ diff --git a/packages/core/src/rendering/webgpu/plan-2026-06-01.md b/packages/core/src/rendering/webgpu/plan-2026-06-01.md index 616b5307..d4460c9e 100644 --- a/packages/core/src/rendering/webgpu/plan-2026-06-01.md +++ b/packages/core/src/rendering/webgpu/plan-2026-06-01.md @@ -17,51 +17,53 @@ The merged graph (produced by `mergeCommandAndBindGroupGraphs`) contains redunda ### Phase 1: Types & Callbacks 1. **`StateFingerprint`** — `{ key: string; value: string } | null` - - `key`: the state category/slot being set (e.g., `"pipeline"`, `"bindGroup:0"`, `"viewport"`, `"scissor"`, `"stencilRef"`) - - `value`: opaque identity for the specific value being set - - `null`: the node is not a state-setting command (e.g., Drawable) — always encode, never skip. + - `key`: the state category/slot being set (e.g., `"pipeline"`, `"bindGroup:0"`, `"viewport"`, `"scissor"`, `"stencilRef"`) + - `value`: opaque identity for the specific value being set + - `null`: the node is not a state-setting command (e.g., Drawable) — always encode, never skip. 2. **`ResolveStateFingerprint`** — `(node: GraphNode) => StateFingerprint` User-supplied. Keeps the encoder decoupled from node data conventions. 3. **`IsSlotCompatible`** — `(prevPipeline: GraphNode, newPipeline: GraphNode, slotKey: string) => boolean` - - Called on pipeline switch for every active `"bindGroup:*"` key. - - `true` → slot remains valid. `false` → slot invalidated. - - Default: invalidate all bind-group slots on any pipeline switch (conservative). + - Called on pipeline switch for every active `"bindGroup:*"` key. + - `true` → slot remains valid. `false` → slot invalidated. + - Default: invalidate all bind-group slots on any pipeline switch (conservative). 4. **`EncodedCommand`** — a cached instruction: - ```ts - type EncodedCommand = - | { action: 'encode'; node: GraphNode } - | { action: 'skip'; node: GraphNode } - ``` - The command list is the output of the "what to encode" phase. Consumers iterate it and call the actual GPU APIs. + + ```ts + type EncodedCommand = { action: 'encode'; node: GraphNode } | { action: 'skip'; node: GraphNode }; + ``` + + The command list is the output of the "what to encode" phase. Consumers iterate it and call the actual GPU APIs. 5. **`SubtreeCacheEntry`** — persistent across frames: - ```ts - type SubtreeCacheEntry = { - valid: boolean; // false if markDirty() has been called - version: number; // node version when this was recorded - entryState: Map; // active state snapshot at subtree entry - exitState: Map; // active state snapshot at subtree exit - commands: EncodedCommand[]; // the command segment for this subtree - } - ``` - Keyed by node ID. Since structure is stable, a node's position in the DFS is deterministic — its cache entry is valid whenever `valid === true` + entry state matches. + + ```ts + type SubtreeCacheEntry = { + valid: boolean; // false if markDirty() has been called + version: number; // node version when this was recorded + entryState: Map; // active state snapshot at subtree entry + exitState: Map; // active state snapshot at subtree exit + commands: EncodedCommand[]; // the command segment for this subtree + }; + ``` + + Keyed by node ID. Since structure is stable, a node's position in the DFS is deterministic — its cache entry is valid whenever `valid === true` + entry state matches. 6. **`GraphNode` extension** — add a `version: number` field (or the user exposes it via a callback `getVersion(node: GraphNode) => number`). Bumped by the user whenever node data changes. 7. **`EncodeGraphOptions`**: - ```ts - { - resolveFingerprint: ResolveStateFingerprint; - isSlotCompatible?: IsSlotCompatible; - getVersion: (node: GraphNode) => number; - pipelineType?: string; // default 'Pipeline' - onEncode: (node: GraphNode) => void; - onSkip?: (node: GraphNode) => void; - } - ``` + ```ts + { + resolveFingerprint: ResolveStateFingerprint; + isSlotCompatible?: IsSlotCompatible; + getVersion: (node: GraphNode) => number; + pipelineType?: string; // default 'Pipeline' + onEncode: (node: GraphNode) => void; + onSkip?: (node: GraphNode) => void; + } + ``` ### Phase 2: Two-Phase Encoder @@ -70,17 +72,17 @@ The encoder is split into two phases: **Phase 2a: Plan (produces command list)** 8. `planEncode(roots, options, cache: Map)`: - - DFS traversal maintaining `activeState: Map`. - - At each node: - a. Check cache: if `cache[node.id].valid === true` AND `cache[node.id].entryState` deep-equals current `activeState`: + - DFS traversal maintaining `activeState: Map`. + - At each node: + a. Check cache: if `cache[node.id].valid === true` AND `cache[node.id].entryState` deep-equals current `activeState`: - **Cache hit** → append `cache[node.id].commands` to output, fast-forward `activeState` to `cache[node.id].exitState`, skip recursion into children. - b. **Cache miss** → resolve fingerprint, apply no-op elision logic (same as before), emit `{ action: 'encode' | 'skip', node }`, recurse into children. - - After processing a subtree root (on DFS exit), update cache: - - Record `{ valid: true, version, entryState (snapshot at entry), exitState (snapshot at exit), commands (segment produced during this subtree) }`. + b. **Cache miss** → resolve fingerprint, apply no-op elision logic (same as before), emit `{ action: 'encode' | 'skip', node }`, recurse into children. + - After processing a subtree root (on DFS exit), update cache: + - Record `{ valid: true, version, entryState (snapshot at entry), exitState (snapshot at exit), commands (segment produced during this subtree) }`. -9. **Entry state snapshot**: taken *before* processing the node. If the node itself is a state command, `entryState` is the state before it fires. +9. **Entry state snapshot**: taken _before_ processing the node. If the node itself is a state command, `entryState` is the state before it fires. -10. **Exit state snapshot**: taken *after* processing the node and all its descendants. This is the state the rest of the graph sees after the subtree. +10. **Exit state snapshot**: taken _after_ processing the node and all its descendants. This is the state the rest of the graph sees after the subtree. **Phase 2b: Execute (replays command list)** @@ -97,9 +99,9 @@ The encoder is split into two phases: - Walks from `nodeId` up to root via parent pointers, marking each ancestor's cache entry as invalid (`entry.valid = false`). - Cost: O(depth) per call. - The user calls this whenever: - - A Drawable is added/removed from a node - - A node's data changes (alternative to version-bump; either mechanism works) - - Any structural change occurs + - A Drawable is added/removed from a node + - A node's data changes (alternative to version-bump; either mechanism works) + - Any structural change occurs 14. **Cache hit logic (updated)**: During `planEncode`, at each node: @@ -129,18 +131,20 @@ The encoder is split into two phases: ### Phase 5: Encoder Lifecycle 19. The encoder is a **persistent object**: + ```ts class GraphEncoder { - private cache: Map; - private parentMap: Map; - constructor(private options: EncodeGraphOptions) {} - - encode(roots: GraphNode[]): EncodedCommand[]; - markDirty(nodeId: string): void; - markSubtreeDirty(nodeId: string): void; - reset(): void; // full cache clear (graph rebuilt from scratch) + private cache: Map; + private parentMap: Map; + constructor(private options: EncodeGraphOptions) {} + + encode(roots: GraphNode[]): EncodedCommand[]; + markDirty(nodeId: string): void; + markSubtreeDirty(nodeId: string): void; + reset(): void; // full cache clear (graph rebuilt from scratch) } ``` + - First `encode()` call: full DFS, builds cache + parent map. - Subsequent calls: skips clean subtrees in O(1), re-traverses only dirty paths. - `markDirty()`: O(depth) propagation to root. @@ -148,14 +152,14 @@ The encoder is split into two phases: ## Cost Summary -| Operation | Cost | -|-----------|------| -| `markDirty(nodeId)` | O(depth) | -| `encode()` — nothing dirty | O(1) at root | -| `encode()` — one leaf dirty | O(depth) | +| Operation | Cost | +| --------------------------- | ----------------------- | +| `markDirty(nodeId)` | O(depth) | +| `encode()` — nothing dirty | O(1) at root | +| `encode()` — one leaf dirty | O(depth) | | `encode()` — k leaves dirty | O(k × depth) worst case | -| First encode (cold cache) | O(N) | -| `reset()` + next encode | O(N) | +| First encode (cold cache) | O(N) | +| `reset()` + next encode | O(N) | ## Relevant files