From 565f996e247f83cd1e2b205fdd4e058d8b0bc866 Mon Sep 17 00:00:00 2001 From: Evan Robertson Date: Fri, 17 Apr 2026 23:55:16 -0400 Subject: [PATCH] docs(design): add Phase 7 architectural guardrail appendices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two short written sketches that prove R10 (cross-file resolution) and R11 (flow analysis) remain reachable after the v1 ship, without any changes to the v1 public API of `oxabl_semantic::Semantic`. - `docs/design/semantic-v1-cross-file-sketch.md` (R10): shows how `Resolution::Unresolved { reason: External }` entries become `Resolved` via a workspace-level `CrossFileResolutions` side table, walked for USING / NEW / RUN entry points. No v1 shape changes required; existing lint rules consult the new map through a thin wrapper. - `docs/design/semantic-v1-flow-analysis-sketch.md` (R11): shows how a CFG attaches to the AST via the parser-assigned NodeIds and the source-order block bodies. Definite-assignment is a forward dataflow over `(Cfg, Semantic)`. Exception-sensitive flow and alias analysis are called out as future work that the v1 shape accommodates. Non-blocking for the v1 merge — the side-table + NodeId architecture *is* the contract; these docs are its written commentary. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/design/semantic-v1-cross-file-sketch.md | 175 ++++++++++++++++++ .../semantic-v1-flow-analysis-sketch.md | 138 ++++++++++++++ ...6-04-16-004-feat-semantic-layer-v1-plan.md | 2 +- 3 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 docs/design/semantic-v1-cross-file-sketch.md create mode 100644 docs/design/semantic-v1-flow-analysis-sketch.md diff --git a/docs/design/semantic-v1-cross-file-sketch.md b/docs/design/semantic-v1-cross-file-sketch.md new file mode 100644 index 0000000..a348f04 --- /dev/null +++ b/docs/design/semantic-v1-cross-file-sketch.md @@ -0,0 +1,175 @@ +--- +title: "Cross-File Resolution Sketch (R10)" +status: draft +date: 2026-04-17 +parent: docs/plans/2026-04-16-004-feat-semantic-layer-v1-plan.md +--- + +# Cross-File Resolution Sketch (R10) + +## Purpose + +This appendix proves that R10 ("cross-file resolution is reachable without an +IR rewrite") remains viable after the v1 ship. The v1 +`oxabl_semantic::Semantic` type is designed so cross-file work adds a *side +table* without changing any per-file public type signature. + +It is an illustrated commentary on the architecture, not an implementation +plan. Reviewers are invited to stress-test it. + +## v1 per-file shape — reviewed for cross-file extensibility + +```rust +pub struct Semantic { + pub scope_tree: ScopeTree, + pub symbols: SymbolTable, + pub references: NodeIndexVec, + pub types: NodeIndexVec, + pub schema_revision: SchemaRevision, + pub diagnostics: Vec, +} + +pub enum Resolution { + Resolved(SymbolId), + Unresolved { name: OxablAtom, reason: UnresolvedReason }, +} + +pub enum UnresolvedReason { + NotInScope, + External, // USING, RUN "name", DYNAMIC-FUNCTION, NEW ExternalClass + NoSchema, +} +``` + +The contract: +- Every cross-file unresolved reference produces `Unresolved { reason: + External }`, never `NotInScope`. This is a **stable invariant** of the + resolve pass and is pinned by tests (`resolve_new_class_unknown_is_external`, + `cross_file_class_assignment_silent`). +- `SymbolId` is dense within a single `SymbolTable`. Cross-file resolution + introduces a disambiguating `(FileId, SymbolId)` key; the per-file + `Resolution::Resolved(SymbolId)` survives unchanged because the ambiguating + scope only lives in the new workspace-level map. + +## Where cross-file fits + +``` +┌─────────────────────────┐ ┌─────────────────────────────┐ +│ Semantic per file │ │ Workspace-level side table │ +│ (v1 shape, unchanged) │ ──▶ │ CrossFileResolutions │ +└─────────────────────────┘ └─────────────────────────────┘ + ▲ │ + │ ▼ + └─────── consumers (lint, analyze) ◀─────┘ +``` + +### New type (added post-v1, no v1 changes required) + +```rust +pub struct CrossFileResolutions { + /// For each External reference node in each file, maps to its + /// cross-file resolution (or leaves it External if still unresolved). + pub resolved_external: FxHashMap<(FileId, NodeId), CrossFileSymbol>, +} + +pub struct CrossFileSymbol { + pub file: FileId, + pub symbol: SymbolId, +} + +impl Workspace { + pub fn resolve_cross_file( + files: &FxHashMap, + ) -> CrossFileResolutions { + // ... walk each file's External references; look them up in + // the union of symbol tables; record hits. + todo!() + } +} +``` + +This side table **does not modify** any per-file `Semantic`. Lint rules +consume it via a thin wrapper: + +```rust +pub fn effective_resolution<'a>( + sem: &'a Semantic, + xfr: &'a CrossFileResolutions, + file: FileId, + node: NodeId, +) -> Option { + match sem.references.get(node) { + Some(Resolution::Resolved(s)) => + Some(Resolved::Local(*s)), + Some(Resolution::Unresolved { reason: UnresolvedReason::External, .. }) => { + xfr.resolved_external.get(&(file, node)) + .map(|xf| Resolved::CrossFile(*xf)) + } + _ => None, + } +} +``` + +## Two concrete entry points walked + +### 1. `USING pkg.Name` + `NEW Name(...)` + +v1 behavior (tested): +- Parser emits `StatementKind::Using { type_name: "pkg.Name" }` at file top. +- Declare pass does not bind USING names locally (intentional — it's a hint for + the cross-file resolver, not a declaration). +- Resolve pass sees `NEW Name(...)`, fails the local Types lookup, + records `Unresolved { name: "name", reason: External }` at the NEW expression's + NodeId. + +Cross-file flow (post-v1): +- `CrossFileResolutions::resolve_cross_file` walks every `Using` statement, + maps each fully-qualified name to its declaring file via a workspace-level + name index, and for each `Unresolved External` reference whose atom matches + a USING-imported short name, emits a `CrossFileSymbol`. +- `LINT0001 undefined-symbol` continues to *skip* `External` in v1; post-v1 it + consults `CrossFileResolutions` first, and only then decides: if still + External, downgrade to `LINT0001 cross-file-unresolved` (new code) or let it + slide — that's a product choice, not a representation choice. + +### 2. `RUN "other.p"` + +v1 behavior (tested): +- `RunTarget::Literal("other.p")` resolves to no symbol locally; resolve pass + does **not** record a reference entry (there's no NodeId to bind against + the bare string — RUN's target is a plain `String`, not an `Identifier`). +- This is an acknowledged v1 limitation: the diagnostic surface for undefined + procedures is zero because there's no NodeId to hang the resolution on. + +Cross-file flow (post-v1): +- Promote `RunTarget::Literal` from `String` to `RunTarget::Literal { path: + String, id: NodeId }` — a non-breaking AST addition (tuple → struct variant). +- Resolve records `Unresolved { name: , reason: External }` at the new + NodeId. +- `CrossFileResolutions::resolve_cross_file` walks each file's Run nodes, + tries `path == other_file.path.file_name()`, emits CrossFileSymbol on hit. + +## What cross-file does *not* touch + +- **Per-file `Semantic` type signature**: unchanged. +- **`NodeIndexVec` side table**: unchanged. No new variants on + `Resolution`; cross-file lives in its own map. +- **Lint rule signatures**: `run(program, sem, ctx)` remains. The new + cross-file map lives on `AnalysisContext` as an `Option<&CrossFileResolutions>` + field that pre-v1 callers can ignore. +- **Tests**: existing v1 tests stay green because the invariants they pin + (External emitted for cross-file-ish refs, local resolution preferred) are + exactly what cross-file preserves. + +## Breaking-change budget + +None required. The migration is additive: a new crate +`oxabl_workspace_semantic` (or equivalent module) exposes +`CrossFileResolutions`, and the existing lint rules opt into consulting it via +a one-line config on `AnalysisContext`. + +--- + +**Reviewers**: stress-test by trying to design a cross-file integration that +*would* require changing `Semantic`'s public fields. If you find one, the v1 +shape needs adjustment before ship. diff --git a/docs/design/semantic-v1-flow-analysis-sketch.md b/docs/design/semantic-v1-flow-analysis-sketch.md new file mode 100644 index 0000000..f9d06e7 --- /dev/null +++ b/docs/design/semantic-v1-flow-analysis-sketch.md @@ -0,0 +1,138 @@ +--- +title: "Flow Analysis Sketch (R11)" +status: draft +date: 2026-04-17 +parent: docs/plans/2026-04-16-004-feat-semantic-layer-v1-plan.md +--- + +# Flow Analysis Sketch (R11) + +## Purpose + +This appendix proves that R11 ("flow analysis — definite assignment, dead +code, reachability — remains reachable after v1 without an IR rewrite") is +viable. The v1 architecture commits to two facts that make a control-flow +graph (CFG) a local, additive extension: + +1. Every `Statement` and `Expression` carries a stable parser-assigned + `NodeId` (Phase 1). +2. `StatementKind::Block(Vec)` / block bodies preserve source + order, so a statement's textual-successor relationship is trivially + reconstructible from the AST. + +No implementation lands in v1 — this is an illustrated contract. + +## The CFG that would attach to the AST + +```rust +pub struct Cfg { + pub blocks: Vec, + /// Entry block for each scope-owning AST node (procedure, function, + /// method, trigger, file root). + pub entry: FxHashMap, + /// Stable identity for CFG blocks. + pub block_of_stmt: NodeIndexVec, +} + +pub struct BasicBlock { + pub id: BlockId, + pub stmts: Vec, // indexes into AST by stable NodeId + pub successors: SmallVec<[BlockId; 2]>, +} + +pub struct CfgBuilder<'a> { + scope_tree: &'a ScopeTree, + symbols: &'a SymbolTable, +} + +impl<'a> CfgBuilder<'a> { + pub fn build(program: &[Statement], sem: &Semantic) -> Cfg { + // ... straightforward recursive walk matching on StatementKind. + todo!() + } +} +``` + +`Cfg` is a **separate value** alongside `Semantic`. It consumes NodeIds; it +does not modify the AST or the side tables. The AST's existing shape is +already the right thing: + +| StatementKind | CFG-build rule | +|-------------------------------------|-------------------------------------------------------| +| `Block(body)` | Sequence; last-stmt successor = enclosing successor | +| `If { then, else }` | Split: current → {then_entry, else_entry or join} | +| `Do { body, .. }` | Loop back-edge body_exit → body_entry | +| `Repeat { body, .. }` | Same as Do | +| `ForEach { body, .. }` | Same as Do with an implicit break on iterator end | +| `Case { when_branches, otherwise }` | Join over N+1 branches | +| `Leave / Next` | Jump to innermost loop's exit / latch | +| `Return` | Jump to enclosing function's exit | +| `Throw` | Jump to innermost CATCH's entry or function exit | +| `Catch / Finally` | Extra predecessor from every preceding THROW site | +| `ExpressionStatement`, declarations | Linear: current → single successor | + +All of these are already representable today — the parser puts bodies in a +`Vec` in source order, and every branch statement carries its own +NodeId. No AST changes needed. + +## Definite-assignment pass, sketched + +```rust +pub struct DefiniteAssignment { + /// For each `SymbolId`, the set of blocks in which the symbol is + /// provably assigned on *every* path that reaches the block's entry. + pub must_assigned_at_entry: NodeIndexVec>, +} + +pub fn definite_assignment(cfg: &Cfg, sem: &Semantic) -> DefiniteAssignment { + // Forward dataflow. Meet = intersection. Gen = writes in-block. + // in[b] = ∩ out[p] for p ∈ predecessors(b) + // out[b] = in[b] ∪ writes_in(b) + // Converge via worklist. + todo!() +} +``` + +Dataflow is parameterized purely over `Cfg` + `Semantic`. Nothing in the +passage reaches back into the AST; it reads: +- Writes in each block: existing `Symbol::write_count` has the count, but we + need the NodeIds. Those live in `Semantic.references` — every *target* of + an assignment is an `Identifier` expression whose NodeId is in the + reference side table resolved to a `SymbolId`. +- Reads in each block: symmetric — the same reference side table. + +So definite-assignment-for-READ-BEFORE-WRITE becomes: +``` +for each resolved read ref r at NodeId n in block b: + let sym = sem.references[n] === Resolution::Resolved(s) ⇒ s; + if sym ∉ must_assigned_at_entry[b]: + emit LINT0005 read-before-definite-assignment at n +``` + +## What's *not* reachable without changes + +Genuinely hard future work that the v1 shape does accommodate but doesn't +pre-solve: + +- **Exception-sensitive flow**. ABL `CATCH` / `FINALLY` / `UNDO, THROW` need + a subtle extra-edge model (every potentially-throwing stmt has an edge to + the innermost CATCH). The AST preserves enough shape — `Throw(expr)` is a + distinct node, `Catch { body }` is a distinct scope — but the CFG builder + has to walk scope ancestors at each throw site. This is O(n·d) on depth + `d` and is fine in practice (depth ≤ 5 in pcna-erp samples). +- **Alias analysis via `BUFFER-COPY` / dynamic handles**. These are + recorded as External in v1. Cross-file alias is a post-v1 problem and + requires both the cross-file sketch (R10) and a live-variable dataflow + on top. + +## Breaking-change budget + +Zero. `Cfg` is a new value; `DefiniteAssignment` is a new value; +`LINT0005 read-before-definite-assignment` is a new code in `oxabl_lint`. +No v1 public type changes. + +--- + +**Reviewers**: stress-test by imagining a dataflow question that the v1 +side-table model can't answer. If you find one that isn't "cross-file" or +"alias", the shape needs an adjustment. diff --git a/docs/plans/2026-04-16-004-feat-semantic-layer-v1-plan.md b/docs/plans/2026-04-16-004-feat-semantic-layer-v1-plan.md index 388b801..35bde95 100644 --- a/docs/plans/2026-04-16-004-feat-semantic-layer-v1-plan.md +++ b/docs/plans/2026-04-16-004-feat-semantic-layer-v1-plan.md @@ -1022,7 +1022,7 @@ Deferred to follow-up: - Exact-JSON goldens under a stable NodeId allocator (blocked on parser's NodeId-minting determinism under feature growth). -### Phase 7 — Architectural guardrail appendices +### Phase 7 — Architectural guardrail appendices ✅ Two short written sketches that prove R10 and R11 remain reachable. **Non-blocking** for the v1 merge — the side-table + NodeId architecture *is* the contract, and these docs are