From b598e5880f050935cda4205e655f5a84b6cabf87 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Fri, 15 May 2026 04:00:26 +0100 Subject: [PATCH] feat(verify): port `affinescript.ownership` custom-section codec (C2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the parser and encoder for the `affinescript.ownership` custom-section wire format. The parser is a faithful port of OCaml `Tw_verify.parse_ownership_section_payload`; the encoder mirrors the matching emitter on the affinescript side (`Codegen.build_ownership_section`) so downstream emitters in ephapax (C6) and elsewhere can reuse it. Wire format ----------- Little-endian, byte-aligned: u32le count for each entry: u32le func_idx u8 n_params u8[n] param_kinds (0=Unrestricted, 1=Linear, 2=SharedBorrow, 3=ExclBorrow) u8 ret_kind Cross-impl parity decision -------------------------- The OCaml parser is lenient: reading past the end of the payload yields zero bytes (interpreted as `Unrestricted` kinds and `func_idx = 0`). This Rust port preserves that exactly. A correctly-emitted section will never be truncated, but matching the leniency means the C5 cross-compat suite sees identical results on every payload the OCaml side accepts. Stricter validation is a future opt-in (`parse_strict`), not the default. The `OwnershipKind::from_byte` fallback (anything outside 0..=3 → `Unrestricted`) was already in C1 and is reused here. New API ------- - `OwnershipEntry { func_idx, param_kinds, ret_kind }` — named struct replacing the OCaml 3-tuple for readability. - `parse_ownership_section_payload(&[u8]) -> Vec` — lenient parser (matches OCaml). - `build_ownership_section_payload(&[OwnershipEntry]) -> Vec` — inverse encoder; panics if any entry has >255 params (the n_params field is u8, but real wasm modules don't hit this limit). - `OwnershipKind::to_byte(self) -> u8` — encode side of the existing `from_byte`. Tests ----- 11/11 unit tests, covering: - empty payload - count=0 with no entries - single entry with zero params - single entry with all four kind values (encode + decode) - multi-entry round-trip - unknown kind byte falls back to `Unrestricted` (parity guard) - truncated payload reads zeros past EOF (parity guard) - empty round-trip (entries → bytes → entries) - realistic 2-entry round-trip - exact wire-format byte sequence for a known input $ cargo test -p typed-wasm-verify running 11 tests test section::tests::build_emits_correct_wire_format ... ok test section::tests::empty_payload_yields_no_entries ... ok test section::tests::roundtrip_empty ... ok test section::tests::count_zero_yields_no_entries ... ok test section::tests::multiple_entries ... ok test section::tests::roundtrip_realistic ... ok test section::tests::single_entry_no_params ... ok test section::tests::single_entry_with_all_kinds ... ok test section::tests::truncated_payload_reads_zeros_past_end ... ok test section::tests::unknown_kind_byte_decodes_to_unrestricted ... ok test tests::ownership_kind_byte_roundtrip ... ok test result: ok. 11 passed; 0 failed; 0 ignored Stacked on top of #19 (C1 scaffold). Next: C3 — port the per-path use-range analysis and intra-function verifier. Co-Authored-By: Claude Opus 4.7 --- crates/typed-wasm-verify/src/lib.rs | 8 + crates/typed-wasm-verify/src/section.rs | 222 ++++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 crates/typed-wasm-verify/src/section.rs diff --git a/crates/typed-wasm-verify/src/lib.rs b/crates/typed-wasm-verify/src/lib.rs index 4b3707e..2e3f62d 100644 --- a/crates/typed-wasm-verify/src/lib.rs +++ b/crates/typed-wasm-verify/src/lib.rs @@ -16,6 +16,9 @@ use thiserror::Error; +pub mod section; +pub use section::{build_ownership_section_payload, parse_ownership_section_payload, OwnershipEntry}; + /// Ownership kinds matching the OCaml `Codegen.ownership_kind` enum. /// Wire encoding in the `affinescript.ownership` custom section: a single /// u8 per kind, values 0/1/2/3 as below. @@ -38,6 +41,11 @@ impl OwnershipKind { _ => OwnershipKind::Unrestricted, } } + + /// Encode to the single-byte wire value. + pub fn to_byte(self) -> u8 { + self as u8 + } } /// An ownership violation found in a wasm function body. diff --git a/crates/typed-wasm-verify/src/section.rs b/crates/typed-wasm-verify/src/section.rs new file mode 100644 index 0000000..3f7760c --- /dev/null +++ b/crates/typed-wasm-verify/src/section.rs @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// +// `affinescript.ownership` custom-section codec. +// +// Wire format (little-endian, byte-aligned): +// +// u32le count +// for each entry: +// u32le func_idx +// u8 n_params +// u8[n] param_kinds (0=Unrestricted, 1=Linear, 2=SharedBorrow, 3=ExclBorrow) +// u8 ret_kind +// +// Rust port of `Tw_verify.parse_ownership_section_payload` plus the +// inverse encoder mirroring `Codegen.build_ownership_section`. The OCaml +// parser is lenient on truncation — reading past the buffer end yields +// 0 — and this port matches that behaviour so the cross-compat suite +// (C5) sees identical results on every payload the OCaml side accepts. + +use crate::OwnershipKind; + +/// One entry in the ownership section: a function's index plus its +/// ownership-annotated signature. Mirrors the 3-tuple +/// `(int * ownership_kind list * ownership_kind)` returned by the OCaml +/// parser, but as a named struct for readability. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct OwnershipEntry { + pub func_idx: u32, + pub param_kinds: Vec, + pub ret_kind: OwnershipKind, +} + +/// Parse the `affinescript.ownership` custom-section payload into +/// structured entries. +/// +/// Matches OCaml `Tw_verify.parse_ownership_section_payload` exactly, +/// including the leniency: a truncated payload yields zeros for the +/// missing bytes (interpreted as `Unrestricted` kinds and `func_idx = 0`). +/// A properly-emitted section will never be truncated; this leniency is +/// a defence-in-depth choice that preserves cross-impl parity. +pub fn parse_ownership_section_payload(payload: &[u8]) -> Vec { + let mut r = LenientReader::new(payload); + let count = r.read_u32_le(); + (0..count) + .map(|_| { + let func_idx = r.read_u32_le(); + let n_params = r.read_u8(); + let param_kinds = (0..n_params) + .map(|_| OwnershipKind::from_byte(r.read_u8())) + .collect(); + let ret_kind = OwnershipKind::from_byte(r.read_u8()); + OwnershipEntry { func_idx, param_kinds, ret_kind } + }) + .collect() +} + +/// Encode entries to the `affinescript.ownership` custom-section +/// payload format. The inverse of `parse_ownership_section_payload` for +/// any input that doesn't truncate. +/// +/// Mirrors OCaml `Codegen.build_ownership_section` (which lives in the +/// affinescript repo and isn't visible here, but the wire format is the +/// authoritative spec). +/// +/// # Panics +/// +/// Panics if any entry has more than 255 params (the n_params field is +/// a single byte). Real wasm modules don't have functions with more +/// than 255 params (the engine limit is far lower), so this is +/// unreachable in practice. +pub fn build_ownership_section_payload(entries: &[OwnershipEntry]) -> Vec { + let count: u32 = entries.len().try_into().expect("entry count must fit in u32"); + let mut out = Vec::with_capacity(4 + entries.len() * 8); + out.extend_from_slice(&count.to_le_bytes()); + for entry in entries { + out.extend_from_slice(&entry.func_idx.to_le_bytes()); + let n_params: u8 = entry.param_kinds.len().try_into().expect("param count must fit in u8"); + out.push(n_params); + for k in &entry.param_kinds { + out.push(k.to_byte()); + } + out.push(entry.ret_kind.to_byte()); + } + out +} + +/// Cursor that reads u32le / u8 from a byte slice, returning 0 past EOF. +/// Mirrors the OCaml `read_u32_le` / `read_u8` helpers. +struct LenientReader<'a> { + buf: &'a [u8], + pos: usize, +} + +impl<'a> LenientReader<'a> { + fn new(buf: &'a [u8]) -> Self { + Self { buf, pos: 0 } + } + + fn read_u32_le(&mut self) -> u32 { + if self.pos + 4 > self.buf.len() { + return 0; + } + let b = &self.buf[self.pos..self.pos + 4]; + self.pos += 4; + u32::from_le_bytes([b[0], b[1], b[2], b[3]]) + } + + fn read_u8(&mut self) -> u8 { + if self.pos >= self.buf.len() { + return 0; + } + let v = self.buf[self.pos]; + self.pos += 1; + v + } +} + +#[cfg(test)] +mod tests { + use super::*; + use OwnershipKind::*; + + fn entry(func_idx: u32, params: Vec, ret: OwnershipKind) -> OwnershipEntry { + OwnershipEntry { func_idx, param_kinds: params, ret_kind: ret } + } + + #[test] + fn empty_payload_yields_no_entries() { + assert_eq!(parse_ownership_section_payload(&[]), vec![]); + } + + #[test] + fn count_zero_yields_no_entries() { + assert_eq!(parse_ownership_section_payload(&[0, 0, 0, 0]), vec![]); + } + + #[test] + fn single_entry_no_params() { + // count=1, func_idx=7, n_params=0, ret_kind=0 + let payload = [1, 0, 0, 0, 7, 0, 0, 0, 0, 0]; + let parsed = parse_ownership_section_payload(&payload); + assert_eq!(parsed, vec![entry(7, vec![], Unrestricted)]); + } + + #[test] + fn single_entry_with_all_kinds() { + // count=1, func_idx=42, n_params=4, params=[Linear, Unrestricted, ExclBorrow, SharedBorrow], ret=Linear + let payload = [1, 0, 0, 0, 42, 0, 0, 0, 4, 1, 0, 3, 2, 1]; + let parsed = parse_ownership_section_payload(&payload); + assert_eq!( + parsed, + vec![entry(42, vec![Linear, Unrestricted, ExclBorrow, SharedBorrow], Linear)] + ); + } + + #[test] + fn multiple_entries() { + let entries = vec![ + entry(1, vec![Linear], Unrestricted), + entry(2, vec![ExclBorrow, ExclBorrow], Linear), + entry(99, vec![], SharedBorrow), + ]; + let bytes = build_ownership_section_payload(&entries); + assert_eq!(parse_ownership_section_payload(&bytes), entries); + } + + #[test] + fn unknown_kind_byte_decodes_to_unrestricted() { + // Matches OCaml `kind_of_byte` fallback for cross-impl parity. + // count=1, func_idx=0, n_params=1, param=99, ret=200 + let payload = [1, 0, 0, 0, 0, 0, 0, 0, 1, 99, 200]; + let parsed = parse_ownership_section_payload(&payload); + assert_eq!(parsed, vec![entry(0, vec![Unrestricted], Unrestricted)]); + } + + #[test] + fn truncated_payload_reads_zeros_past_end() { + // count=2, but only one entry's worth of bytes follows. + // Matches OCaml leniency (returns 0 for short reads). + // count=2, then func_idx=5, n_params=1, param=1 (Linear), ret=2 (SharedBorrow) + // ... then nothing — second entry should read all zeros. + let payload = [2, 0, 0, 0, 5, 0, 0, 0, 1, 1, 2]; + let parsed = parse_ownership_section_payload(&payload); + assert_eq!( + parsed, + vec![ + entry(5, vec![Linear], SharedBorrow), + entry(0, vec![], Unrestricted), // zero-filled + ] + ); + } + + #[test] + fn roundtrip_empty() { + let entries: Vec = vec![]; + let bytes = build_ownership_section_payload(&entries); + assert_eq!(bytes, vec![0, 0, 0, 0]); + assert_eq!(parse_ownership_section_payload(&bytes), entries); + } + + #[test] + fn roundtrip_realistic() { + // Realistic shape: an exported `consume_string(s: own String) -> ()` + // and a `borrow_string(s: ref String) -> i32`, both at indices the + // affinescript codegen would produce after the host imports. + let entries = vec![ + entry(2, vec![Linear], Unrestricted), + entry(3, vec![SharedBorrow], Unrestricted), + ]; + let bytes = build_ownership_section_payload(&entries); + let parsed = parse_ownership_section_payload(&bytes); + assert_eq!(parsed, entries); + } + + #[test] + fn build_emits_correct_wire_format() { + let entries = vec![entry(7, vec![Linear, ExclBorrow], SharedBorrow)]; + let bytes = build_ownership_section_payload(&entries); + // count=1, func_idx=7, n_params=2, params=[1,3], ret=2 + assert_eq!(bytes, vec![1, 0, 0, 0, 7, 0, 0, 0, 2, 1, 3, 2]); + } +}