diff --git a/docs/ECOSYSTEM.adoc b/docs/ECOSYSTEM.adoc index bd70c715..a0c29f69 100644 --- a/docs/ECOSYSTEM.adoc +++ b/docs/ECOSYSTEM.adoc @@ -219,11 +219,14 @@ builtins lower to on-demand `wasi_snapshot_preview1.*` imports `ctx.wasi_func_indices`; zero impact on units that don't use them; verified with a multi-import combo regression). Component path bridges to `wasi:clocks`/`wasi:cli`. Real-host main-invoke deferred -to S6 (WIT export-lifting / wasi:cli/run command shape). String -accessors (env_at/arg_at) gated on a byte-level wasm-IR extension -(I32Load8U/I32Store8 absent today) — tracked as the next slice -before/with S5 filesystem.** WIT world of -record: `wit/affinescript.wit` +to S6 (WIT export-lifting / wasi:cli/run command shape). +**S5 string accessors (env_at/arg_at) DONE: the wasm IR gained +the byte-level load/store family (I32Load8U/I32Store8 + the full +WebAssembly 1.0 §5.4.6 row, opcodes 0x2C..0x35 / 0x3A..0x3E); +accessors lower to on-demand `environ_get`/`args_get` paired with +the existing `*_sizes_get` import (dedup keeps each WASI import +once even when both `*_count` and `*_at` are used).** WIT world +of record: `wit/affinescript.wit` |INT-04 |Publish compiler + runtime to JSR (then npm) |#181 |runtime packaging READY (affine-js + affinescript-tea JSR dry-run green; manual-only `publish-jsr.yml`; docs/PACKAGING.adoc). INT-01 dep diff --git a/docs/TECH-DEBT.adoc b/docs/TECH-DEBT.adoc index eac7b010..66c5820e 100644 --- a/docs/TECH-DEBT.adoc +++ b/docs/TECH-DEBT.adoc @@ -230,11 +230,14 @@ Component-Model re-target, S1..S6); S2 toolchain #251 closed; S3 componentize done; **S4a (clock) + S4b (env_count, arg_count) DONE — on-demand preview1 imports via Effect_sites pre-scan, canonical-order indexing through `ctx.wasi_func_indices`; combo -regression proves no collision. String accessors (env_at/arg_at) -gated on byte-level wasm IR (I32Load8U/I32Store8 absent today) — -tracked next slice. Real-host main-invoke = S6 (WIT export -lifting). Next S5 -(native clocks/env/argv)** +regression proves no collision. **S5 (env_at/arg_at) DONE — wasm +IR extended with the byte-level load/store family +(I32Load8U/I32Store8 and siblings); accessors lower to on-demand +`environ_get`/`args_get` imports paired with the existing +`*_sizes_get` (dedup keeps each WASI import exactly once even when +both `*_count` and `*_at` are used in the same unit); guest +allocates a length-prefixed AS string and byte-copies from the +WASI buffer.** Real-host main-invoke = S6 (WIT export lifting) |INT-04 |Publish to JSR/npm |S2 |#181 packaging READY (dry-run green, manual workflow); compiler-binary distribution decided = **ADR-019** (#260, Releases + thin Deno/JSR shim, staged S1..S4) — S1/S2/S3 diff --git a/lib/codegen.ml b/lib/codegen.ml index 35f6959d..c5b95459 100644 --- a/lib/codegen.ml +++ b/lib/codegen.ml @@ -873,8 +873,9 @@ let rec gen_expr (ctx : context) (expr : expr) : (context * instr list) result = (added on-demand at module assembly; idx looked up in [ctx.wasi_func_indices]). The Unit arg satisfies the zero-param-fn collapse wart; it is evaluated but its value - is unused. String accessors (env_at/arg_at) need byte-level - wasm IR ops (currently absent) and are a tracked follow-up. *) + is unused. The companion string accessors `env_at`/`arg_at` + landed alongside the byte-level wasm IR extension — see + the case below. *) let wasi_name = if id.name = "env_count" then "environ_sizes_get" else "args_sizes_get" @@ -894,6 +895,47 @@ let rec gen_expr (ctx : context) (expr : expr) : (context * instr list) result = in Ok (ctx_with_heap, code) + | ExprVar id when (id.name = "env_at" || id.name = "arg_at") + && List.length args = 1 -> + (* ADR-015 S5 (#180): env_at(i: Int) / arg_at(i: Int) -> String. + Allocates a length-prefixed AS string and byte-copies the + i-th null-terminated entry from the WASI environ/argv + buffer. Uses [I32Load8U]/[I32Store8] (the byte-level wasm + IR extension landed alongside this slice). Pairs the + existing on-demand `*_sizes_get` import with the matching + `environ_get`/`args_get` import (registered above in the + `optional_wasi` table; deduped by wasi name). *) + let sizes_name, get_name = + if id.name = "env_at" then "environ_sizes_get", "environ_get" + else "args_sizes_get", "args_get" + in + let sizes_func_idx = + try List.assoc sizes_name ctx.wasi_func_indices + with Not_found -> 1 + in + let get_func_idx = + try List.assoc get_name ctx.wasi_func_indices + with Not_found -> 2 + in + let* (ctx0, arg_code) = gen_expr ctx (List.hd args) in + let (c1, n_local) = alloc_local ctx0 ("__" ^ id.name ^ "_n") in + let (c2, scratch_local) = alloc_local c1 ("__" ^ id.name ^ "_scratch") in + let (c3, count_local) = alloc_local c2 ("__" ^ id.name ^ "_count") in + let (c4, bufsize_local) = alloc_local c3 ("__" ^ id.name ^ "_bufsize") in + let (c5, ptrvec_local) = alloc_local c4 ("__" ^ id.name ^ "_ptrvec") in + let (c6, src_local) = alloc_local c5 ("__" ^ id.name ^ "_src") in + let (c7, dst_local) = alloc_local c6 ("__" ^ id.name ^ "_dst") in + let (c8, result_local) = alloc_local c7 ("__" ^ id.name ^ "_result") in + let (ctx_with_heap, heap_idx) = ensure_heap_ptr c8 in + let code = + arg_code @ + Wasi_runtime.gen_str_at_via_get + heap_idx n_local scratch_local count_local bufsize_local + ptrvec_local src_local dst_local result_local + sizes_func_idx get_func_idx + in + Ok (ctx_with_heap, code) + | ExprVar id when List.mem_assoc id.name ctx.variant_tags -> (* Enum constructor called as a function: Circle(5), Rect({x:1,y:2}), etc. Layout: [tag: i32][field1: i32][field2: i32]... @@ -2549,13 +2591,25 @@ let generate_module ?loader (prog : program) : wasm_module result = false prog in let optional_wasi = - (* (guest_builtin_name, wasi_import_name, factory) — canonical order. *) + (* (guest_builtin_name, wasi_import_name, factory) — canonical order. + Multiple builtins MAY require the same WASI import (e.g. both + `env_count` and `env_at` need `environ_sizes_get`); the dedup + pass below keeps the first occurrence so each wasm import shows + up exactly once with a stable index. *) [ ("clock_now_ms", "clock_time_get", Wasi_runtime.create_clock_time_get_import); ("env_count", "environ_sizes_get", Wasi_runtime.create_environ_sizes_get_import); ("arg_count", "args_sizes_get", Wasi_runtime.create_args_sizes_get_import); + ("env_at", "environ_sizes_get", Wasi_runtime.create_environ_sizes_get_import); + ("env_at", "environ_get", Wasi_runtime.create_environ_get_import); + ("arg_at", "args_sizes_get", Wasi_runtime.create_args_sizes_get_import); + ("arg_at", "args_get", Wasi_runtime.create_args_get_import); ] - |> List.filter_map - (fun (b, w, f) -> if uses b then Some (w, f ()) else None) + |> List.filter (fun (b, _, _) -> uses b) + |> List.fold_left + (fun acc (_, w, f) -> + if List.exists (fun (w', _) -> w' = w) acc then acc + else acc @ [(w, f ())]) + [] |> List.mapi (fun i (w, (imp, ty)) -> (i + 1, w, imp, ty)) in let opt_types = List.map (fun (_, _, _, ty) -> ty) optional_wasi in diff --git a/lib/typecheck.ml b/lib/typecheck.ml index 529b338c..177843b5 100644 --- a/lib/typecheck.ml +++ b/lib/typecheck.ml @@ -1324,11 +1324,18 @@ let register_builtins (ctx : context) : unit = (TArrow (ty_int, QOmega, ty_int, ESingleton "Time")); (* ADR-015 S4b (#180): WASI environment / argv COUNTS. The Unit arg satisfies the zero-param-fn collapse wart (`fn()->T` lowers to - bare `T`; callable zero-arg builtins take `Unit -> R`). String - accessors (env_at/arg_at) need byte-level wasm IR ops — tracked - follow-up. Effect row `Time` (reserved). *) + bare `T`; callable zero-arg builtins take `Unit -> R`). + Effect row `Time` (reserved). *) bind_var ctx "env_count" (TArrow (ty_unit, QOmega, ty_int, ESingleton "Time")); bind_var ctx "arg_count" (TArrow (ty_unit, QOmega, ty_int, ESingleton "Time")); + (* ADR-015 S5 (#180): WASI environment / argv STRING ACCESSORS. Returns + the i-th entry as a length-prefixed AS string. Lowered via + `environ_get`/`args_get` + a byte-level scan + byte-copy, which + became expressible once `I32Load8U`/`I32Store8` joined the wasm IR. + Index out-of-bounds is UB at this layer — the guest is expected to + bound-check against `env_count(())`/`arg_count(())`. *) + bind_var ctx "env_at" (TArrow (ty_int, QOmega, ty_string, ESingleton "Time")); + bind_var ctx "arg_at" (TArrow (ty_int, QOmega, ty_string, ESingleton "Time")); bind_var ctx "eprint" (TArrow (ty_string, QOmega, ty_unit, ESingleton "IO")); bind_var ctx "eprintln" (TArrow (ty_string, QOmega, ty_unit, ESingleton "IO")); bind_var ctx "read_line" diff --git a/lib/wasi_runtime.ml b/lib/wasi_runtime.ml index 3e189483..800a6dea 100644 --- a/lib/wasi_runtime.ml +++ b/lib/wasi_runtime.ml @@ -359,8 +359,8 @@ let gen_print_str (heap_ptr_global : int) (str_ptr_local : int) (fd_write_idx : Signature: `(envc_out: i32, envbuf_size_out: i32) -> errno: i32`. Writes the env-var count and the total byte size of the null-terminated `KEY=VAL\0…` buffer the next call would need. - String accessor (`env_at`) is gated on byte-level wasm IR ops, - deferred to a follow-up slice. *) + Paired with `environ_get` (created by + {!create_environ_get_import}) for the `env_at` string accessor. *) let create_environ_sizes_get_import () : import * func_type = let func_type = { ft_params = [I32; I32]; (* envc_out_ptr, envbuf_size_out_ptr *) @@ -413,3 +413,159 @@ let gen_count_via_sizes_get LocalGet scratch_local; I32Load (2, 0); ] + +(** Create the WASI `environ_get` import (ADR-015 S5, #180). + Signature: `(environ_ptr_ptr: i32, environ_buf_ptr: i32) -> errno: i32`. + Fills two regions: a vector of pointers (one per env-var, written + at `environ_ptr_ptr`) and a contiguous buffer of null-terminated + `KEY=VAL` strings (written at `environ_buf_ptr`). The sizes that + must be allocated are reported by `environ_sizes_get`. *) +let create_environ_get_import () : import * func_type = + let func_type = { + ft_params = [I32; I32]; (* environ_ptr_ptr, environ_buf_ptr *) + ft_results = [I32]; (* errno *) + } in + let import = { + i_module = "wasi_snapshot_preview1"; + i_name = "environ_get"; + i_desc = ImportFunc 0; + } in + (import, func_type) + +(** Create the WASI `args_get` import (ADR-015 S5, #180). + Signature: `(argv_ptr_ptr: i32, argv_buf_ptr: i32) -> errno: i32`. + Same shape as `environ_get`. *) +let create_args_get_import () : import * func_type = + let func_type = { + ft_params = [I32; I32]; + ft_results = [I32]; + } in + let import = { + i_module = "wasi_snapshot_preview1"; + i_name = "args_get"; + i_desc = ImportFunc 0; + } in + (import, func_type) + +(** Emit `env_at(i)` / `arg_at(i)`: fetch the i-th entry from the WASI + environ/argv vector and return it as a length-prefixed AffineScript + string. Sequence: + 1. `*_sizes_get(&count, &bufsize)` + 2. allocate `count*4` bytes for the pointer vector + `bufsize` + bytes for the string buffer + 3. `*_get(ptrvec, ptrvec + count*4)` + 4. resolve `src = ptrvec[i]` + 5. scan `src` for the null terminator to compute length + 6. allocate `(4 + length)` bytes for the result string, + store length at +0, byte-copy `src..src+length` to `result+4` + 7. leave the result pointer on the stack + + The byte loops use `I32Load8U`/`I32Store8` (added with the + byte-level wasm IR extension). The caller has placed the index `i` + on the stack; this helper consumes it via [LocalSet n_local]. + + All locals must be pre-allocated by the caller (8 in total). The + helper itself does not modify the type or scope context — it only + emits instructions. *) +let gen_str_at_via_get + (heap_ptr_global : int) + (n_local : int) + (scratch_local : int) + (count_local : int) + (bufsize_local : int) + (ptrvec_local : int) + (src_local : int) + (dst_local : int) + (result_local : int) + (sizes_func_idx : int) + (get_func_idx : int) + : instr list = + [ + (* Index `i` is on the stack from the caller's arg_code. *) + LocalSet n_local; + + (* --- Phase 1: sizes_get -> count, bufsize --- *) + GlobalGet heap_ptr_global; + I32Const 8l; I32Add; + GlobalSet heap_ptr_global; + GlobalGet heap_ptr_global; + I32Const 8l; I32Sub; + LocalSet scratch_local; + LocalGet scratch_local; (* count_ptr *) + LocalGet scratch_local; I32Const 4l; I32Add; (* bufsize_ptr *) + Call sizes_func_idx; + Drop; + LocalGet scratch_local; I32Load (2, 0); LocalSet count_local; + LocalGet scratch_local; I32Load (2, 4); LocalSet bufsize_local; + + (* --- Phase 2: allocate ptrvec (count*4) + bytebuf (bufsize) --- *) + GlobalGet heap_ptr_global; + LocalSet ptrvec_local; + GlobalGet heap_ptr_global; + LocalGet count_local; I32Const 4l; I32Mul; + LocalGet bufsize_local; I32Add; + I32Add; + GlobalSet heap_ptr_global; + + (* --- Phase 3: get(ptrvec, ptrvec + count*4) --- *) + LocalGet ptrvec_local; + LocalGet ptrvec_local; LocalGet count_local; I32Const 4l; I32Mul; I32Add; + Call get_func_idx; + Drop; + + (* --- Phase 4: src = *(ptrvec + i*4) --- *) + LocalGet ptrvec_local; + LocalGet n_local; I32Const 4l; I32Mul; I32Add; + I32Load (2, 0); + LocalSet src_local; + + (* --- Phase 5: scan for null terminator. Use scratch as cursor. --- *) + LocalGet src_local; LocalSet scratch_local; + Block (BtEmpty, [ + Loop (BtEmpty, [ + LocalGet scratch_local; + I32Load8U (0, 0); + I32Eqz; BrIf 1; (* exit on 0 byte *) + LocalGet scratch_local; I32Const 1l; I32Add; + LocalSet scratch_local; + Br 0 + ]) + ]); + (* length = cursor - src (excludes the null terminator). + Stash it back into count_local, which we are done with. *) + LocalGet scratch_local; LocalGet src_local; I32Sub; + LocalSet count_local; + + (* --- Phase 6: allocate (4 + length) for the AS string --- *) + GlobalGet heap_ptr_global; + LocalSet result_local; + GlobalGet heap_ptr_global; + I32Const 4l; LocalGet count_local; I32Add; + I32Add; + GlobalSet heap_ptr_global; + + (* Store length at result+0. *) + LocalGet result_local; + LocalGet count_local; + I32Store (2, 0); + + (* --- Phase 7: byte-copy src..src+length -> result+4 --- + Reuses scratch as src cursor and count_local as the loop count. *) + LocalGet src_local; LocalSet scratch_local; + LocalGet result_local; I32Const 4l; I32Add; LocalSet dst_local; + Block (BtEmpty, [ + Loop (BtEmpty, [ + LocalGet count_local; I32Eqz; BrIf 1; + LocalGet dst_local; + LocalGet scratch_local; I32Load8U (0, 0); + I32Store8 (0, 0); + LocalGet scratch_local; I32Const 1l; I32Add; LocalSet scratch_local; + LocalGet dst_local; I32Const 1l; I32Add; LocalSet dst_local; + LocalGet count_local; I32Const 1l; I32Sub; LocalSet count_local; + Br 0 + ]) + ]); + + (* --- Result: leave the string pointer on the stack. --- *) + LocalGet result_local; + ] diff --git a/lib/wasm.ml b/lib/wasm.ml index d15903c2..afcd9c4e 100644 --- a/lib/wasm.ml +++ b/lib/wasm.ml @@ -51,10 +51,25 @@ type instr = | I64Load of int * int | F32Load of int * int | F64Load of int * int + | I32Load8S of int * int (** load 1 byte, sign-extend to i32 *) + | I32Load8U of int * int (** load 1 byte, zero-extend to i32 *) + | I32Load16S of int * int (** load 2 bytes, sign-extend to i32 *) + | I32Load16U of int * int (** load 2 bytes, zero-extend to i32 *) + | I64Load8S of int * int + | I64Load8U of int * int + | I64Load16S of int * int + | I64Load16U of int * int + | I64Load32S of int * int + | I64Load32U of int * int | I32Store of int * int | I64Store of int * int | F32Store of int * int | F64Store of int * int + | I32Store8 of int * int (** store low 1 byte of i32 *) + | I32Store16 of int * int (** store low 2 bytes of i32 *) + | I64Store8 of int * int + | I64Store16 of int * int + | I64Store32 of int * int | MemorySize | MemoryGrow diff --git a/lib/wasm_encode.ml b/lib/wasm_encode.ml index 64dd0e52..c0956802 100644 --- a/lib/wasm_encode.ml +++ b/lib/wasm_encode.ml @@ -123,10 +123,25 @@ let rec add_instr buf = function | I64Load (align, offset) -> add_u8 buf 0x29; add_memarg buf align offset | F32Load (align, offset) -> add_u8 buf 0x2A; add_memarg buf align offset | F64Load (align, offset) -> add_u8 buf 0x2B; add_memarg buf align offset + | I32Load8S (align, offset) -> add_u8 buf 0x2C; add_memarg buf align offset + | I32Load8U (align, offset) -> add_u8 buf 0x2D; add_memarg buf align offset + | I32Load16S (align, offset) -> add_u8 buf 0x2E; add_memarg buf align offset + | I32Load16U (align, offset) -> add_u8 buf 0x2F; add_memarg buf align offset + | I64Load8S (align, offset) -> add_u8 buf 0x30; add_memarg buf align offset + | I64Load8U (align, offset) -> add_u8 buf 0x31; add_memarg buf align offset + | I64Load16S (align, offset) -> add_u8 buf 0x32; add_memarg buf align offset + | I64Load16U (align, offset) -> add_u8 buf 0x33; add_memarg buf align offset + | I64Load32S (align, offset) -> add_u8 buf 0x34; add_memarg buf align offset + | I64Load32U (align, offset) -> add_u8 buf 0x35; add_memarg buf align offset | I32Store (align, offset) -> add_u8 buf 0x36; add_memarg buf align offset | I64Store (align, offset) -> add_u8 buf 0x37; add_memarg buf align offset | F32Store (align, offset) -> add_u8 buf 0x38; add_memarg buf align offset | F64Store (align, offset) -> add_u8 buf 0x39; add_memarg buf align offset + | I32Store8 (align, offset) -> add_u8 buf 0x3A; add_memarg buf align offset + | I32Store16 (align, offset) -> add_u8 buf 0x3B; add_memarg buf align offset + | I64Store8 (align, offset) -> add_u8 buf 0x3C; add_memarg buf align offset + | I64Store16 (align, offset) -> add_u8 buf 0x3D; add_memarg buf align offset + | I64Store32 (align, offset) -> add_u8 buf 0x3E; add_memarg buf align offset | MemorySize -> add_u8 buf 0x3F; add_u8 buf 0x00 | MemoryGrow -> add_u8 buf 0x40; add_u8 buf 0x00 | I32Const v -> add_u8 buf 0x41; add_sleb32 buf v diff --git a/tests/codegen/arg_at.affine b/tests/codegen/arg_at.affine new file mode 100644 index 00000000..c40f5843 --- /dev/null +++ b/tests/codegen/arg_at.affine @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: MPL-2.0 +// ADR-015 S5 (#180): arg_at(i) smoke. Same shape as env_at; differs +// only in the underlying WASI import (`args_sizes_get` + `args_get`). +// Indexing 1 (not 0) exercises the `n_local * 4` pointer-vector +// offset path that env_at(0) would short-circuit. +pub fn main() -> Int / { Time } { + string_length(arg_at(1)) +} diff --git a/tests/codegen/env_at.affine b/tests/codegen/env_at.affine new file mode 100644 index 00000000..c97ae466 --- /dev/null +++ b/tests/codegen/env_at.affine @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: MPL-2.0 +// ADR-015 S5 (#180): env_at(i) smoke. Lowers to a `*_sizes_get` + +// `environ_get` pair, then byte-scans the i-th entry to compute +// length and byte-copies it into a length-prefixed AS string. +// Returning `string_length` proves the scan terminated at the +// host-written null terminator at the expected offset. +pub fn main() -> Int / { Time } { + string_length(env_at(0)) +} diff --git a/tests/codegen/env_count_and_at.affine b/tests/codegen/env_count_and_at.affine new file mode 100644 index 00000000..c4a2d8da --- /dev/null +++ b/tests/codegen/env_count_and_at.affine @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: MPL-2.0 +// ADR-015 S5 dedup regression: a unit that uses BOTH `env_count` +// and `env_at`. Both lower to a WASI `environ_sizes_get` call — +// if the optional_wasi table emitted that import twice, the +// resulting wasm would have two competing imports under the same +// name and instantiation would fail. The dedup pass (keep first +// occurrence by wasi import name) must collapse them to one +// shared idx that both gen_expr cases resolve to. +pub fn main() -> Int / { Time } { + env_count(()) + string_length(env_at(0)) +} diff --git a/tests/codegen/test_arg_at.mjs b/tests/codegen/test_arg_at.mjs new file mode 100644 index 00000000..b49abeed --- /dev/null +++ b/tests/codegen/test_arg_at.mjs @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MPL-2.0 +// ADR-015 S5 (#180) — arg_at via WASI preview1 args_get. +// Index 1 (the SECOND argv entry) — verifies the `ptrvec[n]` lookup +// at `n*4` doesn't silently always return the first pointer. +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; + +const buf = await readFile('./tests/codegen/arg_at.wasm'); +let inst = null; +const called = []; + +// argv: ["./prog", "--flag=value"]; arg_at(1) -> "--flag=value" (len 12). +const argv = ['./prog', '--flag=value']; +const bufSize = argv.reduce((n, s) => n + s.length + 1, 0); + +const imports = { + wasi_snapshot_preview1: { + fd_write: () => 0, + args_sizes_get: (argc_ptr, argv_buf_ptr) => { + called.push('sizes'); + const dv = new DataView(inst.exports.memory.buffer); + dv.setUint32(argc_ptr, argv.length, true); + dv.setUint32(argv_buf_ptr, bufSize, true); + return 0; + }, + args_get: (ptrvec_ptr, buf_ptr) => { + called.push('get'); + const dv = new DataView(inst.exports.memory.buffer); + const mem = new Uint8Array(inst.exports.memory.buffer); + let writePtr = buf_ptr; + argv.forEach((s, i) => { + dv.setUint32(ptrvec_ptr + i * 4, writePtr, true); + for (let k = 0; k < s.length; k++) mem[writePtr + k] = s.charCodeAt(k); + mem[writePtr + s.length] = 0; + writePtr += s.length + 1; + }); + return 0; + }, + }, +}; + +inst = (await WebAssembly.instantiate(buf, imports)).instance; +const result = inst.exports.main(); + +assert.deepEqual( + called.sort(), + ['get', 'sizes'], + 'guest called both args_sizes_get and args_get', +); +assert.equal( + result, + argv[1].length, + `arg_at(1) length should be ${argv[1].length} ("${argv[1]}")`, +); +console.log('test_arg_at.mjs OK'); diff --git a/tests/codegen/test_env_at.mjs b/tests/codegen/test_env_at.mjs new file mode 100644 index 00000000..6efd7d1e --- /dev/null +++ b/tests/codegen/test_env_at.mjs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: MPL-2.0 +// ADR-015 S5 (#180) — env_at via WASI preview1 environ_get. +// Host writes two known env-var strings into the guest's buffer; +// the guest scans for the null terminator on entry 0 and returns +// its length. A passing run proves the byte-scan loop terminates +// at the right byte (i.e. I32Load8U / the new byte-level wasm IR +// is wired through end-to-end). +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; + +const buf = await readFile('./tests/codegen/env_at.wasm'); +let inst = null; +const called = []; + +// Two env vars: "FOO=bar\0" (8 bytes incl. NUL), "PI=3.14\0" (8 bytes). +const entries = ['FOO=bar', 'PI=3.14']; +const totalBufSize = entries.reduce((n, s) => n + s.length + 1, 0); + +const imports = { + wasi_snapshot_preview1: { + fd_write: () => 0, + environ_sizes_get: (envc_ptr, envbuf_ptr) => { + called.push('sizes'); + const dv = new DataView(inst.exports.memory.buffer); + dv.setUint32(envc_ptr, entries.length, true); + dv.setUint32(envbuf_ptr, totalBufSize, true); + return 0; + }, + environ_get: (ptrvec_ptr, buf_ptr) => { + called.push('get'); + const dv = new DataView(inst.exports.memory.buffer); + const mem = new Uint8Array(inst.exports.memory.buffer); + let writePtr = buf_ptr; + entries.forEach((s, i) => { + dv.setUint32(ptrvec_ptr + i * 4, writePtr, true); + for (let k = 0; k < s.length; k++) mem[writePtr + k] = s.charCodeAt(k); + mem[writePtr + s.length] = 0; // NUL terminator + writePtr += s.length + 1; + }); + return 0; + }, + }, +}; + +inst = (await WebAssembly.instantiate(buf, imports)).instance; +const result = inst.exports.main(); + +assert.deepEqual( + called.sort(), + ['get', 'sizes'], + 'guest called both environ_sizes_get and environ_get', +); +assert.equal( + result, + entries[0].length, + `env_at(0) length should be ${entries[0].length} ("${entries[0]}")`, +); +console.log('test_env_at.mjs OK'); diff --git a/tests/codegen/test_env_count_and_at.mjs b/tests/codegen/test_env_count_and_at.mjs new file mode 100644 index 00000000..e3a76608 --- /dev/null +++ b/tests/codegen/test_env_count_and_at.mjs @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: MPL-2.0 +// ADR-015 S5 dedup regression — a unit that uses both `env_count` +// and `env_at`. Both lower to `environ_sizes_get`; the dedup pass +// in codegen's optional_wasi table must register that import +// EXACTLY ONCE. A passing instantiation proves the import set is +// well-formed; the matching numeric result proves both builtins +// resolved to the deduped index correctly. +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; + +const buf = await readFile('./tests/codegen/env_count_and_at.wasm'); +let inst = null; +let sizesCalls = 0; + +const entries = ['HOME=/root', 'PATH=/usr/bin']; +const bufSize = entries.reduce((n, s) => n + s.length + 1, 0); + +const imports = { + wasi_snapshot_preview1: { + fd_write: () => 0, + environ_sizes_get: (envc_ptr, envbuf_ptr) => { + sizesCalls++; + const dv = new DataView(inst.exports.memory.buffer); + dv.setUint32(envc_ptr, entries.length, true); + dv.setUint32(envbuf_ptr, bufSize, true); + return 0; + }, + environ_get: (ptrvec_ptr, buf_ptr) => { + const dv = new DataView(inst.exports.memory.buffer); + const mem = new Uint8Array(inst.exports.memory.buffer); + let writePtr = buf_ptr; + entries.forEach((s, i) => { + dv.setUint32(ptrvec_ptr + i * 4, writePtr, true); + for (let k = 0; k < s.length; k++) mem[writePtr + k] = s.charCodeAt(k); + mem[writePtr + s.length] = 0; + writePtr += s.length + 1; + }); + return 0; + }, + }, +}; + +// Will throw at instantiation if environ_sizes_get is imported twice. +inst = (await WebAssembly.instantiate(buf, imports)).instance; +const result = inst.exports.main(); + +// env_count(()) returns 2; string_length(env_at(0)) returns len("HOME=/root") = 10. +assert.equal(result, entries.length + entries[0].length, 'sum across both builtins'); +// env_count() makes one call; env_at() makes another. Both come through the +// same import slot — proving the dedup didn't accidentally drop the second +// call site's wiring. +assert.equal(sizesCalls, 2, 'each builtin invocation hit environ_sizes_get'); +console.log('test_env_count_and_at.mjs OK');