Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ Prose references a version as `v0.X.Y`; headings stay bare `[0.X.Y]`.

### Added

- Codegen for `if` / `else` / `else if` chains (`compiler/src/codegen.c`). The v0.3 codegen rejected `if` statements; this lifts the restriction. Cleave-compiled WASM modules can now do branching. New `emit_if` lowers to WASM's native `if 0x40 ... else ... end` instruction sequence with an empty block type (if-as-expression remains gated on the parser supporting it). `else if` chains nest naturally as `if ... else (if ...)`. Closes #49.
- Cond coercion to i32. WASM's `if` instruction consumes an i32, but Cleave-compiled expressions usually leave i64 on the stack (literals, identifier reads, calls, arithmetic). Codegen now emits `i32.wrap_i64` (0xA7) before the `if` byte when needed. Comparisons (`==`, `!=`, `<`, `>`, `<=`, `>=`) already produce i32 per the WASM spec, so no wrap is emitted in that case; a new `leaves_i32_on_stack` helper distinguishes the two.
- New `leaves_value_on_stack` helper that returns false for assignment expressions. Used in both the if-branch and fn-body code paths so a block whose trailing expression is `x = expr` does not get a spurious `drop` or default-zero. Fixes a latent bug that existed before this PR: a fn whose body ended in an assignment produced invalid WASM. The pre-existing tests did not catch it because every example always had a non-assignment trailing expression.
- 6 codegen tests covering: if without else, if with else, else-if chain, comparison-cond (no wrap), bool-literal cond (wrap), and the assignment-branch-no-drop regression that surfaced during this PR's development.
- `codegen_if_heavy` bench case: 5-way else-if chain.
- New opcodes registered in `wasm.h`: `WASM_OP_BLOCK 0x02`, `WASM_OP_LOOP 0x03`, `WASM_OP_IF 0x04`, `WASM_OP_ELSE 0x05`, `WASM_OP_I32_WRAP_I64 0xA7`. New constant `WASM_BLOCKTYPE_EMPTY 0x40`.
- CI gains four lower-cost gates:
- **`wasm-validate` on emitted modules.** The compiler job installs wabt, compiles every example marked "yes" in `examples/README.md`, and runs `wasm-validate` on the output. Catches structurally invalid modules that pass our byte-level codegen unit tests.
- **Determinism check.** The compiler job compiles `examples/counter-mvp.cv` twice and `cmp -s` the outputs; non-zero exit on any difference. Guards against nondeterminism from HashMap iteration order, pointer addresses, build timestamps.
Expand Down
28 changes: 28 additions & 0 deletions compiler/bench/codegen_bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@ static const char *ARITHMETIC_HEAVY =
" }\n"
"}";

/* Exercises the if/else codegen path (issue #49): an else-if chain
* that drives the cond coercion + nested-if logic. */
static const char *IF_HEAVY =
"module M {\n"
" state x: u64\n"
" fn classify(n: u64) -> u64 {\n"
" if n == 0 { x = 1 }\n"
" else if n == 1 { x = 2 }\n"
" else if n == 2 { x = 3 }\n"
" else if n == 3 { x = 4 }\n"
" else if n == 4 { x = 5 }\n"
" else { x = 6 }\n"
" x\n"
" }\n"
"}";

/* Exercises the let-binding codegen path (issue #44): eight local
* variables, the trailing expression reads several of them. The
* pre-scan walks 8 stmts; the locals declaration is one i64 group
Expand Down Expand Up @@ -92,6 +108,18 @@ int main(void) {
wasm_free(&bin);
});

BENCH("codegen_if_heavy", {
Lexer lex; lexer_init(&lex, IF_HEAVY);
Parser p; parser_init(&p, &lex);
AstNode *prog = parser_parse_program(&p);
TypeChecker tc; typecheck_init(&tc, DEV_NULL);
(void)typecheck_program(&tc, prog);
Codegen cg; cg_init(&cg, DEV_NULL);
WasmBuf bin;
(void)cg_compile_program(&cg, prog, &bin);
wasm_free(&bin);
});

fclose(DEV_NULL);
return 0;
}
16 changes: 15 additions & 1 deletion compiler/include/wasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ enum {

/* Opcodes used by Cleave codegen. Names mirror the spec's text format. */
enum {
WASM_OP_BLOCK = 0x02,
WASM_OP_LOOP = 0x03,
WASM_OP_IF = 0x04,
WASM_OP_ELSE = 0x05,
WASM_OP_END = 0x0B,
WASM_OP_RETURN = 0x0F,
WASM_OP_CALL = 0x10,
Expand Down Expand Up @@ -124,7 +128,17 @@ enum {
WASM_OP_I64_REM_S = 0x81,
WASM_OP_I64_REM_U = 0x82,
WASM_OP_I64_AND = 0x83,
WASM_OP_I64_OR = 0x84
WASM_OP_I64_OR = 0x84,
WASM_OP_I32_WRAP_I64 = 0xA7 /* drops upper 32 bits of an i64; used to
coerce a bool-shaped i64 (0/1) into an
i32 for the if instruction */
};

/* Block-type byte that immediately follows a block / loop / if instruction.
* `empty` means no result on the stack at the end of the block (the only
* case the v0 codegen needs since if-as-expression is not yet supported). */
enum {
WASM_BLOCKTYPE_EMPTY = 0x40
};

/* Export descriptor kinds. */
Expand Down
103 changes: 100 additions & 3 deletions compiler/src/codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@
* - state = expr -> i32.const <slot>; <expr>; call state_set
* - local = expr -> <expr>; local.set <idx>
* - module-fn call -> args...; call <fn index>
* - if / else / else-if -> <cond>; if; <then>; else; <else>; end (issue #49)
* - block trailing expr -> becomes the function's return value
*
* What we deliberately reject
* ---------------------------
* - control flow (if/match/loops) -- see issues #43, #49
* - match (issue #43); loops (no issue yet, deliberate)
* - if-as-expression (`let x = if c { 1 } else { 2 }`): grammar does
* not parse if at expression position today; lift when the parser
* does
* - sum types (`Result`, `Option`) -- see issue #48
* - identifiers we cannot resolve
*
Expand Down Expand Up @@ -333,6 +337,93 @@ static void emit_call(CodegenCtx *ctx, WasmBuf *body, AstNode *node) {
wasm_write_leb_u32(body, f->fn_index);
}

/* Returns 1 if `expr`, when emitted, pushes a value onto the operand
* stack. The only v0 expression that does NOT push is an assignment
* (`x = y`), because both state_set (a void hostcall) and local.set
* consume the value silently. Callers use this to decide whether
* trailing-expression slots and if-branch results need a `drop` or
* fn-body default-zero to balance the stack. */
static int leaves_value_on_stack(const AstNode *expr) {
if (!expr) return 0;
if (expr->kind == AST_EXPR_BINARY
&& expr->as.binary.op.length == 1
&& expr->as.binary.op.start[0] == '=') {
return 0;
}
return 1;
}

/* Returns 1 if `node`, when emitted, leaves an i32 on the WASM operand
* stack rather than the usual i64. The v0 codegen produces i32 only for
* comparison binary operators (i64.eq, i64.lt_u, etc. all return i32
* per the WASM spec). Everything else (literals, identifiers, calls,
* arithmetic) leaves i64. Used by emit_if to decide whether the cond
* value needs an i32.wrap_i64 coercion before the `if` instruction
* consumes it. */
static int leaves_i32_on_stack(const AstNode *node) {
if (!node) return 0;
if (node->kind != AST_EXPR_BINARY) return 0;
StrRef op = node->as.binary.op;
if (op.length == 1) {
return op.start[0] == '<' || op.start[0] == '>';
}
if (op.length == 2) {
return memcmp(op.start, "==", 2) == 0
|| memcmp(op.start, "!=", 2) == 0
|| memcmp(op.start, "<=", 2) == 0
|| memcmp(op.start, ">=", 2) == 0;
}
return 0;
}

static void emit_if(CodegenCtx *ctx, WasmBuf *body, AstNode *node);
static void emit_block(CodegenCtx *ctx, WasmBuf *body, AstNode *node);

/* Emit one branch of an if statement. v0 if statements have an empty
* block type, so any value the branch leaves on the stack must be
* dropped to keep the WASM type checker happy. When sum types and
* if-as-expression land, this layer learns to pass the value through
* instead of dropping it. */
static void emit_if_branch(CodegenCtx *ctx, WasmBuf *body, AstNode *branch) {
if (!branch) return;
if (branch->kind == AST_EXPR_BLOCK) {
emit_block(ctx, body, branch);
/* Drop the trailing result so the block produces no value, but
* only when the result expression actually pushed something.
* Assignments (state_set / local.set) leave the stack empty;
* a `drop` there would underflow. */
if (leaves_value_on_stack(branch->as.block.result)) {
wasm_write_byte(body, WASM_OP_DROP);
}
} else if (branch->kind == AST_STMT_IF) {
/* `else if` chain: nest another if/else/end. */
emit_if(ctx, body, branch);
}
}

static void emit_if(CodegenCtx *ctx, WasmBuf *body, AstNode *node) {
StmtIf *i = &node->as.if_stmt;

emit_expr(ctx, body, i->cond);
if (!leaves_i32_on_stack(i->cond)) {
/* The condition came out as i64 (bool literal, identifier read,
* call result, etc.). WASM `if` expects an i32, so narrow. */
wasm_write_byte(body, WASM_OP_I32_WRAP_I64);
}

wasm_write_byte(body, WASM_OP_IF);
wasm_write_byte(body, WASM_BLOCKTYPE_EMPTY);

emit_if_branch(ctx, body, i->then_branch);

if (i->else_branch) {
wasm_write_byte(body, WASM_OP_ELSE);
emit_if_branch(ctx, body, i->else_branch);
}

wasm_write_byte(body, WASM_OP_END);
}

static void emit_block(CodegenCtx *ctx, WasmBuf *body, AstNode *node) {
ExprBlock *b = &node->as.block;
for (size_t i = 0; i < b->n_stmts; ++i) {
Expand Down Expand Up @@ -374,6 +465,8 @@ static void emit_block(CodegenCtx *ctx, WasmBuf *body, AstNode *node) {
emit_expr(ctx, body, l->value);
wasm_write_byte(body, WASM_OP_LOCAL_SET);
wasm_write_leb_u32(body, bind->local_index);
} else if (s->kind == AST_STMT_IF) {
emit_if(ctx, body, s);
} else {
cg_report(ctx, &s->span,
"statement kind %s not yet supported in v0 codegen",
Expand Down Expand Up @@ -512,10 +605,14 @@ static void emit_fn_body(CodegenCtx *ctx, AstNode *fn_decl,
} else {
emit_block(ctx, body_out, f->body);
/* If the block has no trailing expression and no return, push a
* default 0 so the function's i64 return type is satisfied. */
* default 0 so the function's i64 return type is satisfied.
* `leaves_value_on_stack` treats an assignment-as-result as
* not-a-value, which is the right call here: a fn that ends
* with `x = expr` needs the synthetic 0 just like one with no
* trailing expression at all. */
ExprBlock *bb = &f->body->as.block;
int needs_default = 1;
if (bb->result) needs_default = 0;
if (leaves_value_on_stack(bb->result)) needs_default = 0;
for (size_t i = 0; i < bb->n_stmts; ++i) {
if (bb->stmts[i]->kind == AST_STMT_RETURN) {
needs_default = 0;
Expand Down
133 changes: 133 additions & 0 deletions compiler/tests/codegen_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,131 @@ TEST(test_let_assignment_writes_back_to_local) {
wasm_free(&bin);
}

/* ============== if / else / else-if (issue #49) ============== */

TEST(test_if_without_else_emits_if_blocktype_end) {
/* Simple if with no else. We expect to find:
* if 0x40 (0x04 0x40, empty block type)
* ... then body ...
* end (0x0B)
* No else byte (0x05). Use a comparison condition so no i32.wrap_i64
* coercion is emitted between cond and the if instruction. */
const char *src =
"module M {\n"
" state x: u64\n"
" fn f() -> u64 { if x == 0 { x = 1 }; x }\n"
"}";
WasmBuf bin;
ASSERT(compile_source(src, &bin));
const uint8_t needle[] = { 0x04, 0x40 };
ASSERT(contains_bytes(&bin, needle, sizeof(needle)));
wasm_free(&bin);
}

TEST(test_if_with_else_emits_if_else_end) {
/* Look for `if 0x40 ... else (0x05) ... end (0x0B)` substring. */
const char *src =
"module M {\n"
" state x: u64\n"
" fn f() -> u64 { if x == 0 { x = 1 } else { x = 2 }; x }\n"
"}";
WasmBuf bin;
ASSERT(compile_source(src, &bin));
/* The exact then-body bytes for `x = 1` are:
* i32.const 0 (slot) 0x41 0x00
* i64.const 1 0x42 0x01
* call 1 (state_set) 0x10 0x01
* Then else (0x05). */
const uint8_t needle[] = {
0x04, 0x40, /* if empty */
0x41, 0x00, 0x42, 0x01, 0x10, 0x01, /* then: x = 1 */
0x05, /* else */
};
ASSERT(contains_bytes(&bin, needle, sizeof(needle)));
wasm_free(&bin);
}

TEST(test_else_if_chain_nests_inner_if) {
/* `if a { ... } else if b { ... }` should emit two `if 0x40` and
* two `end` bytes. */
const char *src =
"module M {\n"
" state x: u64\n"
" fn f() -> u64 { if x == 0 { x = 1 } else if x == 1 { x = 2 } else { x = 3 }; x }\n"
"}";
WasmBuf bin;
ASSERT(compile_source(src, &bin));
/* Two `if 0x40` substrings should appear (outer + inner else-if). */
size_t first = 0, second = 0;
const uint8_t needle[] = { 0x04, 0x40 };
/* Count occurrences. */
int hits = 0;
for (size_t i = 0; i + sizeof(needle) <= bin.len; ++i) {
if (memcmp(bin.data + i, needle, sizeof(needle)) == 0) {
if (hits == 0) first = i;
else second = i;
hits++;
}
}
(void)first; (void)second;
ASSERT(hits >= 2);
wasm_free(&bin);
}

TEST(test_comparison_cond_does_not_need_i32_wrap) {
/* Comparison opcodes (i64.eq etc.) return i32 in WASM, so the cond
* value is already the right shape for `if`. The codegen should NOT
* emit i32.wrap_i64 (0xA7) immediately before the `if 0x40` bytes. */
const char *src =
"module M {\n"
" fn f(a: u64) -> u64 { if a == 1 { } else { }; 0 }\n"
"}";
WasmBuf bin;
ASSERT(compile_source(src, &bin));
/* The sequence `0xA7 0x04 0x40` would indicate a redundant wrap.
* Confirm it does NOT appear. */
const uint8_t bad[] = { 0xA7, 0x04, 0x40 };
ASSERT(!contains_bytes(&bin, bad, sizeof(bad)));
wasm_free(&bin);
}

TEST(test_bool_literal_cond_emits_i32_wrap_i64) {
/* A bool literal (or any non-comparison expression) leaves i64 on
* the stack; the codegen must emit i32.wrap_i64 (0xA7) before the
* if instruction. */
const char *src =
"module M {\n"
" fn f() -> u64 { if true { } else { }; 0 }\n"
"}";
WasmBuf bin;
ASSERT(compile_source(src, &bin));
/* Expect: i64.const 1 (0x42 0x01), then i32.wrap_i64 (0xA7),
* then if (0x04 0x40). */
const uint8_t needle[] = { 0x42, 0x01, 0xA7, 0x04, 0x40 };
ASSERT(contains_bytes(&bin, needle, sizeof(needle)));
wasm_free(&bin);
}

TEST(test_if_with_assignment_branch_does_not_emit_drop) {
/* Regression test: when an if branch is `{ x = 5 }`, the parser
* puts the assignment in block.result (no semicolon, sits right
* before `}`). The branch leaves nothing on the stack because
* state_set is void. The codegen must NOT emit a `drop` (0x1A),
* since that would underflow. */
const char *src =
"module M {\n"
" state x: u64\n"
" fn f() -> u64 { if 1 == 1 { x = 5 }; x }\n"
"}";
WasmBuf bin;
ASSERT(compile_source(src, &bin));
/* If the bug returns, a drop byte (0x1A) appears right after
* `call 1` (state_set, 0x10 0x01) and before `end` (0x0B). */
const uint8_t bad[] = { 0x10, 0x01, 0x1A };
ASSERT(!contains_bytes(&bin, bad, sizeof(bad)));
wasm_free(&bin);
}

TEST(test_no_let_bindings_emits_zero_local_groups) {
/* Functions with no let bindings should still emit a leading 0
* (zero local groups) in their code section. Regression guard for
Expand Down Expand Up @@ -416,5 +541,13 @@ int main(void) {
RUN(test_let_assignment_writes_back_to_local);
RUN(test_no_let_bindings_emits_zero_local_groups);

/* if / else / else-if (issue #49) */
RUN(test_if_without_else_emits_if_blocktype_end);
RUN(test_if_with_else_emits_if_else_end);
RUN(test_else_if_chain_nests_inner_if);
RUN(test_comparison_cond_does_not_need_i32_wrap);
RUN(test_bool_literal_cond_emits_i32_wrap_i64);
RUN(test_if_with_assignment_branch_does_not_emit_drop);

REPORT();
}
Loading