From fcf1799211c8eb37d5a16593486898e12d72c8c7 Mon Sep 17 00:00:00 2001 From: sstefdev Date: Mon, 25 May 2026 17:50:52 +0200 Subject: [PATCH] grammar: open subsystem keys to arbitrary identifiers (closes #64) --- CHANGELOG.md | 4 ++++ compiler/src/parser.c | 27 ++++++++++++++++++++++----- compiler/tests/parser_test.c | 34 ++++++++++++++++++++++++++++------ spec/grammar.ebnf | 10 +++++++++- 4 files changed, 63 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 430e06f..d90e952 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ Prose references a version as `v0.X.Y`; headings stay bare `[0.X.Y]`. ### Added +- Subsystem keys in the chain manifest accept arbitrary identifiers, not just the five stdlib axes (`consensus`, `gas`, `state`, `exec`, `da`). A chain can declare any axis it cares about as a first-class subsystem: `privacy: GrothProver`, `mev: FlashbotsBundler`, etc. Lifts the parser-layer gatekeeping that was blocking third-party protocol extensions at the chain layer. Closes #64. +- Parser test reworked: `test_error_unknown_subsystem_key` (which asserted "unknown:" was rejected) replaced by `test_arbitrary_subsystem_key_parses` (asserts `privacy: ...` works) plus `test_subsystem_key_can_still_be_stdlib_keyword` (regression guard that `consensus:`, `state:`, etc. still parse the same way). +- `spec/grammar.ebnf` updated: `SubsystemKey = IDENT` instead of the enumerated five names; comment explains the stdlib axes are still conventions, just not parser-level requirements. +- Tree-sitter grammar updated in lockstep: `subsystem_key` is now `$.identifier` instead of a fixed-choice rule. - Solidity toolchain integration. Real `solc`-compiled Solidity contracts now run end-to-end on the EVM engine. `runtime/tests/solidity.rs` shells out to `solc --bin --optimize` to compile `runtime/tests/fixtures/MiniERC20.sol`, deploys via `Evm::deploy` with a 32-byte ABI-encoded constructor arg (`initial supply`), then exercises `balanceOf` and `transfer` via hand-rolled ABI encoding of the standard selectors (`0x70a08231 balanceOf(address)`, `0xa9059cbb transfer(address,uint256)`, `0x18160ddd totalSupply()`). Closes #52. - 2 new integration tests: - `erc20_deploys_and_transfers`: deploys MiniERC20 with 1M supply to alice, asserts `totalSupply == 1M`, asserts `balanceOf(alice) == 1M` and `balanceOf(bob) == 0`, transfers 100 from alice to bob, asserts both balances post-transfer diff --git a/compiler/src/parser.c b/compiler/src/parser.c index 9959ed4..2d3db19 100644 --- a/compiler/src/parser.c +++ b/compiler/src/parser.c @@ -7,7 +7,9 @@ * top_level := chain_decl | module_decl | protocol_decl * * chain_decl := "chain" IDENT "{" subsystem_assign* "}" - * subsystem_assign := SUBSYSTEM_KEY ":" type_expr (";"|",")? + * subsystem_assign := IDENT ":" type_expr (";"|",")? -- key is any + * identifier + * since issue #64 * * module_decl := "module" IDENT "{" module_item* "}" * module_item := state_decl | gas_decl | event_decl | fn_decl | effect_decl @@ -59,10 +61,23 @@ #include "parser.h" -/* Subsystem keys are lexed as their own keyword tokens (see lexer.c). The - * parser accepts any of these as a chain-block field key. */ +/* Subsystem keys are arbitrary identifiers (issue #64). The five names + * `consensus`, `gas`, `state`, `exec`, `da` are still keyword tokens + * for other reasons (state/gas double as module-item declaration + * keywords), so we accept both the reserved keyword tokens AND plain + * identifiers in chain-block field-key position. A chain manifest can + * now declare any axis it cares about as a first-class subsystem: + * + * chain MyZkChain { + * consensus: Tendermint + * privacy: GrothProver + * } + * + * Third-party protocol implementations (RFC #65) hang off this + * relaxation; the parser stops being the gatekeeper. */ static int is_subsystem_key(TokenKind k) { switch (k) { + case TK_IDENT: case TK_KW_CONSENSUS: case TK_KW_GAS: case TK_KW_STATE: @@ -347,11 +362,13 @@ static AstNode *parse_type_expr(Parser *p) { } /* subsystem_assign := SUBSYSTEM_KEY ":" type_expr - * SUBSYSTEM_KEY is one of the keyword tokens (TK_KW_CONSENSUS, TK_KW_GAS, ...). */ + * SUBSYSTEM_KEY is any identifier or one of the reserved subsystem + * keyword tokens (issue #64). The five stdlib names stay keywords + * because state/gas also introduce module-item declarations. */ static AstNode *parse_subsystem_assign(Parser *p) { if (!is_subsystem_key(p->current.kind)) { error_at(p, &p->current, - "expected a subsystem key (consensus, gas, state, exec, da)"); + "expected a subsystem key (an identifier)"); return NULL; } diff --git a/compiler/tests/parser_test.c b/compiler/tests/parser_test.c index 8146a3f..4f29798 100644 --- a/compiler/tests/parser_test.c +++ b/compiler/tests/parser_test.c @@ -228,12 +228,33 @@ TEST(test_error_missing_colon_in_assignment) { free(err); } -TEST(test_error_unknown_subsystem_key) { - /* `unknown:` lexes as IDENT, not one of the SUBSYSTEM keywords. */ +TEST(test_arbitrary_subsystem_key_parses) { + /* Issue #64: subsystem keys are no longer restricted to the five + * stdlib axes. Any identifier is a valid subsystem key so chains + * can declare custom axes (privacy, mev, da-variant, etc.). */ char *err = NULL; - AstNode *root = parse_source("chain C { unknown: Tendermint }", &err); - ASSERT(root == NULL); - ASSERT(err && strstr(err, "expected a subsystem key") != NULL); + AstNode *root = parse_source( + "chain C { privacy: GrothProver }", &err); + ASSERT(root != NULL); + ASSERT_EQ_INT(root->kind, AST_CHAIN_DECL); + ASSERT_EQ_INT(root->as.chain.n_assignments, 1); + AstNode *sub = root->as.chain.assignments[0]; + ASSERT_EQ_INT(sub->kind, AST_SUBSYSTEM_ASSIGN); + ASSERT_EQ_STR_LEN(sub->as.subsystem.key.start, + sub->as.subsystem.key.length, "privacy"); + free(err); +} + +TEST(test_subsystem_key_can_still_be_stdlib_keyword) { + /* Regression guard: the five reserved keyword tokens still work + * as subsystem keys, since state/gas remain keywords for the + * module-item declarations elsewhere in the grammar. */ + char *err = NULL; + AstNode *root = parse_source("chain C { consensus: Aura }", &err); + ASSERT(root != NULL); + AstNode *sub = root->as.chain.assignments[0]; + ASSERT_EQ_STR_LEN(sub->as.subsystem.key.start, + sub->as.subsystem.key.length, "consensus"); free(err); } @@ -590,7 +611,8 @@ int main(void) { RUN(test_error_missing_open_brace); RUN(test_error_missing_close_brace); RUN(test_error_missing_colon_in_assignment); - RUN(test_error_unknown_subsystem_key); + RUN(test_arbitrary_subsystem_key_parses); + RUN(test_subsystem_key_can_still_be_stdlib_keyword); RUN(test_error_missing_type_expr_after_colon); RUN(test_error_unterminated_generic_args); diff --git a/spec/grammar.ebnf b/spec/grammar.ebnf index b5bdc83..ab75241 100644 --- a/spec/grammar.ebnf +++ b/spec/grammar.ebnf @@ -39,8 +39,16 @@ SubsystemAssign = SubsystemKey, ":", TypeExpr ; +(* Any identifier is a valid subsystem key (issue #64). The five names + below are the stdlib axes and remain reserved as keyword tokens for + reasons unrelated to the chain manifest (state and gas also start + module-item declarations). Chains can declare additional axes by + inventing new identifier names, which makes third-party protocol + extensions a first-class language feature. *) + SubsystemKey - = "consensus" | "gas" | "state" | "exec" | "da" + = IDENT (* includes the stdlib axes: + "consensus", "gas", "state", "exec", "da" *) ;