Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 62 additions & 53 deletions Micro_Rust_Parsing_Frontend/Micro_Rust_Syntax.thy
Original file line number Diff line number Diff line change
Expand Up @@ -282,16 +282,14 @@ syntax
("if let _ = (_) { (_) } else { (_) }" [100,20,0,0]11)

\<comment> \<open>We distinguish two types of matches. The first is the usual \<^verbatim>\<open>case\<close> on datatypes.
The second is more of a C-style \<^verbatim>\<open>switch\<close> statement via a match. It is hard to distinguish
these at first parsing time. Instead, we distinguish them via a \<^emph>\<open>parse AST translation\<close>. The
distinguishing property (on the AST level) is that the \<^verbatim>\<open>switch\<close>-style matches must contain
only numeral, other and \<^verbatim>\<open>id\<close> clauses, while the \<^verbatim>\<open>case\<close> style matches cannot contain numeral
clauses.
We add two syntax clauses which this AST translation will insert, which will then be used
as a handle for further translations of the syntax tree later on.\<close>
The second is more of a C-style \<^verbatim>\<open>switch\<close> statement via a match. These are now unified
under a single \<^verbatim>\<open>match\<close> syntax that handles both algebraic patterns (constructors) and
numeric patterns (0, 1, numerals, named constants). The implementation automatically
routes to the appropriate backend (bcase for algebraic, ncase for numeric).\<close>
"_urust_match_case" :: "[urust, urust_match_branches] \<Rightarrow> urust" ("match'_case (_) {/ _/ }" [20, 10]20)
"_urust_match_switch" :: "[urust, urust_match_branches] \<Rightarrow> urust" ("match'_switch (_) {/ _/ }" [20, 10]20)
\<comment> \<open>This is \<^verbatim>\<open>temporary\<close> since we will disambiguate between two styles of matches\<close>
\<comment> \<open>Internal syntax for numeric matches - not user-facing, used by AST translation\<close>
"_urust_match_switch" :: "[urust, urust_match_branches] \<Rightarrow> urust"
\<comment> \<open>This is \<^verbatim>\<open>temporary\<close> since we convert it to \<^verbatim>\<open>_urust_match_case\<close> or \<^verbatim>\<open>_urust_match_switch\<close> via AST translation\<close>
"_urust_temporary_match" :: "[urust, urust_match_branches] \<Rightarrow> urust" ("match (_) {/ _/ }" [20, 10]20)
"_urust_match1" :: "[urust_pattern, urust] \<Rightarrow> urust_match_branches" ("(2_ \<Rightarrow>/ _)" [100, 20] 21)
"_urust_match1_guard" :: "[urust_pattern, urust, urust] \<Rightarrow> urust_match_branches"
Expand Down Expand Up @@ -710,22 +708,27 @@ parse_ast_translation\<open>
] end
\<close>

text\<open>Now we add an AST translation that converts \<^verbatim>\<open>_urust_temporary_match\<close> to the appropriate type
of match, i.e. \<^verbatim>\<open>match_case\<close> or \<^verbatim>\<open>match_select\<close>s.\<close>
text\<open>Now we add an AST translation that converts \<^verbatim>\<open>_urust_temporary_match\<close> to the appropriate
internal representation. Users always write \<^verbatim>\<open>match\<close>, but internally we dispatch to either
\<^verbatim>\<open>_urust_match_case\<close> (for algebraic patterns) or \<^verbatim>\<open>_urust_match_switch\<close> (for numeric patterns)
based on pattern analysis.\<close>
parse_ast_translation\<open>
let
\<comment> \<open>Get the head constants of an AST node\<close>
fun pattern_ast_to_head_const (Ast.Appl (Ast.Constant c :: tl)) = c
| pattern_ast_to_head_const (Ast.Constant c) = c
| pattern_ast_to_head_const _ = \<^syntax_const>\<open>_urust_match_pattern_other\<close>

\<comment> \<open>Get the list of patterns from a \<^verbatim>\<open>_urust_match2\<close> node in the AST\<close>
\<comment> \<open>Get all head constants from a pattern, handling disjunctions recursively.
For \<^verbatim>\<open>1 | 2 | 3\<close>, returns the head constants of 1, 2, and 3.\<close>
fun pattern_ast_to_head_consts (Ast.Appl [Ast.Constant \<^syntax_const>\<open>_urust_match_pattern_disjunction\<close>, left, right]) =
pattern_ast_to_head_consts left @ pattern_ast_to_head_consts right
| pattern_ast_to_head_consts (Ast.Appl (Ast.Constant c :: _)) = [c]
| pattern_ast_to_head_consts (Ast.Constant c) = [c]
| pattern_ast_to_head_consts _ = [\<^syntax_const>\<open>_urust_match_pattern_other\<close>]

\<comment> \<open>Get the list of all pattern head constants from branches, flattening disjunctions\<close>
fun branches_ast_to_pattern_list (Ast.Appl [Ast.Constant \<^syntax_const>\<open>_urust_match2\<close>, left, right]) =
branches_ast_to_pattern_list left @ branches_ast_to_pattern_list right
| branches_ast_to_pattern_list (Ast.Appl [Ast.Constant \<^syntax_const>\<open>_urust_match1\<close>, clause, _]) =
[pattern_ast_to_head_const clause]
pattern_ast_to_head_consts clause
| branches_ast_to_pattern_list (Ast.Appl [Ast.Constant \<^syntax_const>\<open>_urust_match1_guard\<close>, clause, _, _]) =
[pattern_ast_to_head_const clause]
pattern_ast_to_head_consts clause
| branches_ast_to_pattern_list _ = []

\<comment> \<open>Detect guards in match branches\<close>
Expand All @@ -734,44 +737,50 @@ parse_ast_translation\<open>
| branches_ast_has_guard (Ast.Appl [Ast.Constant \<^syntax_const>\<open>_urust_match1_guard\<close>, _, _, _]) = true
| branches_ast_has_guard _ = false

\<comment> \<open>Is this pattern valid in a \<^verbatim>\<open>match_case\<close>?\<close>
fun pat_is_match_case pat =
pat <> \<^syntax_const>\<open>_urust_match_pattern_num_const\<close> andalso
pat <> \<^syntax_const>\<open>_urust_match_pattern_zero\<close> andalso
pat <> \<^syntax_const>\<open>_urust_match_pattern_one\<close>

\<comment> \<open>Is this pattern valid in a \<^verbatim>\<open>match_switch\<close>?\<close>
fun pat_is_match_switch pat =
(pat = \<^syntax_const>\<open>_urust_match_pattern_num_const\<close>)
orelse (pat = \<^syntax_const>\<open>_urust_match_pattern_constr_no_args\<close>)
orelse (pat = \<^syntax_const>\<open>_urust_match_pattern_other\<close>)
orelse (pat = \<^syntax_const>\<open>_urust_match_pattern_zero\<close>)
orelse (pat = \<^syntax_const>\<open>_urust_match_pattern_one\<close>)

\<comment> \<open>Replace a \<^verbatim>\<open>_urust_temporary_match\<close> AST node with arguments \<^verbatim>\<open>[arg, branches]\<close> with the
appropriate match AST node\<close>
\<comment> \<open>Does this pattern REQUIRE the algebraic case path?
- Constructor patterns with arguments: Some(x), Ok(v), etc.
- Tuple patterns: (a, b), ((x, y), z), etc.
These patterns need HOL's case expression for structural matching.\<close>
fun pat_requires_algebraic pat =
pat = \<^syntax_const>\<open>_urust_match_pattern_constr_with_args\<close> orelse
pat = \<^syntax_const>\<open>_urust_let_pattern_tuple\<close>

\<comment> \<open>Does this pattern REQUIRE the switch path?
- Numeric literals: 0, 1, 42, etc.
These patterns need ncase for numeric comparison.\<close>
fun pat_requires_switch pat =
pat = \<^syntax_const>\<open>_urust_match_pattern_num_const\<close> orelse
pat = \<^syntax_const>\<open>_urust_match_pattern_zero\<close> orelse
pat = \<^syntax_const>\<open>_urust_match_pattern_one\<close>

\<comment> \<open>Is this pattern compatible with the switch path?
Includes: numerics, nullary constructors/constants, wildcards.
Excludes: constructor patterns with args, tuple patterns.\<close>
fun pat_is_switch_compatible pat =
pat = \<^syntax_const>\<open>_urust_match_pattern_num_const\<close> orelse
pat = \<^syntax_const>\<open>_urust_match_pattern_constr_no_args\<close> orelse
pat = \<^syntax_const>\<open>_urust_match_pattern_other\<close> orelse
pat = \<^syntax_const>\<open>_urust_match_pattern_zero\<close> orelse
pat = \<^syntax_const>\<open>_urust_match_pattern_one\<close>

\<comment> \<open>Replace a \<^verbatim>\<open>_urust_temporary_match\<close> AST node with the appropriate internal match form.
Logic:
- If guards present: must use algebraic case (for conditional fallthrough)
- If any pattern requires algebraic (constructor with args, tuple): use algebraic case
- If any pattern requires switch (numerics) AND all patterns are switch-compatible: use switch
- Otherwise: default to algebraic case\<close>
fun match_selector ctx [arg, branches] =
let
val patterns = branches_ast_to_pattern_list branches
val has_guard = branches_ast_has_guard branches
val is_match_case = has_guard orelse (patterns |> List.all pat_is_match_case)
val is_match_select = (not has_guard) andalso (patterns |> List.all pat_is_match_switch)
val new_hd = (
\<comment> \<open>Note that we default to \<^verbatim>\<open>is_match_case\<close>! If you explicitly want your match to be
parsed as a switch statement, use \<^verbatim>\<open>match_switch {...}\<close>\<close>
if is_match_case then \<^syntax_const>\<open>_urust_match_case\<close>
else (if is_match_select then \<^syntax_const>\<open>_urust_match_switch\<close>
else
\<comment> \<open>User wrote down a mixed 'illegal' match. We thus do not know how to change the
AST in a meaningful way, and keep it as is.
The problem is now that this will give a very poor error message, so add some logging\<close>
let
val _ = writeln "Error: detected match with mixed numeral and constructors"
in
\<^syntax_const>\<open>_urust_temporary_match\<close>
end
)
)
val has_algebraic = List.exists pat_requires_algebraic patterns
val has_switch = List.exists pat_requires_switch patterns
val all_switch_compatible = List.all pat_is_switch_compatible patterns
val use_switch = (not has_guard) andalso (not has_algebraic) andalso
has_switch andalso all_switch_compatible
val new_hd = if use_switch
then \<^syntax_const>\<open>_urust_match_switch\<close>
else \<^syntax_const>\<open>_urust_match_case\<close>
in
Ast.mk_appl (Ast.Constant new_hd) [arg, branches]
end
Expand Down
25 changes: 4 additions & 21 deletions Shallow_Micro_Rust/Core_Syntax.thy
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ nonterminal urust_shallow_match_pattern_args

syntax
"_urust_shallow_match" :: "[('s, 'v, 'r, 'abort, 'i, 'o) expression, urust_shallow_match_branches] \<Rightarrow> ('sp, 'vp, 'rp, 'abort, 'i, 'o) expression" ("match (_) \<lbrace>/ _/ \<rbrace>" [20, 20]20)
"_urust_shallow_switch" :: "[('s, 'v, 'r, 'abort, 'i, 'o) expression, urust_shallow_match_branches] \<Rightarrow> ('sp, 'vp, 'rp, 'abort, 'i, 'o) expression" ("match'_switch (_) \<lbrace>/ _/ \<rbrace>" [20, 20]20)
\<comment>\<open>Internal syntax for numeric switch - used by translation, not user-facing\<close>
"_urust_shallow_switch" :: "[('s, 'v, 'r, 'abort, 'i, 'o) expression, urust_shallow_match_branches] \<Rightarrow> ('sp, 'vp, 'rp, 'abort, 'i, 'o) expression"
\<comment>\<open>Basic case branches\<close>
"_urust_shallow_match1" :: "[urust_shallow_match_pattern, 'b] \<Rightarrow> urust_shallow_match_branches" ("(2_ \<Rightarrow>/ _)" [100, 20] 21)
"_urust_shallow_match1_guard"
Expand Down Expand Up @@ -738,15 +739,10 @@ let
Const (name, _) =>
if name = "_urust_shallow_match_pattern_other" then
Syntax.const \<^syntax_const>\<open>_case_basic_pattern_other\<close>
else if name = "_urust_shallow_match_pattern_zero" orelse
name = "_urust_shallow_match_pattern_one" then
case_error ("numeric pattern in match_case: " ^ Syntax.string_of_term ctxt pat)
else case_error ("invalid match pattern: " ^ Syntax.string_of_term ctxt pat)
| Const (name, _) $ id =>
if name = "_urust_shallow_match_pattern_constr_no_args" then
Syntax.const \<^syntax_const>\<open>_case_basic_pattern_constr_no_args\<close> $ id
else if name = "_urust_shallow_match_pattern_num_const" then
case_error ("numeric pattern in match_case: " ^ Syntax.string_of_term ctxt pat)
else case_error ("invalid match pattern: " ^ Syntax.string_of_term ctxt pat)
| Const (name, _) $ id $ args =>
if name = "_urust_shallow_match_pattern_constr_with_args" then
Expand Down Expand Up @@ -1182,21 +1178,8 @@ term \<open>
\<up>x +=\<^sub>\<mu> \<up>12
\<close>

term\<open>let y = x; match_switch y \<lbrace>
3 \<Rightarrow> \<up>True,
5 \<Rightarrow> \<up>False
\<rbrace>\<close>

term\<open>
match_switch x \<lbrace>
3 \<Rightarrow> \<up>True,
5 \<Rightarrow> \<up>True,
\<guillemotleft>twentyfive\<guillemotright> \<Rightarrow> \<up>True,
0 \<Rightarrow> \<up>True,
1 \<Rightarrow> \<up>True,
_ \<Rightarrow> \<up>False
\<rbrace>
\<close>
\<comment>\<open>Numeric match tests are in Micro_Rust_Shallow_Embedding_Tests.thy using the full uRust syntax,
which properly dispatches to the switch path. The shallow syntax here bypasses that dispatch.\<close>

(*>*)
end
Expand Down
1 change: 0 additions & 1 deletion Shallow_Micro_Rust/Micro_Rust_Shallow_Embedding.thy
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,6 @@ translations
"_shallow_match_args (_urust_match_pattern_args_app a bs)"
\<rightharpoonup> "_urust_shallow_match_pattern_args_app (_shallow_match_arg a) (_shallow_match_args bs)"


"_shallow (_urust_match_switch exp branches)"
\<rightharpoonup> "_urust_shallow_switch (_shallow exp) (_shallow_match_branches branches)"

Expand Down
Loading