@@ -38,6 +38,24 @@ macro_rules! bail {
3838
3939type Result < T > = std:: result:: Result < T , Error > ;
4040
41+ /// Whether a character can start a grammar rule name.
42+ ///
43+ /// This includes ASCII alphabetic characters, underscores, and
44+ /// non-ASCII Unicode symbols such as `⊥` (bottom) and `⊤` (top).
45+ /// ASCII symbols are excluded because characters such as `+`, `|`,
46+ /// `~`, and `^` are grammar syntax.
47+ fn is_name_start ( ch : char ) -> bool {
48+ ch. is_alphabetic ( ) || ch == '_' || !ch. is_ascii ( )
49+ }
50+
51+ /// Whether a character can continue a grammar rule name.
52+ ///
53+ /// Accepts alphanumeric characters, underscores, and non-ASCII
54+ /// characters.
55+ fn is_name_continue ( ch : char ) -> bool {
56+ ch. is_alphanumeric ( ) || ch == '_' || !ch. is_ascii ( )
57+ }
58+
4159pub fn parse_grammar (
4260 input : & str ,
4361 grammar : & mut Grammar ,
@@ -152,18 +170,11 @@ impl Parser<'_> {
152170 }
153171
154172 fn parse_name ( & mut self ) -> Option < String > {
155- // Names must start with an alphabetic character or
156- // underscore.
157173 let first = self . input [ self . index ..] . chars ( ) . next ( ) ?;
158- if !first . is_alphabetic ( ) && first != '_' {
174+ if !is_name_start ( first) {
159175 return None ;
160176 }
161- let name = self . take_while ( & |c : char | c. is_alphanumeric ( ) || c == '_' ) ;
162- if name. is_empty ( ) {
163- None
164- } else {
165- Some ( name. to_string ( ) )
166- }
177+ Some ( self . take_while ( & |c| is_name_continue ( c) ) . to_string ( ) )
167178 }
168179
169180 fn parse_expression ( & mut self ) -> Result < Option < Expression > > {
@@ -231,7 +242,7 @@ impl Parser<'_> {
231242 } else if self . input [ self . index ..]
232243 . chars ( )
233244 . next ( )
234- . map ( |ch| ch . is_alphanumeric ( ) )
245+ . map ( |ch| is_name_start ( ch ) )
235246 . unwrap_or ( false )
236247 {
237248 self . parse_nonterminal ( )
0 commit comments