66(* * Exception for parse errors *)
77exception Parse_error of string * Span. t
88
9+ module I = Parser. MenhirInterpreter
10+
911(* * Buffered token stream that provides Menhir-compatible interface *)
1012type token_buffer = {
1113 mutable current_token : Token .t ;
1214 mutable current_span : Span .t ;
1315 mutable next_token : unit -> Token .t * Span .t ;
1416}
1517
16- (* * Create a Menhir-compatible lexer function from our token stream *)
17- let lexer_of_token_stream (next : unit -> Token.t * Span.t ) : Lexing.lexbuf -> Parser.token =
18- (* We need to track position for Menhir *)
19- let buf = ref None in
20- let get_next () =
21- match ! buf with
22- | Some (tok , span ) ->
23- buf := None ;
24- (tok, span)
25- | None -> next ()
26- in
27- fun lexbuf ->
28- let (tok, span) = get_next () in
29- (* Update lexbuf positions for Menhir *)
30- lexbuf.Lexing. lex_start_p < - {
31- Lexing. pos_fname = span.Span. file;
32- pos_lnum = span.start_pos.line;
33- pos_bol = span.start_pos.offset - span.start_pos.col + 1 ;
34- pos_cnum = span.start_pos.offset;
35- };
36- lexbuf.Lexing. lex_curr_p < - {
37- Lexing. pos_fname = span.Span. file;
38- pos_lnum = span.end_pos.line;
39- pos_bol = span.end_pos.offset - span.end_pos.col + 1 ;
40- pos_cnum = span.end_pos.offset;
41- };
42- (* Convert our token type to Menhir's *)
18+ (* * Convert our token type to Menhir's. *)
19+ let to_menhir_token (tok : Token.t ) : Parser.token =
4320 match tok with
4421 | Token. INT n -> Parser. INT n
4522 | Token. FLOAT f -> Parser. FLOAT f
@@ -146,27 +123,82 @@ let lexer_of_token_stream (next : unit -> Token.t * Span.t) : Lexing.lexbuf -> P
146123 | Token. ROW_VAR s -> Parser. ROW_VAR s
147124 | Token. EOF -> Parser. EOF
148125
149- (* * Parse a program from a string *)
150- let parse_string ~file content =
151- let token_stream = Lexer. from_string ~file content in
152- let lexbuf = Lexing. from_string content in
153- lexbuf.Lexing. lex_curr_p < - { lexbuf.Lexing. lex_curr_p with pos_fname = file };
154- let lexer = lexer_of_token_stream token_stream in
155- try
156- Parser. program lexer lexbuf
126+ (* * Convert a Span position to a Menhir/Lexing position. *)
127+ let lexing_pos ~file (p : Span.pos ) : Lexing.position =
128+ { Lexing. pos_fname = file;
129+ pos_lnum = p.Span. line;
130+ pos_bol = p.offset - p.col + 1 ;
131+ pos_cnum = p.offset }
132+
133+ (* * Create a Menhir-compatible lexer function from our token stream.
134+ Retained for any monolithic-API caller; the file/expr entry points
135+ below use the incremental driver instead. *)
136+ let lexer_of_token_stream (next : unit -> Token.t * Span.t ) : Lexing.lexbuf -> Parser.token =
137+ let buf = ref None in
138+ let get_next () =
139+ match ! buf with
140+ | Some (tok , span ) -> buf := None ; (tok, span)
141+ | None -> next ()
142+ in
143+ fun lexbuf ->
144+ let (tok, span) = get_next () in
145+ lexbuf.Lexing. lex_start_p < - lexing_pos ~file: span.Span. file span.start_pos;
146+ lexbuf.Lexing. lex_curr_p < - lexing_pos ~file: span.Span. file span.end_pos;
147+ to_menhir_token tok
148+
149+ (* The lexer emits ">>" as a single GTGT token (the right-shift operator).
150+ In nested applied generics — `Option<Result<T, E>>` — the inner and outer
151+ closing '>' fuse into that one GTGT, and the LR grammar, which expects a
152+ single GT to close each type-argument list, cannot proceed (issue #131).
153+
154+ Real LR compilers (rustc, Roslyn) solve this by splitting the token at
155+ parse time, *driven by the parser's own state* rather than a lexical
156+ guess: a GTGT is re-read as two GT tokens only when, in the current state,
157+ the parser would accept GT but not GTGT — i.e. we are closing a type-
158+ argument list, where the shift operator is not grammatical. Expression
159+ `a >> b` is untouched: there GTGT is acceptable, so no split happens.
160+ `>>>` lexes as GTGT then GT and is handled by one split plus the trailing
161+ GT; deeper nestings compose the same way. This requires the incremental
162+ API so we can interrogate the parser via [I.acceptable]. *)
163+ let drive (type a ) ~file
164+ (start : Lexing.position -> a I.checkpoint )
165+ (next : unit -> Token.t * Span.t ) : a =
166+ let pending : (Parser.token * Lexing.position * Lexing.position) option ref =
167+ ref None in
168+ let last_span = ref Span. dummy in
169+ let next_triple () =
170+ match ! pending with
171+ | Some t -> pending := None ; t
172+ | None ->
173+ let (tok, span) = next () in
174+ last_span := span;
175+ let sp = lexing_pos ~file: span.Span. file span.start_pos in
176+ let ep = lexing_pos ~file: span.Span. file span.end_pos in
177+ (to_menhir_token tok, sp, ep)
178+ in
179+ let rec run (cp : a I.checkpoint ) =
180+ match cp with
181+ | I. InputNeeded _ ->
182+ let (ptok, sp, ep) = next_triple () in
183+ let triple =
184+ match ptok with
185+ | Parser. GTGT
186+ when (not (I. acceptable cp Parser. GTGT sp))
187+ && I. acceptable cp Parser. GT sp ->
188+ let mid = { sp with Lexing. pos_cnum = sp.Lexing. pos_cnum + 1 } in
189+ pending := Some (Parser. GT , mid, ep);
190+ (Parser. GT , sp, mid)
191+ | _ -> (ptok, sp, ep)
192+ in
193+ run (I. offer cp triple)
194+ | I. Shifting _ | I. AboutToReduce _ -> run (I. resume cp)
195+ | I. HandlingError _ | I. Rejected -> raise Parser. Error
196+ | I. Accepted v -> v
197+ in
198+ try run (start (lexing_pos ~file { Span. line = 1 ; col = 1 ; offset = 0 }))
157199 with
158200 | Parser. Error ->
159- let pos = lexbuf.Lexing. lex_curr_p in
160- let span = Span. make
161- ~file
162- ~start_pos: { Span. line = pos.pos_lnum;
163- col = pos.pos_cnum - pos.pos_bol + 1 ;
164- offset = pos.pos_cnum }
165- ~end_pos: { Span. line = pos.pos_lnum;
166- col = pos.pos_cnum - pos.pos_bol + 1 ;
167- offset = pos.pos_cnum }
168- in
169- raise (Parse_error (" Syntax error" , span))
201+ raise (Parse_error (" Syntax error" , ! last_span))
170202 | Parser_errors. Parse_action_error (msg , startpos , endpos ) ->
171203 let span = Span. make
172204 ~file
@@ -179,6 +211,10 @@ let parse_string ~file content =
179211 in
180212 raise (Parse_error (msg, span))
181213
214+ (* * Parse a program from a string *)
215+ let parse_string ~file content =
216+ drive ~file Parser.Incremental. program (Lexer. from_string ~file content)
217+
182218(* * Parse a program from a file *)
183219let parse_file filename =
184220 let chan = open_in_bin filename in
@@ -190,33 +226,4 @@ let parse_file filename =
190226
191227(* * Parse a single expression from a string *)
192228let parse_expr ~file content =
193- let token_stream = Lexer. from_string ~file content in
194- let lexbuf = Lexing. from_string content in
195- lexbuf.Lexing. lex_curr_p < - { lexbuf.Lexing. lex_curr_p with pos_fname = file };
196- let lexer = lexer_of_token_stream token_stream in
197- try
198- Parser. expr_only lexer lexbuf
199- with
200- | Parser. Error ->
201- let pos = lexbuf.Lexing. lex_curr_p in
202- let span = Span. make
203- ~file
204- ~start_pos: { Span. line = pos.pos_lnum;
205- col = pos.pos_cnum - pos.pos_bol + 1 ;
206- offset = pos.pos_cnum }
207- ~end_pos: { Span. line = pos.pos_lnum;
208- col = pos.pos_cnum - pos.pos_bol + 1 ;
209- offset = pos.pos_cnum }
210- in
211- raise (Parse_error (" Syntax error" , span))
212- | Parser_errors. Parse_action_error (msg , startpos , endpos ) ->
213- let span = Span. make
214- ~file
215- ~start_pos: { Span. line = startpos.Lexing. pos_lnum;
216- col = startpos.pos_cnum - startpos.pos_bol + 1 ;
217- offset = startpos.pos_cnum }
218- ~end_pos: { Span. line = endpos.Lexing. pos_lnum;
219- col = endpos.pos_cnum - endpos.pos_bol + 1 ;
220- offset = endpos.pos_cnum }
221- in
222- raise (Parse_error (msg, span))
229+ drive ~file Parser.Incremental. expr_only (Lexer. from_string ~file content)
0 commit comments