11use anyhow:: { Context , Result } ;
2+ use bumpalo:: Bump ;
23use ruby_prism:: { parse, ParseResult } ;
34use std:: fs;
45use std:: path:: Path ;
56
6- /// Parse Ruby source code and return ruby-prism AST
7+ /// Parse session - manages source bytes for multiple files using arena allocation
78///
8- /// Note: Uses Box::leak internally to ensure 'static lifetime
9- pub fn parse_ruby_file ( file_path : & Path ) -> Result < ParseResult < ' static > > {
10- let source = fs:: read_to_string ( file_path)
11- . with_context ( || format ! ( "Failed to read file: {}" , file_path. display( ) ) ) ?;
12-
13- parse_ruby_source ( & source, file_path. to_string_lossy ( ) . to_string ( ) )
9+ /// Uses an arena allocator to efficiently manage source bytes during parsing.
10+ /// When the session is dropped, all memory is released at once.
11+ pub struct ParseSession {
12+ arena : Bump ,
1413}
1514
16- /// Parse Ruby source code string
17- pub fn parse_ruby_source ( source : & str , file_name : String ) -> Result < ParseResult < ' static > > {
18- // ruby-prism accepts &[u8]
19- // Use Box::leak to ensure 'static lifetime (memory leak is acceptable for analysis tools)
20- let source_bytes: & ' static [ u8 ] = Box :: leak ( source. as_bytes ( ) . to_vec ( ) . into_boxed_slice ( ) ) ;
21- let parse_result = parse ( source_bytes) ;
22-
23- // Check parse errors
24- let error_messages: Vec < String > = parse_result
25- . errors ( )
26- . map ( |e| {
27- format ! (
28- "Parse error at offset {}: {}" ,
29- e. location( ) . start_offset( ) ,
30- e. message( )
31- )
32- } )
33- . collect ( ) ;
34-
35- if !error_messages. is_empty ( ) {
36- anyhow:: bail!(
37- "Failed to parse Ruby source in {}:\n {}" ,
38- file_name,
39- error_messages. join( "\n " )
40- ) ;
15+ impl ParseSession {
16+ pub fn new ( ) -> Self {
17+ Self { arena : Bump :: new ( ) }
18+ }
19+
20+ /// Create with pre-allocated capacity (recommended for batch file processing)
21+ pub fn with_capacity ( capacity : usize ) -> Self {
22+ Self {
23+ arena : Bump :: with_capacity ( capacity) ,
24+ }
25+ }
26+
27+ /// Allocate source in arena and parse
28+ pub fn parse_source < ' a > ( & ' a self , source : & str , file_name : & str ) -> Result < ParseResult < ' a > > {
29+ // Copy bytes to arena
30+ let source_bytes = self . arena . alloc_slice_copy ( source. as_bytes ( ) ) ;
31+ let parse_result = parse ( source_bytes) ;
32+
33+ // Check for parse errors
34+ let error_messages: Vec < String > = parse_result
35+ . errors ( )
36+ . map ( |e| {
37+ format ! (
38+ "Parse error at offset {}: {}" ,
39+ e. location( ) . start_offset( ) ,
40+ e. message( )
41+ )
42+ } )
43+ . collect ( ) ;
44+
45+ if !error_messages. is_empty ( ) {
46+ anyhow:: bail!(
47+ "Failed to parse Ruby source in {}:\n {}" ,
48+ file_name,
49+ error_messages. join( "\n " )
50+ ) ;
51+ }
52+
53+ Ok ( parse_result)
54+ }
55+
56+ /// Read file and parse
57+ pub fn parse_file < ' a > ( & ' a self , file_path : & Path ) -> Result < ParseResult < ' a > > {
58+ let source = fs:: read_to_string ( file_path)
59+ . with_context ( || format ! ( "Failed to read file: {}" , file_path. display( ) ) ) ?;
60+
61+ self . parse_source ( & source, & file_path. to_string_lossy ( ) )
4162 }
4263
43- Ok ( parse_result)
64+ /// Get allocated memory size (for debugging)
65+ pub fn allocated_bytes ( & self ) -> usize {
66+ self . arena . allocated_bytes ( )
67+ }
68+
69+ /// Reset arena (for memory control during batch file processing)
70+ pub fn reset ( & mut self ) {
71+ self . arena . reset ( ) ;
72+ }
73+ }
74+
75+ impl Default for ParseSession {
76+ fn default ( ) -> Self {
77+ Self :: new ( )
78+ }
4479}
4580
4681#[ cfg( test) ]
@@ -51,21 +86,24 @@ mod tests {
5186 fn test_parse_simple_ruby ( ) {
5287 let source = r#"x = 1
5388puts x"# ;
54- let result = parse_ruby_source ( source, "test.rb" . to_string ( ) ) ;
89+ let session = ParseSession :: new ( ) ;
90+ let result = session. parse_source ( source, "test.rb" ) ;
5591 assert ! ( result. is_ok( ) ) ;
5692 }
5793
5894 #[ test]
5995 fn test_parse_string_literal ( ) {
6096 let source = r#""hello".upcase"# ;
61- let result = parse_ruby_source ( source, "test.rb" . to_string ( ) ) ;
97+ let session = ParseSession :: new ( ) ;
98+ let result = session. parse_source ( source, "test.rb" ) ;
6299 assert ! ( result. is_ok( ) ) ;
63100 }
64101
65102 #[ test]
66103 fn test_parse_array_literal ( ) {
67104 let source = r#"[1, 2, 3].map { |x| x * 2 }"# ;
68- let result = parse_ruby_source ( source, "test.rb" . to_string ( ) ) ;
105+ let session = ParseSession :: new ( ) ;
106+ let result = session. parse_source ( source, "test.rb" ) ;
69107 assert ! ( result. is_ok( ) ) ;
70108 }
71109
@@ -75,22 +113,44 @@ puts x"#;
75113 x = "hello"
76114 x.upcase
77115end"# ;
78- let result = parse_ruby_source ( source, "test.rb" . to_string ( ) ) ;
116+ let session = ParseSession :: new ( ) ;
117+ let result = session. parse_source ( source, "test.rb" ) ;
79118 assert ! ( result. is_ok( ) ) ;
80119 }
81120
82121 #[ test]
83122 fn test_parse_invalid_ruby ( ) {
84123 let source = "def\n end end" ;
85- let result = parse_ruby_source ( source, "test.rb" . to_string ( ) ) ;
124+ let session = ParseSession :: new ( ) ;
125+ let result = session. parse_source ( source, "test.rb" ) ;
86126 assert ! ( result. is_err( ) ) ;
87127 }
88128
89129 #[ test]
90130 fn test_parse_method_call ( ) {
91131 let source = r#"user = User.new
92132user.save"# ;
93- let result = parse_ruby_source ( source, "test.rb" . to_string ( ) ) ;
133+ let session = ParseSession :: new ( ) ;
134+ let result = session. parse_source ( source, "test.rb" ) ;
94135 assert ! ( result. is_ok( ) ) ;
95136 }
137+
138+ #[ test]
139+ fn test_parse_session_memory_tracking ( ) {
140+ let session = ParseSession :: new ( ) ;
141+ let source = "x = 1" ;
142+ let _ = session. parse_source ( source, "test.rb" ) . unwrap ( ) ;
143+ assert ! ( session. allocated_bytes( ) > 0 ) ;
144+ }
145+
146+ #[ test]
147+ fn test_parse_session_reset ( ) {
148+ let mut session = ParseSession :: new ( ) ;
149+ let source = "x = 1" ;
150+ let _ = session. parse_source ( source, "test.rb" ) . unwrap ( ) ;
151+ let before_reset = session. allocated_bytes ( ) ;
152+ session. reset ( ) ;
153+ // After reset, allocated_bytes may still report used chunks but internal data is cleared
154+ assert ! ( before_reset > 0 ) ;
155+ }
96156}
0 commit comments