From 920734c295b8b632546ca10a95d51444a6caeb40 Mon Sep 17 00:00:00 2001 From: dak2 Date: Wed, 4 Feb 2026 23:14:15 +0900 Subject: [PATCH] Fix memory leak Add arena allocator to fix memory leak. Before the fix, `Box::leak` was causing a memory leak because it utilized a `'static` lifetime, abandoning ownership of heap memory, which would then not be freed until program termination. The problem may become more serious when a large number of files are inspected. Therefore, I utilized the arena allocator mechanism to free up memory. --- ext/src/lib.rs | 13 +- rust/Cargo.toml | 1 + rust/src/analyzer/definitions.rs | 17 ++- rust/src/analyzer/install.rs | 17 ++- rust/src/analyzer/tests/integration_test.rs | 5 +- rust/src/checker.rs | 10 +- rust/src/parser.rs | 138 ++++++++++++++------ 7 files changed, 139 insertions(+), 62 deletions(-) diff --git a/ext/src/lib.rs b/ext/src/lib.rs index 334439d..09fd988 100644 --- a/ext/src/lib.rs +++ b/ext/src/lib.rs @@ -6,7 +6,8 @@ use magnus::{function, method, prelude::*, Error, Ruby}; use methodray_core::{ analyzer::AstInstaller, env::{GlobalEnv, LocalEnv}, - parser, rbs, + parser::ParseSession, + rbs, }; #[magnus::wrap(class = "MethodRay::Analyzer")] @@ -27,11 +28,11 @@ impl Analyzer { /// Execute type inference fn infer_types(&self, source: String) -> Result { // Parse - let parse_result = - parser::parse_ruby_source(&source, "source.rb".to_string()).map_err(|e| { - let ruby = unsafe { Ruby::get_unchecked() }; - Error::new(ruby.exception_runtime_error(), e.to_string()) - })?; + let session = ParseSession::new(); + let parse_result = session.parse_source(&source, "source.rb").map_err(|e| { + let ruby = unsafe { Ruby::get_unchecked() }; + Error::new(ruby.exception_runtime_error(), e.to_string()) + })?; // Build graph let mut genv = GlobalEnv::new(); diff --git a/rust/Cargo.toml b/rust/Cargo.toml index c0bd610..ea14ef7 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -18,6 +18,7 @@ path = "src/lsp/main.rs" required-features = ["lsp"] [dependencies] +bumpalo = "3.14" smallvec = "1.13" rayon = "1.10" walkdir = "2.5" diff --git a/rust/src/analyzer/definitions.rs b/rust/src/analyzer/definitions.rs index 517b0d7..7df5138 100644 --- a/rust/src/analyzer/definitions.rs +++ b/rust/src/analyzer/definitions.rs @@ -79,7 +79,7 @@ fn extract_constant_path(node: &Node) -> Option { #[cfg(test)] mod tests { use super::*; - use crate::parser::parse_ruby_source; + use crate::parser::ParseSession; #[test] fn test_enter_exit_class_scope() { @@ -150,7 +150,8 @@ mod tests { #[test] fn test_extract_simple_class_name() { let source = "class User; end"; - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let root = parse_result.node(); let program = root.as_program_node().unwrap(); let stmt = program.statements().body().first().unwrap(); @@ -163,7 +164,8 @@ mod tests { #[test] fn test_extract_qualified_class_name() { let source = "class Api::User; end"; - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let root = parse_result.node(); let program = root.as_program_node().unwrap(); let stmt = program.statements().body().first().unwrap(); @@ -176,7 +178,8 @@ mod tests { #[test] fn test_extract_deeply_qualified_class_name() { let source = "class Api::V1::Admin::User; end"; - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let root = parse_result.node(); let program = root.as_program_node().unwrap(); let stmt = program.statements().body().first().unwrap(); @@ -189,7 +192,8 @@ mod tests { #[test] fn test_extract_simple_module_name() { let source = "module Utils; end"; - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let root = parse_result.node(); let program = root.as_program_node().unwrap(); let stmt = program.statements().body().first().unwrap(); @@ -202,7 +206,8 @@ mod tests { #[test] fn test_extract_qualified_module_name() { let source = "module Api::V1; end"; - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let root = parse_result.node(); let program = root.as_program_node().unwrap(); let stmt = program.statements().body().first().unwrap(); diff --git a/rust/src/analyzer/install.rs b/rust/src/analyzer/install.rs index fa37d7e..5d480c0 100644 --- a/rust/src/analyzer/install.rs +++ b/rust/src/analyzer/install.rs @@ -536,13 +536,14 @@ impl<'a> AstInstaller<'a> { #[cfg(test)] mod tests { use super::*; - use crate::parser::parse_ruby_source; + use crate::parser::ParseSession; use crate::types::Type; #[test] fn test_install_literal() { let source = r#"x = "hello""#; - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let mut genv = GlobalEnv::new(); let mut lenv = LocalEnv::new(); @@ -568,7 +569,8 @@ mod tests { x = "hello" y = 42 "#; - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let mut genv = GlobalEnv::new(); let mut lenv = LocalEnv::new(); @@ -597,7 +599,8 @@ y = 42 x = "hello" y = x.upcase "#; - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let mut genv = GlobalEnv::new(); genv.register_builtin_method(Type::string(), "upcase", Type::string()); @@ -631,7 +634,8 @@ module Utils end end "#; - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let mut genv = GlobalEnv::new(); let mut lenv = LocalEnv::new(); @@ -663,7 +667,8 @@ module Api end end "#; - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let mut genv = GlobalEnv::new(); let mut lenv = LocalEnv::new(); diff --git a/rust/src/analyzer/tests/integration_test.rs b/rust/src/analyzer/tests/integration_test.rs index f415f8d..4041a27 100644 --- a/rust/src/analyzer/tests/integration_test.rs +++ b/rust/src/analyzer/tests/integration_test.rs @@ -8,12 +8,13 @@ use crate::analyzer::AstInstaller; use crate::env::{GlobalEnv, LocalEnv}; -use crate::parser::parse_ruby_source; +use crate::parser::ParseSession; use crate::types::Type; /// Helper to run analysis on Ruby source code fn analyze(source: &str) -> (GlobalEnv, LocalEnv) { - let parse_result = parse_ruby_source(source, "test.rb".to_string()).unwrap(); + let session = ParseSession::new(); + let parse_result = session.parse_source(source, "test.rb").unwrap(); let mut genv = GlobalEnv::new(); diff --git a/rust/src/checker.rs b/rust/src/checker.rs index e5150f9..3693da9 100644 --- a/rust/src/checker.rs +++ b/rust/src/checker.rs @@ -1,7 +1,7 @@ use crate::analyzer::AstInstaller; use crate::diagnostics::Diagnostic; use crate::env::{GlobalEnv, LocalEnv}; -use crate::parser; +use crate::parser::ParseSession; use anyhow::{Context, Result}; use std::path::Path; @@ -30,8 +30,12 @@ impl FileChecker { let source = std::fs::read_to_string(file_path) .with_context(|| format!("Failed to read {}", file_path.display()))?; + // Create ParseSession (arena allocator) for this check + let session = ParseSession::new(); + // Parse file - let parse_result = parser::parse_ruby_file(file_path) + let parse_result = session + .parse_source(&source, &file_path.to_string_lossy()) .with_context(|| format!("Failed to parse {}", file_path.display()))?; // Create fresh GlobalEnv for this analysis @@ -56,7 +60,7 @@ impl FileChecker { let diagnostics = collect_diagnostics(&genv, file_path); Ok(diagnostics) - } + } // session drops here, freeing arena memory } /// Load RBS methods from cache (CLI mode without Ruby runtime) diff --git a/rust/src/parser.rs b/rust/src/parser.rs index 82e8d23..1146a99 100644 --- a/rust/src/parser.rs +++ b/rust/src/parser.rs @@ -1,46 +1,81 @@ use anyhow::{Context, Result}; +use bumpalo::Bump; use ruby_prism::{parse, ParseResult}; use std::fs; use std::path::Path; -/// Parse Ruby source code and return ruby-prism AST +/// Parse session - manages source bytes for multiple files using arena allocation /// -/// Note: Uses Box::leak internally to ensure 'static lifetime -pub fn parse_ruby_file(file_path: &Path) -> Result> { - let source = fs::read_to_string(file_path) - .with_context(|| format!("Failed to read file: {}", file_path.display()))?; - - parse_ruby_source(&source, file_path.to_string_lossy().to_string()) +/// Uses an arena allocator to efficiently manage source bytes during parsing. +/// When the session is dropped, all memory is released at once. +pub struct ParseSession { + arena: Bump, } -/// Parse Ruby source code string -pub fn parse_ruby_source(source: &str, file_name: String) -> Result> { - // ruby-prism accepts &[u8] - // Use Box::leak to ensure 'static lifetime (memory leak is acceptable for analysis tools) - let source_bytes: &'static [u8] = Box::leak(source.as_bytes().to_vec().into_boxed_slice()); - let parse_result = parse(source_bytes); - - // Check parse errors - let error_messages: Vec = parse_result - .errors() - .map(|e| { - format!( - "Parse error at offset {}: {}", - e.location().start_offset(), - e.message() - ) - }) - .collect(); - - if !error_messages.is_empty() { - anyhow::bail!( - "Failed to parse Ruby source in {}:\n{}", - file_name, - error_messages.join("\n") - ); +impl ParseSession { + pub fn new() -> Self { + Self { arena: Bump::new() } + } + + /// Create with pre-allocated capacity (recommended for batch file processing) + pub fn with_capacity(capacity: usize) -> Self { + Self { + arena: Bump::with_capacity(capacity), + } + } + + /// Allocate source in arena and parse + pub fn parse_source<'a>(&'a self, source: &str, file_name: &str) -> Result> { + // Copy bytes to arena + let source_bytes = self.arena.alloc_slice_copy(source.as_bytes()); + let parse_result = parse(source_bytes); + + // Check for parse errors + let error_messages: Vec = parse_result + .errors() + .map(|e| { + format!( + "Parse error at offset {}: {}", + e.location().start_offset(), + e.message() + ) + }) + .collect(); + + if !error_messages.is_empty() { + anyhow::bail!( + "Failed to parse Ruby source in {}:\n{}", + file_name, + error_messages.join("\n") + ); + } + + Ok(parse_result) + } + + /// Read file and parse + pub fn parse_file<'a>(&'a self, file_path: &Path) -> Result> { + let source = fs::read_to_string(file_path) + .with_context(|| format!("Failed to read file: {}", file_path.display()))?; + + self.parse_source(&source, &file_path.to_string_lossy()) } - Ok(parse_result) + /// Get allocated memory size (for debugging) + pub fn allocated_bytes(&self) -> usize { + self.arena.allocated_bytes() + } + + /// Reset arena (for memory control during batch file processing) + pub fn reset(&mut self) { + self.arena.reset(); + } +} + +impl Default for ParseSession { + fn default() -> Self { + Self::new() + } } #[cfg(test)] @@ -51,21 +86,24 @@ mod tests { fn test_parse_simple_ruby() { let source = r#"x = 1 puts x"#; - let result = parse_ruby_source(source, "test.rb".to_string()); + let session = ParseSession::new(); + let result = session.parse_source(source, "test.rb"); assert!(result.is_ok()); } #[test] fn test_parse_string_literal() { let source = r#""hello".upcase"#; - let result = parse_ruby_source(source, "test.rb".to_string()); + let session = ParseSession::new(); + let result = session.parse_source(source, "test.rb"); assert!(result.is_ok()); } #[test] fn test_parse_array_literal() { let source = r#"[1, 2, 3].map { |x| x * 2 }"#; - let result = parse_ruby_source(source, "test.rb".to_string()); + let session = ParseSession::new(); + let result = session.parse_source(source, "test.rb"); assert!(result.is_ok()); } @@ -75,14 +113,16 @@ puts x"#; x = "hello" x.upcase end"#; - let result = parse_ruby_source(source, "test.rb".to_string()); + let session = ParseSession::new(); + let result = session.parse_source(source, "test.rb"); assert!(result.is_ok()); } #[test] fn test_parse_invalid_ruby() { let source = "def\nend end"; - let result = parse_ruby_source(source, "test.rb".to_string()); + let session = ParseSession::new(); + let result = session.parse_source(source, "test.rb"); assert!(result.is_err()); } @@ -90,7 +130,27 @@ end"#; fn test_parse_method_call() { let source = r#"user = User.new user.save"#; - let result = parse_ruby_source(source, "test.rb".to_string()); + let session = ParseSession::new(); + let result = session.parse_source(source, "test.rb"); assert!(result.is_ok()); } + + #[test] + fn test_parse_session_memory_tracking() { + let session = ParseSession::new(); + let source = "x = 1"; + let _ = session.parse_source(source, "test.rb").unwrap(); + assert!(session.allocated_bytes() > 0); + } + + #[test] + fn test_parse_session_reset() { + let mut session = ParseSession::new(); + let source = "x = 1"; + let _ = session.parse_source(source, "test.rb").unwrap(); + let before_reset = session.allocated_bytes(); + session.reset(); + // After reset, allocated_bytes may still report used chunks but internal data is cleared + assert!(before_reset > 0); + } }