diff --git a/.gitignore b/.gitignore index f6742631..9fc5c7d4 100644 --- a/.gitignore +++ b/.gitignore @@ -19,4 +19,5 @@ Thumbs.db /test.purs *.purs.bak -/tests/oa \ No newline at end of file +/tests/oa +/tests/sources.txt \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index f3957ef5..54200bf7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,8 @@ swc_ecma_parser = "34.0.0" swc_ecma_ast = "20.0.1" swc_common = "18.0.1" ntest_timeout = "0.9.5" +rayon = "1.10" +mimalloc = { version = "0.1", default-features = false } [build-dependencies] lalrpop = "0.22" diff --git a/src/ast.rs b/src/ast.rs index 6fe4279a..3608949c 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -629,8 +629,9 @@ impl DoStatement { // ===== CST → AST Conversion ===== -pub fn convert(module: cst::Module, registry: &ModuleRegistry) -> (Module, Vec) { - let mut conv = Converter::from_module(&module, registry); +pub fn convert(module: impl std::borrow::Borrow, registry: &ModuleRegistry) -> (Module, Vec) { + let module = module.borrow(); + let mut conv = Converter::from_module(module, registry); let decls = module.decls.iter().map(|d| conv.convert_decl(d)).collect(); let ast = Module { span: module.span, @@ -714,26 +715,19 @@ impl Default for Converter { } fn module_name_to_symbol(name: &ModuleName) -> Symbol { - let parts: Vec = name - .parts - .iter() - .map(|p| interner::resolve(*p).unwrap_or_default()) - .collect(); - intern(&parts.join(".")) + interner::intern_module_name(&name.parts) } fn is_prim_module(name: &ModuleName) -> bool { - name.parts.len() == 1 && interner::resolve(name.parts[0]).unwrap_or_default() == "Prim" + name.parts.len() == 1 && interner::symbol_eq(name.parts[0], "Prim") } fn is_prim_submodule(name: &ModuleName) -> bool { - name.parts.len() >= 2 && interner::resolve(name.parts[0]).unwrap_or_default() == "Prim" + name.parts.len() >= 2 && interner::symbol_eq(name.parts[0], "Prim") } fn qualified_symbol(module: Symbol, name: Symbol) -> Symbol { - let m = interner::resolve(module).unwrap_or_default(); - let n = interner::resolve(name).unwrap_or_default(); - intern(&format!("{}.{}", m, n)) + interner::intern_qualified(module, name) } impl Converter { diff --git a/src/build/mod.rs b/src/build/mod.rs index 560d39f0..d39874e9 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -12,6 +12,8 @@ use std::path::PathBuf; use std::sync::Arc; use std::time::Instant; +use rayon::prelude::*; + use crate::cst::{Decl, Module}; use crate::interner::{self, Symbol}; use crate::js_ffi; @@ -65,15 +67,11 @@ struct ParsedModule { // ===== Helpers ===== fn module_name_string(parts: &[Symbol]) -> String { - parts - .iter() - .map(|s| interner::resolve(*s).unwrap_or_default()) - .collect::>() - .join(".") + interner::resolve_module_name(parts) } fn is_prim_import(parts: &[Symbol]) -> bool { - !parts.is_empty() && interner::resolve(parts[0]).unwrap_or_default() == "Prim" + !parts.is_empty() && interner::symbol_eq(parts[0], "Prim") } /// Extract the names of all `foreign import` declarations from a module. @@ -99,30 +97,30 @@ pub fn build(globs: &[&str], output_dir: Option) -> BuildResult { let mut build_errors = Vec::new(); // Phase 1: Glob resolution - log::debug!("Phase 1: Resolving glob patterns: {:?}", globs); + log::info!("Phase 1: Resolving glob patterns: {:?}", globs); let phase_start = Instant::now(); let paths = resolve_globs(globs, &mut build_errors); - log::debug!( + log::info!( "Phase 1 complete: found {} files in {:.2?}", paths.len(), phase_start.elapsed() ); for path in &paths { - log::debug!(" discovered: {}", path.display()); + log::info!(" discovered: {}", path.display()); } // Phase 2: Read and parse - log::debug!("Phase 2a: Reading source files"); + log::info!("Phase 2a: Reading source files"); let phase_start = Instant::now(); let mut sources = Vec::new(); for path in &paths { match std::fs::read_to_string(path) { Ok(source) => { - log::debug!(" read {} ({} bytes)", path.display(), source.len()); + log::info!(" read {} ({} bytes)", path.display(), source.len()); sources.push((path.to_string_lossy().into_owned(), source)); } Err(e) => { - log::debug!(" failed to read {}: {}", path.display(), e); + log::info!(" failed to read {}: {}", path.display(), e); build_errors.push(BuildError::FileReadError { path: path.clone(), error: e.to_string(), @@ -130,25 +128,25 @@ pub fn build(globs: &[&str], output_dir: Option) -> BuildResult { } } } - log::debug!( + log::info!( "Phase 2a complete: read {} source files in {:.2?}", sources.len(), phase_start.elapsed() ); - log::debug!("Phase 2b: Scanning for FFI companion .js files"); + log::info!("Phase 2b: Scanning for FFI companion .js files"); let mut js_sources: HashMap = HashMap::new(); for (path_str, _) in &sources { let purs_path = PathBuf::from(path_str); let js_path = purs_path.with_extension("js"); if js_path.exists() { if let Ok(js_source) = std::fs::read_to_string(&js_path) { - log::debug!(" found FFI companion: {}", js_path.display()); + log::info!(" found FFI companion: {}", js_path.display()); js_sources.insert(path_str.clone(), js_source); } } } - log::debug!( + log::info!( "Phase 2b complete: found {} FFI companion files", js_sources.len() ); @@ -173,7 +171,7 @@ pub fn build(globs: &[&str], output_dir: Option) -> BuildResult { build_errors.append(&mut result.build_errors); result.build_errors = build_errors; - log::debug!("Build finished in {:.2?}", build_start.elapsed()); + log::info!("Build finished in {:.2?}", build_start.elapsed()); result } @@ -211,41 +209,44 @@ pub fn build_from_sources_with_options( let pipeline_start = Instant::now(); let mut build_errors = Vec::new(); - // Phase 2: Parse all sources - log::debug!("Phase 2c: Parsing {} source files", sources.len()); + // Phase 2: Parse all sources (parallel) + log::info!("Phase 2c: Parsing {} source files", sources.len()); let phase_start = Instant::now(); + + // Parse all sources in parallel + let parse_results: Vec<_> = sources + .par_iter() + .map(|&(path_str, source)| { + let path = PathBuf::from(path_str); + match crate::parser::parse(source) { + Ok(module) => Ok((path, module)), + Err(e) => Err(BuildError::CompileError { path, error: e }), + } + }) + .collect(); + + // Sequential validation (Prim check, dup check, etc.) let mut parsed: Vec = Vec::new(); let mut seen_modules: HashMap, PathBuf> = HashMap::new(); - for &(path_str, source) in sources { - let path = PathBuf::from(path_str); - let parse_start = Instant::now(); - let module = match crate::parser::parse(source) { - Ok(m) => { - log::debug!( - " parsed {} ({} decls, {} imports) in {:.2?}", - path_str, - m.decls.len(), - m.imports.len(), - parse_start.elapsed() - ); - m - } + for (i, result) in parse_results.into_iter().enumerate() { + let (path, module) = match result { + Ok(pair) => pair, Err(e) => { - log::debug!(" parse error in {}: {}", path_str, e); - build_errors.push(BuildError::CompileError { path, error: e }); + build_errors.push(e); continue; } }; + let path_str = sources[i].0; let module_parts: Vec = module.name.value.parts.clone(); let module_name = module_name_string(&module_parts); // Check for reserved Prim namespace if !module_parts.is_empty() { - let first = interner::resolve(module_parts[0]).unwrap_or_default(); - if first == "Prim" { - log::debug!(" rejected {}: Prim namespace is reserved", module_name); + let is_prim = interner::with_resolved(module_parts[0], |s| s == "Prim").unwrap_or(false); + if is_prim { + log::info!(" rejected {}: Prim namespace is reserved", module_name); build_errors.push(BuildError::CannotDefinePrimModules { module_name, path }); continue; } @@ -254,9 +255,11 @@ pub fn build_from_sources_with_options( // Check for invalid characters in module name segments (no apostrophes or underscores) let mut invalid_module = false; for part in &module_parts { - let part_str = interner::resolve(*part).unwrap_or_default(); - if let Some(c) = part_str.chars().find(|&c| c == '\'' || c == '_') { - log::debug!( + let invalid_char = interner::with_resolved(*part, |s| { + s.chars().find(|&c| c == '\'' || c == '_') + }).flatten(); + if let Some(c) = invalid_char { + log::info!( " rejected {}: invalid character '{}' in module name", module_name, c @@ -276,7 +279,7 @@ pub fn build_from_sources_with_options( // Check for duplicate module names if let Some(existing_path) = seen_modules.get(&module_parts) { - log::debug!( + log::info!( " rejected {}: duplicate (already at {})", module_name, existing_path.display() @@ -312,7 +315,7 @@ pub fn build_from_sources_with_options( js_source, }); } - log::debug!( + log::info!( "Phase 2c complete: parsed {} modules (rejected {}) in {:.2?}", parsed.len(), sources.len() - parsed.len(), @@ -320,14 +323,14 @@ pub fn build_from_sources_with_options( ); // Phase 3: Build dependency graph and check for unknown imports - log::debug!("Phase 3: Building dependency graph"); + log::info!("Phase 3: Building dependency graph"); let phase_start = Instant::now(); let known_modules: HashSet> = parsed.iter().map(|p| p.module_parts.clone()).collect(); let mut registry = match start_registry { Some(base) => { - log::debug!(" using base registry from support packages"); + log::info!(" using base registry from support packages"); ModuleRegistry::with_base(base) } None => ModuleRegistry::default(), @@ -337,7 +340,7 @@ pub fn build_from_sources_with_options( for imp_parts in &pm.import_parts { let imp_name = module_name_string(imp_parts); if !known_modules.contains(imp_parts) && !registry.contains(imp_parts) { - log::debug!( + log::info!( " missing import: {} imports {} (not found)", pm.module_name, imp_name @@ -349,7 +352,7 @@ pub fn build_from_sources_with_options( span: pm.module.span, }); } else { - log::debug!(" resolved import: {} -> {}", pm.module_name, imp_name); + log::info!(" resolved import: {} -> {}", pm.module_name, imp_name); } } } @@ -362,11 +365,11 @@ pub fn build_from_sources_with_options( .collect(); // Topological sort (Kahn's algorithm) - log::debug!("Phase 3b: Topological sort of {} modules", parsed.len()); + log::info!("Phase 3b: Topological sort of {} modules", parsed.len()); let levels: Vec> = match topological_sort_levels(&parsed, &module_index) { Ok(levels) => { - log::debug!(" {} dependency levels for parallel build", levels.len()); + log::info!(" {} dependency levels for parallel build", levels.len()); levels } Err(cycle_indices) => { @@ -374,7 +377,7 @@ pub fn build_from_sources_with_options( .iter() .map(|&i| (parsed[i].module_name.clone(), parsed[i].path.clone())) .collect(); - log::debug!( + log::info!( " cycle detected among: {:?}", cycle_names .iter() @@ -390,119 +393,123 @@ pub fn build_from_sources_with_options( } } }; - log::debug!( + log::info!( "Phase 3 complete: dependency graph built in {:.2?}", phase_start.elapsed() ); - // Phase 4: Typecheck in dependency order (sequential, on a large-stack thread) + // Phase 4: Typecheck in dependency order (parallel within each level) let total_modules: usize = levels.iter().map(|l| l.len()).sum(); - log::debug!( - "Phase 4: Typechecking {} modules (sequential)", + log::info!( + "Phase 4: Typechecking {} modules ({} levels, parallel within levels)", total_modules, + levels.len(), ); let phase_start = Instant::now(); let timeout = options.module_timeout; let mut module_results = Vec::new(); - std::thread::scope(|s| { - let handle = std::thread::Builder::new() - .stack_size(16 * 1024 * 1024) - .spawn_scoped(s, || { - let mut done = 0usize; - let mut results = Vec::new(); - let mut errs = Vec::new(); - - for idx in levels.iter().flatten().copied() { - let pm = &parsed[idx]; - let tc_start = Instant::now(); - let deadline = timeout.map(|t| tc_start + t); - let check_result = std::panic::catch_unwind(AssertUnwindSafe(|| { - let mod_sym = crate::interner::intern(&pm.module_name); - let path_str = pm.path.to_string_lossy(); - crate::typechecker::set_deadline(deadline, mod_sym, &path_str); - log::debug!(" typechecking {}", pm.module_name); - let (ast_module, convert_errors) = crate::ast::convert(pm.module.clone(), ®istry); - let mut result = check::check_module(&ast_module, ®istry); - // Prepend AST conversion errors (name resolution failures, overlapping bindings, etc.) - // These are combined with typechecker errors so both are visible. - if !convert_errors.is_empty() { - let mut all_errors = convert_errors; - all_errors.extend(result.errors); - result.errors = all_errors; - } - log::debug!( - " finished {} ({} type errors) in {:.2?}", + // Build a rayon thread pool with large stacks for deep recursion in the typechecker. + let num_threads = std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(1); + let pool = rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .stack_size(16 * 1024 * 1024) + .build() + .expect("failed to build rayon thread pool"); + // Scale wall-clock deadline to account for resource contention under parallel + // execution (interner mutex, CPU cache pressure, memory bandwidth). + let parallel_timeout = timeout.map(|t| t * 3); + log::info!(" using {} worker threads (deadline {}s)", num_threads, + parallel_timeout.map(|t| t.as_secs()).unwrap_or(0)); + + let mut done = 0usize; + + for level in &levels { + // Typecheck all modules in this level in parallel + let level_results: Vec<_> = pool.install(|| { + level.par_iter().map(|&idx| { + let pm = &parsed[idx]; + let tc_start = Instant::now(); + let deadline = parallel_timeout.map(|t| tc_start + t); + let check_result = std::panic::catch_unwind(AssertUnwindSafe(|| { + let mod_sym = crate::interner::intern(&pm.module_name); + let path_str = pm.path.to_string_lossy(); + crate::typechecker::set_deadline(deadline, mod_sym, &path_str); + let (ast_module, convert_errors) = crate::ast::convert(&pm.module, ®istry); + let mut result = check::check_module(&ast_module, ®istry); + if !convert_errors.is_empty() { + let mut all_errors = convert_errors; + all_errors.extend(result.errors); + result.errors = all_errors; + } + crate::typechecker::set_deadline(None, mod_sym, ""); + result + })); + (idx, check_result, tc_start.elapsed()) + }).collect() + }); + + // Register results sequentially (registry needs &mut) + for (idx, check_result, elapsed) in level_results { + let pm = &parsed[idx]; + done += 1; + match check_result { + Ok(result) => { + log::info!( + " [{}/{}] ok: {} ({:.2?})", + done, + total_modules, + pm.module_name, + elapsed + ); + registry.register(&pm.module_parts, result.exports); + module_results.push(ModuleResult { + path: pm.path.clone(), + module_name: pm.module_name.clone(), + types: result.types, + type_errors: result.errors, + }); + } + Err(payload) => { + let is_deadline = payload + .downcast_ref::<&str>() + .map_or(false, |s| s.starts_with("typechecking deadline exceeded")) + || payload.downcast_ref::().map_or(false, |s| { + s.starts_with("typechecking deadline exceeded") + }); + if is_deadline { + log::info!( + " [{}/{}] timeout: {} ({:.2?})", + done, + total_modules, pm.module_name, - result.errors.len(), - tc_start.elapsed() + elapsed ); - crate::typechecker::set_deadline(None, mod_sym, ""); - result - })); - let elapsed = tc_start.elapsed(); - done += 1; - match check_result { - Ok(result) => { - log::debug!( - " [{}/{}] ok: {} ({:.2?})", - done, - total_modules, - pm.module_name, - elapsed - ); - registry.register(&pm.module_parts, result.exports); - results.push(ModuleResult { - path: pm.path.clone(), - module_name: pm.module_name.clone(), - types: result.types, - type_errors: result.errors, - }); - } - Err(payload) => { - let is_deadline = payload - .downcast_ref::<&str>() - .map_or(false, |s| s.starts_with("typechecking deadline exceeded")) - || payload.downcast_ref::().map_or(false, |s| { - s.starts_with("typechecking deadline exceeded") - }); - if is_deadline { - log::debug!( - " [{}/{}] timeout: {} ({:.2?})", - done, - total_modules, - pm.module_name, - elapsed - ); - errs.push(BuildError::TypecheckTimeout { - path: pm.path.clone(), - module_name: pm.module_name.clone(), - timeout_secs: timeout.unwrap().as_secs(), - }); - } else { - log::debug!( - " [{}/{}] panic: {} ({:.2?})", - done, - total_modules, - pm.module_name, - elapsed - ); - errs.push(BuildError::TypecheckPanic { - path: pm.path.clone(), - module_name: pm.module_name.clone(), - }); - } - } + build_errors.push(BuildError::TypecheckTimeout { + path: pm.path.clone(), + module_name: pm.module_name.clone(), + timeout_secs: timeout.unwrap().as_secs(), + }); + } else { + log::info!( + " [{}/{}] panic: {} ({:.2?})", + done, + total_modules, + pm.module_name, + elapsed + ); + build_errors.push(BuildError::TypecheckPanic { + path: pm.path.clone(), + module_name: pm.module_name.clone(), + }); } } - (results, errs) - }) - .expect("failed to spawn typecheck thread"); - let (results, errs) = handle.join().expect("typecheck thread panicked"); - module_results = results; - build_errors.extend(errs); - }); - log::debug!( + } + } + } + log::info!( "Phase 4 complete: typechecked {} modules in {:.2?}", module_results.len(), phase_start.elapsed() @@ -510,7 +517,7 @@ pub fn build_from_sources_with_options( // Phase 5: FFI validation (only when JS sources were provided) if js_sources.is_some() { - log::debug!("Phase 5: FFI validation"); + log::info!("Phase 5: FFI validation"); let phase_start = Instant::now(); let mut ffi_checked = 0; for pm in &parsed { @@ -519,7 +526,7 @@ pub fn build_from_sources_with_options( match (&pm.js_source, has_foreign) { (Some(js_src), _) => { - log::debug!( + log::info!( " validating FFI for {} ({} foreign imports)", pm.module_name, foreign_names.len() @@ -529,12 +536,12 @@ pub fn build_from_sources_with_options( Ok(info) => { let ffi_errors = js_ffi::validate_foreign_module(&foreign_names, &info); if ffi_errors.is_empty() { - log::debug!(" FFI OK for {}", pm.module_name); + log::info!(" FFI OK for {}", pm.module_name); } for err in ffi_errors { match err { js_ffi::FfiError::DeprecatedFFICommonJSModule => { - log::debug!( + log::info!( " FFI error in {}: deprecated CommonJS module", pm.module_name ); @@ -546,7 +553,7 @@ pub fn build_from_sources_with_options( ); } js_ffi::FfiError::MissingFFIImplementations { missing } => { - log::debug!( + log::info!( " FFI error in {}: missing implementations: {:?}", pm.module_name, missing @@ -558,7 +565,7 @@ pub fn build_from_sources_with_options( }); } js_ffi::FfiError::UnusedFFIImplementations { unused } => { - log::debug!( + log::info!( " FFI error in {}: unused implementations: {:?}", pm.module_name, unused @@ -588,7 +595,7 @@ pub fn build_from_sources_with_options( ); } js_ffi::FfiError::ParseError { message } => { - log::debug!( + log::info!( " FFI parse error in {}: {}", pm.module_name, message @@ -603,7 +610,7 @@ pub fn build_from_sources_with_options( } } Err(msg) => { - log::debug!(" FFI parse error in {}: {}", pm.module_name, msg); + log::info!(" FFI parse error in {}: {}", pm.module_name, msg); build_errors.push(BuildError::FFIParseError { module_name: pm.module_name.clone(), path: pm.path.clone(), @@ -613,7 +620,7 @@ pub fn build_from_sources_with_options( } } (None, true) => { - log::debug!( + log::info!( " missing FFI companion for {} ({} foreign imports)", pm.module_name, foreign_names.len() @@ -626,7 +633,7 @@ pub fn build_from_sources_with_options( (None, false) => {} } } - log::debug!( + log::info!( "Phase 5 complete: validated {} FFI modules in {:.2?}", ffi_checked, phase_start.elapsed() @@ -635,7 +642,7 @@ pub fn build_from_sources_with_options( // Phase 6: Code generation (only when output_dir is specified) if let Some(ref output_dir) = options.output_dir { - log::debug!("Phase 6: JavaScript code generation to {}", output_dir.display()); + log::info!("Phase 6: JavaScript code generation to {}", output_dir.display()); let phase_start = Instant::now(); let mut codegen_count = 0; @@ -648,7 +655,7 @@ pub fn build_from_sources_with_options( for pm in &parsed { if !ok_modules.contains(&pm.module_name) { - log::debug!(" skipping {} (has type errors)", pm.module_name); + log::info!(" skipping {} (has type errors)", pm.module_name); continue; } @@ -656,14 +663,14 @@ pub fn build_from_sources_with_options( let module_exports = match registry.lookup(&pm.module_parts) { Some(exports) => exports, None => { - log::debug!(" skipping {} (no exports in registry)", pm.module_name); + log::info!(" skipping {} (no exports in registry)", pm.module_name); continue; } }; let has_ffi = pm.js_source.is_some(); - log::debug!(" generating JS for {}", pm.module_name); + log::info!(" generating JS for {}", pm.module_name); let js_module = crate::codegen::js::module_to_js( &pm.module, &pm.module_name, @@ -678,7 +685,7 @@ pub fn build_from_sources_with_options( // Write output//index.js let module_dir = output_dir.join(&pm.module_name); if let Err(e) = std::fs::create_dir_all(&module_dir) { - log::debug!(" failed to create dir {}: {}", module_dir.display(), e); + log::info!(" failed to create dir {}: {}", module_dir.display(), e); build_errors.push(BuildError::FileReadError { path: module_dir.clone(), error: format!("Failed to create output directory: {e}"), @@ -688,40 +695,40 @@ pub fn build_from_sources_with_options( let index_path = module_dir.join("index.js"); if let Err(e) = std::fs::write(&index_path, &js_text) { - log::debug!(" failed to write {}: {}", index_path.display(), e); + log::info!(" failed to write {}: {}", index_path.display(), e); build_errors.push(BuildError::FileReadError { path: index_path, error: format!("Failed to write JS output: {e}"), }); continue; } - log::debug!(" wrote {} ({} bytes)", index_path.display(), js_text.len()); + log::info!(" wrote {} ({} bytes)", index_path.display(), js_text.len()); // Copy FFI companion file if let Some(ref js_src) = pm.js_source { let foreign_path = module_dir.join("foreign.js"); if let Err(e) = std::fs::write(&foreign_path, js_src) { - log::debug!(" failed to write {}: {}", foreign_path.display(), e); + log::info!(" failed to write {}: {}", foreign_path.display(), e); build_errors.push(BuildError::FileReadError { path: foreign_path, error: format!("Failed to write foreign JS: {e}"), }); continue; } - log::debug!(" copied foreign.js for {}", pm.module_name); + log::info!(" copied foreign.js for {}", pm.module_name); } codegen_count += 1; } - log::debug!( + log::info!( "Phase 6 complete: generated JS for {} modules in {:.2?}", codegen_count, phase_start.elapsed() ); } - log::debug!( + log::info!( "Build pipeline finished in {:.2?} ({} modules, {} errors)", pipeline_start.elapsed(), module_results.len(), diff --git a/src/interner.rs b/src/interner.rs index 408f209b..68188ac7 100644 --- a/src/interner.rs +++ b/src/interner.rs @@ -25,6 +25,61 @@ pub fn resolve(sym: Symbol) -> Option { with_interner(|interner| interner.resolve(sym).map(|s| s.to_string())) } +/// Resolve a symbol and pass the &str to a closure, avoiding String allocation. +pub fn with_resolved(sym: Symbol, f: impl FnOnce(&str) -> R) -> Option { + with_interner(|interner| interner.resolve(sym).map(f)) +} + +/// Intern a qualified name "module.name" in a single lock acquisition. +pub fn intern_qualified(module: Symbol, name: Symbol) -> Symbol { + with_interner(|interner| { + let m = interner.resolve(module).unwrap_or(""); + let n = interner.resolve(name).unwrap_or(""); + let mut buf = String::with_capacity(m.len() + 1 + n.len()); + buf.push_str(m); + buf.push('.'); + buf.push_str(n); + interner.get_or_intern(&buf) + }) +} + +/// Intern a dot-joined module name from symbol parts in a single lock acquisition. +pub fn intern_module_name(parts: &[Symbol]) -> Symbol { + with_interner(|interner| { + let mut buf = String::new(); + for (i, &part) in parts.iter().enumerate() { + if i > 0 { + buf.push('.'); + } + if let Some(s) = interner.resolve(part) { + buf.push_str(s); + } + } + interner.get_or_intern(&buf) + }) +} + +/// Resolve a module name (dot-joined parts) to a String. +pub fn resolve_module_name(parts: &[Symbol]) -> String { + with_interner(|interner| { + let mut buf = String::new(); + for (i, &part) in parts.iter().enumerate() { + if i > 0 { + buf.push('.'); + } + if let Some(s) = interner.resolve(part) { + buf.push_str(s); + } + } + buf + }) +} + +/// Check if a symbol equals a string, without allocating. +pub fn symbol_eq(sym: Symbol, s: &str) -> bool { + with_interner(|interner| interner.resolve(sym).map_or(false, |r| r == s)) +} + /// Clear the interner (useful for testing) #[cfg(test)] pub fn clear() { diff --git a/src/lib.rs b/src/lib.rs index 8afaed6a..67c53580 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,9 @@ //! 2. Layout processor for handling indentation-sensitive syntax //! 3. LALRPOP-based parser with declarative grammar +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + pub mod span; pub mod lexer; pub mod cst; diff --git a/src/typechecker/check.rs b/src/typechecker/check.rs index c100ebac..480bb4a7 100644 --- a/src/typechecker/check.rs +++ b/src/typechecker/check.rs @@ -18,7 +18,7 @@ use crate::typechecker::infer::{ unwrap_binder, InferCtx, }; use crate::typechecker::registry::{ModuleExports, ModuleRegistry}; -use crate::typechecker::types::{Role, Scheme, Type}; +use crate::typechecker::types::{Role, Scheme, TyVarId, Type}; /// Wrap a bare Symbol as an unqualified QualifiedIdent. Only for local identifier, not for imports #[inline] @@ -1239,13 +1239,13 @@ fn prim_exports_inner() -> ModuleExports { /// Check if a CST ModuleName matches "Prim". pub(super) fn is_prim_module(module_name: &crate::cst::ModuleName) -> bool { module_name.parts.len() == 1 - && crate::interner::resolve(module_name.parts[0]).unwrap_or_default() == "Prim" + && crate::interner::symbol_eq(module_name.parts[0], "Prim") } /// Check if a CST ModuleName is a Prim submodule (e.g. Prim.Coerce, Prim.Row). pub(super) fn is_prim_submodule(module_name: &crate::cst::ModuleName) -> bool { module_name.parts.len() >= 2 - && crate::interner::resolve(module_name.parts[0]).unwrap_or_default() == "Prim" + && crate::interner::symbol_eq(module_name.parts[0], "Prim") } /// Build exports for Prim submodules (Prim.Coerce, Prim.Row, Prim.RowList, etc.). @@ -7387,6 +7387,21 @@ pub fn check_module(module: &Module, registry: &ModuleRegistry) -> CheckResult { 0, &mut expanding, ); + // Replace any remaining unsolved Unif vars with fresh named type variables. + // These can occur for unsolved row tails in open records (e.g. `{ x :: Int | ?331 }`) + // that weren't generalized because they were also free in the environment. + // If left as Unif, they cause panics in importing modules whose UnifyState + // has fewer entries. + let mut unif_to_var: HashMap = HashMap::new(); + collect_unif_var_ids(&scheme.ty, &mut unif_to_var); + if !unif_to_var.is_empty() { + scheme.ty = replace_unif_with_vars(&scheme.ty, &unif_to_var); + for var_name in unif_to_var.values() { + if !scheme.forall_vars.contains(var_name) { + scheme.forall_vars.push(*var_name); + } + } + } } // Build origin maps: all locally-defined names have origin = this module @@ -7488,7 +7503,20 @@ pub fn check_module(module: &Module, registry: &ModuleRegistry) -> CheckResult { for (_op, target) in &module_exports.value_operator_targets.clone() { if !module_exports.values.contains_key(target) { if let Some(scheme) = env.lookup(target.name) { - module_exports.values.insert(*target, scheme.clone()); + let mut scheme = scheme.clone(); + scheme.ty = ctx.state.zonk(scheme.ty); + // Replace any remaining Unif vars + let mut unif_to_var: HashMap = HashMap::new(); + collect_unif_var_ids(&scheme.ty, &mut unif_to_var); + if !unif_to_var.is_empty() { + scheme.ty = replace_unif_with_vars(&scheme.ty, &unif_to_var); + for var_name in unif_to_var.values() { + if !scheme.forall_vars.contains(var_name) { + scheme.forall_vars.push(*var_name); + } + } + } + module_exports.values.insert(*target, scheme); } } if !module_exports.ctor_details.contains_key(target) { @@ -7613,6 +7641,63 @@ pub fn check_module(module: &Module, registry: &ModuleRegistry) -> CheckResult { } } +/// Collect all Type::Unif IDs in a type, assigning each a fresh named type variable. +fn collect_unif_var_ids(ty: &Type, map: &mut HashMap) { + match ty { + Type::Unif(id) => { + map.entry(*id).or_insert_with(|| { + crate::interner::intern(&format!("$r{}", id.0)) + }); + } + Type::Fun(a, b) => { + collect_unif_var_ids(a, map); + collect_unif_var_ids(b, map); + } + Type::App(f, a) => { + collect_unif_var_ids(f, map); + collect_unif_var_ids(a, map); + } + Type::Forall(_, body) => collect_unif_var_ids(body, map), + Type::Record(fields, tail) => { + for (_, t) in fields { + collect_unif_var_ids(t, map); + } + if let Some(t) = tail { + collect_unif_var_ids(t, map); + } + } + _ => {} + } +} + +/// Replace all Type::Unif with Type::Var according to the given mapping. +fn replace_unif_with_vars(ty: &Type, map: &HashMap) -> Type { + match ty { + Type::Unif(id) => { + if let Some(&name) = map.get(id) { + Type::Var(name) + } else { + ty.clone() + } + } + Type::Fun(a, b) => { + Type::fun(replace_unif_with_vars(a, map), replace_unif_with_vars(b, map)) + } + Type::App(f, a) => { + Type::app(replace_unif_with_vars(f, map), replace_unif_with_vars(a, map)) + } + Type::Forall(vars, body) => { + Type::Forall(vars.clone(), Box::new(replace_unif_with_vars(body, map))) + } + Type::Record(fields, tail) => { + let fields = fields.iter().map(|(l, t)| (*l, replace_unif_with_vars(t, map))).collect(); + let tail = tail.as_ref().map(|t| Box::new(replace_unif_with_vars(t, map))); + Type::Record(fields, tail) + } + _ => ty.clone(), + } +} + /// Check if a constructor field type is directly a partially applied type synonym. /// Only checks the outermost type expression (counts args at the top-level App chain). /// Nested partial applications (e.g. `F ((~>) Array)`) are left for the kind checker @@ -7662,9 +7747,7 @@ fn check_field_partially_applied_synonym( /// Create a qualified symbol by combining a module alias with a name. fn qualified_symbol(module: Symbol, name: Symbol) -> Symbol { - let mod_str = crate::interner::resolve(module).unwrap_or_default(); - let name_str = crate::interner::resolve(name).unwrap_or_default(); - crate::interner::intern(&format!("{}.{}", mod_str, name_str)) + crate::interner::intern_qualified(module, name) } /// Generalize unresolved Unif vars in a kind type into forall bindings. @@ -7790,12 +7873,7 @@ fn strip_kind_qualifiers(kind: &Type) -> Type { /// Convert a ModuleName to a single symbol (joining parts with '.'). fn module_name_to_symbol(module_name: &crate::cst::ModuleName) -> Symbol { - let parts: Vec = module_name - .parts - .iter() - .map(|p| crate::interner::resolve(*p).unwrap_or_default()) - .collect(); - crate::interner::intern(&parts.join(".")) + crate::interner::intern_module_name(&module_name.parts) } /// Optionally qualify a name: if qualifier is Some, prefix with "Q.", otherwise return as-is. diff --git a/src/typechecker/infer.rs b/src/typechecker/infer.rs index 6fc415da..8f3f298d 100644 --- a/src/typechecker/infer.rs +++ b/src/typechecker/infer.rs @@ -177,9 +177,7 @@ impl InferCtx { /// Create a qualified symbol by combining a module alias with a name. fn qualified_symbol(module: Symbol, name: Symbol) -> Symbol { - let mod_str = crate::interner::resolve(module).unwrap_or_default(); - let name_str = crate::interner::resolve(name).unwrap_or_default(); - crate::interner::intern(&format!("{}.{}", mod_str, name_str)) + crate::interner::intern_qualified(module, name) } /// Find the first occurrence of `Unif(target_id)` as the head of an App chain diff --git a/src/typechecker/mod.rs b/src/typechecker/mod.rs index 293f175d..89f9e9ee 100644 --- a/src/typechecker/mod.rs +++ b/src/typechecker/mod.rs @@ -93,7 +93,7 @@ pub fn check_module(module: &crate::cst::Module) -> CheckResult { /// Typecheck a full CST module with a registry, returning partial results and accumulated errors. /// Performs CST→AST conversion internally; returns conversion errors if any. pub fn check_module_with_registry(module: &crate::cst::Module, registry: &ModuleRegistry) -> CheckResult { - let (ast_module, convert_errors) = crate::ast::convert(module.clone(), registry); + let (ast_module, convert_errors) = crate::ast::convert(module, registry); if !convert_errors.is_empty() { return CheckResult { types: HashMap::new(), diff --git a/src/typechecker/resolve.rs b/src/typechecker/resolve.rs index d32fb326..ff904dd1 100644 --- a/src/typechecker/resolve.rs +++ b/src/typechecker/resolve.rs @@ -258,26 +258,19 @@ struct Resolver<'a> { // ===== Helpers ===== fn qualified_symbol(module: Symbol, name: Symbol) -> Symbol { - let mod_str = interner::resolve(module).unwrap_or_default(); - let name_str = interner::resolve(name).unwrap_or_default(); - interner::intern(&format!("{}.{}", mod_str, name_str)) + interner::intern_qualified(module, name) } fn is_prim_module(module: &crate::cst::ModuleName) -> bool { - module.parts.len() == 1 && interner::resolve(module.parts[0]).unwrap_or_default() == "Prim" + module.parts.len() == 1 && interner::symbol_eq(module.parts[0], "Prim") } fn is_prim_submodule(module: &crate::cst::ModuleName) -> bool { - module.parts.len() > 1 && interner::resolve(module.parts[0]).unwrap_or_default() == "Prim" + module.parts.len() > 1 && interner::symbol_eq(module.parts[0], "Prim") } fn module_name_to_symbol(module: &crate::cst::ModuleName) -> Symbol { - let parts: Vec = module - .parts - .iter() - .map(|s| interner::resolve(*s).unwrap_or_default().to_string()) - .collect(); - interner::intern(&parts.join(".")) + interner::intern_module_name(&module.parts) } fn maybe_qualify(name: Symbol, qualifier: Option) -> Symbol { diff --git a/src/typechecker/unify.rs b/src/typechecker/unify.rs index e6513021..4da077ae 100644 --- a/src/typechecker/unify.rs +++ b/src/typechecker/unify.rs @@ -228,6 +228,10 @@ impl UnifyState { fn zonk_ref(&mut self, ty: &Type) -> Option { match ty { Type::Unif(v) => { + // Guard against stale TyVarIds from another module's UnifyState + if (v.0 as usize) >= self.entries.len() { + return None; + } match self.probe(*v) { Some(solved) => Some(self.zonk(solved)), None => { @@ -1116,6 +1120,10 @@ impl UnifyState { fn collect_free_unif_vars(&mut self, ty: &Type, vars: &mut Vec) { match ty { Type::Unif(v) => { + // Guard against stale TyVarIds from another module's UnifyState + if (v.0 as usize) >= self.entries.len() { + return; + } match self.probe(*v) { Some(solved) => self.collect_free_unif_vars(&solved, vars), None => { diff --git a/tests/build.rs b/tests/build.rs index eee3f2e7..1729fbb0 100644 --- a/tests/build.rs +++ b/tests/build.rs @@ -4,6 +4,7 @@ //! build successfully through the full pipeline (parse + typecheck). use ntest_timeout::timeout; +use rayon::prelude::*; use purescript_fast_compiler::build::{ build_from_sources_with_js, build_from_sources_with_options, build_from_sources_with_registry, BuildError, BuildOptions, BuildResult, @@ -702,513 +703,6 @@ fn build_fixture_original_compiler_failing() { } -const MARIONETTE_REACT_BASIC_HOOKS_EXTRA_PACKAGES: &[&str] = &[ - "lists", - "ordered-collections", - "nullable", - "exceptions", - "parallel", - "transformers", - "datetime", - "aff", - "now", - "unsafe-reference", - "web-events", - "web-dom", - "web-file", - "web-storage", - "media-types", - "js-date", - "web-html", - "js-promise", - "aff-promise", - "react-basic", - "indexed-monad", - "react-basic-hooks", - "marionette", - "marionette-react-basic-hooks", -]; - -#[test] -#[timeout(20000)] -fn build_marionette_react_basic_hooks() { - let packages_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/packages"); - let registry = Arc::clone(&get_support_build().registry); - - let mut sources: Vec<(String, String)> = Vec::new(); - for &pkg in MARIONETTE_REACT_BASIC_HOOKS_EXTRA_PACKAGES { - let pkg_src = packages_dir.join(pkg).join("src"); - assert!(pkg_src.exists(), "Package '{}' not found at: {}", pkg, pkg_src.display()); - let mut files = Vec::new(); - collect_purs_files(&pkg_src, &mut files); - for f in files { - if let Ok(source) = std::fs::read_to_string(&f) { - sources.push((f.to_string_lossy().into_owned(), source)); - } - } - } - - eprintln!("Building marionette-react-basic-hooks ({} modules from {} extra packages)...", sources.len(), MARIONETTE_REACT_BASIC_HOOKS_EXTRA_PACKAGES.len()); - - let source_refs: Vec<(&str, &str)> = sources.iter().map(|(p, s)| (p.as_str(), s.as_str())).collect(); - let options = BuildOptions { module_timeout: Some(std::time::Duration::from_secs(3)), output_dir: None }; - let (result, _) = build_from_sources_with_options(&source_refs, &None, Some(registry), &options); - - let mut timeouts: Vec = Vec::new(); - let mut panics: Vec = Vec::new(); - let mut other_errors: Vec = Vec::new(); - for e in &result.build_errors { - match e { - BuildError::TypecheckTimeout { .. } => timeouts.push(format!(" {}", e)), - BuildError::TypecheckPanic { .. } => panics.push(format!(" {}", e)), - _ => other_errors.push(format!(" {}", e)), - } - } - - assert!(timeouts.is_empty(), "marionette-react-basic-hooks: {} modules timed out:\n{}", timeouts.len(), timeouts.join("\n")); - assert!(panics.is_empty(), "marionette-react-basic-hooks: modules panicked:\n{}", panics.join("\n")); - assert!(other_errors.is_empty(), "marionette-react-basic-hooks: build errors:\n{}", other_errors.join("\n")); - - let mut type_errors: Vec<(String, PathBuf, String)> = Vec::new(); - for m in &result.modules { - if !m.type_errors.is_empty() { - for e in &m.type_errors { - type_errors.push((m.module_name.clone(), m.path.clone(), e.to_string())); - } - } - } - - assert!( - type_errors.is_empty(), - "marionette-react-basic-hooks: {} modules have type errors:\n{}", - type_errors.len(), - type_errors.iter().map(|(m, p, e)| format!("{} ({}): {}", m, p.to_string_lossy(), e)).collect::>().join("\n") - ); -} - -const LITERALS_EXTRA_PACKAGES: &[&str] = &[ - "literals", -]; - -#[test] -#[timeout(20000)] -fn build_literals() { - let packages_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/packages"); - let registry = Arc::clone(&get_support_build().registry); - - let mut sources: Vec<(String, String)> = Vec::new(); - for &pkg in LITERALS_EXTRA_PACKAGES { - let pkg_src = packages_dir.join(pkg).join("src"); - assert!(pkg_src.exists(), "Package '{}' not found at: {}", pkg, pkg_src.display()); - let mut files = Vec::new(); - collect_purs_files(&pkg_src, &mut files); - for f in files { - if let Ok(source) = std::fs::read_to_string(&f) { - sources.push((f.to_string_lossy().into_owned(), source)); - } - } - } - - eprintln!("Building literals ({} modules from {} extra packages)...", sources.len(), LITERALS_EXTRA_PACKAGES.len()); - - let source_refs: Vec<(&str, &str)> = sources.iter().map(|(p, s)| (p.as_str(), s.as_str())).collect(); - let options = BuildOptions { module_timeout: Some(std::time::Duration::from_secs(3)), output_dir: None }; - let (result, _) = build_from_sources_with_options(&source_refs, &None, Some(registry), &options); - - let mut timeouts: Vec = Vec::new(); - let mut panics: Vec = Vec::new(); - let mut other_errors: Vec = Vec::new(); - for e in &result.build_errors { - match e { - BuildError::TypecheckTimeout { .. } => timeouts.push(format!(" {}", e)), - BuildError::TypecheckPanic { .. } => panics.push(format!(" {}", e)), - _ => other_errors.push(format!(" {}", e)), - } - } - - assert!(timeouts.is_empty(), "literals: {} modules timed out:\n{}", timeouts.len(), timeouts.join("\n")); - assert!(panics.is_empty(), "literals: modules panicked:\n{}", panics.join("\n")); - assert!(other_errors.is_empty(), "literals: build errors:\n{}", other_errors.join("\n")); - - let mut type_errors: Vec<(String, PathBuf, String)> = Vec::new(); - for m in &result.modules { - if !m.type_errors.is_empty() { - for e in &m.type_errors { - type_errors.push((m.module_name.clone(), m.path.clone(), e.to_string())); - } - } - } - - assert!( - type_errors.is_empty(), - "literals: {} modules have type errors:\n{}", - type_errors.len(), - type_errors.iter().map(|(m, p, e)| format!("{} ({}): {}", m, p.to_string_lossy(), e)).collect::>().join("\n") - ); -} - -/// Additional packages needed to build codec-json on top of SUPPORT_PACKAGES. -const CODEC_JSON_EXTRA_PACKAGES: &[&str] = &["codec", "variant", "codec-json"]; - -#[test] -#[timeout(10000)] -fn build_codec_json() { - let packages_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/packages"); - - // Build on top of the shared support registry - let registry = Arc::clone(&get_support_build().registry); - - // Collect sources from the extra packages needed for codec-json - let mut sources: Vec<(String, String)> = Vec::new(); - for &pkg in CODEC_JSON_EXTRA_PACKAGES { - let pkg_src = packages_dir.join(pkg).join("src"); - assert!( - pkg_src.exists(), - "Package '{}' not found at: {}", - pkg, - pkg_src.display() - ); - let mut files = Vec::new(); - collect_purs_files(&pkg_src, &mut files); - for f in files { - if let Ok(source) = std::fs::read_to_string(&f) { - sources.push((f.to_string_lossy().into_owned(), source)); - } - } - } - - eprintln!( - "Building codec-json ({} modules from {} extra packages)...", - sources.len(), - CODEC_JSON_EXTRA_PACKAGES.len() - ); - - let source_refs: Vec<(&str, &str)> = sources - .iter() - .map(|(p, s)| (p.as_str(), s.as_str())) - .collect(); - - let options = BuildOptions { - module_timeout: None, - output_dir: None, - }; - let (result, _) = - build_from_sources_with_options(&source_refs, &None, Some(registry), &options); - - // Separate timeouts from other build errors - let mut timeouts: Vec = Vec::new(); - let mut other_errors: Vec = Vec::new(); - for e in &result.build_errors { - match e { - BuildError::TypecheckTimeout { .. } => timeouts.push(format!(" {}", e)), - _ => other_errors.push(format!(" {}", e)), - } - } - - assert!( - timeouts.is_empty(), - "Modules timed out:\n{}", - timeouts.join("\n") - ); - - assert!( - other_errors.is_empty(), - "Build errors in codec-json:\n{}", - other_errors.join("\n") - ); - - let mut type_errors: Vec<(String, PathBuf, String)> = Vec::new(); - let mut fails = 0; - - for m in &result.modules { - if !m.type_errors.is_empty() { - fails += 1; - for e in &m.type_errors { - type_errors.push((m.module_name.clone(), m.path.clone(), e.to_string())); - } - } - } - - let type_errors_str: String = type_errors - .iter() - .map(|(m, p, e)| format!("{} ({}): {}", m, p.to_string_lossy(), e)) - .collect::>() - .join("\n"); - - assert!( - type_errors.is_empty(), - "codec-json: {}/{} modules have type errors:\n{}", - fails, - result.modules.len(), - type_errors_str - ); - - eprintln!( - "codec-json: {} modules typechecked, {} with errors", - result.modules.len(), - fails - ); -} - -/// Additional packages needed to build webb-aff-list on top of SUPPORT_PACKAGES. -const WEBB_AFF_LIST_EXTRA_PACKAGES: &[&str] = &[ - "aff", - "tailrec", - "monad-loops", - "debug", - "profunctor-lenses", - "webb-monad", - "webb-refer", - "webb-array", - "webb-mutex", - "webb-channel", - "webb-slot", - "webb-stateful", - "webb-thread", - "webb-aff-list", - "parallel", -]; - -#[test] -#[timeout(30000)] -fn build_webb_aff_list() { - let packages_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/packages"); - - // Build on top of the shared support registry - let registry = Arc::clone(&get_support_build().registry); - - // Collect sources from the extra packages needed for webb-aff-list - let mut sources: Vec<(String, String)> = Vec::new(); - for &pkg in WEBB_AFF_LIST_EXTRA_PACKAGES { - let pkg_src = packages_dir.join(pkg).join("src"); - assert!( - pkg_src.exists(), - "Package '{}' not found at: {}", - pkg, - pkg_src.display() - ); - let mut files = Vec::new(); - collect_purs_files(&pkg_src, &mut files); - for f in files { - if let Ok(source) = std::fs::read_to_string(&f) { - sources.push((f.to_string_lossy().into_owned(), source)); - } - } - } - - eprintln!( - "Building webb-aff-list ({} modules from {} extra packages)...", - sources.len(), - WEBB_AFF_LIST_EXTRA_PACKAGES.len() - ); - - let source_refs: Vec<(&str, &str)> = sources - .iter() - .map(|(p, s)| (p.as_str(), s.as_str())) - .collect(); - - let options = BuildOptions { - module_timeout: Some(std::time::Duration::from_secs(10)), - output_dir: None, - }; - let (result, _) = - build_from_sources_with_options(&source_refs, &None, Some(registry), &options); - - // Separate timeouts/panics from other build errors - let mut timeouts: Vec = Vec::new(); - let mut panics: Vec = Vec::new(); - let mut other_errors: Vec = Vec::new(); - for e in &result.build_errors { - match e { - BuildError::TypecheckTimeout { .. } => timeouts.push(format!(" {}", e)), - BuildError::TypecheckPanic { .. } => panics.push(format!(" {}", e)), - _ => other_errors.push(format!(" {}", e)), - } - } - - assert!( - timeouts.is_empty(), - "Modules exceeded typecheck timeout:\n{}", - timeouts.join("\n") - ); - - assert!( - panics.is_empty(), - "Modules panicked:\n{}", - panics.join("\n") - ); - - assert!( - other_errors.is_empty(), - "Build errors:\n{}", - other_errors.join("\n") - ); - - // Only check type errors for Webb.AffList.* modules (the target package) - let mut type_errors: Vec<(String, PathBuf, String)> = Vec::new(); - let mut fails = 0; - - for m in &result.modules { - if !m.type_errors.is_empty() { - fails += 1; - for e in &m.type_errors { - type_errors.push((m.module_name.clone(), m.path.clone(), e.to_string())); - } - } - } - - let type_errors_str: String = type_errors - .iter() - .map(|(m, p, e)| format!("{} ({}): {}", m, p.to_string_lossy(), e)) - .collect::>() - .join("\n"); - - assert!( - type_errors.is_empty(), - "type errors found. {}/{} modules have type errors:\n{}", - fails, - result.modules.len(), - type_errors_str - ); - - assert!( - type_errors.is_empty(), - "webb-aff-list: {}/{} modules have type errors:\n{}", - fails, - result.modules.len(), - type_errors_str - ); -} - -/// Additional packages needed to build halogen on top of SUPPORT_PACKAGES. -const HALOGEN_EXTRA_PACKAGES: &[&str] = &[ - "aff", - "media-types", - "js-date", - "js-promise", - "unsafe-reference", - "web-events", - "web-dom", - "web-storage", - "web-file", - "web-html", - "web-uievents", - "web-touchevents", - "web-pointerevents", - "web-clipboard", - "dom-indexed", - "nullable", - "parallel", - "freeap", - "fork", - "halogen-vdom", - "halogen-subscriptions", - "halogen", -]; - -#[test] -#[ignore] // 6/228 modules have type errors (ExportConflict, PartiallyAppliedSynonym, UnificationError) -#[timeout(30000)] -fn build_halogen() { - let packages_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/packages"); - - // Build on top of the shared support registry - let registry = Arc::clone(&get_support_build().registry); - - // Collect sources from the extra packages needed for halogen - let mut sources: Vec<(String, String)> = Vec::new(); - for &pkg in HALOGEN_EXTRA_PACKAGES { - let pkg_src = packages_dir.join(pkg).join("src"); - assert!( - pkg_src.exists(), - "Package '{}' not found at: {}", - pkg, - pkg_src.display() - ); - let mut files = Vec::new(); - collect_purs_files(&pkg_src, &mut files); - for f in files { - if let Ok(source) = std::fs::read_to_string(&f) { - sources.push((f.to_string_lossy().into_owned(), source)); - } - } - } - - eprintln!( - "Building halogen ({} modules from {} extra packages)...", - sources.len(), - HALOGEN_EXTRA_PACKAGES.len() - ); - - let source_refs: Vec<(&str, &str)> = sources - .iter() - .map(|(p, s)| (p.as_str(), s.as_str())) - .collect(); - - let options = BuildOptions { - module_timeout: Some(std::time::Duration::from_secs(5)), - output_dir: None, - }; - let (result, _) = - build_from_sources_with_options(&source_refs, &None, Some(registry), &options); - - // Separate timeouts/panics from other build errors - let mut timeouts: Vec = Vec::new(); - let mut panics: Vec = Vec::new(); - let mut other_errors: Vec = Vec::new(); - for e in &result.build_errors { - match e { - BuildError::TypecheckTimeout { .. } => timeouts.push(format!(" {}", e)), - BuildError::TypecheckPanic { .. } => panics.push(format!(" {}", e)), - _ => other_errors.push(format!(" {}", e)), - } - } - - assert!( - timeouts.is_empty(), - "Modules exceeded typecheck timeout:\n{}", - timeouts.join("\n") - ); - - assert!( - panics.is_empty(), - "Modules panicked:\n{}", - panics.join("\n") - ); - - assert!( - other_errors.is_empty(), - "Build errors:\n{}", - other_errors.join("\n") - ); - - let mut type_errors: Vec<(String, PathBuf, String)> = Vec::new(); - let mut fails = 0; - - for m in &result.modules { - if !m.type_errors.is_empty() { - fails += 1; - for e in &m.type_errors { - type_errors.push((m.module_name.clone(), m.path.clone(), e.to_string())); - } - } - } - - let type_errors_str: String = type_errors - .iter() - .map(|(m, p, e)| format!("{} ({}): {}", m, p.to_string_lossy(), e)) - .collect::>() - .join("\n"); - - assert!( - type_errors.is_empty(), - "halogen: {}/{} modules have type errors:\n{}", - fails, - result.modules.len(), - type_errors_str - ); -} - #[test] #[ignore] @@ -1395,3 +889,164 @@ fn build_all_packages() { result.modules.len(), ); } + + +// run with: RUST_LOG=debug cargo test --test build build_from_sources -- --exact --ignored +// for release (RECOMMENDED): cargo test --release --test build build_from_sources -- --exact --ignored +#[test] +#[ignore] // This is for manually invocation with +#[timeout(600000)] // 10 min timeout +fn build_from_sources() { + + let _ = env_logger::try_init(); + let started = std::time::Instant::now(); + + let application_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("CARGO_MANIFEST_DIR has no parent") + .join("application"); + assert!( + application_dir.exists(), + "OA application directory not found at: {}", + application_dir.display() + ); + + let sources_txt = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/sources.txt"); + let patterns = std::fs::read_to_string(&sources_txt).expect("Failed to read sources.txt"); + + let timeout_secs: u64 = std::env::var("MODULE_TIMEOUT_SECS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(30); + + let options = BuildOptions { + module_timeout: Some(std::time::Duration::from_secs(timeout_secs)), + output_dir: None + }; + + // Step 1: Glob all patterns to collect file paths + let step = std::time::Instant::now(); + let mut all_paths: Vec = Vec::new(); + for line in patterns.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + + let full_pattern = application_dir.join(line); + let pattern_str = full_pattern.to_string_lossy(); + + let matches: Vec<_> = glob::glob(&pattern_str) + .unwrap_or_else(|e| panic!("Invalid glob pattern '{}': {}", pattern_str, e)) + .filter_map(|entry| entry.ok()) + .collect(); + + all_paths.extend(matches); + } + eprintln!( + " glob: {} files in {:.2?}", + all_paths.len(), + step.elapsed() + ); + + // Step 2: Read all files in parallel + let step = std::time::Instant::now(); + let all_sources: Vec<(String, String)> = all_paths + .into_par_iter() + .filter_map(|path| { + std::fs::read_to_string(&path) + .ok() + .map(|source| (path.to_string_lossy().into_owned(), source)) + }) + .collect(); + eprintln!( + " read: {} files in {:.2?}", + all_sources.len(), + step.elapsed() + ); + + eprintln!( + "Discovered {} modules from sources.txt in {:.2?}", + all_sources.len(), + started.elapsed() + ); + + let source_refs: Vec<(&str, &str)> = all_sources + .iter() + .map(|(p, s)| (p.as_str(), s.as_str())) + .collect(); + + let (result, _) = build_from_sources_with_options(&source_refs, &None, None, &options); + + eprintln!("Build completed in {:.2?}", started.elapsed()); + + let mut timeouts: Vec = Vec::new(); + let mut panics: Vec = Vec::new(); + let mut other_errors: Vec = Vec::new(); + for e in &result.build_errors { + match e { + BuildError::TypecheckTimeout { .. } => { + timeouts.push(format!(" {}", e)); + } + BuildError::TypecheckPanic { .. } => { + panics.push(format!(" {}", e)); + } + _ => { + other_errors.push(format!(" {}", e)); + } + } + } + + let mut type_errors: Vec<(String, PathBuf, String)> = Vec::new(); + let mut fails = 0; + + for m in &result.modules { + if !m.type_errors.is_empty() { + eprintln!("Errors in {}, {}", m.path.to_string_lossy(), m.module_name); + fails += 1; + for e in &m.type_errors { + eprintln!(" {}", e); + type_errors.push((m.module_name.clone(), m.path.clone(), e.to_string())); + } + } + } + + let clean = result.modules.len() - fails; + eprintln!( + "Results: {} clean, {} with type errors, {} timeouts, {} panics out of {} modules", + clean, + fails, + timeouts.len(), + panics.len(), + result.modules.len() + ); + + // Error distribution + let mut error_counts: std::collections::HashMap = + std::collections::HashMap::new(); + for m in &result.modules { + for e in &m.type_errors { + *error_counts.entry(e.code()).or_default() += 1; + } + } + if fails > 0 { + let mut sorted_counts: Vec<_> = error_counts.iter().collect(); + sorted_counts.sort_by(|a, b| b.1.cmp(a.1)); + eprintln!("\nError distribution ({} modules with errors):", fails); + for (code, count) in &sorted_counts { + eprintln!(" {:>4} {}", count, code); + } + } + + assert!( + timeouts.is_empty(), + "Modules exceeded deadline:\n{}", + timeouts.join("\n") + ); + + assert!( + panics.is_empty(), + "Modules panicked during typechecking:\n{}", + panics.join("\n") + ); +}