From 865b55692a8172a496ca2fc232b06f76b1432248 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Thu, 26 Mar 2026 12:27:55 -0500 Subject: [PATCH 1/2] refactor - Implemented garbage detection in the AST to identify `Expr::Garbage` nodes. - Introduced a new module for garbage detection with functions to check for garbage in blocks, pipelines, redirections, and expressions. - Added parse-error repair utilities to handle common issues like compact `if/else` patterns and missing commas in records. - Enhanced the formatting module to utilize garbage detection and repair strategies during formatting. - Updated tests to cover new functionality and ensure idempotency in formatting. --- Cargo.toml | 2 +- src/formatting.rs | 2461 --------------------------------- src/formatting/blocks.rs | 293 ++++ src/formatting/calls.rs | 451 ++++++ src/formatting/collections.rs | 330 +++++ src/formatting/comments.rs | 132 ++ src/formatting/engine.rs | 65 + src/formatting/expressions.rs | 316 +++++ src/formatting/garbage.rs | 192 +++ src/formatting/mod.rs | 455 ++++++ src/formatting/repair.rs | 449 ++++++ tests/ground_truth.rs | 727 ++-------- 12 files changed, 2780 insertions(+), 3093 deletions(-) delete mode 100644 src/formatting.rs create mode 100644 src/formatting/blocks.rs create mode 100644 src/formatting/calls.rs create mode 100644 src/formatting/collections.rs create mode 100644 src/formatting/comments.rs create mode 100644 src/formatting/engine.rs create mode 100644 src/formatting/expressions.rs create mode 100644 src/formatting/garbage.rs create mode 100644 src/formatting/mod.rs create mode 100644 src/formatting/repair.rs diff --git a/Cargo.toml b/Cargo.toml index 88bfe7c..c7b9581 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "nufmt" version = "0.1.0" -edition = "2018" +edition = "2021" authors = ["The NuShell Contributors"] license = "MIT" description = "Formats nushell extremely fast" diff --git a/src/formatting.rs b/src/formatting.rs deleted file mode 100644 index 5ccb61f..0000000 --- a/src/formatting.rs +++ /dev/null @@ -1,2461 +0,0 @@ -//! In this module occurs most of the magic in `nufmt`. -//! -//! It walks the Nushell AST and emits properly formatted code. - -use crate::config::Config; -use crate::format_error::FormatError; -use log::{debug, trace}; -use nu_parser::parse; -use nu_protocol::{ - ast::{ - Argument, Block, CellPath, Expr, Expression, ExternalArgument, FullCellPath, ListItem, - MatchPattern, PathMember, Pattern, Pipeline, PipelineElement, PipelineRedirection, - RecordItem, RedirectionTarget, - }, - engine::{Call, Command, CommandType as NuCommandType, EngineState, Stack, StateWorkingSet}, - Category, Completion, ParseError, PipelineData, ShellError, Signature, Span, SyntaxShape, -}; -use nu_utils::NuCow; - -/// Commands that format their block arguments in a special way -const BLOCK_COMMANDS: &[&str] = &["for", "while", "loop", "module"]; -const CONDITIONAL_COMMANDS: &[&str] = &["if", "try"]; -const DEF_COMMANDS: &[&str] = &["def", "def-env", "export def"]; -const EXTERN_COMMANDS: &[&str] = &["extern", "export extern"]; -const LET_COMMANDS: &[&str] = &["let", "let-env", "mut", "const"]; - -#[derive(Clone)] -struct WhereKeyword; - -impl Command for WhereKeyword { - fn name(&self) -> &str { - "where" - } - - fn signature(&self) -> Signature { - Signature::build("where") - .required( - "condition", - SyntaxShape::RowCondition, - "filter row condition or closure", - ) - .category(Category::Filters) - } - - fn description(&self) -> &str { - "filter values of an input list based on a condition" - } - - fn command_type(&self) -> NuCommandType { - NuCommandType::Keyword - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - _call: &Call, - input: PipelineData, - ) -> Result { - Ok(input) - } -} - -/// Get the default engine state with built-in commands -fn get_engine_state() -> EngineState { - let mut engine_state = nu_cmd_lang::create_default_context(); - let delta = { - let mut working_set = StateWorkingSet::new(&engine_state); - working_set.add_decl(Box::new(WhereKeyword)); - working_set.render() - }; - - if let Err(err) = engine_state.merge_delta(delta) { - debug!("failed to merge formatter context: {err:?}"); - } - - engine_state -} - -/// The main formatter context that tracks indentation and other state -struct Formatter<'a> { - /// The original source bytes - source: &'a [u8], - /// The working set for looking up blocks and other data - working_set: &'a StateWorkingSet<'a>, - /// Configuration options - config: &'a Config, - /// Current indentation level - indent_level: usize, - /// Output buffer - output: Vec, - /// Track if we're at the start of a line (for indentation) - at_line_start: bool, - /// Comments extracted from source, indexed by their end position - comments: Vec<(Span, Vec)>, - /// Track which comments have been written - written_comments: Vec, - /// Current position in source being processed - last_pos: usize, - /// Track nested conditional argument formatting to preserve explicit parens - conditional_context_depth: usize, - /// Force preserving explicit parens for subexpressions inside precedence-sensitive contexts - preserve_subexpr_parens_depth: usize, - /// Allow compact inline record style used for repaired malformed records - allow_compact_recovered_record_style: bool, -} - -impl<'a> Formatter<'a> { - fn new( - source: &'a [u8], - working_set: &'a StateWorkingSet<'a>, - config: &'a Config, - allow_compact_recovered_record_style: bool, - ) -> Self { - let comments = extract_comments(source); - let written_comments = vec![false; comments.len()]; - Self { - source, - working_set, - config, - indent_level: 0, - output: Vec::new(), - at_line_start: true, - comments, - written_comments, - last_pos: 0, - conditional_context_depth: 0, - preserve_subexpr_parens_depth: 0, - allow_compact_recovered_record_style, - } - } - - // ───────────────────────────────────────────────────────────────────────────── - // Basic output methods - // ───────────────────────────────────────────────────────────────────────────── - - /// Write indentation if at start of line - fn write_indent(&mut self) { - if self.at_line_start { - let indent = " ".repeat(self.config.indent * self.indent_level); - self.output.extend(indent.as_bytes()); - self.at_line_start = false; - } - } - - /// Write a string to output - fn write(&mut self, s: &str) { - self.write_indent(); - self.output.extend(s.as_bytes()); - } - - /// Write bytes to output - fn write_bytes(&mut self, bytes: &[u8]) { - self.write_indent(); - self.output.extend(bytes); - } - - /// Write a newline - fn newline(&mut self) { - self.output.push(b'\n'); - self.at_line_start = true; - } - - /// Write a space if not at line start and not already following whitespace/opener - fn space(&mut self) { - if !self.at_line_start && !self.output.is_empty() { - if let Some(&last) = self.output.last() { - if !matches!(last, b' ' | b'\n' | b'\t' | b'(' | b'[') { - self.output.push(b' '); - } - } - } - } - - // ───────────────────────────────────────────────────────────────────────────── - // Span and source helpers - // ───────────────────────────────────────────────────────────────────────────── - - /// Get the source content for a span (returns owned Vec to avoid borrow issues) - fn get_span_content(&self, span: Span) -> Vec { - self.source[span.start..span.end].to_vec() - } - - /// Write the original source content for a span - fn write_span(&mut self, span: Span) { - let content = self.source[span.start..span.end].to_vec(); - self.write_bytes(&content); - } - - /// Write the original source content for an expression's span - fn write_expr_span(&mut self, expr: &Expression) { - self.write_span(expr.span); - } - - /// Write an attribute expression while preserving a leading '@' sigil. - fn write_attribute_span(&mut self, expr: &Expression) { - let mut start = expr.span.start; - - if start > 0 && self.source[start - 1] == b'@' { - start -= 1; - } - - self.write_span(Span { - start, - end: expr.span.end, - }); - } - - // ───────────────────────────────────────────────────────────────────────────── - // Comment handling - // ───────────────────────────────────────────────────────────────────────────── - - /// Check if there are any comments between `last_pos` and the given position - fn write_comments_before(&mut self, pos: usize) { - let mut comments_to_write: Vec<_> = self - .comments - .iter() - .enumerate() - .filter(|(i, (span, _))| { - !self.written_comments[*i] && span.start >= self.last_pos && span.end <= pos - }) - .map(|(i, (span, content))| (i, span.start, content.clone())) - .collect(); - - comments_to_write.sort_by_key(|(_, start, _)| *start); - - for (idx, _, content) in comments_to_write { - self.written_comments[idx] = true; - if !self.at_line_start { - if let Some(&last) = self.output.last() { - if last != b'\n' { - self.newline(); - } - } - } - self.write_indent(); - self.output.extend(&content); - self.newline(); - } - } - - /// Check for inline comment after a position (on the same line) - fn write_inline_comment(&mut self, after_pos: usize) { - let line_end = self.source[after_pos..] - .iter() - .position(|&b| b == b'\n') - .map_or(self.source.len(), |p| after_pos + p); - - let found = self - .comments - .iter() - .enumerate() - .find(|(i, (span, _))| { - !self.written_comments[*i] && span.start >= after_pos && span.start < line_end - }) - .map(|(i, (span, content))| (i, *span, content.clone())); - - if let Some((idx, span, content)) = found { - self.written_comments[idx] = true; - self.write(" "); - self.output.extend(&content); - self.last_pos = span.end; - } - } - - // ───────────────────────────────────────────────────────────────────────────── - // Block and pipeline formatting - // ───────────────────────────────────────────────────────────────────────────── - - /// Format a block - fn format_block(&mut self, block: &Block) { - let num_pipelines = block.pipelines.len(); - for (i, pipeline) in block.pipelines.iter().enumerate() { - if let Some(first_elem) = pipeline.elements.first() { - self.write_comments_before(first_elem.expr.span.start); - } - - self.format_pipeline(pipeline); - - if let Some(last_elem) = pipeline.elements.last() { - let end_pos = self.get_element_end_pos(last_elem); - self.write_inline_comment(end_pos); - self.last_pos = end_pos; - } - - if i < num_pipelines - 1 { - let separator_newlines = if self.indent_level == 0 && self.config.margin > 1 { - self.config.margin.saturating_add(1) - } else { - 1 - }; - - for _ in 0..separator_newlines { - self.newline(); - } - } - } - } - - /// Get the end position of a pipeline element, including any redirections - fn get_element_end_pos(&self, element: &PipelineElement) -> usize { - element - .redirection - .as_ref() - .map_or(element.expr.span.end, |redir| match redir { - PipelineRedirection::Single { target, .. } => target.span().end, - PipelineRedirection::Separate { out, err } => out.span().end.max(err.span().end), - }) - } - - /// Format a pipeline - fn format_pipeline(&mut self, pipeline: &Pipeline) { - for (i, element) in pipeline.elements.iter().enumerate() { - if i > 0 { - self.write(" | "); - } - self.format_pipeline_element(element); - } - } - - /// Format a pipeline element - fn format_pipeline_element(&mut self, element: &PipelineElement) { - self.format_expression(&element.expr); - if let Some(ref redirection) = element.redirection { - self.format_redirection(redirection); - } - } - - /// Format a redirection - fn format_redirection(&mut self, redir: &PipelineRedirection) { - match redir { - PipelineRedirection::Single { target, .. } => { - self.space(); - self.format_redirection_target(target); - } - PipelineRedirection::Separate { out, err } => { - self.space(); - self.format_redirection_target(out); - self.space(); - self.format_redirection_target(err); - } - } - } - - /// Format a redirection target - fn format_redirection_target(&mut self, target: &RedirectionTarget) { - match target { - RedirectionTarget::File { expr, span, .. } => { - self.write_span(*span); - self.space(); - self.format_expression(expr); - } - RedirectionTarget::Pipe { span } => { - self.write_span(*span); - } - } - } - - // ───────────────────────────────────────────────────────────────────────────── - // Expression formatting - // ───────────────────────────────────────────────────────────────────────────── - - /// Format an expression - fn format_expression(&mut self, expr: &Expression) { - match &expr.expr { - // Literals and simple values - preserve original - Expr::Int(_) - | Expr::Float(_) - | Expr::Bool(_) - | Expr::Nothing - | Expr::DateTime(_) - | Expr::String(_) - | Expr::RawString(_) - | Expr::Binary(_) - | Expr::Filepath(_, _) - | Expr::Directory(_, _) - | Expr::GlobPattern(_, _) - | Expr::Var(_) - | Expr::VarDecl(_) - | Expr::Operator(_) - | Expr::StringInterpolation(_) - | Expr::GlobInterpolation(_, _) - | Expr::ImportPattern(_) - | Expr::Overlay(_) - | Expr::Garbage => { - self.write_expr_span(expr); - } - - Expr::Signature(sig) => { - let has_comments = self - .comments - .iter() - .any(|(span, _)| span.start >= expr.span.start && span.end <= expr.span.end); - if has_comments { - self.write_expr_span(expr); - self.last_pos = expr.span.end; - // Mark comments within the span as written - for (i, (span, _)) in self.comments.iter().enumerate() { - if span.start >= expr.span.start && span.end <= expr.span.end { - self.written_comments[i] = true; - } - } - } else { - self.format_signature(sig); - } - } - - Expr::Call(call) => self.format_call(call), - Expr::ExternalCall(head, args) => self.format_external_call(head, args), - Expr::BinaryOp(lhs, op, rhs) => self.format_binary_op(lhs, op, rhs), - Expr::UnaryNot(inner) => { - self.write("not "); - self.format_expression(inner); - } - - Expr::Block(block_id) => { - self.format_block_expression(*block_id, expr.span, false); - } - Expr::Closure(block_id) => { - self.format_closure_expression(*block_id, expr.span); - } - Expr::Subexpression(block_id) => { - self.format_subexpression(*block_id, expr.span); - } - - Expr::List(items) => self.format_list(items), - Expr::Record(items) => self.format_record(items, expr.span), - Expr::Table(table) => self.format_table(&table.columns, &table.rows), - - Expr::Range(range) => self.format_range(range), - Expr::CellPath(cell_path) => self.format_cell_path(cell_path), - Expr::FullCellPath(full_path) => { - self.format_full_cell_path(full_path); - } - - Expr::RowCondition(_) => self.write_expr_span(expr), - - Expr::Keyword(keyword) => { - self.write_span(keyword.span); - self.space(); - self.format_block_or_expr(&keyword.expr); - } - - Expr::ValueWithUnit(_) => { - // Preserve original span since the parser normalizes units - // (e.g., 1kb becomes 1000b internally) - self.write_expr_span(expr); - } - - Expr::MatchBlock(matches) => self.format_match_block(matches), - - Expr::Collect(_, inner) => self.format_expression(inner), - - Expr::AttributeBlock(attr_block) => { - for attr in &attr_block.attributes { - self.write_attribute_span(&attr.expr); - self.newline(); - } - self.format_expression(&attr_block.item); - } - } - } - - /// Format a call expression - fn format_call(&mut self, call: &nu_protocol::ast::Call) { - let decl = self.working_set.get_decl(call.decl_id); - let decl_name = decl.name(); - - // Determine command type - let cmd_type = Self::classify_command(decl_name); - - // Write command name - if call.head.end != 0 { - self.write_span(call.head); - } - - if matches!(cmd_type, CommandType::Let) { - self.format_let_call(call); - return; - } - - // Format arguments based on command type - for arg in &call.arguments { - self.format_call_argument(arg, &cmd_type); - } - } - - /// Format let/mut/const calls while preserving explicit type annotations. - fn format_let_call(&mut self, call: &nu_protocol::ast::Call) { - let positional: Vec<&Expression> = call - .arguments - .iter() - .filter_map(|arg| match arg { - Argument::Positional(expr) | Argument::Unknown(expr) => Some(expr), - _ => None, - }) - .collect(); - - if positional.is_empty() { - for arg in &call.arguments { - self.format_call_argument(arg, &CommandType::Let); - } - return; - } - - self.space(); - self.format_expression(positional[0]); - - if let Some(rhs) = positional.get(1) { - let lhs = positional[0]; - let between = if lhs.span.end <= rhs.span.start { - &self.source[lhs.span.end..rhs.span.start] - } else { - &[] - }; - - if let Some(eq_pos) = between.iter().position(|b| *b == b'=') { - let annotation = between[..eq_pos].trim_ascii(); - if !annotation.is_empty() { - if !annotation.starts_with(b":") { - self.space(); - } - self.write_bytes(annotation); - } - } - - self.write(" = "); - - match &rhs.expr { - Expr::Block(block_id) | Expr::Subexpression(block_id) => { - let block = self.working_set.get_block(*block_id); - self.format_block(block); - } - _ => self.format_expression(rhs), - } - - for extra in positional.iter().skip(2) { - self.space(); - self.format_expression(extra); - } - } - - for arg in &call.arguments { - if !matches!(arg, Argument::Positional(_) | Argument::Unknown(_)) { - self.format_call_argument(arg, &CommandType::Let); - } - } - } - - /// Classify a command by its formatting requirements - fn classify_command(name: &str) -> CommandType { - if DEF_COMMANDS.contains(&name) { - CommandType::Def - } else if EXTERN_COMMANDS.contains(&name) { - CommandType::Extern - } else if CONDITIONAL_COMMANDS.contains(&name) { - CommandType::Conditional - } else if LET_COMMANDS.contains(&name) { - CommandType::Let - } else if BLOCK_COMMANDS.contains(&name) { - CommandType::Block - } else { - CommandType::Regular - } - } - - /// Format a call argument based on command type - fn format_call_argument(&mut self, arg: &Argument, cmd_type: &CommandType) { - match arg { - Argument::Positional(positional) | Argument::Unknown(positional) => { - self.format_positional_argument(positional, cmd_type); - } - Argument::Named(named) => { - self.space(); - if named.0.span.end != 0 { - self.write_span(named.0.span); - } - if let Some(short) = &named.1 { - self.write_span(short.span); - } - if let Some(value) = &named.2 { - let separator_start = named - .1 - .as_ref() - .map_or(named.0.span.end, |short| short.span.end); - let has_equals = separator_start <= value.span.start - && self.source[separator_start..value.span.start].contains(&b'='); - - if has_equals { - self.write("="); - } else { - self.space(); - } - self.format_expression(value); - } - } - Argument::Spread(spread_expr) => { - self.space(); - self.write("..."); - self.format_expression(spread_expr); - } - } - } - - /// Format a positional argument based on command type - fn format_positional_argument(&mut self, positional: &Expression, cmd_type: &CommandType) { - self.space(); - match cmd_type { - CommandType::Def => self.format_def_argument(positional), - CommandType::Extern => self.format_extern_argument(positional), - CommandType::Conditional => { - self.conditional_context_depth += 1; - self.format_block_or_expr(positional); - self.conditional_context_depth -= 1; - } - CommandType::Block => { - self.format_block_or_expr(positional); - } - CommandType::Let => self.format_let_argument(positional), - CommandType::Regular => self.format_expression(positional), - } - } - - /// Format an argument for def commands - fn format_def_argument(&mut self, positional: &Expression) { - match &positional.expr { - Expr::String(_) => self.format_expression(positional), - Expr::Signature(sig) => { - let has_comments = self.comments.iter().any(|(span, _)| { - span.start >= positional.span.start && span.end <= positional.span.end - }); - if has_comments { - self.write_expr_span(positional); - // Mark comments within the span as written - for (i, (span, _)) in self.comments.iter().enumerate() { - if span.start >= positional.span.start && span.end <= positional.span.end { - self.written_comments[i] = true; - } - } - } else { - self.format_signature(sig); - } - } - Expr::Closure(block_id) | Expr::Block(block_id) => { - self.format_block_expression(*block_id, positional.span, true); - } - _ => self.format_expression(positional), - } - } - - /// Format an argument for extern commands (preserve original signature) - fn format_extern_argument(&mut self, positional: &Expression) { - match &positional.expr { - // For extern, preserve the signature span to maintain parameter order - Expr::Signature(_) => self.write_expr_span(positional), - _ => self.format_expression(positional), - } - } - - /// Format an argument for let/mut/const commands - fn format_let_argument(&mut self, positional: &Expression) { - match &positional.expr { - Expr::VarDecl(_) => self.format_expression(positional), - Expr::Subexpression(block_id) => { - self.write("= "); - self.format_subexpression(*block_id, positional.span); - } - Expr::Block(block_id) => { - self.write("= "); - let block = self.working_set.get_block(*block_id); - self.format_block(block); - } - _ => { - self.write("= "); - self.format_expression(positional); - } - } - } - - /// Format an expression that could be a block or a regular expression - fn format_block_or_expr(&mut self, expr: &Expression) { - match &expr.expr { - Expr::Block(block_id) => { - self.format_block_expression(*block_id, expr.span, true); - } - Expr::Closure(block_id) => { - self.format_closure_expression(*block_id, expr.span); - } - _ => self.format_expression(expr), - } - } - - /// Format an external call - fn format_external_call(&mut self, head: &Expression, args: &[ExternalArgument]) { - // Check if the original source had an explicit ^ prefix - // by looking at the byte before the head span - if head.span.start > 0 && self.source.get(head.span.start - 1) == Some(&b'^') { - self.write("^"); - } - self.format_expression(head); - for arg in args { - self.space(); - match arg { - ExternalArgument::Regular(arg_expr) => self.format_expression(arg_expr), - ExternalArgument::Spread(spread_expr) => { - self.write("..."); - self.format_expression(spread_expr); - } - } - } - } - - /// Format a binary operation - fn format_binary_op(&mut self, lhs: &Expression, op: &Expression, rhs: &Expression) { - self.preserve_subexpr_parens_depth += 1; - self.format_expression(lhs); - self.preserve_subexpr_parens_depth -= 1; - // Always add space around binary operators for valid nushell syntax - self.write(" "); - self.format_expression(op); - self.write(" "); - - // For assignment operators, unwrap Subexpression on RHS to avoid double parens - if let Expr::Operator(nu_protocol::ast::Operator::Assignment(_)) = &op.expr { - if let Expr::Subexpression(block_id) = &rhs.expr { - let block = self.working_set.get_block(*block_id); - self.format_block(block); - return; - } - } - self.preserve_subexpr_parens_depth += 1; - self.format_expression(rhs); - self.preserve_subexpr_parens_depth -= 1; - } - - /// Format a range expression - fn format_range(&mut self, range: &nu_protocol::ast::Range) { - let op = range.operator.to_string(); - if let Some(from) = &range.from { - self.format_expression(from); - } - self.write(&op); - if let Some(next) = &range.next { - self.format_expression(next); - // For step ranges (start..step..end), write the operator again before end - self.write(&op); - } - if let Some(to) = &range.to { - self.format_expression(to); - } - } - - /// Write custom completion if present (e.g., @`completion_name` or @[list items]) - fn write_custom_completion(&mut self, completion: &Option) { - match completion { - Some(Completion::Command(decl_id)) => { - let decl = self.working_set.get_decl(*decl_id); - let name = decl.name(); - self.write("@"); - // Quote command names that contain special characters (like spaces) - if name.contains(' ') - || name.contains('-') - || name.contains('[') - || name.contains(']') - { - self.write("\""); - self.write(name); - self.write("\""); - } else { - self.write(name); - } - } - Some(Completion::List(list)) => { - self.write("@["); - match list { - NuCow::Borrowed(items) => { - for (i, item) in items.iter().enumerate() { - if i > 0 { - self.write(" "); - } - self.write(item); - } - } - NuCow::Owned(items) => { - for (i, item) in items.iter().enumerate() { - if i > 0 { - self.write(" "); - } - self.write(item); - } - } - } - self.write("]"); - } - None => {} - } - } - - /// Write a `SyntaxShape` with any necessary prettifying - fn write_shape(&mut self, shape: &SyntaxShape) { - match shape { - SyntaxShape::Closure(Option::None) => self.write("closure"), - SyntaxShape::Closure(_) => { - let rendered = shape.to_string(); - if rendered == "closure()" { - self.write("closure"); - } else { - self.write(&rendered); - } - } - _ => self.write(&format!("{}", shape)), - } - } - - /// Format a signature (for def commands) - fn format_signature(&mut self, sig: &Signature) { - self.write("["); - - let param_count = sig.required_positional.len() - + sig.optional_positional.len() - + sig.named.iter().filter(|f| f.long != "help").count() - + if sig.rest_positional.is_some() { 1 } else { 0 }; - let has_multiline = param_count > 3; - - if has_multiline { - self.newline(); - self.indent_level += 1; - } - - let mut first = true; - - // Helper to write separator - let write_sep = |formatter: &mut Formatter, first: &mut bool, has_multiline: bool| { - if !*first { - if has_multiline { - formatter.newline(); - formatter.write_indent(); - } else { - formatter.write(", "); - } - } - *first = false; - }; - - // Required positional - for param in &sig.required_positional { - write_sep(self, &mut first, has_multiline); - self.write(¶m.name); - if param.shape != SyntaxShape::Any { - self.write(": "); - self.write_shape(¶m.shape); - self.write_custom_completion(¶m.completion); - } - } - - // Optional positional - for param in &sig.optional_positional { - write_sep(self, &mut first, has_multiline); - self.write(¶m.name); - // If there's a default value, don't use ? syntax, use = syntax - if param.default_value.is_none() { - self.write("?"); - } - if param.shape != SyntaxShape::Any { - self.write(": "); - self.write_shape(¶m.shape); - self.write_custom_completion(¶m.completion); - } - if let Some(default) = ¶m.default_value { - self.write(" = "); - self.write(&default.to_expanded_string(" ", &nu_protocol::Config::default())); - } - } - - // Named flags (before rest positional to match common convention) - for flag in &sig.named { - // Skip help flag as it's auto-added - if flag.long == "help" { - continue; - } - write_sep(self, &mut first, has_multiline); - - // Handle short-only flags (empty long name) - if flag.long.is_empty() { - if let Some(short) = flag.short { - self.write("-"); - self.write(&short.to_string()); - } - } else { - self.write("--"); - self.write(&flag.long); - if let Some(short) = flag.short { - self.write("(-"); - self.write(&short.to_string()); - self.write(")"); - } - } - if let Some(shape) = &flag.arg { - self.write(": "); - self.write_shape(shape); - self.write_custom_completion(&flag.completion); - } - if let Some(default) = &flag.default_value { - self.write(" = "); - self.write(&default.to_expanded_string(" ", &nu_protocol::Config::default())); - } - } - - // Rest positional (comes last) - if let Some(rest) = &sig.rest_positional { - write_sep(self, &mut first, has_multiline); - self.write("..."); - self.write(&rest.name); - if rest.shape != SyntaxShape::Any { - self.write(": "); - self.write_shape(&rest.shape); - self.write_custom_completion(&rest.completion); - } - } - - if has_multiline { - self.newline(); - self.indent_level -= 1; - self.write_indent(); - } - self.write("]"); - - if !sig.input_output_types.is_empty() { - self.write(": "); - for (i, (input, output)) in sig.input_output_types.iter().enumerate() { - if i > 0 { - self.write(", "); - } - self.write(&input.to_string()); - self.write(" -> "); - self.write(&output.to_string()); - } - } - } - - /// Format a `CellPath` - fn format_cell_path(&mut self, cell_path: &CellPath) { - for (i, member) in cell_path.members.iter().enumerate() { - if i > 0 { - self.write("."); - } - self.format_cell_path_member(member); - } - } - - /// Format a `FullCellPath` - fn format_full_cell_path(&mut self, cell_path: &FullCellPath) { - self.format_expression(&cell_path.head); - - for member in &cell_path.tail { - self.write("."); - self.format_cell_path_member(member); - } - } - - /// Format cell path member (shared between `CellPath` and `FullCellPath`) - fn format_cell_path_member(&mut self, member: &PathMember) { - match member { - PathMember::String { val, optional, .. } => { - self.write(val); - if *optional { - self.write("?"); - } - } - PathMember::Int { val, optional, .. } => { - self.write(&val.to_string()); - if *optional { - self.write("?"); - } - } - } - } - - /// Format a subexpression - fn format_subexpression(&mut self, block_id: nu_protocol::BlockId, span: Span) { - let block = self.working_set.get_block(block_id); - let source_has_newline = - span.end > span.start && self.source[span.start..span.end].contains(&b'\n'); - // Check if the subexpression has explicit parentheses in the source - let has_explicit_parens = self.source.get(span.start) == Some(&b'(') - && self.source.get(span.end - 1) == Some(&b')'); - if !has_explicit_parens { - self.format_block(block); - return; - } - - // Preserve explicit multiline parenthesized expressions to avoid collapsing - // author-intended layout (for example multiline call arguments). - if source_has_newline { - self.write_span(span); - return; - } - - if self.conditional_context_depth > 0 { - let can_drop_parens = - block.pipelines.len() == 1 && block.pipelines[0].elements.len() == 1; - if can_drop_parens { - self.format_block(block); - return; - } - self.write("("); - self.format_block(block); - self.write(")"); - return; - } - - // Special case: subexpressions containing only a string interpolation don't need parentheses - if block.pipelines.len() == 1 && block.pipelines[0].elements.len() == 1 { - if let Expr::StringInterpolation(_) = &block.pipelines[0].elements[0].expr.expr { - self.format_block(block); - return; - } - } - // Preserve explicit parens when formatting subexpressions in precedence-sensitive contexts. - if self.preserve_subexpr_parens_depth == 0 - && block.pipelines.len() == 1 - && !block.pipelines[0].elements.is_empty() - { - let first_element = &block.pipelines[0].elements[0]; - let element_text = String::from_utf8_lossy( - &self.source[first_element.expr.span.start..first_element.expr.span.end], - ); - if element_text == "$in" { - self.format_block(block); - return; - } - } - - self.write("("); - let is_simple = !source_has_newline - && block.pipelines.len() == 1 - && block.pipelines[0].elements.len() <= 3; - - if is_simple { - self.format_block(block); - } else { - self.newline(); - self.indent_level += 1; - self.format_block(block); - self.newline(); - self.indent_level -= 1; - self.write_indent(); - } - self.write(")"); - } - - // ───────────────────────────────────────────────────────────────────────────── - // Block expression formatting - // ───────────────────────────────────────────────────────────────────────────── - - /// Format a block expression with optional braces - fn format_block_expression( - &mut self, - block_id: nu_protocol::BlockId, - span: Span, - with_braces: bool, - ) { - let block = self.working_set.get_block(block_id); - - let source_has_newline = with_braces - && span.end > span.start - && self.source[span.start..span.end].contains(&b'\n'); - - if with_braces { - self.write("{"); - } - - let is_simple = block.pipelines.len() == 1 - && block.pipelines[0].elements.len() == 1 - && !self.block_has_nested_structures(block) - && !source_has_newline; - let preserve_compact_record_like = - with_braces && is_simple && self.block_expression_looks_like_compact_record(span); - - if is_simple && with_braces { - if !preserve_compact_record_like { - self.write(" "); - } - self.format_block(block); - if !preserve_compact_record_like { - self.write(" "); - } - } else if block.pipelines.is_empty() { - if with_braces { - self.write(" "); - } - } else { - self.newline(); - self.indent_level += 1; - self.format_block(block); - self.newline(); - self.indent_level -= 1; - self.write_indent(); - } - - if with_braces { - self.write("}"); - } - } - - fn block_expression_looks_like_compact_record(&self, span: Span) -> bool { - if span.end <= span.start + 1 || span.end > self.source.len() { - return false; - } - - let slice = &self.source[span.start..span.end]; - if !slice.starts_with(b"{") || !slice.ends_with(b"}") { - return false; - } - - if slice.starts_with(b"{ ") || slice.ends_with(b" }") { - return false; - } - - slice[1..slice.len() - 1].contains(&b':') - } - - /// Check if a block has nested structures that require multiline formatting - fn block_has_nested_structures(&self, block: &Block) -> bool { - block - .pipelines - .iter() - .flat_map(|p| &p.elements) - .any(|e| self.expr_is_complex(&e.expr)) - } - - /// Check if an expression is complex enough to warrant multiline formatting - fn expr_is_complex(&self, expr: &Expression) -> bool { - match &expr.expr { - Expr::Block(_) | Expr::Closure(_) => true, - Expr::List(items) => items.len() > 3, - Expr::Record(items) => items.len() > 2, - Expr::Call(call) => call.arguments.iter().any(|arg| match arg { - Argument::Positional(e) | Argument::Unknown(e) | Argument::Spread(e) => { - self.expr_is_complex(e) - } - Argument::Named(n) => n.2.as_ref().is_some_and(|e| self.expr_is_complex(e)), - }), - _ => false, - } - } - - /// Format a closure expression - fn format_closure_expression(&mut self, block_id: nu_protocol::BlockId, span: Span) { - let content = self.get_span_content(span); - let has_params = content - .iter() - .skip(1) // Skip '{' - .find(|b| !b.is_ascii_whitespace()) - .is_some_and(|char| char.eq(&b'|')); - - if !has_params { - self.format_block_expression(block_id, span, true); - return; - } - - // Find the start of the parameter section (first |) - let Some(first_pipe_index) = content.iter().position(|&b| b == b'|') else { - self.write_bytes(&content); - return; - }; - - // Find the end of the parameter section (second |) - let Some(second_pipe_index) = content[first_pipe_index + 1..] - .iter() - .position(|&b| b == b'|') - .map(|p| first_pipe_index + 1 + p) - else { - self.write_bytes(&content); - return; - }; - - self.write("{|"); - // Extract and trim parameter content - let params = &content[first_pipe_index + 1..second_pipe_index]; - let mut params_iter = params.split(|&b| b == b',').peekable(); - - while let Some(param) = params_iter.next() { - let mut sub_parts = param.splitn(2, |&b| b == b':'); - - if let (Some(param_name), Some(type_hint)) = (sub_parts.next(), sub_parts.next()) { - self.write_bytes(param_name.trim_ascii()); - self.write_bytes(b": "); - self.write_bytes(type_hint.trim_ascii()); - } else { - self.write_bytes(param.trim_ascii()); - } - - if params_iter.peek().is_some() { - self.write_bytes(b", "); - } - } - - self.write("|"); - - let block = self.working_set.get_block(block_id); - let is_simple = block.pipelines.len() == 1 - && block.pipelines[0].elements.len() == 1 - && !self.block_has_nested_structures(block); - - if is_simple { - self.space(); - self.format_block(block); - self.write(" }"); - } else { - self.newline(); - self.indent_level += 1; - self.format_block(block); - self.newline(); - self.indent_level -= 1; - self.write_indent(); - self.write("}"); - } - } - - // ───────────────────────────────────────────────────────────────────────────── - // Collection formatting (lists, records, tables) - // ───────────────────────────────────────────────────────────────────────────── - - /// Format a list - fn format_list(&mut self, items: &[ListItem]) { - if items.is_empty() { - self.write("[]"); - return; - } - - let uses_commas = self.list_uses_commas(items); - - // Check if all items are simple (primitives) - let all_simple = items.iter().all(|item| match item { - ListItem::Item(expr) => self.is_simple_expr(expr), - ListItem::Spread(_, expr) => self.is_simple_expr(expr), - }); - - if all_simple && items.len() <= 5 { - // Inline format - self.write("["); - for (i, item) in items.iter().enumerate() { - if i > 0 { - if uses_commas { - self.write(", "); - } else { - self.write(" "); - } - } - self.format_list_item(item); - } - self.write("]"); - } else { - // Multiline format - self.write("["); - self.newline(); - self.indent_level += 1; - for item in items { - self.write_indent(); - self.format_list_item(item); - self.newline(); - } - self.indent_level -= 1; - self.write_indent(); - self.write("]"); - } - } - - /// Detect whether list items are separated with commas in the source. - fn list_uses_commas(&self, items: &[ListItem]) -> bool { - if items.len() < 2 { - return false; - } - - let item_bounds = |item: &ListItem| match item { - ListItem::Item(expr) | ListItem::Spread(_, expr) => (expr.span.start, expr.span.end), - }; - - let (_, mut prev_end) = item_bounds(&items[0]); - for item in items.iter().skip(1) { - let (start, end) = item_bounds(item); - if start > prev_end && self.source[prev_end..start].contains(&b',') { - return true; - } - prev_end = end; - } - - false - } - - /// Format a single list item - fn format_list_item(&mut self, item: &ListItem) { - match item { - ListItem::Item(expr) => self.format_expression(expr), - ListItem::Spread(_, expr) => { - self.write("..."); - self.format_expression(expr); - } - } - } - - /// Format a record - fn format_record(&mut self, items: &[RecordItem], span: Span) { - if items.is_empty() { - self.write("{}"); - return; - } - - let preserve_compact_issue122_style = self.record_preserve_compact_style(span); - - // Check if all items are simple - let all_simple = items.iter().all(|item| match item { - RecordItem::Pair(k, v) => self.is_simple_expr(k) && self.is_simple_expr(v), - RecordItem::Spread(_, expr) => self.is_simple_expr(expr), - }); - - // Check if any value contains nested structures (records, lists, closures) or variables - let has_nested_complex = items.iter().any(|item| match item { - RecordItem::Pair(_, v) => matches!( - &v.expr, - Expr::Record(_) - | Expr::List(_) - | Expr::Closure(_) - | Expr::Block(_) - | Expr::Var(_) - | Expr::FullCellPath(_) - ), - RecordItem::Spread(_, _) => false, - }); - - // When nested with complex values or variables, records with 2+ items should be multiline - let nested_multiline = self.indent_level > 0 && items.len() >= 2 && has_nested_complex; - - if all_simple && items.len() <= 3 && !nested_multiline { - // Inline format - let record_start = self.output.len(); - self.write("{"); - if preserve_compact_issue122_style { - self.write(" "); - } - for (i, item) in items.iter().enumerate() { - if i > 0 { - self.write(", "); - } - self.format_record_item(item, preserve_compact_issue122_style); - } - if !preserve_compact_issue122_style - && self.output.len() > record_start + 1 - && self.output[record_start + 1] == b' ' - { - self.output.remove(record_start + 1); - } - if preserve_compact_issue122_style { - self.write(" "); - } - self.write("}"); - } else { - // Multiline format - self.write("{"); - self.newline(); - self.indent_level += 1; - for item in items { - self.write_indent(); - self.format_record_item(item, preserve_compact_issue122_style); - self.newline(); - } - self.indent_level -= 1; - self.write_indent(); - self.write("}"); - } - } - - fn record_preserve_compact_style(&self, span: Span) -> bool { - if !self.allow_compact_recovered_record_style { - return false; - } - - if span.end <= span.start || span.end > self.source.len() { - return false; - } - - let slice = &self.source[span.start..span.end]; - slice.starts_with(b"{ ") - && slice.ends_with(b" }") - && slice.contains(&b',') - && !slice.windows(2).any(|window| window == b": ") - } - - /// Format a single record item - fn format_record_item(&mut self, item: &RecordItem, compact_colon: bool) { - match item { - RecordItem::Pair(key, value) => { - self.format_record_key(key); - if compact_colon { - self.write(":"); - } else { - self.write(": "); - } - self.format_expression(value); - } - RecordItem::Spread(_, expr) => { - self.write("..."); - self.format_expression(expr); - } - } - } - - fn format_record_key(&mut self, key: &Expression) { - let span_contents = self.get_span_content(key.span); - - let start = span_contents - .iter() - .position(|byte| !byte.is_ascii_whitespace()) - .unwrap_or(span_contents.len()); - let end = span_contents - .iter() - .rposition(|byte| !byte.is_ascii_whitespace()) - .map(|idx| idx + 1) - .unwrap_or(0); - - if start < end { - self.write_bytes(&span_contents[start..end]); - } else { - self.format_expression(key); - } - } - - /// Format a table - fn format_table(&mut self, columns: &[Expression], rows: &[Box<[Expression]>]) { - self.write("["); - - // Format header row - self.write("["); - for (i, col) in columns.iter().enumerate() { - if i > 0 { - self.write(", "); - } - self.format_expression(col); - } - self.write("]"); - - // Format data rows - if !rows.is_empty() { - self.write("; "); - for (i, row) in rows.iter().enumerate() { - if i > 0 { - self.write(", "); - } - self.write("["); - for (j, cell) in row.iter().enumerate() { - if j > 0 { - self.write(", "); - } - self.format_expression(cell); - } - self.write("]"); - } - } - - self.write("]"); - } - - // ───────────────────────────────────────────────────────────────────────────── - // Match block formatting - // ───────────────────────────────────────────────────────────────────────────── - - /// Format a match block - fn format_match_block(&mut self, matches: &[(MatchPattern, Expression)]) { - self.write("{"); - self.newline(); - self.indent_level += 1; - - for (pattern, expr) in matches { - self.write_indent(); - self.format_match_pattern(pattern); - self.write(" => "); - self.format_block_or_expr(expr); - self.newline(); - } - - self.indent_level -= 1; - self.write_indent(); - self.write("}"); - } - - /// Format a match pattern - fn format_match_pattern(&mut self, pattern: &MatchPattern) { - match &pattern.pattern { - Pattern::Expression(expr) => self.format_expression(expr), - Pattern::Value(_) | Pattern::Variable(_) | Pattern::Rest(_) | Pattern::Garbage => { - self.write_span(pattern.span); - } - Pattern::Or(patterns) => { - for (i, p) in patterns.iter().enumerate() { - if i > 0 { - self.write(" | "); - } - self.format_match_pattern(p); - } - } - Pattern::List(patterns) => { - self.write("["); - for (i, p) in patterns.iter().enumerate() { - if i > 0 { - self.write(", "); - } - self.format_match_pattern(p); - } - self.write("]"); - } - Pattern::Record(entries) => { - self.write("{"); - for (i, (key, pat)) in entries.iter().enumerate() { - if i > 0 { - self.write(", "); - } - self.write(key); - self.write(": "); - self.format_match_pattern(pat); - } - self.write("}"); - } - Pattern::IgnoreRest => self.write(".."), - Pattern::IgnoreValue => self.write("_"), - } - } - - // ───────────────────────────────────────────────────────────────────────────── - // Helpers - // ───────────────────────────────────────────────────────────────────────────── - - /// Check if an expression is simple (primitive type) - fn is_simple_expr(&self, expr: &Expression) -> bool { - match &expr.expr { - Expr::Int(_) - | Expr::Float(_) - | Expr::Bool(_) - | Expr::String(_) - | Expr::RawString(_) - | Expr::Nothing - | Expr::Var(_) - | Expr::Filepath(_, _) - | Expr::Directory(_, _) - | Expr::GlobPattern(_, _) - | Expr::DateTime(_) => true, - // FullCellPath with empty tail is simple (e.g., $var or undefined $var parsed as Garbage) - Expr::FullCellPath(full_path) => { - full_path.tail.is_empty() - && matches!( - &full_path.head.expr, - Expr::Var(_) | Expr::Garbage | Expr::Int(_) | Expr::String(_) - ) - } - _ => false, - } - } - - /// Get the final output - fn finish(self) -> Vec { - self.output - } -} - -/// Command types for formatting purposes -enum CommandType { - Def, - Extern, - Conditional, - Let, - Block, - Regular, -} - -/// Extract comments from source code -fn extract_comments(source: &[u8]) -> Vec<(Span, Vec)> { - let mut comments = Vec::new(); - let mut i = 0; - let mut in_string = false; - let mut string_char = b'"'; - - while i < source.len() { - let c = source[i]; - - // Track string state to avoid matching # inside strings - if !in_string && (c == b'"' || c == b'\'') { - in_string = true; - string_char = c; - i += 1; - continue; - } - - if in_string { - if c == b'\\' && i + 1 < source.len() { - i += 2; // Skip escaped character - continue; - } - if c == string_char { - in_string = false; - } - i += 1; - continue; - } - - // Found a comment - if c == b'#' { - let start = i; - while i < source.len() && source[i] != b'\n' { - i += 1; - } - comments.push((Span::new(start, i), source[start..i].to_vec())); - } - - i += 1; - } - - comments -} - -fn is_fatal_parse_error(error: &ParseError) -> bool { - matches!( - error, - ParseError::ExtraTokens(_) - | ParseError::ExtraTokensAfterClosingDelimiter(_) - | ParseError::UnexpectedEof(_, _) - | ParseError::Unclosed(_, _) - | ParseError::Unbalanced(_, _, _) - | ParseError::IncompleteMathExpression(_) - | ParseError::UnknownCommand(_) - | ParseError::Expected(_, _) - | ParseError::ExpectedWithStringMsg(_, _) - | ParseError::ExpectedWithDidYouMean(_, _, _) - ) -} - -enum ParseRepairOutcome { - Reformat(Vec), -} - -fn try_repair_compact_if_else(source: &str) -> (String, bool) { - transform_outside_string_literals(source, repair_compact_if_else_segment) -} - -fn find_string_literal_ranges(source: &str) -> Vec<(usize, usize)> { - let bytes = source.as_bytes(); - let mut ranges = Vec::new(); - let mut in_string: Option = None; - let mut escaped = false; - let mut start = 0; - - for (idx, byte) in bytes.iter().copied().enumerate() { - if let Some(quote) = in_string { - if escaped { - escaped = false; - continue; - } - - if byte == b'\\' { - escaped = true; - continue; - } - - if byte == quote { - ranges.push((start, idx + 1)); - in_string = None; - } - - continue; - } - - if byte == b'"' || byte == b'\'' { - start = idx; - in_string = Some(byte); - escaped = false; - } - } - - if in_string.is_some() { - ranges.push((start, source.len())); - } - - ranges -} - -fn transform_outside_string_literals( - source: &str, - mut transform: impl FnMut(&str) -> (String, bool), -) -> (String, bool) { - let string_ranges = find_string_literal_ranges(source); - - if string_ranges.is_empty() { - return transform(source); - } - - let mut output = String::with_capacity(source.len()); - let mut changed = false; - let mut cursor = 0; - - for (start, end) in string_ranges { - if cursor < start { - let (transformed, seg_changed) = transform(&source[cursor..start]); - output.push_str(&transformed); - changed |= seg_changed; - } - - output.push_str(&source[start..end]); - cursor = end; - } - - if cursor < source.len() { - let (transformed, seg_changed) = transform(&source[cursor..]); - output.push_str(&transformed); - changed |= seg_changed; - } - - (output, changed) -} - -fn non_string_ranges(source: &str) -> Vec<(usize, usize)> { - let string_ranges = find_string_literal_ranges(source); - - if string_ranges.is_empty() { - return vec![(0, source.len())]; - } - - let mut ranges = Vec::new(); - let mut cursor = 0; - - for (start, end) in string_ranges { - if cursor < start { - ranges.push((cursor, start)); - } - cursor = end; - } - - if cursor < source.len() { - ranges.push((cursor, source.len())); - } - - ranges -} - -fn detect_compact_if_else_spans(source: &str) -> Vec { - let mut spans = Vec::new(); - let patterns = ["if(", "}else{", "} else{", "}else {"]; - - for (range_start, range_end) in non_string_ranges(source) { - let segment = &source[range_start..range_end]; - for pattern in patterns { - for (offset, _) in segment.match_indices(pattern) { - let idx = range_start + offset; - spans.push(Span { - start: idx.saturating_sub(32), - end: (idx + pattern.len() + 32).min(source.len()), - }); - } - } - } - - spans -} - -fn detect_missing_record_comma_positions(source: &str) -> Vec { - let bytes = source.as_bytes(); - let mut in_string = false; - let mut escaped = false; - let mut insert_positions: Vec = Vec::new(); - - for (idx, byte) in bytes.iter().copied().enumerate() { - if in_string { - if escaped { - escaped = false; - continue; - } - - if byte == b'\\' { - escaped = true; - continue; - } - - if byte == b'"' { - in_string = false; - - let mut lookahead = idx + 1; - while lookahead < bytes.len() && bytes[lookahead].is_ascii_whitespace() { - lookahead += 1; - } - - if lookahead < bytes.len() - && (bytes[lookahead].is_ascii_alphabetic() || bytes[lookahead] == b'_') - { - let mut key_end = lookahead; - while key_end < bytes.len() - && (bytes[key_end].is_ascii_alphanumeric() - || bytes[key_end] == b'_' - || bytes[key_end] == b'-') - { - key_end += 1; - } - - if key_end < bytes.len() && bytes[key_end] == b':' { - let between = &bytes[idx + 1..lookahead]; - if !between.contains(&b',') { - insert_positions.push(idx + 1); - } - } - } - } - - continue; - } - - if byte == b'"' { - in_string = true; - escaped = false; - } - } - - insert_positions -} - -fn detect_missing_record_comma_spans(source: &str) -> Vec { - detect_missing_record_comma_positions(source) - .into_iter() - .map(|idx| Span { - start: idx.saturating_sub(32), - end: (idx + 32).min(source.len()), - }) - .collect() -} - -fn repair_compact_if_else_segment(segment: &str) -> (String, bool) { - let mut repaired = segment.to_string(); - let mut changed = false; - let replacements = [ - ("if(", "if ("), - ("){", ") {"), - ("}else{", "} else {"), - ("} else{", "} else {"), - ("}else {", "} else {"), - ]; - - for (from, to) in replacements { - if repaired.contains(from) { - repaired = repaired.replace(from, to); - changed = true; - } - } - - (repaired, changed) -} - -fn add_brace_padding_segment(segment: &str) -> (String, bool) { - let bytes = segment.as_bytes(); - let mut output: Vec = Vec::with_capacity(bytes.len() + 8); - let mut changed = false; - - for (idx, byte) in bytes.iter().copied().enumerate() { - if byte == b'{' { - output.push(byte); - if let Some(next) = bytes.get(idx + 1) { - if !next.is_ascii_whitespace() && *next != b'}' { - output.push(b' '); - changed = true; - } - } - continue; - } - - if byte == b'}' { - if let Some(last) = output.last() { - if !last.is_ascii_whitespace() && *last != b'{' { - output.push(b' '); - changed = true; - } - } - - output.push(byte); - continue; - } - - output.push(byte); - } - - let output = String::from_utf8(output).unwrap_or_else(|_| segment.to_string()); - (output, changed) -} - -fn add_brace_padding_outside_strings(source: &str) -> (String, bool) { - transform_outside_string_literals(source, add_brace_padding_segment) -} - -fn try_repair_missing_record_commas(source: &str) -> (String, bool) { - let insert_positions = detect_missing_record_comma_positions(source); - - if insert_positions.is_empty() { - return (source.to_string(), false); - } - - let mut repaired = String::with_capacity(source.len() + insert_positions.len() * 2); - let mut next_insert_idx = 0; - - for (idx, ch) in source.char_indices() { - while next_insert_idx < insert_positions.len() && insert_positions[next_insert_idx] == idx { - repaired.push(','); - repaired.push(' '); - next_insert_idx += 1; - } - repaired.push(ch); - } - - (repaired, true) -} - -fn merge_spans(spans: &[Span], len: usize) -> Vec { - let mut normalized: Vec = spans - .iter() - .filter_map(|span| { - if span.start >= len || span.end <= span.start { - return None; - } - - Some(Span { - start: span.start, - end: span.end.min(len), - }) - }) - .collect(); - - normalized.sort_by_key(|span| (span.start, span.end)); - - let mut merged: Vec = Vec::new(); - for span in normalized { - if let Some(last) = merged.last_mut() { - if span.start <= last.end { - last.end = last.end.max(span.end); - continue; - } - } - - merged.push(span); - } - - merged -} - -fn align_to_char_boundary(source: &str, index: usize, forward: bool) -> usize { - let mut adjusted = index.min(source.len()); - - if forward { - while adjusted < source.len() && !source.is_char_boundary(adjusted) { - adjusted += 1; - } - } else { - while adjusted > 0 && !source.is_char_boundary(adjusted) { - adjusted -= 1; - } - } - - adjusted -} - -fn repair_region(source: &str) -> (String, bool) { - let (repaired_if_else, if_else_changed) = try_repair_compact_if_else(source); - let (mut repaired_record, record_changed) = try_repair_missing_record_commas(&repaired_if_else); - - let mut brace_spacing_changed = false; - if record_changed { - let (padded, padded_changed) = add_brace_padding_outside_strings(&repaired_record); - repaired_record = padded; - brace_spacing_changed = padded_changed; - } - - ( - repaired_record, - if_else_changed || record_changed || brace_spacing_changed, - ) -} - -fn try_repair_parse_errors( - contents: &[u8], - malformed_spans: &[Span], -) -> Option { - if malformed_spans.is_empty() { - return None; - } - - let source = String::from_utf8_lossy(contents).into_owned(); - let spans = merge_spans(malformed_spans, source.len()); - - if spans.is_empty() { - return None; - } - - let mut cursor = 0; - let mut output = String::with_capacity(source.len()); - let mut changed = false; - - for span in spans { - let start = align_to_char_boundary(&source, span.start, false); - let end = align_to_char_boundary(&source, span.end, true); - - if start >= end || start < cursor { - continue; - } - - output.push_str(&source[cursor..start]); - - let (repaired_region, region_changed) = repair_region(&source[start..end]); - output.push_str(&repaired_region); - changed |= region_changed; - - cursor = end; - } - - output.push_str(&source[cursor..]); - - if changed { - Some(ParseRepairOutcome::Reformat(output.into_bytes())) - } else { - None - } -} - -fn block_contains_garbage(working_set: &StateWorkingSet<'_>, block: &Block) -> bool { - block - .pipelines - .iter() - .any(|pipeline| pipeline_contains_garbage(working_set, pipeline)) -} - -fn pipeline_contains_garbage(working_set: &StateWorkingSet<'_>, pipeline: &Pipeline) -> bool { - pipeline.elements.iter().any(|element| { - expr_contains_garbage(working_set, &element.expr) - || element - .redirection - .as_ref() - .is_some_and(|redir| redirection_contains_garbage(working_set, redir)) - }) -} - -fn redirection_contains_garbage( - working_set: &StateWorkingSet<'_>, - redirection: &PipelineRedirection, -) -> bool { - match redirection { - PipelineRedirection::Single { target, .. } => { - redirection_target_contains_garbage(working_set, target) - } - PipelineRedirection::Separate { out, err } => { - redirection_target_contains_garbage(working_set, out) - || redirection_target_contains_garbage(working_set, err) - } - } -} - -fn redirection_target_contains_garbage( - working_set: &StateWorkingSet<'_>, - target: &RedirectionTarget, -) -> bool { - match target { - RedirectionTarget::File { expr, .. } => expr_contains_garbage(working_set, expr), - RedirectionTarget::Pipe { .. } => false, - } -} - -fn argument_contains_garbage(working_set: &StateWorkingSet<'_>, arg: &Argument) -> bool { - match arg { - Argument::Positional(expr) | Argument::Unknown(expr) | Argument::Spread(expr) => { - expr_contains_garbage(working_set, expr) - } - Argument::Named(named) => named - .2 - .as_ref() - .is_some_and(|expr| expr_contains_garbage(working_set, expr)), - } -} - -fn expr_contains_garbage(working_set: &StateWorkingSet<'_>, expr: &Expression) -> bool { - match &expr.expr { - Expr::Garbage => true, - Expr::Call(call) => call - .arguments - .iter() - .any(|arg| argument_contains_garbage(working_set, arg)), - Expr::ExternalCall(head, args) => { - expr_contains_garbage(working_set, head) - || args.iter().any(|arg| match arg { - ExternalArgument::Regular(expr) | ExternalArgument::Spread(expr) => { - expr_contains_garbage(working_set, expr) - } - }) - } - Expr::BinaryOp(lhs, op, rhs) => { - expr_contains_garbage(working_set, lhs) - || expr_contains_garbage(working_set, op) - || expr_contains_garbage(working_set, rhs) - } - Expr::UnaryNot(inner) => expr_contains_garbage(working_set, inner), - Expr::Block(block_id) | Expr::Closure(block_id) | Expr::Subexpression(block_id) => { - block_contains_garbage(working_set, working_set.get_block(*block_id)) - } - Expr::List(items) => items.iter().any(|item| match item { - ListItem::Item(expr) | ListItem::Spread(_, expr) => { - expr_contains_garbage(working_set, expr) - } - }), - Expr::Record(items) => items.iter().any(|item| match item { - RecordItem::Pair(key, value) => { - expr_contains_garbage(working_set, key) || expr_contains_garbage(working_set, value) - } - RecordItem::Spread(_, expr) => expr_contains_garbage(working_set, expr), - }), - Expr::Table(table) => { - table - .columns - .iter() - .any(|col| expr_contains_garbage(working_set, col)) - || table - .rows - .iter() - .flat_map(|row| row.iter()) - .any(|cell| expr_contains_garbage(working_set, cell)) - } - Expr::Range(range) => { - range - .from - .as_ref() - .is_some_and(|expr| expr_contains_garbage(working_set, expr)) - || range - .next - .as_ref() - .is_some_and(|expr| expr_contains_garbage(working_set, expr)) - || range - .to - .as_ref() - .is_some_and(|expr| expr_contains_garbage(working_set, expr)) - } - Expr::CellPath(_) => false, - Expr::FullCellPath(full_path) => expr_contains_garbage(working_set, &full_path.head), - Expr::RowCondition(block_id) => { - block_contains_garbage(working_set, working_set.get_block(*block_id)) - } - Expr::Keyword(keyword) => expr_contains_garbage(working_set, &keyword.expr), - Expr::MatchBlock(matches) => matches.iter().any(|(pattern, arm_expr)| { - pattern_contains_garbage(working_set, pattern) - || expr_contains_garbage(working_set, arm_expr) - }), - Expr::Collect(_, inner) => expr_contains_garbage(working_set, inner), - Expr::AttributeBlock(attr_block) => { - attr_block - .attributes - .iter() - .any(|attr| expr_contains_garbage(working_set, &attr.expr)) - || expr_contains_garbage(working_set, &attr_block.item) - } - Expr::Int(_) - | Expr::Float(_) - | Expr::Bool(_) - | Expr::Nothing - | Expr::DateTime(_) - | Expr::String(_) - | Expr::RawString(_) - | Expr::Binary(_) - | Expr::Filepath(_, _) - | Expr::Directory(_, _) - | Expr::GlobPattern(_, _) - | Expr::Var(_) - | Expr::VarDecl(_) - | Expr::Operator(_) - | Expr::StringInterpolation(_) - | Expr::GlobInterpolation(_, _) - | Expr::ImportPattern(_) - | Expr::Overlay(_) - | Expr::Signature(_) - | Expr::ValueWithUnit(_) => false, - } -} - -fn pattern_contains_garbage(working_set: &StateWorkingSet<'_>, pattern: &MatchPattern) -> bool { - match &pattern.pattern { - Pattern::Garbage => true, - Pattern::Expression(expr) => expr_contains_garbage(working_set, expr), - Pattern::Or(patterns) | Pattern::List(patterns) => patterns - .iter() - .any(|pattern| pattern_contains_garbage(working_set, pattern)), - Pattern::Record(entries) => entries - .iter() - .any(|(_, pattern)| pattern_contains_garbage(working_set, pattern)), - Pattern::Value(_) - | Pattern::Variable(_) - | Pattern::Rest(_) - | Pattern::IgnoreRest - | Pattern::IgnoreValue => false, - } -} - -/// Format an array of bytes -/// -/// Reading the file gives you a list of bytes -pub(crate) fn format_inner(contents: &[u8], config: &Config) -> Result, FormatError> { - format_inner_with_options(contents, config) -} - -fn format_inner_with_options(contents: &[u8], config: &Config) -> Result, FormatError> { - let engine_state = get_engine_state(); - let mut working_set = StateWorkingSet::new(&engine_state); - - let parsed_block = parse(&mut working_set, None, contents, false); - trace!("parsed block:\n{:?}", &parsed_block); - - let source_text = String::from_utf8_lossy(contents); - let mut malformed_spans: Vec = working_set - .parse_errors - .iter() - .map(ParseError::span) - .collect(); - malformed_spans.extend(detect_compact_if_else_spans(&source_text)); - malformed_spans.extend(detect_missing_record_comma_spans(&source_text)); - - let has_garbage = block_contains_garbage(&working_set, &parsed_block); - let has_fatal_parse_error = working_set.parse_errors.iter().any(is_fatal_parse_error); - if !malformed_spans.is_empty() || has_garbage { - if let Some(repaired) = try_repair_parse_errors(contents, &malformed_spans) { - debug!( - "retrying formatting after targeted parse-error repair ({} parse errors)", - working_set.parse_errors.len() - ); - - return match repaired { - ParseRepairOutcome::Reformat(repaired_source) => { - format_inner_with_options(&repaired_source, config) - } - }; - } - } - - if has_fatal_parse_error && has_garbage { - debug!( - "skipping formatting due to fatal parse errors with garbage AST nodes ({} found)", - working_set.parse_errors.len() - ); - return Ok(contents.to_vec()); - } - - // Note: We don't reject files with "garbage" nodes because the parser - // produces garbage for commands it doesn't know about (e.g., `where`, `each`) - // when using only nu-cmd-lang context. Instead, we output original span - // content for expressions we can't format. - - if parsed_block.pipelines.is_empty() { - trace!("block has no pipelines!"); - debug!("File has no code to format."); - let comments = extract_comments(contents); - if comments.is_empty() { - return Ok(contents.to_vec()); - } - } - - let mut formatter = Formatter::new(contents, &working_set, config, true); - - // Write leading comments - if let Some(first_pipeline) = parsed_block.pipelines.first() { - if let Some(first_elem) = first_pipeline.elements.first() { - formatter.write_comments_before(first_elem.expr.span.start); - } - } - - formatter.format_block(&parsed_block); - - // Write trailing comments - let end_pos = parsed_block - .pipelines - .last() - .and_then(|p| p.elements.last()) - .map(|e| e.expr.span.end) - .unwrap_or(0); - - if end_pos > 0 { - formatter.last_pos = end_pos; - formatter.write_comments_before(contents.len()); - } - - Ok(formatter.finish()) -} - -/// Make sure there is a newline at the end of a buffer -pub(crate) fn add_newline_at_end_of_file(out: Vec) -> Vec { - if out.last() == Some(&b'\n') { - out - } else { - let mut result = out; - result.push(b'\n'); - result - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn format(input: &str) -> String { - let config = Config::default(); - let result = format_inner(input.as_bytes(), &config).expect("formatting failed"); - String::from_utf8(result).expect("invalid utf8") - } - - #[test] - fn repair_patterns_do_not_mutate_double_quoted_strings() { - let input = "let s = \"if(true){1}else{2}\""; - let output = format(input); - assert!(output.contains("\"if(true){1}else{2}\"")); - } - - #[test] - fn repair_patterns_do_not_mutate_record_like_strings() { - let input = "let s = \"{ name: Alice }\""; - let output = format(input); - assert!(output.contains("\"{ name: Alice }\"")); - } - - #[test] - fn test_simple_let() { - let input = "let x = 1"; - let output = format(input); - assert_eq!(output, "let x = 1"); - } - - #[test] - fn test_let_with_spaces() { - let input = "let x = 1"; - let output = format(input); - assert_eq!(output, "let x = 1"); - } - - #[test] - fn test_simple_def() { - let input = "def foo [] { echo hello }"; - let output = format(input); - assert!(output.contains("def foo")); - } - - #[test] - fn test_pipeline() { - let input = "ls | get name"; - let output = format(input); - assert!(output.contains("| get")); - } - - #[test] - fn test_if_else() { - let input = "if true { echo yes } else { echo no }"; - let output = format(input); - assert!(output.contains("if true")); - assert!(output.contains("else")); - } - - #[test] - fn test_for_loop() { - let input = "for x in [1, 2, 3] { print $x }"; - let output = format(input); - assert!(output.contains("for x in")); - assert!(output.contains("{ print")); - } - - #[test] - fn test_while_loop() { - let input = "while true { break }"; - let output = format(input); - assert!(output.contains("while true")); - assert!(output.contains("{ break }")); - } - - #[test] - fn test_closure() { - let input = "{|x| $x * 2 }"; - let output = format(input); - assert!(output.contains("{|x|")); - } - - #[test] - fn test_multiline() { - let input = "let x = 1\nlet y = 2"; - let output = format(input); - assert!(output.contains("let x = 1")); - assert!(output.contains("let y = 2")); - assert!(output.contains("\n")); - } - - #[test] - fn test_list_simple() { - let input = "[1, 2, 3]"; - let output = format(input); - assert_eq!(output, "[1, 2, 3]"); - } - - #[test] - fn test_record_simple() { - let input = "{a: 1, b: 2}"; - let output = format(input); - assert!(output.contains("a: 1")); - } - - #[test] - fn test_comment_preservation() { - let input = "# this is a comment\nlet x = 1"; - let output = format(input); - assert!(output.contains("# this is a comment")); - } - - #[test] - fn test_idempotency_let() { - let input = "let x = 1"; - let first = format(input); - let second = format(&first); - assert_eq!(first, second, "Formatting should be idempotent"); - } - - #[test] - fn test_idempotency_def() { - let input = "def foo [x: int] { $x + 1 }"; - let first = format(input); - let second = format(&first); - assert_eq!(first, second, "Formatting should be idempotent"); - } - - #[test] - fn test_idempotency_if_else() { - let input = "if true { echo yes } else { echo no }"; - let first = format(input); - let second = format(&first); - assert_eq!(first, second, "Formatting should be idempotent"); - } - - #[test] - fn test_idempotency_for_loop() { - let input = "for x in [1, 2, 3] { print $x }"; - let first = format(input); - let second = format(&first); - assert_eq!(first, second, "Formatting should be idempotent"); - } - - #[test] - fn test_idempotency_complex() { - let input = "# comment\nlet x = 1\ndef foo [] { $x }"; - let first = format(input); - let second = format(&first); - assert_eq!(first, second, "Formatting should be idempotent"); - } - - #[test] - fn issue98_margin_has_effect() { - let input = "def foo [] {\n let out = 1\n out\n}\n\ndef bar [] {\n let out = 1\n out\n}"; - let config = Config::new(4, 80, 2); - let result = format_inner(input.as_bytes(), &config).expect("formatting failed"); - let output = String::from_utf8(result).expect("invalid utf8"); - - let expected = "def foo [] {\n let out = 1\n out\n}\n\n\ndef bar [] {\n let out = 1\n out\n}"; - assert_eq!(output, expected); - } -} diff --git a/src/formatting/blocks.rs b/src/formatting/blocks.rs new file mode 100644 index 0000000..a19c8a2 --- /dev/null +++ b/src/formatting/blocks.rs @@ -0,0 +1,293 @@ +//! Block, pipeline, and closure formatting. +//! +//! Handles formatting of blocks (top-level and nested), pipelines, +//! pipeline elements, redirections, block expressions, and closures. + +use super::Formatter; +use nu_protocol::{ + ast::{ + Argument, Block, Expr, Expression, PipelineElement, PipelineRedirection, RedirectionTarget, + }, + Span, +}; + +impl<'a> Formatter<'a> { + // ───────────────────────────────────────────────────────────────────────── + // Block and pipeline formatting + // ───────────────────────────────────────────────────────────────────────── + + /// Format a block (a sequence of pipelines). + pub(super) fn format_block(&mut self, block: &Block) { + let num_pipelines = block.pipelines.len(); + for (i, pipeline) in block.pipelines.iter().enumerate() { + if let Some(first_elem) = pipeline.elements.first() { + self.write_comments_before(first_elem.expr.span.start); + } + + self.format_pipeline(pipeline); + + if let Some(last_elem) = pipeline.elements.last() { + let end_pos = self.get_element_end_pos(last_elem); + self.write_inline_comment(end_pos); + self.last_pos = end_pos; + } + + if i < num_pipelines - 1 { + let separator_newlines = if self.indent_level == 0 && self.config.margin > 1 { + self.config.margin.saturating_add(1) + } else { + 1 + }; + + for _ in 0..separator_newlines { + self.newline(); + } + } + } + } + + /// Get the end position of a pipeline element, including redirections. + fn get_element_end_pos(&self, element: &PipelineElement) -> usize { + element + .redirection + .as_ref() + .map_or(element.expr.span.end, |redir| match redir { + PipelineRedirection::Single { target, .. } => target.span().end, + PipelineRedirection::Separate { out, err } => out.span().end.max(err.span().end), + }) + } + + /// Format a pipeline (elements joined by `|`). + pub(super) fn format_pipeline(&mut self, pipeline: &nu_protocol::ast::Pipeline) { + for (i, element) in pipeline.elements.iter().enumerate() { + if i > 0 { + self.write(" | "); + } + self.format_pipeline_element(element); + } + } + + /// Format a single pipeline element (expression + optional redirection). + fn format_pipeline_element(&mut self, element: &PipelineElement) { + self.format_expression(&element.expr); + if let Some(ref redirection) = element.redirection { + self.format_redirection(redirection); + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Redirections + // ───────────────────────────────────────────────────────────────────────── + + /// Format a redirection. + fn format_redirection(&mut self, redir: &PipelineRedirection) { + match redir { + PipelineRedirection::Single { target, .. } => { + self.space(); + self.format_redirection_target(target); + } + PipelineRedirection::Separate { out, err } => { + self.space(); + self.format_redirection_target(out); + self.space(); + self.format_redirection_target(err); + } + } + } + + /// Format a redirection target (file or pipe). + fn format_redirection_target(&mut self, target: &RedirectionTarget) { + match target { + RedirectionTarget::File { expr, span, .. } => { + self.write_span(*span); + self.space(); + self.format_expression(expr); + } + RedirectionTarget::Pipe { span } => { + self.write_span(*span); + } + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Block expressions + // ───────────────────────────────────────────────────────────────────────── + + /// Format a block expression with optional braces. + /// + /// Simple single-pipeline blocks are kept inline; complex or multiline + /// blocks are indented on new lines. + pub(super) fn format_block_expression( + &mut self, + block_id: nu_protocol::BlockId, + span: Span, + with_braces: bool, + ) { + let block = self.working_set.get_block(block_id); + + let source_has_newline = with_braces + && span.end > span.start + && self.source[span.start..span.end].contains(&b'\n'); + + if with_braces { + self.write("{"); + } + + let is_simple = block.pipelines.len() == 1 + && block.pipelines[0].elements.len() == 1 + && !self.block_has_nested_structures(block) + && !source_has_newline; + let preserve_compact_record_like = + with_braces && is_simple && self.block_expression_looks_like_compact_record(span); + + if is_simple && with_braces { + if !preserve_compact_record_like { + self.write(" "); + } + self.format_block(block); + if !preserve_compact_record_like { + self.write(" "); + } + } else if block.pipelines.is_empty() { + if with_braces { + self.write(" "); + } + } else { + self.newline(); + self.indent_level += 1; + self.format_block(block); + self.newline(); + self.indent_level -= 1; + self.write_indent(); + } + + if with_braces { + self.write("}"); + } + } + + /// Detect whether a braced block looks like a compact record literal + /// (e.g. `{name:"Alice"}`), which should preserve its tight formatting. + fn block_expression_looks_like_compact_record(&self, span: Span) -> bool { + if span.end <= span.start + 1 || span.end > self.source.len() { + return false; + } + + let slice = &self.source[span.start..span.end]; + if !slice.starts_with(b"{") || !slice.ends_with(b"}") { + return false; + } + + if slice.starts_with(b"{ ") || slice.ends_with(b" }") { + return false; + } + + slice[1..slice.len() - 1].contains(&b':') + } + + /// Check if a block has nested structures that require multiline formatting. + pub(super) fn block_has_nested_structures(&self, block: &Block) -> bool { + block + .pipelines + .iter() + .flat_map(|p| &p.elements) + .any(|e| self.expr_is_complex(&e.expr)) + } + + /// Check if an expression is complex enough to warrant multiline formatting. + pub(super) fn expr_is_complex(&self, expr: &Expression) -> bool { + match &expr.expr { + Expr::Block(_) | Expr::Closure(_) => true, + Expr::List(items) => items.len() > 3, + Expr::Record(items) => items.len() > 2, + Expr::Call(call) => call.arguments.iter().any(|arg| match arg { + Argument::Positional(e) | Argument::Unknown(e) | Argument::Spread(e) => { + self.expr_is_complex(e) + } + Argument::Named(n) => n.2.as_ref().is_some_and(|e| self.expr_is_complex(e)), + }), + _ => false, + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Closure expressions + // ───────────────────────────────────────────────────────────────────────── + + /// Format a closure expression (`{|params| body}`), extracting and + /// normalising parameters from the raw source. + pub(super) fn format_closure_expression( + &mut self, + block_id: nu_protocol::BlockId, + span: Span, + ) { + let content = self.get_span_content(span); + let has_params = content + .iter() + .skip(1) // Skip '{' + .find(|b| !b.is_ascii_whitespace()) + .is_some_and(|ch| *ch == b'|'); + + if !has_params { + self.format_block_expression(block_id, span, true); + return; + } + + let Some(first_pipe) = content.iter().position(|&b| b == b'|') else { + self.write_bytes(&content); + return; + }; + + let Some(second_pipe) = content[first_pipe + 1..] + .iter() + .position(|&b| b == b'|') + .map(|p| first_pipe + 1 + p) + else { + self.write_bytes(&content); + return; + }; + + self.write("{|"); + + // Normalise parameter whitespace + let params = &content[first_pipe + 1..second_pipe]; + let mut params_iter = params.split(|&b| b == b',').peekable(); + + while let Some(param) = params_iter.next() { + let mut sub_parts = param.splitn(2, |&b| b == b':'); + + if let (Some(param_name), Some(type_hint)) = (sub_parts.next(), sub_parts.next()) { + self.write_bytes(param_name.trim_ascii()); + self.write_bytes(b": "); + self.write_bytes(type_hint.trim_ascii()); + } else { + self.write_bytes(param.trim_ascii()); + } + + if params_iter.peek().is_some() { + self.write_bytes(b", "); + } + } + + self.write("|"); + + let block = self.working_set.get_block(block_id); + let is_simple = block.pipelines.len() == 1 + && block.pipelines[0].elements.len() == 1 + && !self.block_has_nested_structures(block); + + if is_simple { + self.space(); + self.format_block(block); + self.write(" }"); + } else { + self.newline(); + self.indent_level += 1; + self.format_block(block); + self.newline(); + self.indent_level -= 1; + self.write_indent(); + self.write("}"); + } + } +} diff --git a/src/formatting/calls.rs b/src/formatting/calls.rs new file mode 100644 index 0000000..8ba5e3c --- /dev/null +++ b/src/formatting/calls.rs @@ -0,0 +1,451 @@ +//! Call and argument formatting. +//! +//! Handles `def`, `let`/`mut`/`const`, `extern`, conditional, and regular +//! command calls, including signature rendering and custom completions. + +use super::{CommandType, Formatter}; +use nu_protocol::{ + ast::{Argument, Expr, Expression, ExternalArgument}, + Completion, Signature, SyntaxShape, +}; +use nu_utils::NuCow; + +// ───────────────────────────────────────────────────────────────────────────── +// Constants +// ───────────────────────────────────────────────────────────────────────────── + +/// Commands whose block arguments are formatted specially. +pub(super) const BLOCK_COMMANDS: &[&str] = &["for", "while", "loop", "module"]; +pub(super) const CONDITIONAL_COMMANDS: &[&str] = &["if", "try"]; +pub(super) const DEF_COMMANDS: &[&str] = &["def", "def-env", "export def"]; +pub(super) const EXTERN_COMMANDS: &[&str] = &["extern", "export extern"]; +pub(super) const LET_COMMANDS: &[&str] = &["let", "let-env", "mut", "const"]; + +impl<'a> Formatter<'a> { + // ───────────────────────────────────────────────────────────────────────── + // Call formatting + // ───────────────────────────────────────────────────────────────────────── + + /// Format a call expression. + pub(super) fn format_call(&mut self, call: &nu_protocol::ast::Call) { + let decl = self.working_set.get_decl(call.decl_id); + let decl_name = decl.name(); + let cmd_type = Self::classify_command(decl_name); + + // Write command name + if call.head.end != 0 { + self.write_span(call.head); + } + + if matches!(cmd_type, CommandType::Let) { + self.format_let_call(call); + return; + } + + for arg in &call.arguments { + self.format_call_argument(arg, &cmd_type); + } + } + + /// Format `let`/`mut`/`const` calls while preserving explicit type annotations. + pub(super) fn format_let_call(&mut self, call: &nu_protocol::ast::Call) { + let positional: Vec<&Expression> = call + .arguments + .iter() + .filter_map(|arg| match arg { + Argument::Positional(expr) | Argument::Unknown(expr) => Some(expr), + _ => None, + }) + .collect(); + + if positional.is_empty() { + for arg in &call.arguments { + self.format_call_argument(arg, &CommandType::Let); + } + return; + } + + self.space(); + self.format_expression(positional[0]); + + if let Some(rhs) = positional.get(1) { + let lhs = positional[0]; + let between = if lhs.span.end <= rhs.span.start { + &self.source[lhs.span.end..rhs.span.start] + } else { + &[] + }; + + if let Some(eq_pos) = between.iter().position(|b| *b == b'=') { + let annotation = between[..eq_pos].trim_ascii(); + if !annotation.is_empty() { + if !annotation.starts_with(b":") { + self.space(); + } + self.write_bytes(annotation); + } + } + + self.write(" = "); + + match &rhs.expr { + Expr::Block(block_id) | Expr::Subexpression(block_id) => { + let block = self.working_set.get_block(*block_id); + self.format_block(block); + } + _ => self.format_expression(rhs), + } + + for extra in positional.iter().skip(2) { + self.space(); + self.format_expression(extra); + } + } + + // Emit any non-positional arguments (e.g. named flags) + for arg in &call.arguments { + if !matches!(arg, Argument::Positional(_) | Argument::Unknown(_)) { + self.format_call_argument(arg, &CommandType::Let); + } + } + } + + /// Classify a command name into a [`CommandType`] for formatting purposes. + pub(super) fn classify_command(name: &str) -> CommandType { + if DEF_COMMANDS.contains(&name) { + CommandType::Def + } else if EXTERN_COMMANDS.contains(&name) { + CommandType::Extern + } else if CONDITIONAL_COMMANDS.contains(&name) { + CommandType::Conditional + } else if LET_COMMANDS.contains(&name) { + CommandType::Let + } else if BLOCK_COMMANDS.contains(&name) { + CommandType::Block + } else { + CommandType::Regular + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Argument formatting + // ───────────────────────────────────────────────────────────────────────── + + /// Format a single call argument, dispatching by [`CommandType`]. + pub(super) fn format_call_argument(&mut self, arg: &Argument, cmd_type: &CommandType) { + match arg { + Argument::Positional(positional) | Argument::Unknown(positional) => { + self.format_positional_argument(positional, cmd_type); + } + Argument::Named(named) => { + self.space(); + if named.0.span.end != 0 { + self.write_span(named.0.span); + } + if let Some(short) = &named.1 { + self.write_span(short.span); + } + if let Some(value) = &named.2 { + let separator_start = named + .1 + .as_ref() + .map_or(named.0.span.end, |short| short.span.end); + let has_equals = separator_start <= value.span.start + && self.source[separator_start..value.span.start].contains(&b'='); + + if has_equals { + self.write("="); + } else { + self.space(); + } + self.format_expression(value); + } + } + Argument::Spread(spread_expr) => { + self.space(); + self.write("..."); + self.format_expression(spread_expr); + } + } + } + + /// Format a positional argument, using the command type to pick the + /// right strategy. + fn format_positional_argument(&mut self, positional: &Expression, cmd_type: &CommandType) { + self.space(); + match cmd_type { + CommandType::Def => self.format_def_argument(positional), + CommandType::Extern => self.format_extern_argument(positional), + CommandType::Conditional => { + self.conditional_context_depth += 1; + self.format_block_or_expr(positional); + self.conditional_context_depth -= 1; + } + CommandType::Block => { + self.format_block_or_expr(positional); + } + CommandType::Let => self.format_let_argument(positional), + CommandType::Regular => self.format_expression(positional), + } + } + + /// Format an argument for `def` commands (name, signature, body). + fn format_def_argument(&mut self, positional: &Expression) { + match &positional.expr { + Expr::String(_) => self.format_expression(positional), + Expr::Signature(sig) => { + if self.has_comments_in_span(positional.span.start, positional.span.end) { + self.write_expr_span(positional); + self.mark_comments_written_in_span(positional.span.start, positional.span.end); + } else { + self.format_signature(sig); + } + } + Expr::Closure(block_id) | Expr::Block(block_id) => { + self.format_block_expression(*block_id, positional.span, true); + } + _ => self.format_expression(positional), + } + } + + /// Format an argument for `extern` commands (preserve original signature). + fn format_extern_argument(&mut self, positional: &Expression) { + match &positional.expr { + Expr::Signature(_) => self.write_expr_span(positional), + _ => self.format_expression(positional), + } + } + + /// Format an argument for `let`/`mut`/`const` commands. + fn format_let_argument(&mut self, positional: &Expression) { + match &positional.expr { + Expr::VarDecl(_) => self.format_expression(positional), + Expr::Subexpression(block_id) => { + self.write("= "); + self.format_subexpression(*block_id, positional.span); + } + Expr::Block(block_id) => { + self.write("= "); + let block = self.working_set.get_block(*block_id); + self.format_block(block); + } + _ => { + self.write("= "); + self.format_expression(positional); + } + } + } + + /// Format an external call (e.g. `^git status`). + pub(super) fn format_external_call(&mut self, head: &Expression, args: &[ExternalArgument]) { + // Preserve explicit `^` prefix + if head.span.start > 0 && self.source.get(head.span.start - 1) == Some(&b'^') { + self.write("^"); + } + self.format_expression(head); + for arg in args { + self.space(); + match arg { + ExternalArgument::Regular(arg_expr) => self.format_expression(arg_expr), + ExternalArgument::Spread(spread_expr) => { + self.write("..."); + self.format_expression(spread_expr); + } + } + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Signature formatting + // ───────────────────────────────────────────────────────────────────────── + + /// Format a parameter signature (`[x: int, --flag(-f)]`). + pub(super) fn format_signature(&mut self, sig: &Signature) { + self.write("["); + + let param_count = sig.required_positional.len() + + sig.optional_positional.len() + + sig.named.iter().filter(|f| f.long != "help").count() + + usize::from(sig.rest_positional.is_some()); + let has_multiline = param_count > 3; + + if has_multiline { + self.newline(); + self.indent_level += 1; + } + + let mut first = true; + + // Helper closure for separators + let write_sep = |f: &mut Formatter, first: &mut bool, multiline: bool| { + if !*first { + if multiline { + f.newline(); + f.write_indent(); + } else { + f.write(", "); + } + } + *first = false; + }; + + // Required positional parameters + for param in &sig.required_positional { + write_sep(self, &mut first, has_multiline); + self.write(¶m.name); + if param.shape != SyntaxShape::Any { + self.write(": "); + self.write_shape(¶m.shape); + self.write_custom_completion(¶m.completion); + } + } + + // Optional positional parameters + for param in &sig.optional_positional { + write_sep(self, &mut first, has_multiline); + self.write(¶m.name); + if param.default_value.is_none() { + self.write("?"); + } + if param.shape != SyntaxShape::Any { + self.write(": "); + self.write_shape(¶m.shape); + self.write_custom_completion(¶m.completion); + } + if let Some(default) = ¶m.default_value { + self.write(" = "); + self.write(&default.to_expanded_string(" ", &nu_protocol::Config::default())); + } + } + + // Named flags (skip auto-added --help) + for flag in &sig.named { + if flag.long == "help" { + continue; + } + write_sep(self, &mut first, has_multiline); + + if flag.long.is_empty() { + if let Some(short) = flag.short { + self.write("-"); + self.write(&short.to_string()); + } + } else { + self.write("--"); + self.write(&flag.long); + if let Some(short) = flag.short { + self.write("(-"); + self.write(&short.to_string()); + self.write(")"); + } + } + if let Some(shape) = &flag.arg { + self.write(": "); + self.write_shape(shape); + self.write_custom_completion(&flag.completion); + } + if let Some(default) = &flag.default_value { + self.write(" = "); + self.write(&default.to_expanded_string(" ", &nu_protocol::Config::default())); + } + } + + // Rest positional (last) + if let Some(rest) = &sig.rest_positional { + write_sep(self, &mut first, has_multiline); + self.write("..."); + self.write(&rest.name); + if rest.shape != SyntaxShape::Any { + self.write(": "); + self.write_shape(&rest.shape); + self.write_custom_completion(&rest.completion); + } + } + + if has_multiline { + self.newline(); + self.indent_level -= 1; + self.write_indent(); + } + self.write("]"); + + // Input/output type annotations + if !sig.input_output_types.is_empty() { + self.write(": "); + for (i, (input, output)) in sig.input_output_types.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write(&input.to_string()); + self.write(" -> "); + self.write(&output.to_string()); + } + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Custom completions and shapes + // ───────────────────────────────────────────────────────────────────────── + + /// Write a custom completion annotation (`@cmd` or `@[items]`). + pub(super) fn write_custom_completion(&mut self, completion: &Option) { + match completion { + Some(Completion::Command(decl_id)) => { + let decl = self.working_set.get_decl(*decl_id); + let name = decl.name(); + self.write("@"); + if name.contains(' ') + || name.contains('-') + || name.contains('[') + || name.contains(']') + { + self.write("\""); + self.write(name); + self.write("\""); + } else { + self.write(name); + } + } + Some(Completion::List(list)) => { + self.write("@["); + match list { + NuCow::Borrowed(items) => { + for (i, item) in items.iter().enumerate() { + if i > 0 { + self.write(" "); + } + self.write(item); + } + } + NuCow::Owned(items) => { + for (i, item) in items.iter().enumerate() { + if i > 0 { + self.write(" "); + } + self.write(item); + } + } + } + self.write("]"); + } + None => {} + } + } + + /// Write a [`SyntaxShape`], normalising special cases (e.g. `closure()` + /// → `closure`). + pub(super) fn write_shape(&mut self, shape: &SyntaxShape) { + match shape { + SyntaxShape::Closure(None) => self.write("closure"), + SyntaxShape::Closure(_) => { + let rendered = shape.to_string(); + if rendered == "closure()" { + self.write("closure"); + } else { + self.write(&rendered); + } + } + other => self.write(&other.to_string()), + } + } +} diff --git a/src/formatting/collections.rs b/src/formatting/collections.rs new file mode 100644 index 0000000..0f66054 --- /dev/null +++ b/src/formatting/collections.rs @@ -0,0 +1,330 @@ +//! Collection formatting: lists, records, tables, and match blocks. +//! +//! Handles inline-vs-multiline layout decisions and +//! comma / whitespace normalisation for each collection type. + +use super::Formatter; +use nu_protocol::{ + ast::{Expr, Expression, ListItem, MatchPattern, Pattern, RecordItem}, + Span, +}; + +impl<'a> Formatter<'a> { + // ───────────────────────────────────────────────────────────────────────── + // Lists + // ───────────────────────────────────────────────────────────────────────── + + /// Format a list expression, choosing inline or multiline layout. + pub(super) fn format_list(&mut self, items: &[ListItem]) { + if items.is_empty() { + self.write("[]"); + return; + } + + let uses_commas = self.list_uses_commas(items); + + let all_simple = items.iter().all(|item| match item { + ListItem::Item(expr) | ListItem::Spread(_, expr) => self.is_simple_expr(expr), + }); + + if all_simple && items.len() <= 5 { + self.write("["); + for (i, item) in items.iter().enumerate() { + if i > 0 { + if uses_commas { + self.write(", "); + } else { + self.write(" "); + } + } + self.format_list_item(item); + } + self.write("]"); + } else { + self.write("["); + self.newline(); + self.indent_level += 1; + for item in items { + self.write_indent(); + self.format_list_item(item); + self.newline(); + } + self.indent_level -= 1; + self.write_indent(); + self.write("]"); + } + } + + /// Detect whether items are comma-separated in the original source. + fn list_uses_commas(&self, items: &[ListItem]) -> bool { + if items.len() < 2 { + return false; + } + + let item_bounds = |item: &ListItem| match item { + ListItem::Item(expr) | ListItem::Spread(_, expr) => (expr.span.start, expr.span.end), + }; + + let (_, mut prev_end) = item_bounds(&items[0]); + for item in items.iter().skip(1) { + let (start, end) = item_bounds(item); + if start > prev_end && self.source[prev_end..start].contains(&b',') { + return true; + } + prev_end = end; + } + + false + } + + /// Format a single list item (regular or spread). + fn format_list_item(&mut self, item: &ListItem) { + match item { + ListItem::Item(expr) => self.format_expression(expr), + ListItem::Spread(_, expr) => { + self.write("..."); + self.format_expression(expr); + } + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Records + // ───────────────────────────────────────────────────────────────────────── + + /// Format a record expression, choosing inline or multiline layout. + pub(super) fn format_record(&mut self, items: &[RecordItem], span: Span) { + if items.is_empty() { + self.write("{}"); + return; + } + + let preserve_compact = self.record_preserve_compact_style(span); + + let all_simple = items.iter().all(|item| match item { + RecordItem::Pair(k, v) => self.is_simple_expr(k) && self.is_simple_expr(v), + RecordItem::Spread(_, expr) => self.is_simple_expr(expr), + }); + + let has_nested_complex = items.iter().any(|item| match item { + RecordItem::Pair(_, v) => matches!( + &v.expr, + Expr::Record(_) + | Expr::List(_) + | Expr::Closure(_) + | Expr::Block(_) + | Expr::Var(_) + | Expr::FullCellPath(_) + ), + RecordItem::Spread(_, _) => false, + }); + + // Records with 2+ items and complex values should be multiline when nested + let nested_multiline = self.indent_level > 0 && items.len() >= 2 && has_nested_complex; + + if all_simple && items.len() <= 3 && !nested_multiline { + // Inline format + let record_start = self.output.len(); + self.write("{"); + if preserve_compact { + self.write(" "); + } + for (i, item) in items.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.format_record_item(item, preserve_compact); + } + if !preserve_compact + && self.output.len() > record_start + 1 + && self.output[record_start + 1] == b' ' + { + self.output.remove(record_start + 1); + } + if preserve_compact { + self.write(" "); + } + self.write("}"); + } else { + // Multiline format + self.write("{"); + self.newline(); + self.indent_level += 1; + for item in items { + self.write_indent(); + self.format_record_item(item, preserve_compact); + self.newline(); + } + self.indent_level -= 1; + self.write_indent(); + self.write("}"); + } + } + + /// Determine whether a record should preserve compact colon style + /// (used for repaired malformed records like `{ name:Alice, age:30 }`). + fn record_preserve_compact_style(&self, span: Span) -> bool { + if !self.allow_compact_recovered_record_style { + return false; + } + + if span.end <= span.start || span.end > self.source.len() { + return false; + } + + let slice = &self.source[span.start..span.end]; + slice.starts_with(b"{ ") + && slice.ends_with(b" }") + && slice.contains(&b',') + && !slice.windows(2).any(|window| window == b": ") + } + + /// Format a single record item (key-value pair or spread). + fn format_record_item(&mut self, item: &RecordItem, compact_colon: bool) { + match item { + RecordItem::Pair(key, value) => { + self.format_record_key(key); + if compact_colon { + self.write(":"); + } else { + self.write(": "); + } + self.format_expression(value); + } + RecordItem::Spread(_, expr) => { + self.write("..."); + self.format_expression(expr); + } + } + } + + /// Format a record key, trimming any surrounding whitespace from the + /// source span. + fn format_record_key(&mut self, key: &Expression) { + let span_contents = self.get_span_content(key.span); + + let start = span_contents + .iter() + .position(|byte| !byte.is_ascii_whitespace()) + .unwrap_or(span_contents.len()); + let end = span_contents + .iter() + .rposition(|byte| !byte.is_ascii_whitespace()) + .map(|idx| idx + 1) + .unwrap_or(0); + + if start < end { + self.write_bytes(&span_contents[start..end]); + } else { + self.format_expression(key); + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Tables + // ───────────────────────────────────────────────────────────────────────── + + /// Format a table expression (`[[col1, col2]; [val1, val2]]`). + pub(super) fn format_table( + &mut self, + columns: &[Expression], + rows: &[Box<[Expression]>], + ) { + self.write("["); + + // Header row + self.write("["); + for (i, col) in columns.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.format_expression(col); + } + self.write("]"); + + // Data rows + if !rows.is_empty() { + self.write("; "); + for (i, row) in rows.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write("["); + for (j, cell) in row.iter().enumerate() { + if j > 0 { + self.write(", "); + } + self.format_expression(cell); + } + self.write("]"); + } + } + + self.write("]"); + } + + // ───────────────────────────────────────────────────────────────────────── + // Match blocks + // ───────────────────────────────────────────────────────────────────────── + + /// Format a match block (`match $val { pattern => expr }`). + pub(super) fn format_match_block(&mut self, matches: &[(MatchPattern, Expression)]) { + self.write("{"); + self.newline(); + self.indent_level += 1; + + for (pattern, expr) in matches { + self.write_indent(); + self.format_match_pattern(pattern); + self.write(" => "); + self.format_block_or_expr(expr); + self.newline(); + } + + self.indent_level -= 1; + self.write_indent(); + self.write("}"); + } + + /// Format a match pattern (value, variable, list, record, or, etc.). + pub(super) fn format_match_pattern(&mut self, pattern: &MatchPattern) { + match &pattern.pattern { + Pattern::Expression(expr) => self.format_expression(expr), + Pattern::Value(_) | Pattern::Variable(_) | Pattern::Rest(_) | Pattern::Garbage => { + self.write_span(pattern.span); + } + Pattern::Or(patterns) => { + for (i, p) in patterns.iter().enumerate() { + if i > 0 { + self.write(" | "); + } + self.format_match_pattern(p); + } + } + Pattern::List(patterns) => { + self.write("["); + for (i, p) in patterns.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.format_match_pattern(p); + } + self.write("]"); + } + Pattern::Record(entries) => { + self.write("{"); + for (i, (key, pat)) in entries.iter().enumerate() { + if i > 0 { + self.write(", "); + } + self.write(key); + self.write(": "); + self.format_match_pattern(pat); + } + self.write("}"); + } + Pattern::IgnoreRest => self.write(".."), + Pattern::IgnoreValue => self.write("_"), + } + } +} diff --git a/src/formatting/comments.rs b/src/formatting/comments.rs new file mode 100644 index 0000000..e1bfbe1 --- /dev/null +++ b/src/formatting/comments.rs @@ -0,0 +1,132 @@ +//! Comment extraction and formatting. +//! +//! Extracts `#`-prefixed comments from Nushell source while respecting string +//! boundaries, and provides methods on [`Formatter`] to emit them at the +//! correct locations in the output. + +use super::Formatter; +use nu_protocol::Span; + +/// Extract all comments from source code, returning their spans and content. +/// +/// Tracks string state so that `#` characters inside quoted strings are not +/// treated as comment starts. +pub(super) fn extract_comments(source: &[u8]) -> Vec<(Span, Vec)> { + let mut comments = Vec::new(); + let mut i = 0; + let mut in_string = false; + let mut string_char = b'"'; + + while i < source.len() { + let c = source[i]; + + // Track string state to avoid matching # inside strings + if !in_string && (c == b'"' || c == b'\'') { + in_string = true; + string_char = c; + i += 1; + continue; + } + + if in_string { + if c == b'\\' && i + 1 < source.len() { + i += 2; // Skip escaped character + continue; + } + if c == string_char { + in_string = false; + } + i += 1; + continue; + } + + // Found a comment + if c == b'#' { + let start = i; + while i < source.len() && source[i] != b'\n' { + i += 1; + } + comments.push((Span::new(start, i), source[start..i].to_vec())); + } + + i += 1; + } + + comments +} + +// ───────────────────────────────────────────────────────────────────────────── +// Formatter comment methods +// ───────────────────────────────────────────────────────────────────────────── + +impl<'a> Formatter<'a> { + /// Emit all comments that fall between `last_pos` and `pos`, each on its + /// own line with the current indentation. + pub(super) fn write_comments_before(&mut self, pos: usize) { + let mut comments_to_write: Vec<_> = self + .comments + .iter() + .enumerate() + .filter(|(i, (span, _))| { + !self.written_comments[*i] && span.start >= self.last_pos && span.end <= pos + }) + .map(|(i, (span, content))| (i, span.start, content.clone())) + .collect(); + + comments_to_write.sort_by_key(|(_, start, _)| *start); + + for (idx, _, content) in comments_to_write { + self.written_comments[idx] = true; + if !self.at_line_start { + if let Some(&last) = self.output.last() { + if last != b'\n' { + self.newline(); + } + } + } + self.write_indent(); + self.output.extend(&content); + self.newline(); + } + } + + /// Emit an inline comment (on the same line) that appears after `after_pos`. + pub(super) fn write_inline_comment(&mut self, after_pos: usize) { + let line_end = self.source[after_pos..] + .iter() + .position(|&b| b == b'\n') + .map_or(self.source.len(), |p| after_pos + p); + + let found = self + .comments + .iter() + .enumerate() + .find(|(i, (span, _))| { + !self.written_comments[*i] && span.start >= after_pos && span.start < line_end + }) + .map(|(i, (span, content))| (i, *span, content.clone())); + + if let Some((idx, span, content)) = found { + self.written_comments[idx] = true; + self.write(" "); + self.output.extend(&content); + self.last_pos = span.end; + } + } + + /// Check whether the given span range contains any comments. + pub(super) fn has_comments_in_span(&self, start: usize, end: usize) -> bool { + self.comments + .iter() + .any(|(span, _)| span.start >= start && span.end <= end) + } + + /// Mark all comments within the given span range as already written. + pub(super) fn mark_comments_written_in_span(&mut self, start: usize, end: usize) { + for (i, (span, _)) in self.comments.iter().enumerate() { + if span.start >= start && span.end <= end { + self.written_comments[i] = true; + } + } + } +} diff --git a/src/formatting/engine.rs b/src/formatting/engine.rs new file mode 100644 index 0000000..61af78e --- /dev/null +++ b/src/formatting/engine.rs @@ -0,0 +1,65 @@ +//! Engine state initialization and command registration. +//! +//! Sets up the Nushell engine state with built-in commands so the parser +//! can resolve syntax for formatting. + +use log::debug; +use nu_protocol::{ + engine::{Call, Command, CommandType as NuCommandType, EngineState, Stack, StateWorkingSet}, + Category, PipelineData, ShellError, Signature, SyntaxShape, +}; + +/// Stub implementation of the `where` keyword so the parser can resolve it. +#[derive(Clone)] +pub(super) struct WhereKeyword; + +impl Command for WhereKeyword { + fn name(&self) -> &str { + "where" + } + + fn signature(&self) -> Signature { + Signature::build("where") + .required( + "condition", + SyntaxShape::RowCondition, + "filter row condition or closure", + ) + .category(Category::Filters) + } + + fn description(&self) -> &str { + "filter values of an input list based on a condition" + } + + fn command_type(&self) -> NuCommandType { + NuCommandType::Keyword + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + _call: &Call, + input: PipelineData, + ) -> Result { + Ok(input) + } +} + +/// Build the default engine state with `nu-cmd-lang` built-ins plus the +/// formatter's own keyword stubs. +pub(super) fn get_engine_state() -> EngineState { + let mut engine_state = nu_cmd_lang::create_default_context(); + let delta = { + let mut working_set = StateWorkingSet::new(&engine_state); + working_set.add_decl(Box::new(WhereKeyword)); + working_set.render() + }; + + if let Err(err) = engine_state.merge_delta(delta) { + debug!("failed to merge formatter context: {err:?}"); + } + + engine_state +} diff --git a/src/formatting/expressions.rs b/src/formatting/expressions.rs new file mode 100644 index 0000000..1a75f68 --- /dev/null +++ b/src/formatting/expressions.rs @@ -0,0 +1,316 @@ +//! Expression formatting. +//! +//! Dispatches each [`Expr`] variant to the appropriate formatting logic, +//! and handles cell paths, subexpressions, binary operations, and ranges. + +use super::Formatter; +use nu_protocol::{ + ast::{CellPath, Expr, Expression, FullCellPath, PathMember}, + Span, +}; + +impl<'a> Formatter<'a> { + /// Format an expression by dispatching on its variant. + pub(super) fn format_expression(&mut self, expr: &Expression) { + match &expr.expr { + // Literals and simple values — preserve original source + Expr::Int(_) + | Expr::Float(_) + | Expr::Bool(_) + | Expr::Nothing + | Expr::DateTime(_) + | Expr::String(_) + | Expr::RawString(_) + | Expr::Binary(_) + | Expr::Filepath(_, _) + | Expr::Directory(_, _) + | Expr::GlobPattern(_, _) + | Expr::Var(_) + | Expr::VarDecl(_) + | Expr::Operator(_) + | Expr::StringInterpolation(_) + | Expr::GlobInterpolation(_, _) + | Expr::ImportPattern(_) + | Expr::Overlay(_) + | Expr::Garbage => { + self.write_expr_span(expr); + } + + Expr::Signature(sig) => { + if self.has_comments_in_span(expr.span.start, expr.span.end) { + self.write_expr_span(expr); + self.last_pos = expr.span.end; + self.mark_comments_written_in_span(expr.span.start, expr.span.end); + } else { + self.format_signature(sig); + } + } + + Expr::Call(call) => self.format_call(call), + Expr::ExternalCall(head, args) => self.format_external_call(head, args), + Expr::BinaryOp(lhs, op, rhs) => self.format_binary_op(lhs, op, rhs), + Expr::UnaryNot(inner) => { + self.write("not "); + self.format_expression(inner); + } + + Expr::Block(block_id) => { + self.format_block_expression(*block_id, expr.span, false); + } + Expr::Closure(block_id) => { + self.format_closure_expression(*block_id, expr.span); + } + Expr::Subexpression(block_id) => { + self.format_subexpression(*block_id, expr.span); + } + + Expr::List(items) => self.format_list(items), + Expr::Record(items) => self.format_record(items, expr.span), + Expr::Table(table) => self.format_table(&table.columns, &table.rows), + + Expr::Range(range) => self.format_range(range), + Expr::CellPath(cell_path) => self.format_cell_path(cell_path), + Expr::FullCellPath(full_path) => { + self.format_full_cell_path(full_path); + } + + Expr::RowCondition(_) => self.write_expr_span(expr), + + Expr::Keyword(keyword) => { + self.write_span(keyword.span); + self.space(); + self.format_block_or_expr(&keyword.expr); + } + + Expr::ValueWithUnit(_) => { + // Preserve original span — the parser normalises units + // (e.g. 1kb → 1000b internally). + self.write_expr_span(expr); + } + + Expr::MatchBlock(matches) => self.format_match_block(matches), + + Expr::Collect(_, inner) => self.format_expression(inner), + + Expr::AttributeBlock(attr_block) => { + for attr in &attr_block.attributes { + self.write_attribute_span(&attr.expr); + self.newline(); + } + self.format_expression(&attr_block.item); + } + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Binary operations and ranges + // ───────────────────────────────────────────────────────────────────────── + + /// Format a binary operation (`lhs op rhs`). + pub(super) fn format_binary_op(&mut self, lhs: &Expression, op: &Expression, rhs: &Expression) { + self.preserve_subexpr_parens_depth += 1; + self.format_expression(lhs); + self.preserve_subexpr_parens_depth -= 1; + + // Always add space around binary operators for valid Nushell syntax + self.write(" "); + self.format_expression(op); + self.write(" "); + + // For assignment operators, unwrap Subexpression RHS to avoid double parens + if let Expr::Operator(nu_protocol::ast::Operator::Assignment(_)) = &op.expr { + if let Expr::Subexpression(block_id) = &rhs.expr { + let block = self.working_set.get_block(*block_id); + self.format_block(block); + return; + } + } + + self.preserve_subexpr_parens_depth += 1; + self.format_expression(rhs); + self.preserve_subexpr_parens_depth -= 1; + } + + /// Format a range expression (e.g. `1..5`, `1..2..10`). + pub(super) fn format_range(&mut self, range: &nu_protocol::ast::Range) { + let op = range.operator.to_string(); + if let Some(from) = &range.from { + self.format_expression(from); + } + self.write(&op); + if let Some(next) = &range.next { + self.format_expression(next); + // For step ranges (start..step..end), write the operator again before end + self.write(&op); + } + if let Some(to) = &range.to { + self.format_expression(to); + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Cell paths + // ───────────────────────────────────────────────────────────────────────── + + /// Format a bare cell-path literal (e.g. `name.0`). + pub(super) fn format_cell_path(&mut self, cell_path: &CellPath) { + for (i, member) in cell_path.members.iter().enumerate() { + if i > 0 { + self.write("."); + } + self.format_cell_path_member(member); + } + } + + /// Format a full cell-path (expression + tail, e.g. `$record.name`). + pub(super) fn format_full_cell_path(&mut self, cell_path: &FullCellPath) { + self.format_expression(&cell_path.head); + for member in &cell_path.tail { + self.write("."); + self.format_cell_path_member(member); + } + } + + /// Format a single cell-path member (string key or integer index, with + /// optional `?` suffix). + fn format_cell_path_member(&mut self, member: &PathMember) { + match member { + PathMember::String { val, optional, .. } => { + self.write(val); + if *optional { + self.write("?"); + } + } + PathMember::Int { val, optional, .. } => { + self.write(&val.to_string()); + if *optional { + self.write("?"); + } + } + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Subexpressions + // ───────────────────────────────────────────────────────────────────────── + + /// Format a subexpression (`(…)`). + /// + /// Decides whether to keep or strip the parentheses based on context + /// (conditional, precedence, multiline, `$in` shorthand, etc.). + pub(super) fn format_subexpression(&mut self, block_id: nu_protocol::BlockId, span: Span) { + let block = self.working_set.get_block(block_id); + let source_has_newline = + span.end > span.start && self.source[span.start..span.end].contains(&b'\n'); + + let has_explicit_parens = self.source.get(span.start) == Some(&b'(') + && self.source.get(span.end - 1) == Some(&b')'); + + if !has_explicit_parens { + self.format_block(block); + return; + } + + // Preserve explicit multiline parenthesised expressions (author intent). + if source_has_newline { + self.write_span(span); + return; + } + + if self.conditional_context_depth > 0 { + let can_drop_parens = + block.pipelines.len() == 1 && block.pipelines[0].elements.len() == 1; + if can_drop_parens { + self.format_block(block); + return; + } + self.write("("); + self.format_block(block); + self.write(")"); + return; + } + + // String interpolations inside subexpressions don't need parentheses + if block.pipelines.len() == 1 && block.pipelines[0].elements.len() == 1 { + if let Expr::StringInterpolation(_) = &block.pipelines[0].elements[0].expr.expr { + self.format_block(block); + return; + } + } + + // Preserve explicit parens in precedence-sensitive contexts + if self.preserve_subexpr_parens_depth == 0 + && block.pipelines.len() == 1 + && !block.pipelines[0].elements.is_empty() + { + let first_element = &block.pipelines[0].elements[0]; + let element_text = String::from_utf8_lossy( + &self.source[first_element.expr.span.start..first_element.expr.span.end], + ); + if element_text == "$in" { + self.format_block(block); + return; + } + } + + self.write("("); + let is_simple = !source_has_newline + && block.pipelines.len() == 1 + && block.pipelines[0].elements.len() <= 3; + + if is_simple { + self.format_block(block); + } else { + self.newline(); + self.indent_level += 1; + self.format_block(block); + self.newline(); + self.indent_level -= 1; + self.write_indent(); + } + self.write(")"); + } + + /// Format an expression that could be a block or a regular expression. + pub(super) fn format_block_or_expr(&mut self, expr: &Expression) { + match &expr.expr { + Expr::Block(block_id) => { + self.format_block_expression(*block_id, expr.span, true); + } + Expr::Closure(block_id) => { + self.format_closure_expression(*block_id, expr.span); + } + _ => self.format_expression(expr), + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Helpers + // ───────────────────────────────────────────────────────────────────────── + + /// Check if an expression is a simple primitive (used by collection + /// formatting to decide inline vs. multiline layout). + pub(super) fn is_simple_expr(&self, expr: &Expression) -> bool { + match &expr.expr { + Expr::Int(_) + | Expr::Float(_) + | Expr::Bool(_) + | Expr::String(_) + | Expr::RawString(_) + | Expr::Nothing + | Expr::Var(_) + | Expr::Filepath(_, _) + | Expr::Directory(_, _) + | Expr::GlobPattern(_, _) + | Expr::DateTime(_) => true, + Expr::FullCellPath(full_path) => { + full_path.tail.is_empty() + && matches!( + &full_path.head.expr, + Expr::Var(_) | Expr::Garbage | Expr::Int(_) | Expr::String(_) + ) + } + _ => false, + } + } +} diff --git a/src/formatting/garbage.rs b/src/formatting/garbage.rs new file mode 100644 index 0000000..8d5a252 --- /dev/null +++ b/src/formatting/garbage.rs @@ -0,0 +1,192 @@ +//! Garbage / parse-failure detection. +//! +//! These helpers walk the AST searching for `Expr::Garbage` nodes, +//! which indicate regions the parser could not understand. The +//! formatter uses this information to decide whether a block can be +//! safely reformatted or must be emitted verbatim. + +use nu_protocol::ast::{ + Argument, Block, Expr, Expression, ExternalArgument, ListItem, MatchPattern, Pattern, Pipeline, + PipelineRedirection, RecordItem, RedirectionTarget, +}; +use nu_protocol::engine::StateWorkingSet; + +/// Check if a block contains any garbage expressions. +pub(super) fn block_contains_garbage(working_set: &StateWorkingSet<'_>, block: &Block) -> bool { + block + .pipelines + .iter() + .any(|pipeline| pipeline_contains_garbage(working_set, pipeline)) +} + +/// Check if a pipeline contains garbage expressions. +fn pipeline_contains_garbage(working_set: &StateWorkingSet<'_>, pipeline: &Pipeline) -> bool { + pipeline.elements.iter().any(|element| { + expr_contains_garbage(working_set, &element.expr) + || element + .redirection + .as_ref() + .is_some_and(|redir| redirection_contains_garbage(working_set, redir)) + }) +} + +/// Check if a redirection contains garbage expressions. +fn redirection_contains_garbage( + working_set: &StateWorkingSet<'_>, + redir: &PipelineRedirection, +) -> bool { + match redir { + PipelineRedirection::Single { target, .. } => { + redirection_target_contains_garbage(working_set, target) + } + PipelineRedirection::Separate { out, err } => { + redirection_target_contains_garbage(working_set, out) + || redirection_target_contains_garbage(working_set, err) + } + } +} + +/// Check if a redirection target contains garbage. +fn redirection_target_contains_garbage( + working_set: &StateWorkingSet<'_>, + target: &RedirectionTarget, +) -> bool { + match target { + RedirectionTarget::File { expr, .. } => expr_contains_garbage(working_set, expr), + RedirectionTarget::Pipe { .. } => false, + } +} + +/// Check if a call argument contains garbage expressions. +fn argument_contains_garbage(working_set: &StateWorkingSet<'_>, arg: &Argument) -> bool { + match arg { + Argument::Positional(expr) | Argument::Unknown(expr) | Argument::Spread(expr) => { + expr_contains_garbage(working_set, expr) + } + Argument::Named(named) => named + .2 + .as_ref() + .is_some_and(|expr| expr_contains_garbage(working_set, expr)), + } +} + +/// Check if any expression in the tree contains garbage nodes. +pub(super) fn expr_contains_garbage(working_set: &StateWorkingSet<'_>, expr: &Expression) -> bool { + match &expr.expr { + Expr::Garbage => true, + Expr::Call(call) => call + .arguments + .iter() + .any(|arg| argument_contains_garbage(working_set, arg)), + Expr::ExternalCall(head, args) => { + expr_contains_garbage(working_set, head) + || args.iter().any(|arg| match arg { + ExternalArgument::Regular(expr) | ExternalArgument::Spread(expr) => { + expr_contains_garbage(working_set, expr) + } + }) + } + Expr::BinaryOp(lhs, op, rhs) => { + expr_contains_garbage(working_set, lhs) + || expr_contains_garbage(working_set, op) + || expr_contains_garbage(working_set, rhs) + } + Expr::UnaryNot(inner) => expr_contains_garbage(working_set, inner), + Expr::Block(block_id) | Expr::Closure(block_id) | Expr::Subexpression(block_id) => { + block_contains_garbage(working_set, working_set.get_block(*block_id)) + } + Expr::Range(range) => { + range + .from + .as_ref() + .is_some_and(|e| expr_contains_garbage(working_set, e)) + || range + .next + .as_ref() + .is_some_and(|e| expr_contains_garbage(working_set, e)) + || range + .to + .as_ref() + .is_some_and(|e| expr_contains_garbage(working_set, e)) + } + Expr::List(items) => items.iter().any(|item| match item { + ListItem::Item(expr) | ListItem::Spread(_, expr) => { + expr_contains_garbage(working_set, expr) + } + }), + Expr::Record(items) => items.iter().any(|item| match item { + RecordItem::Pair(k, v) => { + expr_contains_garbage(working_set, k) || expr_contains_garbage(working_set, v) + } + RecordItem::Spread(_, expr) => expr_contains_garbage(working_set, expr), + }), + Expr::Table(table) => { + table + .columns + .iter() + .any(|col| expr_contains_garbage(working_set, col)) + || table + .rows + .iter() + .flat_map(|row| row.iter()) + .any(|cell| expr_contains_garbage(working_set, cell)) + } + Expr::CellPath(_) => false, + Expr::FullCellPath(full_path) => expr_contains_garbage(working_set, &full_path.head), + Expr::RowCondition(block_id) => { + block_contains_garbage(working_set, working_set.get_block(*block_id)) + } + Expr::Keyword(keyword) => expr_contains_garbage(working_set, &keyword.expr), + Expr::MatchBlock(matches) => matches.iter().any(|(pattern, arm_expr)| { + pattern_contains_garbage(working_set, pattern) + || expr_contains_garbage(working_set, arm_expr) + }), + Expr::Collect(_, inner) => expr_contains_garbage(working_set, inner), + Expr::AttributeBlock(attr_block) => { + attr_block + .attributes + .iter() + .any(|attr| expr_contains_garbage(working_set, &attr.expr)) + || expr_contains_garbage(working_set, &attr_block.item) + } + Expr::Int(_) + | Expr::Float(_) + | Expr::Bool(_) + | Expr::Nothing + | Expr::DateTime(_) + | Expr::String(_) + | Expr::RawString(_) + | Expr::Binary(_) + | Expr::Filepath(_, _) + | Expr::Directory(_, _) + | Expr::GlobPattern(_, _) + | Expr::Var(_) + | Expr::VarDecl(_) + | Expr::Operator(_) + | Expr::StringInterpolation(_) + | Expr::GlobInterpolation(_, _) + | Expr::ImportPattern(_) + | Expr::Overlay(_) + | Expr::Signature(_) + | Expr::ValueWithUnit(_) => false, + } +} + +/// Check if a match pattern contains garbage. +fn pattern_contains_garbage(working_set: &StateWorkingSet<'_>, pattern: &MatchPattern) -> bool { + match &pattern.pattern { + Pattern::Garbage => true, + Pattern::Expression(expr) => expr_contains_garbage(working_set, expr), + Pattern::Or(patterns) | Pattern::List(patterns) => patterns + .iter() + .any(|pattern| pattern_contains_garbage(working_set, pattern)), + Pattern::Record(entries) => entries + .iter() + .any(|(_, pattern)| pattern_contains_garbage(working_set, pattern)), + Pattern::Value(_) + | Pattern::Variable(_) + | Pattern::Rest(_) + | Pattern::IgnoreRest + | Pattern::IgnoreValue => false, + } +} diff --git a/src/formatting/mod.rs b/src/formatting/mod.rs new file mode 100644 index 0000000..ec96c67 --- /dev/null +++ b/src/formatting/mod.rs @@ -0,0 +1,455 @@ +//! Core formatting module for nufmt. +//! +//! Walks the Nushell AST and emits properly formatted code. The heavy +//! lifting is split across focused submodules: +//! +//! - [`engine`] — Engine state setup and command stubs +//! - [`comments`] — Comment extraction and writing +//! - [`expressions`] — Expression formatting dispatch +//! - [`calls`] — Call and argument formatting +//! - [`blocks`] — Block, pipeline, and closure formatting +//! - [`collections`] — List, record, table, and match formatting +//! - [`repair`] — Parse-error repair utilities +//! - [`garbage`] — Garbage / parse-failure detection + +mod blocks; +mod calls; +mod collections; +mod comments; +mod engine; +mod expressions; +mod garbage; +mod repair; + +use crate::config::Config; +use crate::format_error::FormatError; +use log::{debug, trace}; +use nu_parser::parse; +use nu_protocol::{engine::StateWorkingSet, ParseError, Span}; + +use comments::extract_comments; +use engine::get_engine_state; +use garbage::block_contains_garbage; +use repair::{ + detect_compact_if_else_spans, detect_missing_record_comma_spans, is_fatal_parse_error, + try_repair_parse_errors, ParseRepairOutcome, +}; + +// ───────────────────────────────────────────────────────────────────────────── +// Formatter struct +// ───────────────────────────────────────────────────────────────────────────── + +/// The main formatter context that tracks indentation and other state. +pub(crate) struct Formatter<'a> { + /// The original source bytes. + pub(crate) source: &'a [u8], + /// The working set for looking up blocks and other data. + pub(crate) working_set: &'a StateWorkingSet<'a>, + /// Configuration options. + pub(crate) config: &'a Config, + /// Current indentation level. + pub(crate) indent_level: usize, + /// Output buffer. + pub(crate) output: Vec, + /// Track if we're at the start of a line (for indentation). + pub(crate) at_line_start: bool, + /// Comments extracted from source, indexed by their end position. + pub(crate) comments: Vec<(Span, Vec)>, + /// Track which comments have been written. + pub(crate) written_comments: Vec, + /// Current position in source being processed. + pub(crate) last_pos: usize, + /// Track nested conditional argument formatting to preserve explicit parens. + pub(crate) conditional_context_depth: usize, + /// Force preserving explicit parens for subexpressions inside + /// precedence-sensitive contexts. + pub(crate) preserve_subexpr_parens_depth: usize, + /// Allow compact inline record style used for repaired malformed records. + pub(crate) allow_compact_recovered_record_style: bool, +} + +/// Command types for formatting purposes. +#[derive(Debug, Clone)] +pub(crate) enum CommandType { + Def, + Extern, + Conditional, + Let, + Block, + Regular, +} + +impl<'a> Formatter<'a> { + fn new( + source: &'a [u8], + working_set: &'a StateWorkingSet<'a>, + config: &'a Config, + allow_compact_recovered_record_style: bool, + ) -> Self { + let comments = extract_comments(source); + let written_comments = vec![false; comments.len()]; + Self { + source, + working_set, + config, + indent_level: 0, + output: Vec::new(), + at_line_start: true, + comments, + written_comments, + last_pos: 0, + conditional_context_depth: 0, + preserve_subexpr_parens_depth: 0, + allow_compact_recovered_record_style, + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Basic output methods + // ───────────────────────────────────────────────────────────────────────── + + /// Write indentation if at the start of a line. + pub(crate) fn write_indent(&mut self) { + if self.at_line_start { + let indent = " ".repeat(self.config.indent * self.indent_level); + self.output.extend(indent.as_bytes()); + self.at_line_start = false; + } + } + + /// Write a string to output. + pub(crate) fn write(&mut self, s: &str) { + self.write_indent(); + self.output.extend(s.as_bytes()); + } + + /// Write bytes to output. + pub(crate) fn write_bytes(&mut self, bytes: &[u8]) { + self.write_indent(); + self.output.extend(bytes); + } + + /// Write a newline. + pub(crate) fn newline(&mut self) { + self.output.push(b'\n'); + self.at_line_start = true; + } + + /// Write a space if not at line start and not already following whitespace + /// or an opener. + pub(crate) fn space(&mut self) { + if !self.at_line_start && !self.output.is_empty() { + if let Some(&last) = self.output.last() { + if !matches!(last, b' ' | b'\n' | b'\t' | b'(' | b'[') { + self.output.push(b' '); + } + } + } + } + + // ───────────────────────────────────────────────────────────────────────── + // Span and source helpers + // ───────────────────────────────────────────────────────────────────────── + + /// Get the source content for a span. + pub(crate) fn get_span_content(&self, span: Span) -> Vec { + self.source[span.start..span.end].to_vec() + } + + /// Write the original source content for a span. + pub(crate) fn write_span(&mut self, span: Span) { + let content = self.source[span.start..span.end].to_vec(); + self.write_bytes(&content); + } + + /// Write the original source content for an expression's span. + pub(crate) fn write_expr_span(&mut self, expr: &nu_protocol::ast::Expression) { + self.write_span(expr.span); + } + + /// Write an attribute expression while preserving a leading `@` sigil. + pub(crate) fn write_attribute_span(&mut self, expr: &nu_protocol::ast::Expression) { + let mut start = expr.span.start; + if start > 0 && self.source[start - 1] == b'@' { + start -= 1; + } + self.write_span(Span { + start, + end: expr.span.end, + }); + } + + /// Get the final output. + fn finish(self) -> Vec { + self.output + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Public API +// ───────────────────────────────────────────────────────────────────────────── + +/// Format an array of bytes. +/// +/// Reading the file gives you a list of bytes. +pub(crate) fn format_inner(contents: &[u8], config: &Config) -> Result, FormatError> { + format_inner_with_options(contents, config) +} + +fn format_inner_with_options(contents: &[u8], config: &Config) -> Result, FormatError> { + let engine_state = get_engine_state(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let parsed_block = parse(&mut working_set, None, contents, false); + trace!("parsed block:\n{:?}", &parsed_block); + + let source_text = String::from_utf8_lossy(contents); + let mut malformed_spans: Vec = working_set + .parse_errors + .iter() + .map(ParseError::span) + .collect(); + malformed_spans.extend(detect_compact_if_else_spans(&source_text)); + malformed_spans.extend(detect_missing_record_comma_spans(&source_text)); + + let has_garbage = block_contains_garbage(&working_set, &parsed_block); + let has_fatal_parse_error = working_set.parse_errors.iter().any(is_fatal_parse_error); + + if !malformed_spans.is_empty() || has_garbage { + if let Some(repaired) = try_repair_parse_errors(contents, &malformed_spans) { + debug!( + "retrying formatting after targeted parse-error repair ({} parse errors)", + working_set.parse_errors.len() + ); + return match repaired { + ParseRepairOutcome::Reformat(repaired_source) => { + format_inner_with_options(&repaired_source, config) + } + }; + } + } + + if has_fatal_parse_error && has_garbage { + debug!( + "skipping formatting due to fatal parse errors with garbage AST nodes ({} found)", + working_set.parse_errors.len() + ); + return Ok(contents.to_vec()); + } + + // Note: We don't reject files with "garbage" nodes because the parser + // produces garbage for commands it doesn't know about (e.g., `where`, `each`) + // when using only nu-cmd-lang context. Instead, we output original span + // content for expressions we can't format. + + if parsed_block.pipelines.is_empty() { + trace!("block has no pipelines!"); + debug!("File has no code to format."); + let comments = extract_comments(contents); + if comments.is_empty() { + return Ok(contents.to_vec()); + } + } + + let mut formatter = Formatter::new(contents, &working_set, config, true); + + // Write leading comments + if let Some(first_pipeline) = parsed_block.pipelines.first() { + if let Some(first_elem) = first_pipeline.elements.first() { + formatter.write_comments_before(first_elem.expr.span.start); + } + } + + formatter.format_block(&parsed_block); + + // Write trailing comments + let end_pos = parsed_block + .pipelines + .last() + .and_then(|p| p.elements.last()) + .map(|e| e.expr.span.end) + .unwrap_or(0); + + if end_pos > 0 { + formatter.last_pos = end_pos; + formatter.write_comments_before(contents.len()); + } + + Ok(formatter.finish()) +} + +/// Make sure there is a newline at the end of a buffer. +pub(crate) fn add_newline_at_end_of_file(out: Vec) -> Vec { + if out.last() == Some(&b'\n') { + out + } else { + let mut result = out; + result.push(b'\n'); + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn format(input: &str) -> String { + let config = Config::default(); + let result = format_inner(input.as_bytes(), &config).expect("formatting failed"); + String::from_utf8(result).expect("invalid utf8") + } + + #[test] + fn repair_patterns_do_not_mutate_double_quoted_strings() { + let input = "let s = \"if(true){1}else{2}\""; + let output = format(input); + assert!(output.contains("\"if(true){1}else{2}\"")); + } + + #[test] + fn repair_patterns_do_not_mutate_record_like_strings() { + let input = "let s = \"{ name: Alice }\""; + let output = format(input); + assert!(output.contains("\"{ name: Alice }\"")); + } + + #[test] + fn test_simple_let() { + let input = "let x = 1"; + let output = format(input); + assert_eq!(output, "let x = 1"); + } + + #[test] + fn test_let_with_spaces() { + let input = "let x = 1"; + let output = format(input); + assert_eq!(output, "let x = 1"); + } + + #[test] + fn test_simple_def() { + let input = "def foo [] { echo hello }"; + let output = format(input); + assert!(output.contains("def foo")); + } + + #[test] + fn test_pipeline() { + let input = "ls | get name"; + let output = format(input); + assert!(output.contains("| get")); + } + + #[test] + fn test_if_else() { + let input = "if true { echo yes } else { echo no }"; + let output = format(input); + assert!(output.contains("if true")); + assert!(output.contains("else")); + } + + #[test] + fn test_for_loop() { + let input = "for x in [1, 2, 3] { print $x }"; + let output = format(input); + assert!(output.contains("for x in")); + assert!(output.contains("{ print")); + } + + #[test] + fn test_while_loop() { + let input = "while true { break }"; + let output = format(input); + assert!(output.contains("while true")); + assert!(output.contains("{ break }")); + } + + #[test] + fn test_closure() { + let input = "{|x| $x * 2 }"; + let output = format(input); + assert!(output.contains("{|x|")); + } + + #[test] + fn test_multiline() { + let input = "let x = 1\nlet y = 2"; + let output = format(input); + assert!(output.contains("let x = 1")); + assert!(output.contains("let y = 2")); + assert!(output.contains("\n")); + } + + #[test] + fn test_list_simple() { + let input = "[1, 2, 3]"; + let output = format(input); + assert_eq!(output, "[1, 2, 3]"); + } + + #[test] + fn test_record_simple() { + let input = "{a: 1, b: 2}"; + let output = format(input); + assert!(output.contains("a: 1")); + } + + #[test] + fn test_comment_preservation() { + let input = "# this is a comment\nlet x = 1"; + let output = format(input); + assert!(output.contains("# this is a comment")); + } + + #[test] + fn test_idempotency_let() { + let input = "let x = 1"; + let first = format(input); + let second = format(&first); + assert_eq!(first, second, "Formatting should be idempotent"); + } + + #[test] + fn test_idempotency_def() { + let input = "def foo [x: int] { $x + 1 }"; + let first = format(input); + let second = format(&first); + assert_eq!(first, second, "Formatting should be idempotent"); + } + + #[test] + fn test_idempotency_if_else() { + let input = "if true { echo yes } else { echo no }"; + let first = format(input); + let second = format(&first); + assert_eq!(first, second, "Formatting should be idempotent"); + } + + #[test] + fn test_idempotency_for_loop() { + let input = "for x in [1, 2, 3] { print $x }"; + let first = format(input); + let second = format(&first); + assert_eq!(first, second, "Formatting should be idempotent"); + } + + #[test] + fn test_idempotency_complex() { + let input = "# comment\nlet x = 1\ndef foo [] { $x }"; + let first = format(input); + let second = format(&first); + assert_eq!(first, second, "Formatting should be idempotent"); + } + + #[test] + fn issue98_margin_has_effect() { + let input = "def foo [] {\n let out = 1\n out\n}\n\ndef bar [] {\n let out = 1\n out\n}"; + let config = Config::new(4, 80, 2); + let result = format_inner(input.as_bytes(), &config).expect("formatting failed"); + let output = String::from_utf8(result).expect("invalid utf8"); + + let expected = "def foo [] {\n let out = 1\n out\n}\n\n\ndef bar [] {\n let out = 1\n out\n}"; + assert_eq!(output, expected); + } +} diff --git a/src/formatting/repair.rs b/src/formatting/repair.rs new file mode 100644 index 0000000..1ea41bb --- /dev/null +++ b/src/formatting/repair.rs @@ -0,0 +1,449 @@ +//! Parse-error repair utilities. +//! +//! When the Nushell parser emits recoverable errors (compact `if/else`, +//! missing record commas, etc.), these routines attempt to patch the +//! source text so a second parse succeeds cleanly. Repairs are +//! region-scoped and never mutate string-literal contents. + +use nu_protocol::{ParseError, Span}; + +/// Whether a parse error is fatal enough that formatting should be skipped. +pub(super) fn is_fatal_parse_error(error: &ParseError) -> bool { + matches!( + error, + ParseError::ExtraTokens(_) + | ParseError::ExtraTokensAfterClosingDelimiter(_) + | ParseError::UnexpectedEof(_, _) + | ParseError::Unclosed(_, _) + | ParseError::Unbalanced(_, _, _) + | ParseError::IncompleteMathExpression(_) + | ParseError::UnknownCommand(_) + | ParseError::Expected(_, _) + | ParseError::ExpectedWithStringMsg(_, _) + | ParseError::ExpectedWithDidYouMean(_, _, _) + ) +} + +/// The outcome of a successful repair pass. +pub(super) enum ParseRepairOutcome { + /// Repaired source bytes ready for re-parsing and re-formatting. + Reformat(Vec), +} + +// ───────────────────────────────────────────────────────────────────────────── +// String-literal safeguards +// ───────────────────────────────────────────────────────────────────────────── + +/// Find byte ranges of string literals (single- and double-quoted) so that +/// transformations can skip their contents. +fn find_string_literal_ranges(source: &str) -> Vec<(usize, usize)> { + let bytes = source.as_bytes(); + let mut ranges = Vec::new(); + let mut in_string: Option = None; + let mut escaped = false; + let mut start = 0; + + for (idx, &byte) in bytes.iter().enumerate() { + if let Some(quote) = in_string { + if escaped { + escaped = false; + continue; + } + if byte == b'\\' { + escaped = true; + continue; + } + if byte == quote { + ranges.push((start, idx + 1)); + in_string = None; + } + continue; + } + + if byte == b'"' || byte == b'\'' { + start = idx; + in_string = Some(byte); + escaped = false; + } + } + + // Unclosed string — extend to EOF + if in_string.is_some() { + ranges.push((start, source.len())); + } + + ranges +} + +/// Apply a transformation only to the non-string-literal portions of `source`. +fn transform_outside_string_literals( + source: &str, + mut transform: impl FnMut(&str) -> (String, bool), +) -> (String, bool) { + let string_ranges = find_string_literal_ranges(source); + + if string_ranges.is_empty() { + return transform(source); + } + + let mut output = String::with_capacity(source.len()); + let mut changed = false; + let mut cursor = 0; + + for (start, end) in string_ranges { + if cursor < start { + let (transformed, seg_changed) = transform(&source[cursor..start]); + output.push_str(&transformed); + changed |= seg_changed; + } + output.push_str(&source[start..end]); + cursor = end; + } + + if cursor < source.len() { + let (transformed, seg_changed) = transform(&source[cursor..]); + output.push_str(&transformed); + changed |= seg_changed; + } + + (output, changed) +} + +/// Return ranges of `source` that are *not* inside string literals. +fn non_string_ranges(source: &str) -> Vec<(usize, usize)> { + let string_ranges = find_string_literal_ranges(source); + + if string_ranges.is_empty() { + return vec![(0, source.len())]; + } + + let mut ranges = Vec::new(); + let mut cursor = 0; + + for (start, end) in string_ranges { + if cursor < start { + ranges.push((cursor, start)); + } + cursor = end; + } + + if cursor < source.len() { + ranges.push((cursor, source.len())); + } + + ranges +} + +// ───────────────────────────────────────────────────────────────────────────── +// Compact if/else repair +// ───────────────────────────────────────────────────────────────────────────── + +/// Detect spans that likely contain compact `if/else` patterns (e.g. +/// `if(cond){body}else{body}`). +pub(super) fn detect_compact_if_else_spans(source: &str) -> Vec { + let mut spans = Vec::new(); + let patterns = ["if(", "}else{", "} else{", "}else {"]; + + for (range_start, range_end) in non_string_ranges(source) { + let segment = &source[range_start..range_end]; + for pattern in patterns { + for (offset, _) in segment.match_indices(pattern) { + let idx = range_start + offset; + spans.push(Span { + start: idx.saturating_sub(32), + end: (idx + pattern.len() + 32).min(source.len()), + }); + } + } + } + + spans +} + +/// Repair compact `if/else` within a single (non-string) segment. +fn repair_compact_if_else_segment(segment: &str) -> (String, bool) { + let mut repaired = segment.to_string(); + let mut changed = false; + let replacements = [ + ("if(", "if ("), + ("){", ") {"), + ("}else{", "} else {"), + ("} else{", "} else {"), + ("}else {", "} else {"), + ]; + + for (from, to) in replacements { + if repaired.contains(from) { + repaired = repaired.replace(from, to); + changed = true; + } + } + + (repaired, changed) +} + +/// Attempt to repair compact `if/else` patterns in `source`. +pub(super) fn try_repair_compact_if_else(source: &str) -> (String, bool) { + transform_outside_string_literals(source, repair_compact_if_else_segment) +} + +// ───────────────────────────────────────────────────────────────────────────── +// Missing record-comma repair +// ───────────────────────────────────────────────────────────────────────────── + +/// Detect byte positions where a comma is likely missing between record fields. +fn detect_missing_record_comma_positions(source: &str) -> Vec { + let bytes = source.as_bytes(); + let mut in_string = false; + let mut escaped = false; + let mut insert_positions: Vec = Vec::new(); + + for (idx, &byte) in bytes.iter().enumerate() { + if in_string { + if escaped { + escaped = false; + continue; + } + if byte == b'\\' { + escaped = true; + continue; + } + if byte == b'"' { + in_string = false; + + // Look ahead for `identifier:` pattern (next record key) + let mut lookahead = idx + 1; + while lookahead < bytes.len() && bytes[lookahead].is_ascii_whitespace() { + lookahead += 1; + } + + if lookahead < bytes.len() + && (bytes[lookahead].is_ascii_alphabetic() || bytes[lookahead] == b'_') + { + let mut key_end = lookahead; + while key_end < bytes.len() + && (bytes[key_end].is_ascii_alphanumeric() + || bytes[key_end] == b'_' + || bytes[key_end] == b'-') + { + key_end += 1; + } + + if key_end < bytes.len() && bytes[key_end] == b':' { + let between = &bytes[idx + 1..lookahead]; + if !between.contains(&b',') { + insert_positions.push(idx + 1); + } + } + } + } + continue; + } + + if byte == b'"' { + in_string = true; + escaped = false; + } + } + + insert_positions +} + +/// Detect spans around positions where record commas are likely missing. +pub(super) fn detect_missing_record_comma_spans(source: &str) -> Vec { + detect_missing_record_comma_positions(source) + .into_iter() + .map(|idx| Span { + start: idx.saturating_sub(32), + end: (idx + 32).min(source.len()), + }) + .collect() +} + +/// Attempt to insert missing commas between record fields. +fn try_repair_missing_record_commas(source: &str) -> (String, bool) { + let insert_positions = detect_missing_record_comma_positions(source); + + if insert_positions.is_empty() { + return (source.to_string(), false); + } + + let mut repaired = String::with_capacity(source.len() + insert_positions.len() * 2); + let mut next_insert_idx = 0; + + for (idx, ch) in source.char_indices() { + while next_insert_idx < insert_positions.len() && insert_positions[next_insert_idx] == idx { + repaired.push(','); + repaired.push(' '); + next_insert_idx += 1; + } + repaired.push(ch); + } + + (repaired, true) +} + +// ───────────────────────────────────────────────────────────────────────────── +// Brace padding +// ───────────────────────────────────────────────────────────────────────────── + +/// Add spaces inside braces that are jammed against content +/// (e.g. `{foo` → `{ foo`, `bar}` → `bar }`). +fn add_brace_padding_segment(segment: &str) -> (String, bool) { + let bytes = segment.as_bytes(); + let mut output: Vec = Vec::with_capacity(bytes.len() + 8); + let mut changed = false; + + for (idx, &byte) in bytes.iter().enumerate() { + if byte == b'{' { + output.push(byte); + if let Some(next) = bytes.get(idx + 1) { + if !next.is_ascii_whitespace() && *next != b'}' { + output.push(b' '); + changed = true; + } + } + continue; + } + + if byte == b'}' { + if let Some(last) = output.last() { + if !last.is_ascii_whitespace() && *last != b'{' { + output.push(b' '); + changed = true; + } + } + output.push(byte); + continue; + } + + output.push(byte); + } + + let output = String::from_utf8(output).unwrap_or_else(|_| segment.to_string()); + (output, changed) +} + +/// Add brace padding, skipping string literal contents. +fn add_brace_padding_outside_strings(source: &str) -> (String, bool) { + transform_outside_string_literals(source, add_brace_padding_segment) +} + +// ───────────────────────────────────────────────────────────────────────────── +// Span merging and region repair orchestration +// ───────────────────────────────────────────────────────────────────────────── + +/// Merge overlapping spans into a minimal set of non-overlapping ranges. +fn merge_spans(spans: &[Span], len: usize) -> Vec { + let mut normalised: Vec = spans + .iter() + .filter_map(|span| { + if span.start >= len || span.end <= span.start { + return None; + } + Some(Span { + start: span.start, + end: span.end.min(len), + }) + }) + .collect(); + + normalised.sort_by_key(|span| (span.start, span.end)); + + let mut merged: Vec = Vec::new(); + for span in normalised { + if let Some(last) = merged.last_mut() { + if span.start <= last.end { + last.end = last.end.max(span.end); + continue; + } + } + merged.push(span); + } + + merged +} + +/// Adjust an index to the nearest valid UTF-8 char boundary. +fn align_to_char_boundary(source: &str, index: usize, forward: bool) -> usize { + let mut adjusted = index.min(source.len()); + + if forward { + while adjusted < source.len() && !source.is_char_boundary(adjusted) { + adjusted += 1; + } + } else { + while adjusted > 0 && !source.is_char_boundary(adjusted) { + adjusted -= 1; + } + } + + adjusted +} + +/// Apply all repair strategies to a single source region. +fn repair_region(source: &str) -> (String, bool) { + let (repaired_if_else, if_else_changed) = try_repair_compact_if_else(source); + let (mut repaired_record, record_changed) = try_repair_missing_record_commas(&repaired_if_else); + + let mut brace_spacing_changed = false; + if record_changed { + let (padded, padded_changed) = add_brace_padding_outside_strings(&repaired_record); + repaired_record = padded; + brace_spacing_changed = padded_changed; + } + + ( + repaired_record, + if_else_changed || record_changed || brace_spacing_changed, + ) +} + +/// Attempt to repair parse errors by patching malformed source regions. +/// +/// Returns `Some(ParseRepairOutcome::Reformat(…))` with the patched source +/// bytes if any repair was applied, or `None` if nothing could be done. +pub(super) fn try_repair_parse_errors( + contents: &[u8], + malformed_spans: &[Span], +) -> Option { + if malformed_spans.is_empty() { + return None; + } + + let source = String::from_utf8_lossy(contents).into_owned(); + let spans = merge_spans(malformed_spans, source.len()); + + if spans.is_empty() { + return None; + } + + let mut cursor = 0; + let mut output = String::with_capacity(source.len()); + let mut changed = false; + + for span in spans { + let start = align_to_char_boundary(&source, span.start, false); + let end = align_to_char_boundary(&source, span.end, true); + + if start >= end || start < cursor { + continue; + } + + output.push_str(&source[cursor..start]); + + let (repaired_region, region_changed) = repair_region(&source[start..end]); + output.push_str(&repaired_region); + changed |= region_changed; + + cursor = end; + } + + output.push_str(&source[cursor..]); + + if changed { + Some(ParseRepairOutcome::Reformat(output.into_bytes())) + } else { + None + } +} diff --git a/tests/ground_truth.rs b/tests/ground_truth.rs index 594d03d..271cb5d 100644 --- a/tests/ground_truth.rs +++ b/tests/ground_truth.rs @@ -153,659 +153,124 @@ fn format_via_stdin(test_binary: &PathBuf, input: &str) -> Result { + $( + #[test] + fn $ground_truth_test() { + let test_binary = get_test_binary(); + run_ground_truth_test(&test_binary, $fixture); + } -#[test] -fn idempotency_overlay() { - let test_binary = get_test_binary(); - run_idempotency_test(&test_binary, "overlay"); + #[test] + fn $idempotency_test() { + let test_binary = get_test_binary(); + run_idempotency_test(&test_binary, $fixture); + } + )+ + }; } -// ============================================================================ -// Idempotency Tests - Commands and Definitions -// ============================================================================ +// Core language constructs +fixture_tests!( + ("let_statement", ground_truth_let_statement, idempotency_let_statement), + ("mut_statement", ground_truth_mut_statement, idempotency_mut_statement), + ("const_statement", ground_truth_const_statement, idempotency_const_statement), + ("def_statement", ground_truth_def_statement, idempotency_def_statement), +); -#[test] -fn idempotency_alias() { - let test_binary = get_test_binary(); - run_idempotency_test(&test_binary, "alias"); -} +// Control flow +fixture_tests!( + ("if_else", ground_truth_if_else, idempotency_if_else), + ("for_loop", ground_truth_for_loop, idempotency_for_loop), + ("while_loop", ground_truth_while_loop, idempotency_while_loop), + ("loop_statement", ground_truth_loop_statement, idempotency_loop_statement), + ("match_expr", ground_truth_match_expr, idempotency_match_expr), + ("try_catch", ground_truth_try_catch, idempotency_try_catch), + ("break_continue", ground_truth_break_continue, idempotency_break_continue), + ("return_statement", ground_truth_return_statement, idempotency_return_statement), +); -#[test] -fn idempotency_extern() { - let test_binary = get_test_binary(); - run_idempotency_test(&test_binary, "extern"); -} +// Data structures +fixture_tests!( + ("list", ground_truth_list, idempotency_list), + ("record", ground_truth_record, idempotency_record), + ("table", ground_truth_table, idempotency_table), + ("nested_structures", ground_truth_nested_structures, idempotency_nested_structures), +); -#[test] -fn idempotency_external_call() { - let test_binary = get_test_binary(); - run_idempotency_test(&test_binary, "external_call"); -} +// Pipelines, expressions, and operators +fixture_tests!( + ("pipeline", ground_truth_pipeline, idempotency_pipeline), + ("multiline_pipeline", ground_truth_multiline_pipeline, idempotency_multiline_pipeline), + ("closure", ground_truth_closure, idempotency_closure), + ("subexpression", ground_truth_subexpression, idempotency_subexpression), + ("binary_ops", ground_truth_binary_ops, idempotency_binary_ops), + ("range", ground_truth_range, idempotency_range), + ("cell_path_literals", ground_truth_cell_path_literals, idempotency_cell_path_literals), + ("cell_path", ground_truth_cell_path, idempotency_cell_path), + ("spread", ground_truth_spread, idempotency_spread), +); -// ============================================================================ -// Idempotency Tests - Special Constructs -// ============================================================================ +// Strings, comments, types, and values +fixture_tests!( + ("string_interpolation", ground_truth_string_interpolation, idempotency_string_interpolation), + ("comment", ground_truth_comment, idempotency_comment), + ("value_with_unit", ground_truth_value_with_unit, idempotency_value_with_unit), + ("datetime", ground_truth_datetime, idempotency_datetime), + ("nothing", ground_truth_nothing, idempotency_nothing), + ("glob_pattern", ground_truth_glob_pattern, idempotency_glob_pattern), +); -#[test] -fn idempotency_do_block() { - let test_binary = get_test_binary(); - run_idempotency_test(&test_binary, "do_block"); -} +// Modules and imports +fixture_tests!( + ("module", ground_truth_module, idempotency_module), + ("use_statement", ground_truth_use_statement, idempotency_use_statement), + ("export", ground_truth_export, idempotency_export), + ("source", ground_truth_source, idempotency_source), + ("hide", ground_truth_hide, idempotency_hide), + ("overlay", ground_truth_overlay, idempotency_overlay), +); -#[test] -fn idempotency_where_clause() { - let test_binary = get_test_binary(); - run_idempotency_test(&test_binary, "where_clause"); -} +// Commands, definitions, and special constructs +fixture_tests!( + ("alias", ground_truth_alias, idempotency_alias), + ("extern", ground_truth_extern, idempotency_extern), + ("external_call", ground_truth_external_call, idempotency_external_call), + ("do_block", ground_truth_do_block, idempotency_do_block), + ("where_clause", ground_truth_where_clause, idempotency_where_clause), + ("error_make", ground_truth_error_make, idempotency_error_make), + ("inline_param_comment", ground_truth_inline_param_comment_issue77, idempotency_inline_param_comment_issue77), +); +// Ground-truth-only tests (no idempotency pair) #[test] -fn idempotency_error_make() { +fn ground_truth_def_with_pipeline() { let test_binary = get_test_binary(); - run_idempotency_test(&test_binary, "error_make"); + run_ground_truth_test(&test_binary, "def_with_pipeline_double_parens_issue82"); } -// ============================================================================ -// Issue Tests - -// ============================================================================ #[test] fn issue76_test() { let test_binary = get_test_binary(); run_ground_truth_test(&test_binary, "issue76"); } -#[test] -fn issue81_test() { - let test_binary = get_test_binary(); - run_ground_truth_test(&test_binary, "issue81"); -} - -#[test] -fn idempotency_issue81_test() { - let test_binary = get_test_binary(); - run_idempotency_test(&test_binary, "issue81"); -} - -#[test] -fn ground_truth_inline_param_comment_issue77() { - let test_binary = get_test_binary(); - run_ground_truth_test(&test_binary, "inline_param_comment"); -} - -#[test] -fn idempotency_inline_param_comment_issue77() { - let test_binary = get_test_binary(); - run_idempotency_test(&test_binary, "inline_param_comment"); -} - -macro_rules! issue_fixture_tests { - ($(($fixture:literal, $ground_truth_test:ident, $idempotency_test:ident)),+ $(,)?) => { - $( - #[test] - fn $ground_truth_test() { - let test_binary = get_test_binary(); - run_ground_truth_test(&test_binary, $fixture); - } - - #[test] - fn $idempotency_test() { - let test_binary = get_test_binary(); - run_idempotency_test(&test_binary, $fixture); - } - )+ - }; -} - -issue_fixture_tests!( - ("issue85", issue85_test, idempotency_issue85_test), - ("issue86", issue86_test, idempotency_issue86_test), - ("issue87", issue87_test, idempotency_issue87_test), - ("issue92", issue92_test, idempotency_issue92_test), - ("issue93", issue93_test, idempotency_issue93_test), - ("issue94", issue94_test, idempotency_issue94_test), - ("issue95", issue95_test, idempotency_issue95_test), - ("issue97", issue97_test, idempotency_issue97_test), +// Issue regression tests +fixture_tests!( + ("issue81", issue81_test, idempotency_issue81_test), + ("issue85", issue85_test, idempotency_issue85_test), + ("issue86", issue86_test, idempotency_issue86_test), + ("issue87", issue87_test, idempotency_issue87_test), + ("issue92", issue92_test, idempotency_issue92_test), + ("issue93", issue93_test, idempotency_issue93_test), + ("issue94", issue94_test, idempotency_issue94_test), + ("issue95", issue95_test, idempotency_issue95_test), + ("issue97", issue97_test, idempotency_issue97_test), ("issue100", issue100_test, idempotency_issue100_test), ("issue101", issue101_test, idempotency_issue101_test), ("issue108", issue108_test, idempotency_issue108_test), From 940b46cbe4c69c3b74c7f0b8ff1e1f471a782a32 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Thu, 26 Mar 2026 12:37:47 -0500 Subject: [PATCH 2/2] fmt --- src/formatting/blocks.rs | 6 +- src/formatting/collections.rs | 6 +- tests/ground_truth.rs | 194 ++++++++++++++++++++++++---------- 3 files changed, 143 insertions(+), 63 deletions(-) diff --git a/src/formatting/blocks.rs b/src/formatting/blocks.rs index a19c8a2..28cd570 100644 --- a/src/formatting/blocks.rs +++ b/src/formatting/blocks.rs @@ -216,11 +216,7 @@ impl<'a> Formatter<'a> { /// Format a closure expression (`{|params| body}`), extracting and /// normalising parameters from the raw source. - pub(super) fn format_closure_expression( - &mut self, - block_id: nu_protocol::BlockId, - span: Span, - ) { + pub(super) fn format_closure_expression(&mut self, block_id: nu_protocol::BlockId, span: Span) { let content = self.get_span_content(span); let has_params = content .iter() diff --git a/src/formatting/collections.rs b/src/formatting/collections.rs index 0f66054..ed8e5e2 100644 --- a/src/formatting/collections.rs +++ b/src/formatting/collections.rs @@ -225,11 +225,7 @@ impl<'a> Formatter<'a> { // ───────────────────────────────────────────────────────────────────────── /// Format a table expression (`[[col1, col2]; [val1, val2]]`). - pub(super) fn format_table( - &mut self, - columns: &[Expression], - rows: &[Box<[Expression]>], - ) { + pub(super) fn format_table(&mut self, columns: &[Expression], rows: &[Box<[Expression]>]) { self.write("["); // Header row diff --git a/tests/ground_truth.rs b/tests/ground_truth.rs index 271cb5d..37e3d5f 100644 --- a/tests/ground_truth.rs +++ b/tests/ground_truth.rs @@ -177,74 +177,162 @@ macro_rules! fixture_tests { // Core language constructs fixture_tests!( - ("let_statement", ground_truth_let_statement, idempotency_let_statement), - ("mut_statement", ground_truth_mut_statement, idempotency_mut_statement), - ("const_statement", ground_truth_const_statement, idempotency_const_statement), - ("def_statement", ground_truth_def_statement, idempotency_def_statement), + ( + "let_statement", + ground_truth_let_statement, + idempotency_let_statement + ), + ( + "mut_statement", + ground_truth_mut_statement, + idempotency_mut_statement + ), + ( + "const_statement", + ground_truth_const_statement, + idempotency_const_statement + ), + ( + "def_statement", + ground_truth_def_statement, + idempotency_def_statement + ), ); // Control flow fixture_tests!( - ("if_else", ground_truth_if_else, idempotency_if_else), - ("for_loop", ground_truth_for_loop, idempotency_for_loop), - ("while_loop", ground_truth_while_loop, idempotency_while_loop), - ("loop_statement", ground_truth_loop_statement, idempotency_loop_statement), - ("match_expr", ground_truth_match_expr, idempotency_match_expr), - ("try_catch", ground_truth_try_catch, idempotency_try_catch), - ("break_continue", ground_truth_break_continue, idempotency_break_continue), - ("return_statement", ground_truth_return_statement, idempotency_return_statement), + ("if_else", ground_truth_if_else, idempotency_if_else), + ("for_loop", ground_truth_for_loop, idempotency_for_loop), + ( + "while_loop", + ground_truth_while_loop, + idempotency_while_loop + ), + ( + "loop_statement", + ground_truth_loop_statement, + idempotency_loop_statement + ), + ( + "match_expr", + ground_truth_match_expr, + idempotency_match_expr + ), + ("try_catch", ground_truth_try_catch, idempotency_try_catch), + ( + "break_continue", + ground_truth_break_continue, + idempotency_break_continue + ), + ( + "return_statement", + ground_truth_return_statement, + idempotency_return_statement + ), ); // Data structures fixture_tests!( - ("list", ground_truth_list, idempotency_list), - ("record", ground_truth_record, idempotency_record), - ("table", ground_truth_table, idempotency_table), - ("nested_structures", ground_truth_nested_structures, idempotency_nested_structures), + ("list", ground_truth_list, idempotency_list), + ("record", ground_truth_record, idempotency_record), + ("table", ground_truth_table, idempotency_table), + ( + "nested_structures", + ground_truth_nested_structures, + idempotency_nested_structures + ), ); // Pipelines, expressions, and operators fixture_tests!( - ("pipeline", ground_truth_pipeline, idempotency_pipeline), - ("multiline_pipeline", ground_truth_multiline_pipeline, idempotency_multiline_pipeline), - ("closure", ground_truth_closure, idempotency_closure), - ("subexpression", ground_truth_subexpression, idempotency_subexpression), - ("binary_ops", ground_truth_binary_ops, idempotency_binary_ops), - ("range", ground_truth_range, idempotency_range), - ("cell_path_literals", ground_truth_cell_path_literals, idempotency_cell_path_literals), - ("cell_path", ground_truth_cell_path, idempotency_cell_path), - ("spread", ground_truth_spread, idempotency_spread), + ("pipeline", ground_truth_pipeline, idempotency_pipeline), + ( + "multiline_pipeline", + ground_truth_multiline_pipeline, + idempotency_multiline_pipeline + ), + ("closure", ground_truth_closure, idempotency_closure), + ( + "subexpression", + ground_truth_subexpression, + idempotency_subexpression + ), + ( + "binary_ops", + ground_truth_binary_ops, + idempotency_binary_ops + ), + ("range", ground_truth_range, idempotency_range), + ( + "cell_path_literals", + ground_truth_cell_path_literals, + idempotency_cell_path_literals + ), + ("cell_path", ground_truth_cell_path, idempotency_cell_path), + ("spread", ground_truth_spread, idempotency_spread), ); // Strings, comments, types, and values fixture_tests!( - ("string_interpolation", ground_truth_string_interpolation, idempotency_string_interpolation), - ("comment", ground_truth_comment, idempotency_comment), - ("value_with_unit", ground_truth_value_with_unit, idempotency_value_with_unit), - ("datetime", ground_truth_datetime, idempotency_datetime), - ("nothing", ground_truth_nothing, idempotency_nothing), - ("glob_pattern", ground_truth_glob_pattern, idempotency_glob_pattern), + ( + "string_interpolation", + ground_truth_string_interpolation, + idempotency_string_interpolation + ), + ("comment", ground_truth_comment, idempotency_comment), + ( + "value_with_unit", + ground_truth_value_with_unit, + idempotency_value_with_unit + ), + ("datetime", ground_truth_datetime, idempotency_datetime), + ("nothing", ground_truth_nothing, idempotency_nothing), + ( + "glob_pattern", + ground_truth_glob_pattern, + idempotency_glob_pattern + ), ); // Modules and imports fixture_tests!( - ("module", ground_truth_module, idempotency_module), - ("use_statement", ground_truth_use_statement, idempotency_use_statement), - ("export", ground_truth_export, idempotency_export), - ("source", ground_truth_source, idempotency_source), - ("hide", ground_truth_hide, idempotency_hide), - ("overlay", ground_truth_overlay, idempotency_overlay), + ("module", ground_truth_module, idempotency_module), + ( + "use_statement", + ground_truth_use_statement, + idempotency_use_statement + ), + ("export", ground_truth_export, idempotency_export), + ("source", ground_truth_source, idempotency_source), + ("hide", ground_truth_hide, idempotency_hide), + ("overlay", ground_truth_overlay, idempotency_overlay), ); // Commands, definitions, and special constructs fixture_tests!( - ("alias", ground_truth_alias, idempotency_alias), - ("extern", ground_truth_extern, idempotency_extern), - ("external_call", ground_truth_external_call, idempotency_external_call), - ("do_block", ground_truth_do_block, idempotency_do_block), - ("where_clause", ground_truth_where_clause, idempotency_where_clause), - ("error_make", ground_truth_error_make, idempotency_error_make), - ("inline_param_comment", ground_truth_inline_param_comment_issue77, idempotency_inline_param_comment_issue77), + ("alias", ground_truth_alias, idempotency_alias), + ("extern", ground_truth_extern, idempotency_extern), + ( + "external_call", + ground_truth_external_call, + idempotency_external_call + ), + ("do_block", ground_truth_do_block, idempotency_do_block), + ( + "where_clause", + ground_truth_where_clause, + idempotency_where_clause + ), + ( + "error_make", + ground_truth_error_make, + idempotency_error_make + ), + ( + "inline_param_comment", + ground_truth_inline_param_comment_issue77, + idempotency_inline_param_comment_issue77 + ), ); // Ground-truth-only tests (no idempotency pair) @@ -262,15 +350,15 @@ fn issue76_test() { // Issue regression tests fixture_tests!( - ("issue81", issue81_test, idempotency_issue81_test), - ("issue85", issue85_test, idempotency_issue85_test), - ("issue86", issue86_test, idempotency_issue86_test), - ("issue87", issue87_test, idempotency_issue87_test), - ("issue92", issue92_test, idempotency_issue92_test), - ("issue93", issue93_test, idempotency_issue93_test), - ("issue94", issue94_test, idempotency_issue94_test), - ("issue95", issue95_test, idempotency_issue95_test), - ("issue97", issue97_test, idempotency_issue97_test), + ("issue81", issue81_test, idempotency_issue81_test), + ("issue85", issue85_test, idempotency_issue85_test), + ("issue86", issue86_test, idempotency_issue86_test), + ("issue87", issue87_test, idempotency_issue87_test), + ("issue92", issue92_test, idempotency_issue92_test), + ("issue93", issue93_test, idempotency_issue93_test), + ("issue94", issue94_test, idempotency_issue94_test), + ("issue95", issue95_test, idempotency_issue95_test), + ("issue97", issue97_test, idempotency_issue97_test), ("issue100", issue100_test, idempotency_issue100_test), ("issue101", issue101_test, idempotency_issue101_test), ("issue108", issue108_test, idempotency_issue108_test),