diff --git a/Cargo.lock b/Cargo.lock index b6adb06d..b410b0dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,6 +141,18 @@ version = "9.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59051ec02907378a67b0ba1b8631121f5388c8dbbb3cec8c749d8f93c2c3c211" +[[package]] +name = "ast-grep-core" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d203a50bd471340befbf7d0dee18d66699cc11513aafa1fea06b926e74130818" +dependencies = [ + "bit-set", + "regex", + "thiserror 2.0.17", + "tree-sitter-facade-sg", +] + [[package]] name = "attohttpc" version = "0.30.1" @@ -221,6 +233,21 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "1.3.2" @@ -2773,6 +2800,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "strsim" version = "0.11.1" @@ -3117,6 +3150,49 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tree-sitter" +version = "0.24.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-bash" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "329a4d48623ac337d42b1df84e81a1c9dbb2946907c102ca72db158c1964a52e" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-facade-sg" +version = "0.24.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9195ab85ddd7df7ddac5b2e397ec6264816ae640346013002ceccf0f9b3578f1" +dependencies = [ + "js-sys", + "tree-sitter", + "tree-sitter-language", + "wasm-bindgen", + "web-sys", + "web-tree-sitter-sg", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" + [[package]] name = "tui-term" version = "0.2.0" @@ -3279,6 +3355,15 @@ dependencies = [ "vite_str", ] +[[package]] +name = "vite_shell" +version = "0.0.0" +dependencies = [ + "ast-grep-core", + "thiserror 2.0.17", + "tree-sitter-bash", +] + [[package]] name = "vite_str" version = "0.1.0" @@ -3457,6 +3542,19 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.104" @@ -3504,6 +3602,16 @@ dependencies = [ "walkdir", ] +[[package]] +name = "web-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "web-time" version = "1.1.0" @@ -3514,6 +3622,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-tree-sitter-sg" +version = "0.24.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cf7d34b16550f076d75b4a5d4673f1a9692f79787d040e3ac7ddb04e5c48a0" +dependencies = [ + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", +] + [[package]] name = "which" version = "8.0.0" diff --git a/Cargo.toml b/Cargo.toml index 844946f9..4548b104 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ allocator-api2 = { version = "0.2.21", default-features = false, features = ["al anyhow = "1.0.98" assert2 = "0.3.16" assertables = "9.8.1" +ast-grep-core = "0.32.2" attohttpc = { version = "0.30.1", features = ["tls-native", "tls-native-vendored"], default-features = false } base64 = "0.22.1" bincode = "2.0.1" @@ -107,6 +108,7 @@ toml = "0.9.5" tracing = "0.1.41" tracing-error = "0.2.1" tracing-subscriber = { version = "0.3.19", features = ["env-filter", "serde"] } +tree-sitter-bash = "0.23.1" tui-term = "0.2.0" twox-hash = "2.1.1" uuid = "1.18.1" diff --git a/crates/vite_shell/Cargo.toml b/crates/vite_shell/Cargo.toml new file mode 100644 index 00000000..d3af5dc7 --- /dev/null +++ b/crates/vite_shell/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "vite_shell" +version = "0.0.0" +authors.workspace = true +edition.workspace = true +license.workspace = true +publish = false +rust-version.workspace = true + +[dependencies] +ast-grep-core = { workspace = true } +thiserror = { workspace = true } +tree-sitter-bash = { workspace = true } + +[lints] +workspace = true diff --git a/crates/vite_shell/src/lib.rs b/crates/vite_shell/src/lib.rs new file mode 100644 index 00000000..505863db --- /dev/null +++ b/crates/vite_shell/src/lib.rs @@ -0,0 +1,287 @@ +//! Shell script parsing utilities using ast-grep for syntax analysis. +//! +//! This crate provides functionality to parse and split bash scripts by top-level operators. + +use ast_grep_core::{AstGrep, Doc, Language}; +use thiserror::Error; + +/// Errors that can occur during shell script parsing. +#[derive(Debug, Error)] +pub enum ShellParseError { + /// The shell script has invalid syntax. + #[error("Invalid shell syntax: {0}")] + InvalidSyntax(String), + + /// An error occurred during parsing. + #[error("Parse error: {0}")] + ParseError(String), +} + +/// Bash language implementation for ast-grep. +#[derive(Clone)] +struct BashLanguage; + +impl Language for BashLanguage { + fn get_ts_language(&self) -> ast_grep_core::language::TSLanguage { + tree_sitter_bash::LANGUAGE.into() + } +} + +/// Splits a bash script string into multiple command strings by top-level `&&` operators. +/// +/// This function parses the bash script and identifies command lists separated by `&&` at the +/// top level (not nested within subshells, functions, or other constructs). +/// +/// # Arguments +/// +/// * `script` - The bash script string to split +/// +/// # Returns +/// +/// A `Result` containing a vector of command strings, or a `ShellParseError` if parsing fails. +/// +/// # Examples +/// +/// ``` +/// use vite_shell::split_by_and; +/// +/// let script = "npm run build && npm test"; +/// let commands = split_by_and(script).unwrap(); +/// assert_eq!(commands, vec!["npm run build", "npm test"]); +/// ``` +/// +/// ``` +/// use vite_shell::split_by_and; +/// +/// let script = "echo 'hello' && echo 'world' && echo 'rust'"; +/// let commands = split_by_and(script).unwrap(); +/// assert_eq!(commands, vec!["echo 'hello'", "echo 'world'", "echo 'rust'"]); +/// ``` +pub fn split_by_and(script: &str) -> Result, ShellParseError> { + let grep = AstGrep::new(script, BashLanguage); + let root = grep.root(); + + // Split by top-level && operators + let commands = split_list_by_operator(&root, "&&", script); + + if commands.is_empty() { + // If no && operators found, return the entire script as a single command + Ok(vec![script.trim().to_string()]) + } else { + Ok(commands) + } +} + +/// Splits a node by a specific operator at the top level only. +/// +/// This function walks the AST and splits only at the specified operator, +/// but handles nested lists that ALSO have the same operator (continuing the chain). +fn split_list_by_operator( + node: &ast_grep_core::Node, + operator: &str, + script: &str, +) -> Vec { + let kind = node.kind(); + + // Only process "list" nodes which contain operator sequences + if kind.as_ref() != "list" { + // For program nodes, check children + if kind.as_ref() == "program" { + for child in node.children() { + let results = split_list_by_operator(&child, operator, script); + if !results.is_empty() { + return results; + } + } + } + return Vec::new(); + } + + // We have a list node - check if it contains our target operator AT THIS LEVEL + let children: Vec<_> = node.children().collect(); + let has_target_operator = children.iter().any(|c| c.kind().as_ref() == operator); + + if !has_target_operator { + // No target operator at this level + return Vec::new(); + } + + // Found target operator at this level - split by it + // If we encounter a nested list, check if it's ONLY our operator (continue chain) + // or if it has OTHER operators (treat as atomic) + let mut commands = Vec::new(); + let mut current_start: Option = None; + let mut current_end: Option = None; + + for child in &children { + let child_kind = child.kind(); + + if child_kind.as_ref() == operator { + // Hit the operator - save current command if we have one + if let (Some(start), Some(end)) = (current_start, current_end) { + commands.push(script[start..end].trim().to_string()); + } + // Reset for next command + current_start = None; + current_end = None; + } else if child_kind.as_ref() == "list" { + // Nested list - check what operators it contains + let nested_children: Vec<_> = child.children().collect(); + let has_our_operator = nested_children.iter().any(|c| c.kind().as_ref() == operator); + let has_other_operator = nested_children.iter().any(|c| { + let k = c.kind(); + k.as_ref() == "||" || k.as_ref() == ";" || k.as_ref() == "|" || k.as_ref() == "&" + }); + + if has_our_operator && !has_other_operator { + // This nested list ONLY has our operator - it's a continuation of the chain + // Recursively process it and merge + let nested_results = split_list_by_operator(child, operator, script); + if !nested_results.is_empty() { + if let (Some(start), Some(_)) = (current_start, current_end) { + // Merge first result with accumulated parts + let prefix = script[start..child.range().start].trim(); + if !prefix.is_empty() { + commands.push(format!("{} && {}", prefix, nested_results[0])); + commands.extend(nested_results.into_iter().skip(1)); + } else { + commands.extend(nested_results); + } + current_start = None; + current_end = None; + } else { + commands.extend(nested_results); + } + } else { + // Shouldn't happen, but treat as atomic + let range = child.range(); + if current_start.is_none() { + current_start = Some(range.start); + } + current_end = Some(range.end); + } + } else { + // Nested list has other operators or no our operator - treat as atomic + let range = child.range(); + if current_start.is_none() { + current_start = Some(range.start); + } + current_end = Some(range.end); + } + } else { + // Part of a command + let range = child.range(); + if current_start.is_none() { + current_start = Some(range.start); + } + current_end = Some(range.end); + } + } + + // Don't forget the last command + if let (Some(start), Some(end)) = (current_start, current_end) { + commands.push(script[start..end].trim().to_string()); + } + + commands +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_simple_split() { + let script = "cmd1 && cmd2"; + let commands = split_by_and(script).unwrap(); + assert_eq!(commands, vec!["cmd1", "cmd2"]); + } + + #[test] + fn test_or_then_and() { + // || and && have same precedence, left-associative + // So this parses as: (cmd0 || cmd1) && cmd2 + let script = "cmd0 || cmd1 && cmd2"; + let commands = split_by_and(script).unwrap(); + assert_eq!(commands, vec!["cmd0 || cmd1", "cmd2"]); + } + + #[test] + fn test_and_then_or() { + // This parses as: (a && b) || c + // The && is nested in a list inside an || context + // Since there's no && at the top level (only ||), we don't split + let script = "a && b || c"; + let commands = split_by_and(script).unwrap(); + // No top-level &&, so return the whole thing + assert_eq!(commands, vec!["a && b || c"]); + } + + #[test] + fn test_mixed_operators() { + // Parses as: ((a && b) || c) && d + let script = "a && b || c && d"; + let commands = split_by_and(script).unwrap(); + // There IS a top-level && (between "((a && b) || c)" and "d") + // So we split there, treating the left side as atomic + assert_eq!(commands, vec!["a && b || c", "d"]); + } + + #[test] + fn test_only_or() { + // Only || operators, no splitting + let script = "cmd1 || cmd2 || cmd3"; + let commands = split_by_and(script).unwrap(); + assert_eq!(commands, vec!["cmd1 || cmd2 || cmd3"]); + } + + #[test] + fn test_multiple_and() { + let script = "a && b && c"; + let commands = split_by_and(script).unwrap(); + assert_eq!(commands, vec!["a", "b", "c"]); + } + + #[test] + fn test_no_and() { + let script = "single command"; + let commands = split_by_and(script).unwrap(); + assert_eq!(commands, vec!["single command"]); + } + + #[test] + fn test_with_whitespace() { + let script = " cmd1 && cmd2 "; + let commands = split_by_and(script).unwrap(); + assert_eq!(commands, vec!["cmd1", "cmd2"]); + } + + #[test] + fn test_complex_commands() { + let script = "npm run build && npm test --coverage && echo 'done'"; + let commands = split_by_and(script).unwrap(); + assert_eq!(commands, vec!["npm run build", "npm test --coverage", "echo 'done'"]); + } + + #[test] + fn test_subshell_with_and() { + let script = "(cmd1 && cmd2) && cmd3"; + let commands = split_by_and(script).unwrap(); + // Should split at the top-level &&, keeping the subshell intact + assert_eq!(commands, vec!["(cmd1 && cmd2)", "cmd3"]); + } + + #[test] + fn test_with_pipes() { + let script = "cat file.txt | grep pattern && echo 'found'"; + let commands = split_by_and(script).unwrap(); + assert_eq!(commands, vec!["cat file.txt | grep pattern", "echo 'found'"]); + } + + #[test] + fn test_with_newlines() { + let script = "cmd1 &&\n cmd2 &&\n cmd3"; + let commands = split_by_and(script).unwrap(); + assert_eq!(commands, vec!["cmd1", "cmd2", "cmd3"]); + } +}