diff --git a/src/cmds/system/grep_cmd.rs b/src/cmds/system/grep_cmd.rs index 3f7d9a327..5223a5dc4 100644 --- a/src/cmds/system/grep_cmd.rs +++ b/src/cmds/system/grep_cmd.rs @@ -28,6 +28,15 @@ pub fn run( // Fix: convert BRE alternation \| → | for rg (which uses PCRE-style regex) let rg_pattern = pattern.replace(r"\|", "|"); + let filt = config::filters(); + // Skip exclusions when the user explicitly searches inside an ignored dir + // (e.g. `rtk grep foo node_modules/pkg`) — they asked for it on purpose. + let (exclude_globs, grep_excludes) = if path_targets_ignored_dir(path, &filt.ignore_dirs) { + (Vec::new(), Vec::new()) + } else { + (ignore_globs(&filt), ignore_grep_excludes(&filt)) + }; + let mut rg_cmd = resolved_command("rg"); // --no-ignore-vcs: match grep -r behavior (don't skip .gitignore'd files). // Without this, rg returns 0 matches for files in .gitignore, causing @@ -36,7 +45,11 @@ pub fn run( // -H: always emit the filename. // -0: NUL-separate filename. Allows the parser to disambiguate filenames or // content containing `:digits:` patterns (issue #1436). - rg_cmd.args(["-nH0", "--no-heading", "--no-ignore-vcs", &rg_pattern, path]); + rg_cmd.args(["-nH0", "--no-heading", "--no-ignore-vcs"]); + // Re-exclude token-bomb dirs/files (node_modules, *.min.js, …) that + // --no-ignore-vcs would otherwise descend, causing token expansion (#2064). + rg_cmd.args(&exclude_globs); + rg_cmd.args([&rg_pattern, path]); if let Some(ft) = file_type { rg_cmd.arg("--type").arg(ft); @@ -54,7 +67,11 @@ pub fn run( .or_else(|_| { let mut grep_cmd = resolved_command("grep"); // When we fall back to grep, include all args, not just -rnHZ. - grep_cmd.args(["-rnHZ", pattern, path]).args(extra_args); + // Mirror the rg exclusions so the (non-gitignore-aware) system grep + // also skips node_modules and friends (#2064). + grep_cmd.arg("-rnHZ"); + grep_cmd.args(&grep_excludes); + grep_cmd.args([pattern, path]).args(extra_args); exec_capture(&mut grep_cmd) }) .context("grep/rg failed")?; @@ -182,6 +199,52 @@ fn parse_match_line(line: &str) -> Option<(String, usize, &str)> { }) } +/// Build `rg --glob '!PATTERN'` exclusion args from the configured ignore list +/// (`config.filters.ignore_dirs` + `ignore_files`). +/// +/// `rtk grep` passes `--no-ignore-vcs` so it still searches most .gitignore'd +/// files (avoids false negatives, #1436-era behavior). But that also makes rg +/// descend dependency/build dirs like `node_modules`, dumping minified vendored +/// code and causing token *expansion* instead of reduction (#2064). These globs +/// re-exclude only the well-known token-bomb dirs/files. rg still honors an +/// explicitly-given path inside an excluded dir (e.g. `rtk grep foo node_modules/pkg`), +/// so no result the user explicitly asked for is lost. +fn ignore_globs(filt: &config::FilterConfig) -> Vec { + filt.ignore_dirs + .iter() + .chain(filt.ignore_files.iter()) + .flat_map(|p| ["--glob".to_string(), format!("!{p}")]) + .collect() +} + +/// Same exclusion list as [`ignore_globs`] but expressed as `grep` +/// `--exclude-dir=` / `--exclude=` args, for the system-grep fallback path +/// (taken when `rg` is not installed). Without this the fallback execs a +/// non-gitignore-aware grep that descends `node_modules` (#2064). +/// +/// Note: unlike rg's `--glob`, `grep --exclude-dir` suppresses matches even for +/// an explicitly-given path inside the excluded dir. The caller compensates by +/// not applying any exclusions when the search path itself targets an ignored +/// dir (see [`path_targets_ignored_dir`]), keeping both paths consistent. +fn ignore_grep_excludes(filt: &config::FilterConfig) -> Vec { + filt.ignore_dirs + .iter() + .map(|d| format!("--exclude-dir={d}")) + .chain(filt.ignore_files.iter().map(|f| format!("--exclude={f}"))) + .collect() +} + +/// True if the search `path` explicitly points at (or into) one of the ignored +/// dirs, e.g. `node_modules/pkg`. In that case the user clearly wants to search +/// there, so exclusions must be skipped entirely — otherwise `grep --exclude-dir` +/// would return zero hits for a path the user named on purpose (#2064). +fn path_targets_ignored_dir(path: &str, ignore_dirs: &[String]) -> bool { + std::path::Path::new(path).components().any(|c| { + matches!(c, std::path::Component::Normal(name) + if ignore_dirs.iter().any(|d| name == d.as_str())) + }) +} + fn has_format_flag(extra_args: &[String]) -> bool { extra_args.iter().any(|arg| { matches!( @@ -326,6 +389,72 @@ mod tests { assert_eq!(filtered[0], "-i"); } + // --- #2064: re-exclude token-bomb dirs even with --no-ignore-vcs --- + + fn has_glob_pair(globs: &[String], pat: &str) -> bool { + globs + .windows(2) + .any(|w| w[0] == "--glob" && w[1] == format!("!{pat}")) + } + + #[test] + fn test_ignore_globs_exclude_node_modules() { + // BUG #2064: rtk grep -rn descends node_modules (--no-ignore-vcs), dumping + // minified vendored code → token expansion. The default ignore list must + // produce rg exclusion globs. Fails before the fix (empty), passes after. + let globs = ignore_globs(&config::FilterConfig::default()); + assert!( + has_glob_pair(&globs, "node_modules"), + "node_modules must be excluded: {globs:?}" + ); + assert!( + has_glob_pair(&globs, "*.min.js"), + "*.min.js must be excluded: {globs:?}" + ); + assert!( + has_glob_pair(&globs, "target"), + "target must be excluded: {globs:?}" + ); + } + + #[test] + fn test_ignore_globs_empty_config_yields_nothing() { + let filt = config::FilterConfig { + ignore_dirs: vec![], + ignore_files: vec![], + }; + assert!(ignore_globs(&filt).is_empty()); + } + + #[test] + fn test_path_targets_ignored_dir() { + let dirs = config::FilterConfig::default().ignore_dirs; + // Explicit searches into an ignored dir → exclusions must be skipped. + assert!(path_targets_ignored_dir("node_modules/pkg-a", &dirs)); + assert!(path_targets_ignored_dir("./node_modules", &dirs)); + assert!(path_targets_ignored_dir("a/b/vendor/c", &dirs)); + // Normal project searches → exclusions apply. + assert!(!path_targets_ignored_dir(".", &dirs)); + assert!(!path_targets_ignored_dir("src", &dirs)); + // Substring of an ignored name must NOT trigger (component-anchored). + assert!(!path_targets_ignored_dir("my-node_modules-helper", &dirs)); + } + + #[test] + fn test_ignore_grep_excludes_for_fallback() { + // The system-grep fallback path must mirror the rg exclusions so rg-less + // machines also skip node_modules (#2064 reporter had no rg installed). + let excludes = ignore_grep_excludes(&config::FilterConfig::default()); + assert!( + excludes.iter().any(|e| e == "--exclude-dir=node_modules"), + "fallback grep must exclude node_modules dir: {excludes:?}" + ); + assert!( + excludes.iter().any(|e| e == "--exclude=*.min.js"), + "fallback grep must exclude *.min.js: {excludes:?}" + ); + } + // --- truncation accuracy --- #[test] diff --git a/src/core/config.rs b/src/core/config.rs index ed0f00c6c..e4dad9fa4 100644 --- a/src/core/config.rs +++ b/src/core/config.rs @@ -150,6 +150,12 @@ pub fn limits() -> LimitsConfig { Config::load().map(|c| c.limits).unwrap_or_default() } +/// Get filter config (ignore_dirs / ignore_files). Falls back to defaults if +/// config can't be loaded. +pub fn filters() -> FilterConfig { + Config::load().map(|c| c.filters).unwrap_or_default() +} + impl Config { pub fn load() -> Result { let path = get_config_path()?;