Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 131 additions & 2 deletions src/cmds/system/grep_cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ pub fn run(
// Fix: convert BRE alternation \| → | for rg (which uses PCRE-style regex)
let rg_pattern = pattern.replace(r"\|", "|");

let filt = config::filters();
// Skip exclusions when the user explicitly searches inside an ignored dir
// (e.g. `rtk grep foo node_modules/pkg`) — they asked for it on purpose.
let (exclude_globs, grep_excludes) = if path_targets_ignored_dir(path, &filt.ignore_dirs) {
(Vec::new(), Vec::new())
} else {
(ignore_globs(&filt), ignore_grep_excludes(&filt))
};

let mut rg_cmd = resolved_command("rg");
// --no-ignore-vcs: match grep -r behavior (don't skip .gitignore'd files).
// Without this, rg returns 0 matches for files in .gitignore, causing
Expand All @@ -36,7 +45,11 @@ pub fn run(
// -H: always emit the filename.
// -0: NUL-separate filename. Allows the parser to disambiguate filenames or
// content containing `:digits:` patterns (issue #1436).
rg_cmd.args(["-nH0", "--no-heading", "--no-ignore-vcs", &rg_pattern, path]);
rg_cmd.args(["-nH0", "--no-heading", "--no-ignore-vcs"]);
// Re-exclude token-bomb dirs/files (node_modules, *.min.js, …) that
// --no-ignore-vcs would otherwise descend, causing token expansion (#2064).
rg_cmd.args(&exclude_globs);
rg_cmd.args([&rg_pattern, path]);

if let Some(ft) = file_type {
rg_cmd.arg("--type").arg(ft);
Expand All @@ -54,7 +67,11 @@ pub fn run(
.or_else(|_| {
let mut grep_cmd = resolved_command("grep");
// When we fall back to grep, include all args, not just -rnHZ.
grep_cmd.args(["-rnHZ", pattern, path]).args(extra_args);
// Mirror the rg exclusions so the (non-gitignore-aware) system grep
// also skips node_modules and friends (#2064).
grep_cmd.arg("-rnHZ");
grep_cmd.args(&grep_excludes);
grep_cmd.args([pattern, path]).args(extra_args);
exec_capture(&mut grep_cmd)
})
.context("grep/rg failed")?;
Expand Down Expand Up @@ -182,6 +199,52 @@ fn parse_match_line(line: &str) -> Option<(String, usize, &str)> {
})
}

/// Build `rg --glob '!PATTERN'` exclusion args from the configured ignore list
/// (`config.filters.ignore_dirs` + `ignore_files`).
///
/// `rtk grep` passes `--no-ignore-vcs` so it still searches most .gitignore'd
/// files (avoids false negatives, #1436-era behavior). But that also makes rg
/// descend dependency/build dirs like `node_modules`, dumping minified vendored
/// code and causing token *expansion* instead of reduction (#2064). These globs
/// re-exclude only the well-known token-bomb dirs/files. rg still honors an
/// explicitly-given path inside an excluded dir (e.g. `rtk grep foo node_modules/pkg`),
/// so no result the user explicitly asked for is lost.
fn ignore_globs(filt: &config::FilterConfig) -> Vec<String> {
filt.ignore_dirs
.iter()
.chain(filt.ignore_files.iter())
.flat_map(|p| ["--glob".to_string(), format!("!{p}")])
.collect()
}

/// Same exclusion list as [`ignore_globs`] but expressed as `grep`
/// `--exclude-dir=` / `--exclude=` args, for the system-grep fallback path
/// (taken when `rg` is not installed). Without this the fallback execs a
/// non-gitignore-aware grep that descends `node_modules` (#2064).
///
/// Note: unlike rg's `--glob`, `grep --exclude-dir` suppresses matches even for
/// an explicitly-given path inside the excluded dir. The caller compensates by
/// not applying any exclusions when the search path itself targets an ignored
/// dir (see [`path_targets_ignored_dir`]), keeping both paths consistent.
fn ignore_grep_excludes(filt: &config::FilterConfig) -> Vec<String> {
filt.ignore_dirs
.iter()
.map(|d| format!("--exclude-dir={d}"))
.chain(filt.ignore_files.iter().map(|f| format!("--exclude={f}")))
.collect()
}

/// True if the search `path` explicitly points at (or into) one of the ignored
/// dirs, e.g. `node_modules/pkg`. In that case the user clearly wants to search
/// there, so exclusions must be skipped entirely — otherwise `grep --exclude-dir`
/// would return zero hits for a path the user named on purpose (#2064).
fn path_targets_ignored_dir(path: &str, ignore_dirs: &[String]) -> bool {
std::path::Path::new(path).components().any(|c| {
matches!(c, std::path::Component::Normal(name)
if ignore_dirs.iter().any(|d| name == d.as_str()))
})
}

fn has_format_flag(extra_args: &[String]) -> bool {
extra_args.iter().any(|arg| {
matches!(
Expand Down Expand Up @@ -326,6 +389,72 @@ mod tests {
assert_eq!(filtered[0], "-i");
}

// --- #2064: re-exclude token-bomb dirs even with --no-ignore-vcs ---

fn has_glob_pair(globs: &[String], pat: &str) -> bool {
globs
.windows(2)
.any(|w| w[0] == "--glob" && w[1] == format!("!{pat}"))
}

#[test]
fn test_ignore_globs_exclude_node_modules() {
// BUG #2064: rtk grep -rn descends node_modules (--no-ignore-vcs), dumping
// minified vendored code → token expansion. The default ignore list must
// produce rg exclusion globs. Fails before the fix (empty), passes after.
let globs = ignore_globs(&config::FilterConfig::default());
assert!(
has_glob_pair(&globs, "node_modules"),
"node_modules must be excluded: {globs:?}"
);
assert!(
has_glob_pair(&globs, "*.min.js"),
"*.min.js must be excluded: {globs:?}"
);
assert!(
has_glob_pair(&globs, "target"),
"target must be excluded: {globs:?}"
);
}

#[test]
fn test_ignore_globs_empty_config_yields_nothing() {
let filt = config::FilterConfig {
ignore_dirs: vec![],
ignore_files: vec![],
};
assert!(ignore_globs(&filt).is_empty());
}

#[test]
fn test_path_targets_ignored_dir() {
let dirs = config::FilterConfig::default().ignore_dirs;
// Explicit searches into an ignored dir → exclusions must be skipped.
assert!(path_targets_ignored_dir("node_modules/pkg-a", &dirs));
assert!(path_targets_ignored_dir("./node_modules", &dirs));
assert!(path_targets_ignored_dir("a/b/vendor/c", &dirs));
// Normal project searches → exclusions apply.
assert!(!path_targets_ignored_dir(".", &dirs));
assert!(!path_targets_ignored_dir("src", &dirs));
// Substring of an ignored name must NOT trigger (component-anchored).
assert!(!path_targets_ignored_dir("my-node_modules-helper", &dirs));
}

#[test]
fn test_ignore_grep_excludes_for_fallback() {
// The system-grep fallback path must mirror the rg exclusions so rg-less
// machines also skip node_modules (#2064 reporter had no rg installed).
let excludes = ignore_grep_excludes(&config::FilterConfig::default());
assert!(
excludes.iter().any(|e| e == "--exclude-dir=node_modules"),
"fallback grep must exclude node_modules dir: {excludes:?}"
);
assert!(
excludes.iter().any(|e| e == "--exclude=*.min.js"),
"fallback grep must exclude *.min.js: {excludes:?}"
);
}

// --- truncation accuracy ---

#[test]
Expand Down
6 changes: 6 additions & 0 deletions src/core/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,12 @@ pub fn limits() -> LimitsConfig {
Config::load().map(|c| c.limits).unwrap_or_default()
}

/// Get filter config (ignore_dirs / ignore_files). Falls back to defaults if
/// config can't be loaded.
pub fn filters() -> FilterConfig {
Config::load().map(|c| c.filters).unwrap_or_default()
}

impl Config {
pub fn load() -> Result<Self> {
let path = get_config_path()?;
Expand Down