From b0e2b59642a7ccb4cbc7f29f92653dc0496ac4eb Mon Sep 17 00:00:00 2001 From: Nanook Claw Date: Fri, 29 May 2026 04:08:17 +0000 Subject: [PATCH] fix(grep): skip .claude/worktrees by default rtk grep passes --no-ignore-vcs so ripgrep descends into Claude Code's gitignored .claude/worktrees checkouts, surfacing out-of-scope matches (worse with --hidden). Add a default rg exclusion glob (!**/.claude/worktrees/**) matching the directory at any depth, with an --exclude-dir mirror on the grep fallback. The exclusion is inserted before user args so a later user --glob can re-include the directory. Closes #2147 Signed-off-by: Nanook Claw --- src/cmds/system/grep_cmd.rs | 390 +++++++++++++++++++++++++++++++++--- 1 file changed, 365 insertions(+), 25 deletions(-) diff --git a/src/cmds/system/grep_cmd.rs b/src/cmds/system/grep_cmd.rs index 3f7d9a327..3d93721ab 100644 --- a/src/cmds/system/grep_cmd.rs +++ b/src/cmds/system/grep_cmd.rs @@ -7,6 +7,22 @@ use crate::core::utils::resolved_command; use anyhow::{Context, Result}; use regex::Regex; use std::collections::HashMap; +use std::ffi::OsStr; +use std::path::{Component, Path}; +use std::process::Command; + +/// Glob passed to ripgrep to skip Claude Code's worktree checkouts. +/// +/// Claude Code creates throwaway branch checkouts under `.claude/worktrees`. +/// They are gitignored and out of scope for an agentic session, but because +/// `rtk grep` passes `--no-ignore-vcs` (and a user may add `--hidden`), ripgrep +/// would otherwise descend into them. The leading `**/` matches at any depth, +/// including the repository-root `.claude/worktrees`; the trailing `/**` +/// excludes the directory's contents. Applied only when the requested path is +/// not already inside `.claude/worktrees`. Added before any user-supplied args +/// so a later user `--glob` can still re-include the directory (rg honors the +/// last matching glob). +const WORKTREES_RG_GLOB: &str = "!**/.claude/worktrees/**"; #[allow(clippy::too_many_arguments)] pub fn run( @@ -28,34 +44,24 @@ pub fn run( // Fix: convert BRE alternation \| → | for rg (which uses PCRE-style regex) let rg_pattern = pattern.replace(r"\|", "|"); - let mut rg_cmd = resolved_command("rg"); - // --no-ignore-vcs: match grep -r behavior (don't skip .gitignore'd files). - // Without this, rg returns 0 matches for files in .gitignore, causing - // false negatives that make AI agents draw wrong conclusions. - // Using --no-ignore-vcs (not --no-ignore) so .ignore/.rgignore are still respected. - // -H: always emit the filename. - // -0: NUL-separate filename. Allows the parser to disambiguate filenames or - // content containing `:digits:` patterns (issue #1436). - rg_cmd.args(["-nH0", "--no-heading", "--no-ignore-vcs", &rg_pattern, path]); - - if let Some(ft) = file_type { - rg_cmd.arg("--type").arg(ft); - } - - for arg in extra_args { - // Fix: skip grep-ism -r flag (rg is recursive by default; rg -r means --replace) - if arg == "-r" || arg == "--recursive" { - continue; - } - rg_cmd.arg(arg); - } + let skip_worktrees = should_skip_claude_worktrees_by_default(path); + let mut rg_cmd = build_rg_command(&rg_pattern, path, file_type, extra_args); let result = exec_capture(&mut rg_cmd) .or_else(|_| { - let mut grep_cmd = resolved_command("grep"); - // When we fall back to grep, include all args, not just -rnHZ. - grep_cmd.args(["-rnHZ", pattern, path]).args(extra_args); - exec_capture(&mut grep_cmd) + let mut grep_cmd = build_grep_command(pattern, path, extra_args); + let mut result = exec_capture(&mut grep_cmd)?; + if skip_worktrees { + let original_stdout = result.stdout.clone(); + result.stdout = filter_claude_worktree_matches(&result.stdout); + if result.exit_code == 0 + && !original_stdout.trim().is_empty() + && result.stdout.trim().is_empty() + { + result.exit_code = 1; + } + } + Ok::<_, anyhow::Error>(result) }) .context("grep/rg failed")?; @@ -160,6 +166,123 @@ pub fn run( Ok(exit_code) } +/// Builds the ripgrep command used as the primary search backend. +/// +/// Extracted from [`run`] so the argument construction — notably the +/// `.claude/worktrees` exclusion — can be unit-tested via `Command::get_args`. +fn build_rg_command( + rg_pattern: &str, + path: &str, + file_type: Option<&str>, + extra_args: &[String], +) -> Command { + let mut rg_cmd = resolved_command("rg"); + // --no-ignore-vcs: match grep -r behavior (don't skip .gitignore'd files). + // Without this, rg returns 0 matches for files in .gitignore, causing + // false negatives that make AI agents draw wrong conclusions. + // Using --no-ignore-vcs (not --no-ignore) so .ignore/.rgignore are still respected. + // -H: always emit the filename. + // -0: NUL-separate filename. Allows the parser to disambiguate filenames or + // content containing `:digits:` patterns (issue #1436). + rg_cmd.args(["-nH0", "--no-heading", "--no-ignore-vcs"]); + + // --glob: skip Claude Code worktrees (see WORKTREES_RG_GLOB) for normal + // searches, but do not exclude the subtree when the user explicitly points + // the path argument inside it. Placed before user args so an explicit user + // --glob can still override it. + if should_skip_claude_worktrees_by_default(path) { + rg_cmd.args(["--glob", WORKTREES_RG_GLOB]); + } + + rg_cmd.args([rg_pattern, path]); + + if let Some(ft) = file_type { + rg_cmd.arg("--type").arg(ft); + } + + for arg in extra_args { + // Fix: skip grep-ism -r flag (rg is recursive by default; rg -r means --replace) + if arg == "-r" || arg == "--recursive" { + continue; + } + rg_cmd.arg(arg); + } + + rg_cmd +} + +/// Builds the `grep` fallback command, used only when ripgrep is unavailable. +/// +/// GNU grep's `--exclude-dir` matches base names only, so the fallback cannot +/// express the `.claude/worktrees` subtree skip without also skipping unrelated +/// directories like `src/worktrees`. Default-scan matches from that subtree are +/// filtered from captured stdout instead. +fn build_grep_command(pattern: &str, path: &str, extra_args: &[String]) -> Command { + let mut grep_cmd = resolved_command("grep"); + // When we fall back to grep, include all args, not just -rnHZ. + grep_cmd.args(["-rnHZ", pattern, path]).args(extra_args); + grep_cmd +} + +fn should_skip_claude_worktrees_by_default(path: &str) -> bool { + !path_points_inside_claude_worktrees(path) +} + +fn path_points_inside_claude_worktrees(path: &str) -> bool { + let mut normalized = Vec::new(); + for component in Path::new(path).components() { + match component { + Component::Normal(part) => normalized.push(part), + Component::CurDir => {} + Component::ParentDir => { + normalized.pop(); + } + Component::Prefix(_) | Component::RootDir => normalized.clear(), + } + } + + let mut previous_was_claude = false; + for part in normalized { + if previous_was_claude && part == OsStr::new("worktrees") { + return true; + } + previous_was_claude = part == OsStr::new(".claude"); + } + false +} + +fn filter_claude_worktree_matches(stdout: &str) -> String { + if stdout.ends_with('\0') && !stdout.contains('\n') { + return filter_nul_separated_paths(stdout); + } + + let mut filtered = String::with_capacity(stdout.len()); + for line in stdout.split_inclusive('\n') { + if !line_points_inside_claude_worktrees(line) { + filtered.push_str(line); + } + } + filtered +} + +fn filter_nul_separated_paths(stdout: &str) -> String { + let mut filtered = String::with_capacity(stdout.len()); + for path in stdout.split_inclusive('\0') { + let path_without_nul = path.strip_suffix('\0').unwrap_or(path); + if !path_points_inside_claude_worktrees(path_without_nul) { + filtered.push_str(path); + } + } + filtered +} + +fn line_points_inside_claude_worktrees(line: &str) -> bool { + let Some((path, _)) = line.split_once('\0') else { + return false; + }; + path_points_inside_claude_worktrees(path) +} + /// Parses a single rg/grep match line of the form `file\0line_number:content`. /// /// Requires the underlying command to be invoked with `-0` (rg) or `-Z` (grep) @@ -507,4 +630,221 @@ mod tests { } // If rg is not installed, skip gracefully (test still passes) } + + // --- issue #2147: skip .claude/worktrees by default --- + + /// Collects the constructed rg argument list (excluding the program name). + fn rg_args(path: &str, file_type: Option<&str>, extra: &[String]) -> Vec { + build_rg_command("NEEDLE", path, file_type, extra) + .get_args() + .map(|a| a.to_string_lossy().into_owned()) + .collect() + } + + /// Collects the constructed grep fallback argument list. + fn grep_args(path: &str, extra: &[String]) -> Vec { + build_grep_command("NEEDLE", path, extra) + .get_args() + .map(|a| a.to_string_lossy().into_owned()) + .collect() + } + + #[test] + fn test_rg_command_excludes_claude_worktrees() { + // The rg invocation must carry the exclusion as a `--glob ` pair. + let args = rg_args(".", None, &[]); + let pos = args + .iter() + .position(|a| a == WORKTREES_RG_GLOB) + .expect("rg command must include the .claude/worktrees exclusion glob"); + assert_eq!( + args[pos - 1], "--glob", + "the exclusion must be passed as a --glob value" + ); + } + + #[test] + fn test_worktrees_glob_matches_any_depth() { + // Leading `**/` matches the repo-root .claude/worktrees as well as nested + // copies; trailing `/**` excludes the directory's contents. + assert!(WORKTREES_RG_GLOB.starts_with("!**/")); + assert!(WORKTREES_RG_GLOB.contains(".claude/worktrees")); + assert!(WORKTREES_RG_GLOB.ends_with("/**")); + } + + #[test] + fn test_rg_exclusion_precedes_user_args() { + // The default exclusion is added before user args, so a later user + // `--glob` wins (rg honors the last matching glob) and can re-include + // .claude/worktrees if explicitly desired. + let user = vec!["--glob".to_string(), "**/.claude/worktrees/**".to_string()]; + let args = rg_args(".", None, &user); + let ours = args + .iter() + .position(|a| a == WORKTREES_RG_GLOB) + .expect("default exclusion present"); + let theirs = args + .iter() + .rposition(|a| a == "**/.claude/worktrees/**") + .expect("user glob present"); + assert!(ours < theirs, "user glob must come after the default exclusion"); + } + + #[test] + fn test_rg_preserves_user_args_and_type() { + // Adding the exclusion must not drop user-supplied args or --type. + let user = vec!["-i".to_string(), "-w".to_string()]; + let args = rg_args(".", Some("rust"), &user); + let type_pos = args.iter().position(|a| a == "--type").expect("--type present"); + assert_eq!(args[type_pos + 1], "rust", "--type value preserved"); + assert!(args.iter().any(|a| a == "-i")); + assert!(args.iter().any(|a| a == "-w")); + } + + #[test] + fn test_rg_command_does_not_exclude_explicit_claude_worktrees_path() { + let args = rg_args(".claude/worktrees", None, &[]); + assert!( + !args.iter().any(|a| a == WORKTREES_RG_GLOB), + "explicit .claude/worktrees searches must not carry default exclusion, got: {:?}", + args + ); + } + + #[test] + fn test_path_points_inside_claude_worktrees() { + assert!(path_points_inside_claude_worktrees(".claude/worktrees")); + assert!(path_points_inside_claude_worktrees( + "./.claude/worktrees/session" + )); + assert!(path_points_inside_claude_worktrees( + "/tmp/repo/.claude/worktrees/wt/file.rs" + )); + assert!(!path_points_inside_claude_worktrees(".")); + assert!(!path_points_inside_claude_worktrees("src/worktrees")); + assert!(!path_points_inside_claude_worktrees( + ".claude/../src/worktrees" + )); + } + + #[test] + fn test_grep_fallback_does_not_use_basename_worktrees_exclude() { + let args = grep_args(".", &[]); + assert!( + !args.iter().any(|a| a == "--exclude-dir=worktrees"), + "grep fallback must not over-exclude unrelated worktrees dirs, got: {:?}", + args + ); + } + + #[test] + fn test_grep_preserves_user_args() { + let user = vec!["-i".to_string()]; + let args = grep_args(".", &user); + let user_pos = args.iter().position(|a| a == "-i").expect("user arg present"); + let path_pos = args.iter().position(|a| a == ".").expect("path present"); + assert!(path_pos < user_pos, "user args must still be appended"); + } + + #[test] + fn test_grep_fallback_filter_skips_only_claude_worktrees() { + let stdout = concat!( + "src/worktrees/file.rs\x001:NEEDLE\n", + ".claude/worktrees/wt/file.rs\x001:NEEDLE\n", + "nested/.claude/worktrees/wt/file.rs\x001:NEEDLE\n", + "src/other.rs\x001:NEEDLE\n", + ); + let filtered = filter_claude_worktree_matches(stdout); + + assert!(filtered.contains("src/worktrees/file.rs")); + assert!(filtered.contains("src/other.rs")); + assert!(!filtered.contains(".claude/worktrees")); + } + + #[test] + fn test_grep_fallback_filter_handles_null_separated_format_output() { + let stdout = concat!( + "src/worktrees/file.rs\x00", + ".claude/worktrees/wt/file.rs\x00", + "src/other.rs\x00", + ); + let filtered = filter_claude_worktree_matches(stdout); + + assert!(filtered.contains("src/worktrees/file.rs\x00")); + assert!(filtered.contains("src/other.rs\x00")); + assert!(!filtered.contains(".claude/worktrees")); + } + + #[test] + fn test_grep_fallback_filter_preserves_single_match_without_trailing_newline() { + let stdout = "src/worktrees/file.rs\x001:NEEDLE"; + assert_eq!(filter_claude_worktree_matches(stdout), stdout); + } + + #[test] + fn test_grep_fallback_filter_drops_single_claude_worktree_match_without_trailing_newline() { + let stdout = ".claude/worktrees/wt/file.rs\x001:NEEDLE"; + assert_eq!(filter_claude_worktree_matches(stdout), ""); + } + + #[test] + fn test_rg_worktrees_glob_accepted() { + // Verify rg accepts the worktrees exclusion glob (style of + // test_rg_no_ignore_vcs_flag_accepted). + let mut cmd = resolved_command("rg"); + cmd.args([ + "-n", + "--no-heading", + "--no-ignore-vcs", + "--glob", + WORKTREES_RG_GLOB, + "NONEXISTENT_PATTERN_12345", + ".", + ]); + if let Ok(output) = cmd.output() { + assert!( + output.status.code() == Some(1) || output.status.success(), + "rg should accept the .claude/worktrees exclusion glob" + ); + } + // If rg is not installed, skip gracefully (test still passes) + } + + #[test] + fn test_rg_behavior_skips_claude_worktrees() { + // End-to-end: run the real rg command built by build_rg_command against a + // fixture with .claude/worktrees at the repo root AND nested under a + // subdirectory. --hidden is supplied (the condition under which the bug + // surfaces, since rg skips hidden dirs by default), so the exclusion glob + // is what must do the skipping. + use std::fs; + + let dir = tempfile::tempdir().expect("create temp dir"); + let root = dir.path(); + for rel in [ + ".claude/worktrees/wt/main.rs", + "sub/.claude/worktrees/wt/lib.rs", + "keep.rs", + ] { + let file = root.join(rel); + fs::create_dir_all(file.parent().unwrap()).unwrap(); + fs::write(file, "fn NEEDLE() {}\n").unwrap(); + } + + let path = root.to_str().expect("utf-8 temp path"); + let mut cmd = build_rg_command("NEEDLE", path, None, &["--hidden".to_string()]); + let Ok(output) = cmd.output() else { + return; // rg not installed: skip gracefully (matches the rg tests above). + }; + let stdout = String::from_utf8_lossy(&output.stdout); + + assert!( + stdout.contains("keep.rs"), + "non-worktree file must still match, got: {stdout:?}" + ); + assert!( + !stdout.contains("worktrees"), + "rg must not descend into .claude/worktrees (root or nested), got: {stdout:?}" + ); + } }