From 20eedd19ef8ef75fecae2fd8821f107186ab46d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Ols=CC=8Cavsky=CC=81?= Date: Sun, 31 May 2026 12:22:02 +0200 Subject: [PATCH] =?UTF-8?q?fix(grep):=20respect=20the=20invoked=20tool=20?= =?UTF-8?q?=E2=80=94=20grep=20runs=20grep,=20rg=20runs=20ripgrep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `rg` and `grep` were both rewritten to `rtk grep`, which always ran ripgrep as its backend. That collapsed the two tools' identities and broke whichever one didn't match the backend: ripgrep-only flags (`--glob`, `-g`, `-P`, `--files`) failed because the command fell through to system grep, while grep commands silently misbehaved under rg (e.g. `-r` is recursive in grep but `--replace` in rg, so `grep -rn foo` rewrote every match). Preserve the invoked tool's identity from the rewrite onward, so each command runs its own backend with its own semantics and there is never any crossover: - rewrite: `grep ...` -> `rtk grep`, `rg ...` -> `rtk rg` (split the single rule). - `rtk rg` (new subcommand): faithful ripgrep passthrough — all args forwarded verbatim, so rg-only flags work in any order — with RTK's by-file filtering (and raw streaming for info/list flags like `--files`). - `rtk grep`: now runs system grep (grep semantics: BRE, grep flags, `-r`), parsing grep's `file:line:content` output; `-t/--file-type` maps to grep `--include`. - shared `emit_filtered` takes a per-backend line parser (rg NUL form vs grep colon form). Fixes #2167. Also addresses #2060, #1651. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/usage/FEATURES.md | 13 ++- src/cmds/system/grep_cmd.rs | 192 ++++++++++++++++++++++++++++++------ src/discover/registry.rs | 8 +- src/discover/rules.rs | 17 +++- src/main.rs | 13 +++ 5 files changed, 206 insertions(+), 37 deletions(-) diff --git a/docs/usage/FEATURES.md b/docs/usage/FEATURES.md index ed5ee0bd7..fc5afbd6f 100644 --- a/docs/usage/FEATURES.md +++ b/docs/usage/FEATURES.md @@ -200,9 +200,13 @@ Supporte a la fois la syntaxe RTK et la syntaxe native `find` (`-name`, `-type`, --- -### `rtk grep` -- Recherche dans le contenu +### `rtk grep` / `rtk rg` -- Recherche dans le contenu -**Objectif :** Remplace `grep` et `rg` avec une sortie groupee par fichier, tronquee. +**Objectif :** Sortie groupee par fichier, tronquee. `rtk grep` execute le `grep` +systeme (semantique grep) ; `rtk rg` execute ripgrep (semantique rg, flags +`--glob`/`-g`/`-P`/`--files`...). Chaque commande conserve son propre outil : pas +de bascule de l'un vers l'autre (les flags courts comme `-r` different entre les +deux). **Syntaxe :** ```bash @@ -219,7 +223,7 @@ rtk grep [chemin] [options] | `--file-type` | `-t` | tous | Filtrer par type (ts, py, rust, etc.) | | `--line-numbers` | `-n` | oui | Numeros de ligne (toujours actif) | -Les arguments supplementaires sont transmis a `rg` (ripgrep). Les flags qui changent le format de sortie (`-c`, `-l`, `-L`, `-o`, `-Z`) passent directement a `rg`/`grep` sans filtrage RTK. +Les arguments supplementaires sont transmis a l'outil sous-jacent (`grep` pour `rtk grep`, `rg` pour `rtk rg`). Les flags qui changent le format de sortie (`-c`, `-l`, `-L`, `-o`, `-Z`) passent directement a l'outil sans filtrage RTK. **Economies :** ~80% @@ -1255,7 +1259,8 @@ rtk verify | `gh pr/issue/run` | `rtk gh ...` | | `cargo test/build/clippy/check` | `rtk cargo ...` | | `cat/head/tail ` | `rtk read ` | -| `rg/grep ` | `rtk grep ` | +| `grep ` | `rtk grep ` | +| `rg ` | `rtk rg ` | | `ls` | `rtk ls` | | `tree` | `rtk tree` | | `wc` | `rtk wc` | diff --git a/src/cmds/system/grep_cmd.rs b/src/cmds/system/grep_cmd.rs index 3f7d9a327..ae49feed4 100644 --- a/src/cmds/system/grep_cmd.rs +++ b/src/cmds/system/grep_cmd.rs @@ -1,13 +1,17 @@ //! Filters grep output by grouping matches by file. use crate::core::config; -use crate::core::stream::exec_capture; +use crate::core::stream::{exec_capture, CaptureResult}; use crate::core::tracking; use crate::core::utils::resolved_command; use anyhow::{Context, Result}; use regex::Regex; use std::collections::HashMap; +/// `rtk grep` — runs **system grep** so grep's own semantics are respected (BRE +/// by default, grep flags). Ripgrep is handled separately by [`run_rg`]; the two +/// are never crossed, because grep and rg share short flags with conflicting +/// meanings (e.g. `-r` is recursive in grep but `--replace` in rg). #[allow(clippy::too_many_arguments)] pub fn run( pattern: &str, @@ -25,40 +29,111 @@ pub fn run( eprintln!("grep: '{}' in {}", pattern, path); } - // Fix: convert BRE alternation \| → | for rg (which uses PCRE-style regex) - let rg_pattern = pattern.replace(r"\|", "|"); + // -r: recurse like the common `grep -r`; -n: line numbers; -H: always print + // the filename so output is the parseable `file:line:content`. `-e` marks the + // pattern explicitly so patterns starting with `-` aren't read as flags. + let mut grep_cmd = resolved_command("grep"); + grep_cmd.arg("-rnH"); + if let Some(ft) = file_type { + grep_cmd.arg(format!("--include={}", grep_type_glob(ft))); + } + grep_cmd.arg("-e").arg(pattern).arg(path).args(extra_args); + + let result = exec_capture(&mut grep_cmd).context("grep failed")?; + + emit_filtered( + result, + extra_args, + pattern, + path, + max_line_len, + max_results, + context_only, + parse_grep_line, + timer, + ) +} - let mut rg_cmd = resolved_command("rg"); - // --no-ignore-vcs: match grep -r behavior (don't skip .gitignore'd files). - // Without this, rg returns 0 matches for files in .gitignore, causing - // false negatives that make AI agents draw wrong conclusions. - // Using --no-ignore-vcs (not --no-ignore) so .ignore/.rgignore are still respected. - // -H: always emit the filename. - // -0: NUL-separate filename. Allows the parser to disambiguate filenames or - // content containing `:digits:` patterns (issue #1436). - rg_cmd.args(["-nH0", "--no-heading", "--no-ignore-vcs", &rg_pattern, path]); +/// `rtk rg` — faithful ripgrep passthrough with RTK filtering. All args are +/// forwarded to ripgrep verbatim (no grep-ism translation), so ripgrep-only flags +/// like `--glob`/`-g`/`-P`/`--files` work in any order. Routing these through the +/// `grep` subcommand / system grep is what broke in #2167, #2060, #1651. +pub fn run_rg(args: &[String], verbose: u8) -> Result { + let timer = tracking::TimedExecution::start(); - if let Some(ft) = file_type { - rg_cmd.arg("--type").arg(ft); + if verbose > 0 { + eprintln!("rg {}", args.join(" ")); } - for arg in extra_args { - // Fix: skip grep-ism -r flag (rg is recursive by default; rg -r means --replace) - if arg == "-r" || arg == "--recursive" { - continue; - } - rg_cmd.arg(arg); + // Info/list flags produce output that is not `file:line:content` (or is + // already compact): run rg verbatim and stream it through untouched. + if args.iter().any(|a| is_raw_passthrough_flag(a)) { + let mut rg_cmd = resolved_command("rg"); + rg_cmd.args(args); + let status = rg_cmd.status().context("failed to execute rg")?; + let raw_command = format!("rg {}", args.join(" ")); + timer.track_passthrough( + &raw_command, + &format!("rtk rg {} (passthrough)", args.join(" ")), + ); + return Ok(crate::core::utils::exit_code_from_status( + &status, + &raw_command, + )); } - let result = exec_capture(&mut rg_cmd) - .or_else(|_| { - let mut grep_cmd = resolved_command("grep"); - // When we fall back to grep, include all args, not just -rnHZ. - grep_cmd.args(["-rnHZ", pattern, path]).args(extra_args); - exec_capture(&mut grep_cmd) - }) - .context("grep/rg failed")?; + let mut rg_cmd = resolved_command("rg"); + // -0 NUL-separates the filename so the parser is unambiguous (#1436); + // --no-ignore-vcs matches grep -r (don't skip .gitignore'd files). + rg_cmd.args(["-nH0", "--no-heading", "--no-ignore-vcs"]); + rg_cmd.args(args); + let result = exec_capture(&mut rg_cmd).context("rg failed")?; + + // Pattern is only used for context-aware truncation centring; the positional + // pattern isn't separated out here, so head-truncate instead. + emit_filtered(result, args, "", ".", 80, 200, false, parse_match_line, timer) +} + +/// Map an rg-style `--file-type` name to a system-grep `--include` glob. Most rg +/// type names are the file extension; a few common ones differ. +fn grep_type_glob(file_type: &str) -> String { + let ext = match file_type { + "rust" => "rs", + "python" => "py", + "ruby" => "rb", + "javascript" => "js", + "typescript" => "ts", + "markdown" => "md", + other => other, + }; + format!("*.{}", ext) +} + +/// rg flags whose output should be streamed through unfiltered (file listings, +/// count/format modes). Superset of [`has_format_flag`]. +fn is_raw_passthrough_flag(arg: &str) -> bool { + matches!( + arg, + "--files" | "--type-list" | "--json" | "--vimgrep" | "--count-matches" + ) || has_format_flag(std::slice::from_ref(&arg.to_string())) +} +/// Shared post-execution filtering: passthrough for format flags, an empty-result +/// message, or the by-file grouped/truncated output used by both `run` and +/// `run_rg`. `parse_line` adapts the backend's match-line format (rg's NUL form +/// vs grep's `file:line:content`). +#[allow(clippy::too_many_arguments)] +fn emit_filtered( + result: CaptureResult, + extra_args: &[String], + pattern: &str, + path: &str, + max_line_len: usize, + max_results: usize, + context_only: bool, + parse_line: fn(&str) -> Option<(String, usize, &str)>, + timer: tracking::TimedExecution, +) -> Result { // Passthrough output flags that produce output that is already small. if has_format_flag(extra_args) { print!("{}", result.stdout); @@ -87,7 +162,13 @@ pub fn run( if exit_code == 2 && !result.stderr.trim().is_empty() { eprintln!("{}", result.stderr.trim()); } - let msg = format!("0 matches for '{}'", pattern); + // In raw mode the positional pattern isn't separated out, so omit it + // rather than printing a misleading empty `0 matches for ''`. + let msg = if pattern.is_empty() { + "0 matches".to_string() + } else { + format!("0 matches for '{}'", pattern) + }; println!("{}", msg); timer.track( &format!("grep -rn '{}' {}", pattern, path), @@ -111,7 +192,7 @@ pub fn run( let mut by_file: HashMap> = HashMap::new(); for line in result.stdout.lines() { - let Some((file, line_num, content)) = parse_match_line(line) else { + let Some((file, line_num, content)) = parse_line(line) else { continue; }; let cleaned = clean_line(content, max_line_len, context_re.as_ref(), pattern); @@ -182,6 +263,17 @@ fn parse_match_line(line: &str) -> Option<(String, usize, &str)> { }) } +/// Parse a system-grep match line of the form `file:line:content` (from +/// `grep -nH`). Unlike rg's NUL form there is no separator to disambiguate, so a +/// filename containing `:` can't be told apart and such lines are skipped — this +/// mirrors grep's own inherent ambiguity. Colons inside the content are kept. +fn parse_grep_line(line: &str) -> Option<(String, usize, &str)> { + let (file, rest) = line.split_once(':')?; + let (line_num, content) = rest.split_once(':')?; + let line_num: usize = line_num.parse().ok()?; + Some((file.to_string(), line_num, content)) +} + fn has_format_flag(extra_args: &[String]) -> bool { extra_args.iter().any(|arg| { matches!( @@ -290,6 +382,46 @@ mod tests { // No need to actually run - we're verifying the parameter exists } + #[test] + fn test_is_raw_passthrough_flag() { + // Info/list flags stream through unfiltered. + for flag in ["--files", "--type-list", "--json", "--count-matches"] { + assert!(is_raw_passthrough_flag(flag), "{flag} should pass through"); + } + // Format flags from has_format_flag are also passthrough. + assert!(is_raw_passthrough_flag("-c")); + assert!(is_raw_passthrough_flag("--files-with-matches")); + // Ordinary search flags are filtered, not passed through. + for flag in ["-i", "--glob", "-g", "-A", "-w", "alpha"] { + assert!(!is_raw_passthrough_flag(flag), "{flag} should be filtered"); + } + } + + #[test] + fn test_parse_grep_line() { + // Standard grep -nH line. + assert_eq!( + parse_grep_line("src/main.rs:42:fn main() {"), + Some(("src/main.rs".to_string(), 42, "fn main() {")) + ); + // Colons inside the content are preserved. + assert_eq!( + parse_grep_line("a.rs:7:Registry::init(x):y"), + Some(("a.rs".to_string(), 7, "Registry::init(x):y")) + ); + // Non-match lines (e.g. grep -A/-B context with `-` separator) are skipped. + assert_eq!(parse_grep_line("src/main.rs-43- let x = 1;"), None); + assert_eq!(parse_grep_line("no colons here"), None); + } + + #[test] + fn test_grep_type_glob() { + assert_eq!(grep_type_glob("py"), "*.py"); + assert_eq!(grep_type_glob("rust"), "*.rs"); + assert_eq!(grep_type_glob("ts"), "*.ts"); + assert_eq!(grep_type_glob("markdown"), "*.md"); + } + #[test] fn test_clean_line_multibyte() { // Thai text that exceeds max_len in bytes diff --git a/src/discover/registry.rs b/src/discover/registry.rs index 4fd716828..5babf9f8a 100644 --- a/src/discover/registry.rs +++ b/src/discover/registry.rs @@ -1415,9 +1415,15 @@ mod tests { #[test] fn test_rewrite_rg_pattern() { + // rg keeps its own identity (-> rtk rg), grep keeps its own (-> rtk grep), + // so each runs its own backend without flag-semantic crossover. assert_eq!( rewrite_command_no_prefixes("rg \"fn main\"", &[]), - Some("rtk grep \"fn main\"".into()) + Some("rtk rg \"fn main\"".into()) + ); + assert_eq!( + rewrite_command_no_prefixes("grep \"fn main\" .", &[]), + Some("rtk grep \"fn main\" .".into()) ); } diff --git a/src/discover/rules.rs b/src/discover/rules.rs index df7c72d03..6cbb1d01b 100644 --- a/src/discover/rules.rs +++ b/src/discover/rules.rs @@ -88,10 +88,23 @@ pub const RULES: &[RtkRule] = &[ subcmd_savings: &[], subcmd_status: &[], }, + // grep and rg are kept distinct so each runs its own backend: `grep` keeps + // grep semantics (system grep), `rg` keeps ripgrep semantics. Collapsing them + // breaks the other tool's flags (e.g. `-r` is recursive in grep but --replace + // in rg, `--glob`/`-P` are rg-only). RtkRule { - pattern: r"^(rg|grep)\s+", + pattern: r"^grep\s+", rtk_cmd: "rtk grep", - rewrite_prefixes: &["rg", "grep"], + rewrite_prefixes: &["grep"], + category: "Files", + savings_pct: 75.0, + subcmd_savings: &[], + subcmd_status: &[], + }, + RtkRule { + pattern: r"^rg\s+", + rtk_cmd: "rtk rg", + rewrite_prefixes: &["rg"], category: "Files", savings_pct: 75.0, subcmd_savings: &[], diff --git a/src/main.rs b/src/main.rs index 992f865a2..11320685f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -327,6 +327,16 @@ enum Commands { extra_args: Vec, }, + /// Ripgrep with token-optimized output (compact, grouped by file) + /// + /// Faithful ripgrep passthrough: all args are forwarded to `rg` verbatim, so + /// ripgrep-only flags (`--glob`, `-g`, `-P`, `--files`, ...) work in any order. + Rg { + /// Arguments passed to ripgrep (pattern, paths, and any rg flags) + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + args: Vec, + }, + /// Initialize rtk instructions for assistant CLI usage Init { /// Add to global assistant config directory instead of local project file @@ -1811,6 +1821,8 @@ fn run_cli() -> Result { cli.verbose, )?, + Commands::Rg { args } => grep_cmd::run_rg(&args, cli.verbose)?, + Commands::Init { global, opencode, @@ -2509,6 +2521,7 @@ fn is_operational_command(cmd: &Commands) -> bool { | Commands::Kubectl { .. } | Commands::Summary { .. } | Commands::Grep { .. } + | Commands::Rg { .. } | Commands::Wget { .. } | Commands::Vitest { .. } | Commands::Prisma { .. }