From 74f9df7d48f6fbc22fd518a21419ede2c12ded78 Mon Sep 17 00:00:00 2001 From: snwsnwsnw Date: Sat, 30 May 2026 00:09:30 +0800 Subject: [PATCH 1/3] fix(git): disable core.quotepath so non-ASCII filenames render as UTF-8 Git escapes non-ASCII path bytes as octal \nnn by default (core.quotepath=true). rtk passed this straight through, so git status / log --name-only / diff --stat showed CJK and other non-ASCII filenames as unreadable escapes. Inject -c core.quotepath=false at the single git_cmd() chokepoint; no effect on ASCII paths. --- src/cmds/git/git.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cmds/git/git.rs b/src/cmds/git/git.rs index eaf8d8b5f..c1051ad08 100644 --- a/src/cmds/git/git.rs +++ b/src/cmds/git/git.rs @@ -31,6 +31,10 @@ pub enum GitCommand { /// prepended before any subcommand arguments. fn git_cmd(global_args: &[String]) -> Command { let mut cmd = resolved_command("git"); + // Render non-ASCII (CJK, etc.) filenames as UTF-8 instead of git's default + // octal escapes (\nnn). Must be injected with -c before the subcommand; + // global_args and the subcommand are appended after. + cmd.arg("-c").arg("core.quotepath=false"); for arg in global_args { cmd.arg(arg); } From e8fbb2deaba755e609cdc42108f36257ef833a11 Mon Sep 17 00:00:00 2001 From: snwsnwsnw Date: Sat, 30 May 2026 00:09:30 +0800 Subject: [PATCH 2/3] fix(gain): truncate command strings on char boundaries gain --history and the failure summary sliced command strings by byte index (&cmd[..47] etc). A command containing multibyte UTF-8 (e.g. a non-ASCII search pattern or commit message) could be cut mid-codepoint and panic. Use the existing char-safe utils::truncate(). --- src/analytics/gain.rs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/analytics/gain.rs b/src/analytics/gain.rs index ac61dd9b5..bdff05b65 100644 --- a/src/analytics/gain.rs +++ b/src/analytics/gain.rs @@ -246,11 +246,9 @@ pub fn run( println!("──────────────────────────────────────────────────────────"); for rec in recent { let time = rec.timestamp.with_timezone(&Local).format("%m-%d %H:%M"); - let cmd_short = if rec.rtk_cmd.len() > 25 { - format!("{}...", &rec.rtk_cmd[..22]) - } else { - rec.rtk_cmd.clone() - }; + // char-safe truncation: commands may contain multibyte UTF-8, + // and slicing on a byte boundary would panic + let cmd_short = crate::core::utils::truncate(&rec.rtk_cmd, 25); // added: tier indicators by savings level let sign = if rec.savings_pct >= 70.0 { "▲" @@ -707,11 +705,8 @@ fn show_failures(tracker: &Tracker) -> Result<()> { println!("{}", styled("Top Commands (by frequency)", true)); println!("{}", "─".repeat(60)); for (cmd, count) in &summary.top_commands { - let cmd_display = if cmd.len() > 50 { - format!("{}...", &cmd[..47]) - } else { - cmd.clone() - }; + // char-safe truncation: avoid panicking when a command contains multibyte UTF-8 + let cmd_display = crate::core::utils::truncate(cmd, 50); println!(" {:>4}x {}", count, cmd_display); } println!(); @@ -727,11 +722,8 @@ fn show_failures(tracker: &Tracker) -> Result<()> { &rec.timestamp }; let status = if rec.fallback_succeeded { "ok" } else { "FAIL" }; - let cmd_display = if rec.raw_command.len() > 40 { - format!("{}...", &rec.raw_command[..37]) - } else { - rec.raw_command.clone() - }; + // char-safe truncation: avoid panicking when a command contains multibyte UTF-8 + let cmd_display = crate::core::utils::truncate(&rec.raw_command, 40); println!(" {} [{}] {}", ts_short, status, cmd_display); } println!(); From 69b2f92c0b479bbc463870673e94fa30e636732b Mon Sep 17 00:00:00 2001 From: snwsnwsnw Date: Sat, 30 May 2026 00:09:30 +0800 Subject: [PATCH 3/3] fix(proxy): avoid spurious U+FFFD when capture cap splits a codepoint The proxy streaming path caps the captured copy at 1 MiB. When the cap lands inside a multibyte UTF-8 sequence, from_utf8_lossy emitted a trailing replacement char into the tracked output. decode_captured() trims an incomplete trailing sequence while keeping lossy behavior for genuinely invalid mid-stream bytes. --- src/main.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 992f865a2..3b91fa351 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2431,8 +2431,8 @@ fn run_cli() -> Result { .join() .map_err(|_| anyhow::anyhow!("stderr streaming thread panicked"))??; - let stdout = String::from_utf8_lossy(&stdout_bytes); - let stderr = String::from_utf8_lossy(&stderr_bytes); + let stdout = decode_captured(&stdout_bytes); + let stderr = decode_captured(&stderr_bytes); let full_output = format!("{}{}", stdout, stderr); // Track usage (input = output since no filtering) @@ -2475,6 +2475,22 @@ fn run_cli() -> Result { Ok(code) } +/// Decode captured streaming bytes into a string. `captured` is truncated at a +/// 1 MiB cap, so its tail may stop in the middle of a multibyte UTF-8 sequence; +/// in that case decode only up to the valid boundary to avoid emitting a +/// spurious trailing replacement char (U+FFFD). Genuinely invalid mid-stream +/// bytes (e.g. binary output) keep the lossy behavior. +fn decode_captured(bytes: &[u8]) -> std::borrow::Cow<'_, str> { + match std::str::from_utf8(bytes) { + Ok(s) => std::borrow::Cow::Borrowed(s), + // error_len() == None means the error is an unexpected end of input (an + // incomplete trailing sequence cut off by the cap), so take the valid + // prefix only; Some(_) means an invalid byte mid-stream, so stay lossy. + Err(e) if e.error_len().is_none() => String::from_utf8_lossy(&bytes[..e.valid_up_to()]), + Err(_) => String::from_utf8_lossy(bytes), + } +} + /// Returns true for commands that are invoked via the hook pipeline /// (i.e., commands that process rewritten shell commands). /// Meta commands (init, gain, verify, etc.) are excluded because