diff --git a/src/analytics/gain.rs b/src/analytics/gain.rs index ac61dd9b5..bdff05b65 100644 --- a/src/analytics/gain.rs +++ b/src/analytics/gain.rs @@ -246,11 +246,9 @@ pub fn run( println!("──────────────────────────────────────────────────────────"); for rec in recent { let time = rec.timestamp.with_timezone(&Local).format("%m-%d %H:%M"); - let cmd_short = if rec.rtk_cmd.len() > 25 { - format!("{}...", &rec.rtk_cmd[..22]) - } else { - rec.rtk_cmd.clone() - }; + // char-safe truncation: commands may contain multibyte UTF-8, + // and slicing on a byte boundary would panic + let cmd_short = crate::core::utils::truncate(&rec.rtk_cmd, 25); // added: tier indicators by savings level let sign = if rec.savings_pct >= 70.0 { "▲" @@ -707,11 +705,8 @@ fn show_failures(tracker: &Tracker) -> Result<()> { println!("{}", styled("Top Commands (by frequency)", true)); println!("{}", "─".repeat(60)); for (cmd, count) in &summary.top_commands { - let cmd_display = if cmd.len() > 50 { - format!("{}...", &cmd[..47]) - } else { - cmd.clone() - }; + // char-safe truncation: avoid panicking when a command contains multibyte UTF-8 + let cmd_display = crate::core::utils::truncate(cmd, 50); println!(" {:>4}x {}", count, cmd_display); } println!(); @@ -727,11 +722,8 @@ fn show_failures(tracker: &Tracker) -> Result<()> { &rec.timestamp }; let status = if rec.fallback_succeeded { "ok" } else { "FAIL" }; - let cmd_display = if rec.raw_command.len() > 40 { - format!("{}...", &rec.raw_command[..37]) - } else { - rec.raw_command.clone() - }; + // char-safe truncation: avoid panicking when a command contains multibyte UTF-8 + let cmd_display = crate::core::utils::truncate(&rec.raw_command, 40); println!(" {} [{}] {}", ts_short, status, cmd_display); } println!(); diff --git a/src/cmds/git/git.rs b/src/cmds/git/git.rs index eaf8d8b5f..c1051ad08 100644 --- a/src/cmds/git/git.rs +++ b/src/cmds/git/git.rs @@ -31,6 +31,10 @@ pub enum GitCommand { /// prepended before any subcommand arguments. fn git_cmd(global_args: &[String]) -> Command { let mut cmd = resolved_command("git"); + // Render non-ASCII (CJK, etc.) filenames as UTF-8 instead of git's default + // octal escapes (\nnn). Must be injected with -c before the subcommand; + // global_args and the subcommand are appended after. + cmd.arg("-c").arg("core.quotepath=false"); for arg in global_args { cmd.arg(arg); } diff --git a/src/main.rs b/src/main.rs index 992f865a2..3b91fa351 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2431,8 +2431,8 @@ fn run_cli() -> Result { .join() .map_err(|_| anyhow::anyhow!("stderr streaming thread panicked"))??; - let stdout = String::from_utf8_lossy(&stdout_bytes); - let stderr = String::from_utf8_lossy(&stderr_bytes); + let stdout = decode_captured(&stdout_bytes); + let stderr = decode_captured(&stderr_bytes); let full_output = format!("{}{}", stdout, stderr); // Track usage (input = output since no filtering) @@ -2475,6 +2475,22 @@ fn run_cli() -> Result { Ok(code) } +/// Decode captured streaming bytes into a string. `captured` is truncated at a +/// 1 MiB cap, so its tail may stop in the middle of a multibyte UTF-8 sequence; +/// in that case decode only up to the valid boundary to avoid emitting a +/// spurious trailing replacement char (U+FFFD). Genuinely invalid mid-stream +/// bytes (e.g. binary output) keep the lossy behavior. +fn decode_captured(bytes: &[u8]) -> std::borrow::Cow<'_, str> { + match std::str::from_utf8(bytes) { + Ok(s) => std::borrow::Cow::Borrowed(s), + // error_len() == None means the error is an unexpected end of input (an + // incomplete trailing sequence cut off by the cap), so take the valid + // prefix only; Some(_) means an invalid byte mid-stream, so stay lossy. + Err(e) if e.error_len().is_none() => String::from_utf8_lossy(&bytes[..e.valid_up_to()]), + Err(_) => String::from_utf8_lossy(bytes), + } +} + /// Returns true for commands that are invoked via the hook pipeline /// (i.e., commands that process rewritten shell commands). /// Meta commands (init, gain, verify, etc.) are excluded because