From a952d5ed88a416b93a6dbc76377b46348eda2fbe Mon Sep 17 00:00:00 2001 From: Anurag Thakur Date: Sun, 5 Jul 2026 03:27:16 +0530 Subject: [PATCH] uucore: Remove ARGV cache --- src/bin/coreutils.rs | 12 ++++ src/bin/uudoc.rs | 6 ++ src/uucore/src/lib/lib.rs | 118 +++++++++++++++++++++++++------------- 3 files changed, 96 insertions(+), 40 deletions(-) diff --git a/src/bin/coreutils.rs b/src/bin/coreutils.rs index f451780250a..364b4d8d55d 100644 --- a/src/bin/coreutils.rs +++ b/src/bin/coreutils.rs @@ -121,6 +121,18 @@ fn main() { // binary to avoid the load of the flt // Could be something like: // #[cfg(not(feature = "only_english"))] + + if uucore::get_utility_is_second_arg() { + uucore::init_util_name(&util_os); + uucore::init_execution_phrase(format!( + "{} {}", + binary.to_string_lossy(), + util_os.to_string_lossy() + )); + } else { + uucore::init_util_name(&binary); + uucore::init_execution_phrase(&binary); + } validation::setup_localization_or_exit(util); process::exit(uumain(vec![util_os].into_iter().chain(args))); } diff --git a/src/bin/uudoc.rs b/src/bin/uudoc.rs index 69ab0410886..cd9674c5d93 100644 --- a/src/bin/uudoc.rs +++ b/src/bin/uudoc.rs @@ -145,6 +145,7 @@ fn gen_manpage( .get_matches_from(std::iter::once(OsString::from("manpage")).chain(args)); let utility = matches.get_one::("utility").unwrap(); + uucore::init_util_name(utility); let command = if utility == "coreutils" { gen_coreutils_app(util_map) } else { @@ -229,6 +230,11 @@ fn main() -> io::Result<()> { let command = args.get(1).and_then(|s| s.to_str()).unwrap_or_default(); match command { "manpage" => { + uucore::init_execution_phrase(format!( + "{} {}", + args[0].to_string_lossy(), + args[1].to_string_lossy() + )); let args_iter = args.into_iter().skip(2); gen_manpage( &mut tldr_zip, diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 4dfffed2995..44b3af5ec28 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -151,7 +151,8 @@ use std::os::unix::ffi::{OsStrExt, OsStringExt}; use std::os::wasi::ffi::{OsStrExt, OsStringExt}; use std::str; use std::str::Utf8Chunk; -use std::sync::{LazyLock, atomic::Ordering}; +use std::sync::OnceLock; +use std::sync::atomic::Ordering; /// Disables the custom signal handlers installed by Rust for stack-overflow handling. With those custom signal handlers processes ignore the first SIGBUS and SIGSEGV signal they receive. /// See for details. @@ -211,7 +212,12 @@ macro_rules! bin_inner { }); // execute utility code - let code = $util::uumain(uucore::args_os()); + let mut args = uucore::args_os().peekable(); + if let Some(arg0) = args.peek() { + uucore::init_util_name(arg0); + uucore::init_execution_phrase(arg0); + } + let code = $util::uumain(args); $post std::process::exit(code); @@ -346,48 +352,71 @@ pub fn set_utility_is_second_arg() { macros::UTILITY_IS_SECOND_ARG.store(true, Ordering::SeqCst); } -// args_os() can be expensive to call, it copies all of argv before iterating. -// So if we want only the first arg or so it's overkill. We cache it. -#[cfg(windows)] -static ARGV: LazyLock> = LazyLock::new(|| wild::args_os().collect()); -#[cfg(not(windows))] -static ARGV: LazyLock> = LazyLock::new(|| std::env::args_os().collect()); - -static UTIL_NAME: LazyLock = LazyLock::new(|| { - let base_index = usize::from(get_utility_is_second_arg()); - let is_man = usize::from(ARGV[base_index].eq("manpage")); - let argv_index = base_index + is_man; - - // Strip directory path to show only utility name - // (e.g., "mkdir" instead of "./target/debug/mkdir") - // in version output, error messages, and other user-facing output - std::path::Path::new(&ARGV[argv_index]) - .file_name() - .unwrap_or(&ARGV[argv_index]) - .to_string_lossy() - .into_owned() -}); +static UTIL_NAME: OnceLock = OnceLock::new(); + +/// Set the utility name from a raw argv value, stripping any directory path +/// (e.g., "mkdir" instead of "./target/debug/mkdir") for version output, +/// error messages, and other user-facing output. +/// +/// The first call wins; later calls are ignored. If never called, +/// [`util_name`] derives the name from the process arguments on first use. +pub fn init_util_name(input: impl AsRef) { + let input = input.as_ref(); + let name = std::path::Path::new(input).file_name().unwrap_or(input); + let result = UTIL_NAME.set(name.to_string_lossy().into_owned()); + debug_assert!( + result.is_ok(), + "init_util_name called after UTIL_NAME was already initialized" + ); +} /// Derive the utility name. pub fn util_name() -> &'static str { - &UTIL_NAME + UTIL_NAME.get_or_init(|| { + let base_index = usize::from(get_utility_is_second_arg()); + let args: Vec = args_os().skip(base_index).take(2).collect(); + let is_man = usize::from(args.first().is_some_and(|arg| arg == "manpage")); + let Some(arg) = args.get(is_man) else { + return String::new(); + }; + + // Strip directory path to show only utility name + // (e.g., "mkdir" instead of "./target/debug/mkdir") + // in version output, error messages, and other user-facing output + std::path::Path::new(arg) + .file_name() + .unwrap_or(arg) + .to_string_lossy() + .into_owned() + }) } -static EXECUTION_PHRASE: LazyLock = LazyLock::new(|| { - if get_utility_is_second_arg() { - ARGV.iter() - .take(2) - .map(|os_str| os_str.to_string_lossy().into_owned()) - .collect::>() - .join(" ") - } else { - ARGV[0].to_string_lossy().into_owned() - } -}); +static EXECUTION_PHRASE: OnceLock = OnceLock::new(); + +/// Set the execution phrase shown in "usage" output, e.g. `mkdir` or +/// `coreutils mkdir`, from the raw argv value(s) used to invoke the utility. +/// +/// The first call wins; later calls are ignored. If never called, +/// [`execution_phrase`] derives the phrase from the process arguments on +/// first use. +pub fn init_execution_phrase(input: impl AsRef) { + let result = EXECUTION_PHRASE.set(input.as_ref().to_string_lossy().into_owned()); + debug_assert!( + result.is_ok(), + "init_execution_phrase called after EXECUTION_PHRASE was already initialized" + ); +} /// Derive the complete execution phrase for "usage". pub fn execution_phrase() -> &'static str { - &EXECUTION_PHRASE + EXECUTION_PHRASE.get_or_init(|| { + let n = if get_utility_is_second_arg() { 2 } else { 1 }; + args_os() + .take(n) + .map(|os_str| os_str.to_string_lossy().into_owned()) + .collect::>() + .join(" ") + }) } /// Args contains arguments passed to the utility. @@ -409,16 +438,25 @@ pub trait Args: Iterator + Sized { impl + Sized> Args for T {} /// Returns an iterator over the command line arguments as `OsString`s. -/// args_os() can be expensive to call +/// +/// Each call copies all of argv (and, on Windows, re-expands glob patterns), +/// so call it once and reuse the result rather than calling it repeatedly. pub fn args_os() -> impl Iterator { - ARGV.iter().cloned() + #[cfg(windows)] + { + wild::args_os() + } + #[cfg(not(windows))] + { + std::env::args_os() + } } /// Returns an iterator over the command line arguments as `OsString`s, filtering out empty arguments. /// This is useful for handling cases where extra whitespace or empty arguments are present. -/// args_os_filtered() can be expensive to call +/// Like [`args_os`], each call copies all of argv, so call it once and reuse the result. pub fn args_os_filtered() -> impl Iterator { - ARGV.iter().filter(|arg| !arg.is_empty()).cloned() + args_os().filter(|arg| !arg.is_empty()) } /// Read a line from stdin and check whether the first character is `'y'` or `'Y'`