From 2d9bdb068af27919fd4c7b874cd9802983c0d39d Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 1 Jul 2026 21:04:11 +0200 Subject: [PATCH 1/2] uniq: cache locale check instead of querying env vars per line is_c_locale() was called on every line inside key_end_index() when -w/--check-chars is set, doing up to 3 std::env::var_os() lookups each time. Locale env vars can't change mid-process, so this was pure per-line overhead, causing uniq -w to be ~5x slower than GNU uniq even for small -w values. Compute is_c_locale() once at startup and cache it on the Uniq struct instead. Fixes #13199 --- src/uu/uniq/src/uniq.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/uu/uniq/src/uniq.rs b/src/uu/uniq/src/uniq.rs index 2355da2a20..24c0b54dad 100644 --- a/src/uu/uniq/src/uniq.rs +++ b/src/uu/uniq/src/uniq.rs @@ -55,6 +55,7 @@ struct Uniq { slice_stop: Option, ignore_case: bool, zero_terminated: bool, + is_c_locale: bool, } #[derive(Default)] @@ -202,7 +203,7 @@ impl Uniq { if remainder.is_empty() { return key_start; } - if Self::is_c_locale() { + if self.is_c_locale { // for C or POSIX we count bytes key_start + remainder.len().min(limit) } else if let Ok(valid) = std::str::from_utf8(remainder) { @@ -680,6 +681,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { slice_stop: opt_parsed(options::CHECK_CHARS, &matches)?, ignore_case: matches.get_flag(options::IGNORE_CASE), zero_terminated: matches.get_flag(options::ZERO_TERMINATED), + is_c_locale: Uniq::is_c_locale(), }; if uniq.show_counts && uniq.all_repeated { From 6dc2c359227c8d3d41503cb54e65dbc55542f7a8 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 1 Jul 2026 21:11:25 +0200 Subject: [PATCH 2/2] uniq: merge per-line writes and grow input buffer write_line() issued two separate write_all() calls per output line (line content, then the terminator byte), each going through the dynamically-dispatched Box from open_output_file(). Merge them into a single write via a reused scratch buffer. Also match the input BufReader's capacity to the existing 128KB output buffer (previously the 8KB std default), for consistency. Measured on a 20x-repeated /usr/share/dict/words (~80MB, pinned to one CPU core to reduce noise): -w 1 dropped from 429.7ms to 395.5ms (~8%), -w 512 from 578.9ms to 497.8ms (~14%). --- src/uu/uniq/src/uniq.rs | 45 +++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/uu/uniq/src/uniq.rs b/src/uu/uniq/src/uniq.rs index 24c0b54dad..1aede27d78 100644 --- a/src/uu/uniq/src/uniq.rs +++ b/src/uu/uniq/src/uniq.rs @@ -88,6 +88,7 @@ impl Uniq { let mut next_buf = Vec::with_capacity(1024); let mut next_meta = LineMeta::default(); + let mut line_out = Vec::with_capacity(1024); loop { if !Self::read_line(&mut reader, &mut next_buf, line_terminator)? { @@ -98,7 +99,13 @@ impl Uniq { if self.keys_are_equal(¤t_buf, ¤t_meta, &next_buf, &next_meta) { if self.all_repeated { - self.write_line(writer, ¤t_buf, group_count, first_line_printed)?; + self.write_line( + writer, + &mut line_out, + ¤t_buf, + group_count, + first_line_printed, + )?; first_line_printed = true; std::mem::swap(&mut current_buf, &mut next_buf); std::mem::swap(&mut current_meta, &mut next_meta); @@ -108,7 +115,13 @@ impl Uniq { if (group_count == 1 && !self.repeats_only) || (group_count > 1 && !self.uniques_only) { - self.write_line(writer, ¤t_buf, group_count, first_line_printed)?; + self.write_line( + writer, + &mut line_out, + ¤t_buf, + group_count, + first_line_printed, + )?; first_line_printed = true; } std::mem::swap(&mut current_buf, &mut next_buf); @@ -119,7 +132,13 @@ impl Uniq { } if (group_count == 1 && !self.repeats_only) || (group_count > 1 && !self.uniques_only) { - self.write_line(writer, ¤t_buf, group_count, first_line_printed)?; + self.write_line( + writer, + &mut line_out, + ¤t_buf, + group_count, + first_line_printed, + )?; first_line_printed = true; } if (self.delimiters == Delimiters::Append || self.delimiters == Delimiters::Both) @@ -265,6 +284,7 @@ impl Uniq { fn write_line( &self, writer: &mut impl Write, + line_out: &mut Vec, line: &[u8], count: usize, first_line_printed: bool, @@ -275,21 +295,20 @@ impl Uniq { write_line_terminator!(writer, line_terminator)?; } - let mut count_buf = [0u8; Self::COUNT_PREFIX_BUF_SIZE]; + line_out.clear(); if self.show_counts { - // Call the associated function (no &self) after the refactor above. + let mut count_buf = [0u8; Self::COUNT_PREFIX_BUF_SIZE]; let prefix = Self::build_count_prefix(count, &mut count_buf); - writer - .write_all(prefix) - .map_err_context(|| translate!("uniq-error-write-error"))?; + line_out.extend_from_slice(prefix); } - writer - .write_all(line) - .map_err_context(|| translate!("uniq-error-write-error"))?; + line_out.extend_from_slice(line); + line_out.push(line_terminator); - write_line_terminator!(writer, line_terminator) + writer + .write_all(line_out) + .map_err_context(|| translate!("uniq-error-write-error")) } const COUNT_PREFIX_WIDTH: usize = 7; @@ -827,7 +846,7 @@ fn open_input_file(in_file_name: Option<&OsStr>) -> UResult> { let in_file = File::open(path).map_err_context( || translate!("uniq-error-could-not-open", "path" => path.maybe_quote()), )?; - Box::new(BufReader::new(in_file)) + Box::new(BufReader::with_capacity(OUTPUT_BUFFER_CAPACITY, in_file)) } _ => Box::new(stdin().lock()), })