From d65969ee81560dd4c0e20237cee376c4dd177570 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Thu, 2 Jul 2026 06:24:11 -0700 Subject: [PATCH] fix: csplit --suffix-format huge field width panics instead of erroring --- src/uu/csplit/locales/en-US.ftl | 1 + src/uu/csplit/src/csplit.rs | 21 +++++------ src/uu/csplit/src/csplit_error.rs | 16 +++++++-- src/uu/csplit/src/split_name.rs | 58 ++++++++++++++++++------------- tests/by-util/test_csplit.rs | 31 +++++++++++++++++ 5 files changed, 91 insertions(+), 36 deletions(-) diff --git a/src/uu/csplit/locales/en-US.ftl b/src/uu/csplit/locales/en-US.ftl index ba0ce032e49..81fbe9aa845 100644 --- a/src/uu/csplit/locales/en-US.ftl +++ b/src/uu/csplit/locales/en-US.ftl @@ -23,6 +23,7 @@ csplit-error-invalid-number = invalid number: { $number } csplit-error-suffix-format-incorrect = incorrect conversion specification in suffix csplit-error-suffix-format-too-many-percents = too many % conversion specifications in suffix csplit-error-not-regular-file = { $file } is not a regular file +csplit-error-memory-exhausted = memory exhausted csplit-warning-line-number-same-as-previous = line number '{ $line_number }' is the same as preceding line number csplit-stream-not-utf8 = stream did not contain valid UTF-8 csplit-read-error = read error diff --git a/src/uu/csplit/src/csplit.rs b/src/uu/csplit/src/csplit.rs index 3a4b43e881e..44c2d43064e 100644 --- a/src/uu/csplit/src/csplit.rs +++ b/src/uu/csplit/src/csplit.rs @@ -242,13 +242,14 @@ struct SplitWriter<'a> { impl Drop for SplitWriter<'_> { fn drop(&mut self) { if self.options.elide_empty_files && self.size == 0 { - let file_name = self.options.split_name.get(self.counter); - // In the case of `echo a | csplit -z - %a%1`, the file - // `xx00` does not exist because the positive offset - // advanced past the end of the input. Since there is no - // file to remove in that case, `remove_file` would return - // an error, so we just ignore it. - let _ = remove_file(file_name); + if let Ok(file_name) = self.options.split_name.get(self.counter) { + // In the case of `echo a | csplit -z - %a%1`, the file + // `xx00` does not exist because the positive offset + // advanced past the end of the input. Since there is no + // file to remove in that case, `remove_file` would return + // an error, so we just ignore it. + let _ = remove_file(file_name); + } } } } @@ -270,7 +271,7 @@ impl SplitWriter<'_> { /// /// The creation of the split file may fail with some [`io::Error`]. fn new_writer(&mut self) -> io::Result<()> { - let file_name = self.options.split_name.get(self.counter); + let file_name = self.options.split_name.get(self.counter)?; let file = File::create(file_name)?; self.current_writer = Some(BufWriter::new(file)); self.counter += 1; @@ -314,7 +315,7 @@ impl SplitWriter<'_> { if !self.dev_null { // Flush the writer to ensure all data is written and errors are detected if let Some(ref mut writer) = self.current_writer { - let file_name = self.options.split_name.get(self.counter - 1); + let file_name = self.options.split_name.get(self.counter - 1)?; writer .flush() .map_err_context(|| file_name.clone()) @@ -337,7 +338,7 @@ impl SplitWriter<'_> { fn delete_all_splits(&self) -> io::Result<()> { let mut ret = Ok(()); for ith in 0..self.counter { - let file_name = self.options.split_name.get(ith); + let file_name = self.options.split_name.get(ith)?; if let Err(err) = remove_file(file_name) { ret = Err(err); } diff --git a/src/uu/csplit/src/csplit_error.rs b/src/uu/csplit/src/csplit_error.rs index d73400bd7d1..2d9c92b1dda 100644 --- a/src/uu/csplit/src/csplit_error.rs +++ b/src/uu/csplit/src/csplit_error.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use std::io; +use std::io::{self, ErrorKind}; use thiserror::Error; use uucore::display::Quotable; use uucore::error::UError; @@ -13,7 +13,9 @@ use uucore::translate; #[derive(Debug, Error)] pub enum CsplitError { #[error("IO error: {}", _0)] - IoError(#[from] io::Error), + IoError(io::Error), + #[error("{}", translate!("csplit-error-memory-exhausted"))] + MemoryExhausted, #[error("{}", translate!("csplit-error-line-out-of-range", "pattern" => _0.quote()))] LineOutOfRange(String), #[error("{}", translate!("csplit-error-line-out-of-range-on-repetition", "pattern" => _0.quote(), "repetition" => _1))] @@ -40,6 +42,16 @@ pub enum CsplitError { UError(Box), } +impl From for CsplitError { + fn from(error: io::Error) -> Self { + if error.kind() == ErrorKind::OutOfMemory { + Self::MemoryExhausted + } else { + Self::IoError(error) + } + } +} + impl From> for CsplitError { fn from(error: Box) -> Self { Self::UError(error) diff --git a/src/uu/csplit/src/split_name.rs b/src/uu/csplit/src/split_name.rs index 33c606b4888..a16d1f470ad 100644 --- a/src/uu/csplit/src/split_name.rs +++ b/src/uu/csplit/src/split_name.rs @@ -4,6 +4,8 @@ // file that was distributed with this source code. // spell-checker:ignore (regex) diuox +use std::io; + use uucore::format::{Format, FormatError, num_format::UnsignedInt}; use crate::csplit_error::CsplitError; @@ -62,10 +64,10 @@ impl SplitName { } /// Returns the filename of the i-th split. - pub fn get(&self, n: usize) -> String { + pub fn get(&self, n: usize) -> io::Result { let mut v = self.prefix.clone(); - self.format.fmt(&mut v, n as u64).unwrap(); - String::from_utf8_lossy(&v).to_string() + self.format.fmt(&mut v, n as u64)?; + Ok(String::from_utf8_lossy(&v).to_string()) } } @@ -105,31 +107,31 @@ mod tests { #[test] fn default_formatter() { let split_name = SplitName::new(None, None, None).unwrap(); - assert_eq!(split_name.get(2), "xx02"); + assert_eq!(split_name.get(2).unwrap(), "xx02"); } #[test] fn default_formatter_with_prefix() { let split_name = SplitName::new(Some(String::from("aaa")), None, None).unwrap(); - assert_eq!(split_name.get(2), "aaa02"); + assert_eq!(split_name.get(2).unwrap(), "aaa02"); } #[test] fn default_formatter_with_width() { let split_name = SplitName::new(None, None, Some(String::from("5"))).unwrap(); - assert_eq!(split_name.get(2), "xx00002"); + assert_eq!(split_name.get(2).unwrap(), "xx00002"); } #[test] fn no_padding_decimal() { let split_name = SplitName::new(None, Some(String::from("cst-%d-")), None).unwrap(); - assert_eq!(split_name.get(2), "xxcst-2-"); + assert_eq!(split_name.get(2).unwrap(), "xxcst-2-"); } #[test] fn zero_padding_decimal1() { let split_name = SplitName::new(None, Some(String::from("cst-%03d-")), None).unwrap(); - assert_eq!(split_name.get(2), "xxcst-002-"); + assert_eq!(split_name.get(2).unwrap(), "xxcst-002-"); } #[test] @@ -140,7 +142,7 @@ mod tests { None, ) .unwrap(); - assert_eq!(split_name.get(2), "pre-cst-002-post"); + assert_eq!(split_name.get(2).unwrap(), "pre-cst-002-post"); } #[test] @@ -151,91 +153,99 @@ mod tests { Some(String::from("42")), ) .unwrap(); - assert_eq!(split_name.get(2), "xxcst-002-"); + assert_eq!(split_name.get(2).unwrap(), "xxcst-002-"); } #[test] fn zero_padding_decimal4() { let split_name = SplitName::new(None, Some(String::from("cst-%03i-")), None).unwrap(); - assert_eq!(split_name.get(2), "xxcst-002-"); + assert_eq!(split_name.get(2).unwrap(), "xxcst-002-"); } #[test] fn zero_padding_decimal5() { let split_name = SplitName::new(None, Some(String::from("cst-%03u-")), None).unwrap(); - assert_eq!(split_name.get(2), "xxcst-002-"); + assert_eq!(split_name.get(2).unwrap(), "xxcst-002-"); } #[test] fn zero_padding_octal() { let split_name = SplitName::new(None, Some(String::from("cst-%03o-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-052-"); + assert_eq!(split_name.get(42).unwrap(), "xxcst-052-"); } #[test] fn zero_padding_lower_hex() { let split_name = SplitName::new(None, Some(String::from("cst-%03x-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-02a-"); + assert_eq!(split_name.get(42).unwrap(), "xxcst-02a-"); } #[test] fn zero_padding_upper_hex() { let split_name = SplitName::new(None, Some(String::from("cst-%03X-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-02A-"); + assert_eq!(split_name.get(42).unwrap(), "xxcst-02A-"); } #[test] fn alternate_form_octal() { let split_name = SplitName::new(None, Some(String::from("cst-%#10o-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst- 052-"); + assert_eq!(split_name.get(42).unwrap(), "xxcst- 052-"); } #[test] fn alternate_form_lower_hex() { let split_name = SplitName::new(None, Some(String::from("cst-%#10x-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst- 0x2a-"); + assert_eq!(split_name.get(42).unwrap(), "xxcst- 0x2a-"); } #[test] fn alternate_form_upper_hex() { let split_name = SplitName::new(None, Some(String::from("cst-%#10X-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst- 0X2A-"); + assert_eq!(split_name.get(42).unwrap(), "xxcst- 0X2A-"); } #[test] fn left_adjusted_decimal1() { let split_name = SplitName::new(None, Some(String::from("cst-%-10d-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-42 -"); + assert_eq!(split_name.get(42).unwrap(), "xxcst-42 -"); } #[test] fn left_adjusted_decimal2() { let split_name = SplitName::new(None, Some(String::from("cst-%-10i-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-42 -"); + assert_eq!(split_name.get(42).unwrap(), "xxcst-42 -"); } #[test] fn left_adjusted_decimal3() { let split_name = SplitName::new(None, Some(String::from("cst-%-10u-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-42 -"); + assert_eq!(split_name.get(42).unwrap(), "xxcst-42 -"); } #[test] fn left_adjusted_octal() { let split_name = SplitName::new(None, Some(String::from("cst-%-10o-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-52 -"); + assert_eq!(split_name.get(42).unwrap(), "xxcst-52 -"); } #[test] fn left_adjusted_lower_hex() { let split_name = SplitName::new(None, Some(String::from("cst-%-10x-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-2a -"); + assert_eq!(split_name.get(42).unwrap(), "xxcst-2a -"); } #[test] fn left_adjusted_upper_hex() { let split_name = SplitName::new(None, Some(String::from("cst-%-10X-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-2A -"); + assert_eq!(split_name.get(42).unwrap(), "xxcst-2A -"); + } + + #[test] + fn formatting_width_too_large_returns_error() { + let split_name = + SplitName::new(None, Some(String::from("%9999999999999999999d")), None).unwrap(); + let err = split_name.get(0).unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::OutOfMemory); } #[test] diff --git a/tests/by-util/test_csplit.rs b/tests/by-util/test_csplit.rs index 385e2945c29..2bfb7b7aa54 100644 --- a/tests/by-util/test_csplit.rs +++ b/tests/by-util/test_csplit.rs @@ -1431,6 +1431,37 @@ fn precision_format() { } } +#[test] +fn suffix_format_huge_width_reports_memory_exhausted() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["--suffix-format", "%9999999999999999999d", "-", "1"]) + .pipe_in("a\nb\nc\n") + .fails_with_code(1) + .no_stdout() + .stderr_only("csplit: memory exhausted\n") + .stderr_does_not_contain("panicked"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("counting splits") + .count(); + assert_eq!(count, 0); +} + +#[test] +fn suffix_format_huge_width_elide_empty_drop_does_not_panic() { + new_ucmd!() + .args(&[ + "-z", + "--suffix-format", + "%9999999999999999999d", + "-", + "%a%1", + ]) + .pipe_in("a\n") + .succeeds() + .no_output(); +} + #[test] fn zero_error() { let (at, mut ucmd) = at_and_ucmd!();