diff --git a/src/uu/cksum/benches/cksum_bench.rs b/src/uu/cksum/benches/cksum_bench.rs index c316ec274a0..81f70bbb3d1 100644 --- a/src/uu/cksum/benches/cksum_bench.rs +++ b/src/uu/cksum/benches/cksum_bench.rs @@ -57,7 +57,7 @@ macro_rules! bench_shake_algorithm { let data = text_data::generate_by_size(100, 80); bencher.bench(|| { - let mut shake = Shake128::new(); + let mut shake = Shake128::with_output_bits(256); shake.hash_update(&data); // SHAKE algorithms can output any length, use 256 bits (32 bytes) for meaningful comparison @@ -76,7 +76,7 @@ macro_rules! bench_shake_algorithm { let data = text_data::generate_by_size(100, 80); bencher.bench(|| { - let mut shake = Shake256::new(); + let mut shake = Shake256::with_output_bits(512); shake.hash_update(&data); // SHAKE algorithms can output any length, use 256 bits (32 bytes) for meaningful comparison diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 5c78af2c6de..51dd83d3a27 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -100,6 +100,16 @@ fn maybe_sanitize_length( sanitize_sha2_sha3_length_str(algo, s_len).map(Some) } + // SHAKE128 and SHAKE256 algorithms optionally take a bit length. No + // validation is performed on this length, any value is valid. If the + // given length is not a multiple of 8, the last byte of the output + // will have its extra bits set to zero. + (Some(AlgoKind::Shake128 | AlgoKind::Shake256), Some(len)) => match len.parse::() { + Ok(0) => Ok(None), + Ok(l) => Ok(Some(l)), + Err(_) => Err(ChecksumError::InvalidLength(len.into()).into()), + }, + // For BLAKE2b, if a length is provided, validate it. (Some(AlgoKind::Blake2b), Some(len)) => calculate_blake2b_length_str(len), diff --git a/src/uucore/src/lib/features/checksum/mod.rs b/src/uucore/src/lib/features/checksum/mod.rs index a2fba209c34..13b0680dfcf 100644 --- a/src/uucore/src/lib/features/checksum/mod.rs +++ b/src/uucore/src/lib/features/checksum/mod.rs @@ -114,6 +114,9 @@ impl AlgoKind { ALGORITHM_OPTIONS_SHA256 => Sha256, ALGORITHM_OPTIONS_SHA384 => Sha384, ALGORITHM_OPTIONS_SHA512 => Sha512, + + ALGORITHM_OPTIONS_SHAKE128 => Shake128, + ALGORITHM_OPTIONS_SHAKE256 => Shake256, _ => return Err(ChecksumError::UnknownAlgorithm(algo.as_ref().to_string()).into()), }) } @@ -247,8 +250,8 @@ pub enum SizedAlgoKind { Sha3(ShaLength), // Note: we store Blake2b's length as BYTES. Blake2b(Option), - Shake128(usize), - Shake256(usize), + Shake128(Option), + Shake256(Option), } impl SizedAlgoKind { @@ -280,8 +283,8 @@ impl SizedAlgoKind { (ak::Sha1, _) => Ok(Self::Sha1), (ak::Blake3, _) => Ok(Self::Blake3), - (ak::Shake128, Some(l)) => Ok(Self::Shake128(l)), - (ak::Shake256, Some(l)) => Ok(Self::Shake256(l)), + (ak::Shake128, l) => Ok(Self::Shake128(l)), + (ak::Shake256, l) => Ok(Self::Shake256(l)), (ak::Sha2, Some(l)) => Ok(Self::Sha2(ShaLength::try_from(l)?)), (ak::Sha3, Some(l)) => Ok(Self::Sha3(ShaLength::try_from(l)?)), (algo @ (ak::Sha2 | ak::Sha3), None) => { @@ -298,7 +301,6 @@ impl SizedAlgoKind { (ak::Sha256, None) => Ok(Self::Sha2(ShaLength::Len256)), (ak::Sha384, None) => Ok(Self::Sha2(ShaLength::Len384)), (ak::Sha512, None) => Ok(Self::Sha2(ShaLength::Len512)), - (_, None) => Err(ChecksumError::LengthRequired(kind.to_uppercase().into()).into()), } } @@ -322,45 +324,49 @@ impl SizedAlgoKind { pub fn create_digest(&self) -> Box { use ShaLength::*; match self { - Self::Sysv => Box::new(SysV::new()), - Self::Bsd => Box::new(Bsd::new()), - Self::Crc => Box::new(Crc::new()), - Self::Crc32b => Box::new(CRC32B::new()), - Self::Md5 => Box::new(Md5::new()), - Self::Sm3 => Box::new(Sm3::new()), - Self::Sha1 => Box::new(Sha1::new()), - Self::Blake3 => Box::new(Blake3::new()), - Self::Sha2(Len224) => Box::new(Sha224::new()), - Self::Sha2(Len256) => Box::new(Sha256::new()), - Self::Sha2(Len384) => Box::new(Sha384::new()), - Self::Sha2(Len512) => Box::new(Sha512::new()), - Self::Sha3(Len224) => Box::new(Sha3_224::new()), - Self::Sha3(Len256) => Box::new(Sha3_256::new()), - Self::Sha3(Len384) => Box::new(Sha3_384::new()), - Self::Sha3(Len512) => Box::new(Sha3_512::new()), - Self::Blake2b(Some(byte_len)) => Box::new(Blake2b::with_output_bytes(*byte_len)), - Self::Blake2b(None) => Box::new(Blake2b::new()), - Self::Shake128(_) => Box::new(Shake128::new()), - Self::Shake256(_) => Box::new(Shake256::new()), + Self::Sysv => Box::new(SysV::default()), + Self::Bsd => Box::new(Bsd::default()), + Self::Crc => Box::new(Crc::default()), + Self::Crc32b => Box::new(CRC32B::default()), + Self::Md5 => Box::new(Md5::default()), + Self::Sm3 => Box::new(Sm3::default()), + Self::Sha1 => Box::new(Sha1::default()), + Self::Blake3 => Box::new(Blake3::default()), + Self::Sha2(Len224) => Box::new(Sha224::default()), + Self::Sha2(Len256) => Box::new(Sha256::default()), + Self::Sha2(Len384) => Box::new(Sha384::default()), + Self::Sha2(Len512) => Box::new(Sha512::default()), + Self::Sha3(Len224) => Box::new(Sha3_224::default()), + Self::Sha3(Len256) => Box::new(Sha3_256::default()), + Self::Sha3(Len384) => Box::new(Sha3_384::default()), + Self::Sha3(Len512) => Box::new(Sha3_512::default()), + Self::Blake2b(len_opt) => { + Box::new(len_opt.map(Blake2b::with_output_bytes).unwrap_or_default()) + } + Self::Shake128(len_opt) => { + Box::new(len_opt.map(Shake128::with_output_bits).unwrap_or_default()) + } + Self::Shake256(len_opt) => { + Box::new(len_opt.map(Shake256::with_output_bits).unwrap_or_default()) + } } } pub fn bitlen(&self) -> usize { - use SizedAlgoKind::*; match self { - Sysv => 512, - Bsd => 1024, - Crc => 256, - Crc32b => 32, - Md5 => 128, - Sm3 => 512, - Sha1 => 160, - Blake3 => 256, - Sha2(len) => len.as_usize(), - Sha3(len) => len.as_usize(), - Blake2b(len) => len.unwrap_or(512), - Shake128(len) => *len, - Shake256(len) => *len, + Self::Sysv => 512, + Self::Bsd => 1024, + Self::Crc => 256, + Self::Crc32b => 32, + Self::Md5 => 128, + Self::Sm3 => 512, + Self::Sha1 => 160, + Self::Blake3 => 256, + Self::Sha2(len) => len.as_usize(), + Self::Sha3(len) => len.as_usize(), + Self::Blake2b(len) => len.unwrap_or(Blake2b::DEFAULT_BYTE_SIZE * 8), + Self::Shake128(len) => len.unwrap_or(Shake128::DEFAULT_BIT_SIZE), + Self::Shake256(len) => len.unwrap_or(Shake256::DEFAULT_BIT_SIZE), } } pub fn is_legacy(&self) -> bool { diff --git a/src/uucore/src/lib/features/sum.rs b/src/uucore/src/lib/features/sum.rs index 5272ab3c139..279643a962b 100644 --- a/src/uucore/src/lib/features/sum.rs +++ b/src/uucore/src/lib/features/sum.rs @@ -59,9 +59,6 @@ impl DigestOutput { } pub trait Digest { - fn new() -> Self - where - Self: Sized; fn hash_update(&mut self, input: &[u8]); fn hash_finalize(&mut self, out: &mut [u8]); fn reset(&mut self); @@ -79,31 +76,40 @@ pub trait Digest { /// first element of the tuple is the blake2b state /// second is the number of output bits -pub struct Blake2b(blake2b_simd::State, usize); +pub struct Blake2b { + digest: blake2b_simd::State, + bit_size: usize, +} impl Blake2b { + pub const DEFAULT_BYTE_SIZE: usize = 64; + /// Return a new Blake2b instance with a custom output bytes length pub fn with_output_bytes(output_bytes: usize) -> Self { let mut params = blake2b_simd::Params::new(); params.hash_length(output_bytes); let state = params.to_state(); - Self(state, output_bytes * 8) + Self { + digest: state, + bit_size: output_bytes * 8, + } } } -impl Digest for Blake2b { - fn new() -> Self { - // by default, Blake2b output is 512 bits long (= 64B) - Self::with_output_bytes(64) +impl Default for Blake2b { + fn default() -> Self { + Self::with_output_bytes(Self::DEFAULT_BYTE_SIZE) } +} +impl Digest for Blake2b { fn hash_update(&mut self, input: &[u8]) { - self.0.update(input); + self.digest.update(input); } fn hash_finalize(&mut self, out: &mut [u8]) { - let hash_result = &self.0.finalize(); + let hash_result = &self.digest.finalize(); out.copy_from_slice(hash_result.as_bytes()); } @@ -112,16 +118,14 @@ impl Digest for Blake2b { } fn output_bits(&self) -> usize { - self.1 + self.bit_size } } +#[derive(Default)] pub struct Blake3(blake3::Hasher); -impl Digest for Blake3 { - fn new() -> Self { - Self(blake3::Hasher::new()) - } +impl Digest for Blake3 { fn hash_update(&mut self, input: &[u8]) { self.0.update(input); } @@ -132,7 +136,7 @@ impl Digest for Blake3 { } fn reset(&mut self) { - *self = Self::new(); + *self = Self::default(); } fn output_bits(&self) -> usize { @@ -140,12 +144,10 @@ impl Digest for Blake3 { } } +#[derive(Default)] pub struct Sm3(sm3::Sm3); -impl Digest for Sm3 { - fn new() -> Self { - Self(::new()) - } +impl Digest for Sm3 { fn hash_update(&mut self, input: &[u8]) { ::update(&mut self.0, input); } @@ -155,7 +157,7 @@ impl Digest for Sm3 { } fn reset(&mut self) { - *self = Self::new(); + *self = Self::default(); } fn output_bits(&self) -> usize { @@ -184,14 +186,16 @@ impl Crc { } } -impl Digest for Crc { - fn new() -> Self { +impl Default for Crc { + fn default() -> Self { Self { digest: crc_fast::Digest::new_with_params(Self::get_posix_cksum_params()), size: 0, } } +} +impl Digest for Crc { fn hash_update(&mut self, input: &[u8]) { self.digest.update(input); self.size += input.len(); @@ -230,13 +234,15 @@ pub struct CRC32B { digest: crc_fast::Digest, } -impl Digest for CRC32B { - fn new() -> Self { +impl Default for CRC32B { + fn default() -> Self { Self { digest: crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32IsoHdlc), } } +} +impl Digest for CRC32B { fn hash_update(&mut self, input: &[u8]) { self.digest.update(input); } @@ -264,14 +270,12 @@ impl Digest for CRC32B { } } +#[derive(Default)] pub struct Bsd { state: u16, } -impl Digest for Bsd { - fn new() -> Self { - Self { state: 0 } - } +impl Digest for Bsd { fn hash_update(&mut self, input: &[u8]) { for &byte in input { self.state = (self.state >> 1) + ((self.state & 1) << 15); @@ -290,7 +294,7 @@ impl Digest for Bsd { } fn reset(&mut self) { - *self = Self::new(); + *self = Self::default(); } fn output_bits(&self) -> usize { @@ -298,14 +302,12 @@ impl Digest for Bsd { } } +#[derive(Default)] pub struct SysV { state: u32, } -impl Digest for SysV { - fn new() -> Self { - Self { state: 0 } - } +impl Digest for SysV { fn hash_update(&mut self, input: &[u8]) { for &byte in input { self.state = self.state.wrapping_add(u32::from(byte)); @@ -325,7 +327,7 @@ impl Digest for SysV { } fn reset(&mut self) { - *self = Self::new(); + *self = Self::default(); } fn output_bits(&self) -> usize { @@ -336,11 +338,12 @@ impl Digest for SysV { // Implements the Digest trait for sha2 / sha3 algorithms with fixed output macro_rules! impl_digest_common { ($algo_type: ty, $size: literal) => { - impl Digest for $algo_type { - fn new() -> Self { + impl Default for $algo_type { + fn default() -> Self { Self(Default::default()) } - + } + impl Digest for $algo_type { fn hash_update(&mut self, input: &[u8]) { digest::Digest::update(&mut self.0, input); } @@ -350,7 +353,7 @@ macro_rules! impl_digest_common { } fn reset(&mut self) { - *self = Self::new(); + *self = Self::default(); } fn output_bits(&self) -> usize { @@ -362,26 +365,43 @@ macro_rules! impl_digest_common { // Implements the Digest trait for sha2 / sha3 algorithms with variable output macro_rules! impl_digest_shake { - ($algo_type: ty, $output_bits: literal) => { - impl Digest for $algo_type { - fn new() -> Self { - Self(Default::default()) + ($algo_type: ty, $default_output_bits: literal) => { + impl $algo_type { + pub const DEFAULT_BIT_SIZE: usize = $default_output_bits; + + pub fn with_output_bits(bits: usize) -> Self { + Self { + digest: Default::default(), + bit_size: bits, + } } - + } + impl Default for $algo_type { + fn default() -> Self { + Self::with_output_bits(Self::DEFAULT_BIT_SIZE) + } + } + impl Digest for $algo_type { fn hash_update(&mut self, input: &[u8]) { - digest::Update::update(&mut self.0, input); + digest::Update::update(&mut self.digest, input); } fn hash_finalize(&mut self, out: &mut [u8]) { - digest::ExtendableOutputReset::finalize_xof_reset_into(&mut self.0, out); + digest::ExtendableOutputReset::finalize_xof_reset_into(&mut self.digest, out); + + // Remove the last bits if the requested length is not a multiple of 8. + let extra = self.output_bits() % 8; + if extra != 0 { + out[out.len() - 1] &= (1 << extra) - 1; + } } fn reset(&mut self) { - *self = Self::new(); + *self = Self::with_output_bits(self.bit_size); } fn output_bits(&self) -> usize { - $output_bits + self.bit_size } fn result(&mut self) -> DigestOutput { @@ -415,8 +435,14 @@ impl_digest_common!(Sha3_256, 256); impl_digest_common!(Sha3_384, 384); impl_digest_common!(Sha3_512, 512); -pub struct Shake128(sha3::Shake128); -pub struct Shake256(sha3::Shake256); +pub struct Shake128 { + digest: sha3::Shake128, + bit_size: usize, +} +pub struct Shake256 { + digest: sha3::Shake256, + bit_size: usize, +} impl_digest_shake!(Shake128, 256); impl_digest_shake!(Shake256, 512); @@ -567,8 +593,8 @@ mod tests { #[test] fn test_crc_basic_functionality() { // Test that our CRC implementation works with basic functionality - let mut crc1 = Crc::new(); - let mut crc2 = Crc::new(); + let mut crc1 = Crc::default(); + let mut crc2 = Crc::default(); // Same input should give same output crc1.hash_update(b"test"); @@ -584,7 +610,7 @@ mod tests { #[test] fn test_crc_digest_basic() { - let mut crc = Crc::new(); + let mut crc = Crc::default(); // Test empty input let mut output = [0u8; 8]; @@ -592,7 +618,7 @@ mod tests { let empty_result = u64::from_ne_bytes(output); // Reset and test with "test" string - let mut crc = Crc::new(); + let mut crc = Crc::default(); crc.hash_update(b"test"); crc.hash_finalize(&mut output); let test_result = u64::from_ne_bytes(output); @@ -606,8 +632,8 @@ mod tests { #[test] fn test_crc_digest_incremental() { - let mut crc1 = Crc::new(); - let mut crc2 = Crc::new(); + let mut crc1 = Crc::default(); + let mut crc2 = Crc::default(); // Test that processing in chunks gives same result as all at once let data = b"Hello, World! This is a test string for CRC computation."; @@ -632,13 +658,13 @@ mod tests { // Test that our optimized slice-by-8 gives same results as byte-by-byte let test_data = b"This is a longer test string to verify slice-by-8 optimization works correctly with various data sizes including remainders."; - let mut crc_optimized = Crc::new(); + let mut crc_optimized = Crc::default(); crc_optimized.hash_update(test_data); let mut output_opt = [0u8; 8]; crc_optimized.hash_finalize(&mut output_opt); // Create a reference implementation using hash_update - let mut crc_reference = Crc::new(); + let mut crc_reference = Crc::default(); for &byte in test_data { crc_reference.hash_update(&[byte]); } @@ -659,7 +685,7 @@ mod tests { ]; for (input, expected) in test_cases { - let mut crc = Crc::new(); + let mut crc = Crc::default(); crc.hash_update(input.as_bytes()); let mut output = [0u8; 8]; crc.hash_finalize(&mut output); @@ -671,14 +697,14 @@ mod tests { #[test] fn test_crc_hash_update_edge_cases() { - let mut crc = Crc::new(); + let mut crc = Crc::default(); // Test with data that's not a multiple of 8 bytes let data7 = b"1234567"; // 7 bytes crc.hash_update(data7); let data9 = b"123456789"; // 9 bytes - let mut crc2 = Crc::new(); + let mut crc2 = Crc::default(); crc2.hash_update(data9); // Should not panic and should produce valid results diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index daa69047f00..fac6ab67965 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -4,6 +4,8 @@ // file that was distributed with this source code. // spell-checker:ignore (words) asdf algo algos asha mgmt xffname hexa GFYEQ HYQK Yqxb dont checkfile +use rstest::rstest; + use uutests::at_and_ucmd; use uutests::new_ucmd; use uutests::util::TestScenario; @@ -3098,3 +3100,129 @@ fn test_check_checkfile_with_io_error() { .stderr_contains("/proc/self/mem: read error") .no_stdout(); } + +#[rstest] +#[case::default_length( + &[], + "ac8549b2861a151896ab721bd29d7a20c1a3d1f75b31266f786f20d963fb0fdf" +)] +#[case::pass_default_length( + &["-l", "256"], + "ac8549b2861a151896ab721bd29d7a20c1a3d1f75b31266f786f20d963fb0fdf" +)] +#[case::smaller_length( + &["-l", "128"], + "ac8549b2861a151896ab721bd29d7a20" +)] +#[case::bigger_length( + &["-l", "264"], + "ac8549b2861a151896ab721bd29d7a20c1a3d1f75b31266f786f20d963fb0fdfc2" +)] +#[case::length_0( + &["-l", "0"], + "ac8549b2861a151896ab721bd29d7a20c1a3d1f75b31266f786f20d963fb0fdf" +)] +#[case::length_1( + &["-l", "1"], + "00" +)] +#[case::length_2( + &["-l", "2"], + "00" +)] +#[case::length_3( + &["-l", "3"], + "04" +)] +#[case::length_4( + &["-l", "4"], + "0c" +)] +#[case::length_5( + &["-l", "5"], + "0c" +)] +#[case::length_6( + &["-l", "6"], + "2c" +)] +#[case::length_7( + &["-l", "7"], + "2c" +)] +#[case::length_8( + &["-l", "8"], + "ac" +)] +fn test_shake128(#[case] args: &[&str], #[case] expected: &str) { + new_ucmd!() + .arg("-a") + .arg("shake128") + .args(args) + .pipe_in("xxx") + .succeeds() + .stdout_only(format!("SHAKE128 (-) = {expected}\n")); +} + +#[rstest] +#[case::default_length( + &[], + "2fa631503c3ea5fe85131dbfa24805185474740e6dcb5f2a64f69d932bcb55f7b24958f3e3c4cc0e71f1fe6f054cd3fb28b9efb62b4f8f3fbe6d50d90f5c6eba" +)] +#[case::pass_default_length( + &["-l", "512"], + "2fa631503c3ea5fe85131dbfa24805185474740e6dcb5f2a64f69d932bcb55f7b24958f3e3c4cc0e71f1fe6f054cd3fb28b9efb62b4f8f3fbe6d50d90f5c6eba" +)] +#[case::smaller_length( + &["-l", "128"], + "2fa631503c3ea5fe85131dbfa2480518" +)] +#[case::bigger_length( + &["-l", "1024"], + "2fa631503c3ea5fe85131dbfa24805185474740e6dcb5f2a64f69d932bcb55f7b24958f3e3c4cc0e71f1fe6f054cd3fb28b9efb62b4f8f3fbe6d50d90f5c6eba18783d25f8b36d92b8607f016352b5c405945a7859a8339201728f680647324d1b8ea93a01d2ef965dadf4a1bee3ff044ed2b4bd95e4311f5e3f2cd5bae0b7c6" +)] +#[case::length_0( + &["-l", "0"], + "2fa631503c3ea5fe85131dbfa24805185474740e6dcb5f2a64f69d932bcb55f7b24958f3e3c4cc0e71f1fe6f054cd3fb28b9efb62b4f8f3fbe6d50d90f5c6eba" +)] +#[case::length_1( + &["-l", "1"], + "01" +)] +#[case::length_2( + &["-l", "2"], + "03" +)] +#[case::length_3( + &["-l", "3"], + "07" +)] +#[case::length_4( + &["-l", "4"], + "0f" +)] +#[case::length_5( + &["-l", "5"], + "0f" +)] +#[case::length_6( + &["-l", "6"], + "2f" +)] +#[case::length_7( + &["-l", "7"], + "2f" +)] +#[case::length_8( + &["-l", "8"], + "2f" +)] +fn test_shake256(#[case] args: &[&str], #[case] expected: &str) { + new_ucmd!() + .arg("-a") + .arg("shake256") + .args(args) + .pipe_in("xxx") + .succeeds() + .stdout_only(format!("SHAKE256 (-) = {expected}\n")); +}