From ce5362dd9afef35a46514fbc70dea5317f2b46a9 Mon Sep 17 00:00:00 2001 From: oech3 <79379754+oech3@users.noreply.github.com> Date: Sat, 7 Feb 2026 20:22:46 +0900 Subject: [PATCH] sort: Use ahash --- Cargo.lock | 15 +++++++- Cargo.toml | 4 +-- fuzz/Cargo.lock | 77 ++++++++++++++++++++++------------------- src/uu/sort/Cargo.toml | 4 +-- src/uu/sort/src/sort.rs | 25 +++++-------- 5 files changed, 69 insertions(+), 56 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0436afde666..4135bb5143e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,19 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy 0.8.39", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -4076,6 +4089,7 @@ dependencies = [ name = "uu_sort" version = "0.6.0" dependencies = [ + "ahash", "bigdecimal", "binary-heap-plus", "clap", @@ -4083,7 +4097,6 @@ dependencies = [ "compare", "ctrlc", "fluent", - "fnv", "itertools 0.14.0", "memchr", "nix", diff --git a/Cargo.toml b/Cargo.toml index bf6a17121bf..385a3bb7469 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ # coreutils (uutils) # * see the repository LICENSE, README, and CONTRIBUTING files for more information -# spell-checker:ignore (libs) bigdecimal datetime serde wincode gethostid kqueue libselinux mangen memmap uuhelp startswith constness expl unnested logind cfgs interner +# spell-checker:ignore (libs) ahash bigdecimal datetime serde wincode gethostid kqueue libselinux mangen memmap uuhelp startswith constness expl unnested logind cfgs interner [package] name = "coreutils" @@ -311,6 +311,7 @@ readme = "README.package.md" version = "0.6.0" [workspace.dependencies] +ahash = "0.8.12" ansi-width = "0.1.0" bigdecimal = "0.4" binary-heap-plus = "0.5.0" @@ -329,7 +330,6 @@ dns-lookup = { version = "3.0.0" } exacl = "0.12.0" file_diff = "1.0.0" filetime = "0.2.23" -fnv = "1.0.7" fs_extra = "1.3.0" fts-sys = "0.2.16" gcd = "2.3" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index c4d05638def..da07297ec46 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -8,6 +8,19 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -207,9 +220,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.54" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "jobserver", @@ -242,18 +255,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.54" +version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.54" +version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" dependencies = [ "anstream", "anstyle", @@ -518,9 +531,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixed_decimal" @@ -535,9 +548,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -588,12 +601,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - [[package]] name = "generic-array" version = "0.14.7" @@ -647,9 +654,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -974,9 +981,9 @@ checksum = "84de9d95a6d2547d9b77ee3f25fa0ee32e3c3a6484d47a55adebc0439c077992" [[package]] name = "jiff" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" +checksum = "d89a5b5e10d5a9ad6e5d1f4bd58225f655d6fe9767575a5e8ac5a6fe64e04495" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -1000,9 +1007,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" +checksum = "ff7a39c8862fc1369215ccf0a8f12dd4598c7f6484704359f0351bd617034dbf" dependencies = [ "proc-macro2", "quote", @@ -1105,9 +1112,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "miniz_oxide" @@ -1261,15 +1268,15 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "portable-atomic" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" dependencies = [ "portable-atomic", ] @@ -1393,9 +1400,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" [[package]] name = "rust-ini" @@ -1820,13 +1827,13 @@ dependencies = [ name = "uu_sort" version = "0.6.0" dependencies = [ + "ahash", "bigdecimal", "binary-heap-plus", "clap", "compare", "ctrlc", "fluent", - "fnv", "itertools", "memchr", "nix", @@ -2271,18 +2278,18 @@ checksum = "9b3a41ce106832b4da1c065baa4c31cf640cf965fa1483816402b7f6b96f0a64" [[package]] name = "zerocopy" -version = "0.8.34" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71ddd76bcebeed25db614f82bf31a9f4222d3fbba300e6fb6c00afa26cbd4d9d" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.34" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8187381b52e32220d50b255276aa16a084ec0a9017a0ca2152a1f55c539758d" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 2aba9c73b83..92bca5884ec 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -1,4 +1,4 @@ -# spell-checker:ignore bigdecimal +# spell-checker:ignore ahash bigdecimal [package] name = "uu_sort" @@ -29,7 +29,6 @@ bigdecimal = { workspace = true } binary-heap-plus = { workspace = true } clap = { workspace = true } compare = { workspace = true } -fnv = { workspace = true } itertools = { workspace = true } memchr = { workspace = true } rand = { workspace = true } @@ -45,6 +44,7 @@ uucore = { workspace = true, features = [ "i18n-collator", ] } fluent = { workspace = true } +ahash = { workspace = true } [target.'cfg(not(target_os = "redox"))'.dependencies] ctrlc = { workspace = true } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 26b060a45c2..52247c4feff 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -18,14 +18,13 @@ mod merge; mod numeric_str_cmp; mod tmp_dir; +use ahash::AHashMap; use bigdecimal::BigDecimal; use chunks::LineData; use clap::builder::ValueParser; use clap::{Arg, ArgAction, ArgMatches, Command}; use custom_str_cmp::custom_str_cmp; - use ext_sort::ext_sort; -use fnv::FnvHasher; use numeric_str_cmp::{NumInfo, NumInfoParseSettings, human_numeric_str_cmp, numeric_str_cmp}; use rand::{Rng, rng}; use rayon::prelude::*; @@ -33,7 +32,7 @@ use std::cmp::Ordering; use std::env; use std::ffi::{OsStr, OsString}; use std::fs::{File, OpenOptions}; -use std::hash::{Hash, Hasher}; +use std::hash::{BuildHasher, Hash, Hasher}; use std::io::{BufRead, BufReader, BufWriter, Read, Write, stdin, stdout}; use std::num::{IntErrorKind, NonZero}; use std::ops::Range; @@ -1681,7 +1680,7 @@ fn index_legacy_warnings(processed_args: &[OsString], legacy_warnings: &mut [Leg return; } - let mut index_by_arg = std::collections::HashMap::new(); + let mut index_by_arg = AHashMap::default(); for (warning_idx, warning) in legacy_warnings.iter().enumerate() { index_by_arg.insert(warning.arg_index, warning_idx); } @@ -2909,7 +2908,8 @@ fn salt_from_random_source(path: &Path) -> UResult<[u8; SALT_LEN]> { let mut reader = open_with_open_failed_error(path)?; let mut buf = [0u8; BUF_LEN]; let mut total = 0usize; - let mut hasher = FnvHasher::default(); + // freeze seed for --random-source + let mut hasher = ahash::RandomState::with_seeds(1, 1, 1, 1).build_hasher(); loop { let n = reader @@ -2934,7 +2934,8 @@ fn salt_from_random_source(path: &Path) -> UResult<[u8; SALT_LEN]> { } let first = hasher.finish(); - let mut second_hasher = FnvHasher::default(); + // freeze seed for --random-source + let mut second_hasher = ahash::RandomState::with_seeds(2, 2, 2, 2).build_hasher(); second_hasher.write(RANDOM_SOURCE_TAG); second_hasher.write_u64(first); let second = second_hasher.finish(); @@ -2946,9 +2947,8 @@ fn salt_from_random_source(path: &Path) -> UResult<[u8; SALT_LEN]> { } fn get_hash(t: &T) -> u64 { - let mut s = FnvHasher::default(); - t.hash(&mut s); - s.finish() + // Is reproducibility of get_hash itself needed for --random-source ? + ahash::RandomState::with_seeds(0, 0, 0, 0).hash_one(t) } fn random_shuffle(a: &[u8], b: &[u8], salt: &[u8]) -> Ordering { @@ -3086,13 +3086,6 @@ mod tests { buffer } - #[test] - fn test_get_hash() { - let a = "Ted".to_string(); - - assert_eq!(2_646_829_031_758_483_623, get_hash(&a)); - } - #[test] fn test_random_shuffle() { let a = b"Ted";