From db4aa4ddddc787d734103bc5edddbc5f7757b731 Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Mon, 2 Mar 2026 15:18:21 +0100 Subject: [PATCH 1/4] feat: Criterion Benchmark --- Cargo.toml | 11 +- benches/benchmarks-criterion.rs | 284 ++++++++++++++++++++++++++++++++ 2 files changed, 293 insertions(+), 2 deletions(-) create mode 100644 benches/benchmarks-criterion.rs diff --git a/Cargo.toml b/Cargo.toml index 29331c3..6ebfe07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,10 +23,12 @@ same-file = "1.0.6" unicode-width = "0.2.0" [dev-dependencies] -pretty_assertions = "1.4.0" assert_cmd = "2.0.14" +criterion = { version = "0.8.2", features = ["html_reports"] } +pretty_assertions = "1.4.0" predicates = "3.1.0" -tempfile = "3.10.1" +rand = "0.10.0" +tempfile = "3.26.0" [profile.release] lto = "thin" @@ -40,3 +42,8 @@ panic = "abort" [profile.dist] inherits = "release" lto = "thin" + +[[bench]] +name = "bench_cmp" +path = "benches/benchmarks-criterion.rs" +harness = false diff --git a/benches/benchmarks-criterion.rs b/benches/benchmarks-criterion.rs new file mode 100644 index 0000000..442b89a --- /dev/null +++ b/benches/benchmarks-criterion.rs @@ -0,0 +1,284 @@ +/// Benchmarks, currently only for cmp +/// +/// Provides some general functions, e.g. to create files to compare in different sizes. +/// +/// use hyperfine to benchmark against cmp +/// * hyperfine -i "target/release/diffutils cmp from_file_10000000.txt to_file_10000000.txt" +/// * hyperfine -i "cmp from_file_10000000.txt to_file_10000000.txt" +/// +/// The Rust version seems twice as slow. +use criterion::{criterion_group, criterion_main, Criterion}; +// use std::env; +// use std::hint::black_box; +use rand::RngExt; +use std::io::{BufWriter, Write}; +use std::path::Path; +use std::process::Command; +use std::{ffi::OsString, fs::File, time::Duration}; + +const WARM_UP_TIME_MS: u64 = 500; +#[allow(unused)] +const MEASUREMENT_TIME_MS: u64 = 2000; + +// file lines and .txt will be added +const FROM_FILE: &str = "from_file"; +const TO_FILE: &str = "to_file"; + +criterion_group!( + benches, + bench_parser, + bench_cmp // , bench_diff +); +criterion_main!(benches); + +// All results are a few microseconds, so negligible. +fn bench_parser(c: &mut Criterion) { + let mut group = c.benchmark_group("Bench parser"); + + group.warm_up_time(Duration::from_millis(WARM_UP_TIME_MS)); + // group.measurement_time(Duration::from_millis(MEASUREMENT_TIME_MS)); + // group.sample_size(10); + + group.bench_function("Parse cmp", |b| { + b.iter(|| { + cmp_parse_only( + "cmd file_1.txt file_2.txt -bl --bytes=2048 --ignore-initial=100KiB:1MiB", + ) + }) + }); + + group.bench_function("Parse diff", |b| { + b.iter(|| diff_parse_only("diff file_1.txt file_2.txt")) + }); + // group.bench_function("Parse error", |b| { + // b.iter(|| parse_single_arg("cmd file_1.txt file_2.txt --something-unknown")) + // }); + // group.bench_function("Parse help", |b| b.iter(|| parse_single_arg("cmd --help"))); + + group.finish(); +} + +// This is the interesting part. +fn bench_cmp(c: &mut Criterion) { + let mut group = c.benchmark_group("Bench cmp"); + // uses tmp + // let dir_path = tempfile::tempdir().unwrap().path(); + // uses current directory, the generated files are kept + let dir_path = Path::new(""); + // let curr = env::current_dir().unwrap(); + // let dir_path = curr.as_path(); + let num_lines = 100_000; + // The more differences, the faster cmp returns, as it stops after the first difference is found. + let num_differences = 1; + + group.warm_up_time(Duration::from_millis(WARM_UP_TIME_MS)); + // group.measurement_time(Duration::from_millis(MEASUREMENT_TIME_MS)); + // group.sample_size(10); + + let (from, to) = + generate_test_files(num_lines, 0, dir_path).expect("generate_test_files failed"); + let cmd = format!("cmp {from} {to}"); + let opts = str_to_args(&cmd).into_iter().peekable(); + let params = diffutilslib::cmp::parse_params(opts).unwrap(); + + group.bench_function(format!("cmp files unchanged, lines: {num_lines}"), |b| { + b.iter(|| diffutilslib::cmp::cmp(¶ms).unwrap()) + }); + + let (from, to) = generate_test_files(num_lines, num_differences, dir_path) + .expect("generate_test_files failed"); + let cmd = format!("cmp {from} {to} -s"); + let opts = str_to_args(&cmd).into_iter().peekable(); + let params = diffutilslib::cmp::parse_params(opts).unwrap(); + + group.bench_function(format!("cmp files changed, lines: {num_lines}"), |b| { + b.iter(|| diffutilslib::cmp::cmp(¶ms).unwrap()) + }); + + group.finish(); + + // Optional bench by executing the file as cmd + bench_binary_execution_cmp(c); +} + +// // This is the interesting part. +// fn bench_diff(c: &mut Criterion) { +// let mut group = c.benchmark_group("Bench cmp"); +// // uses tmp +// // let dir_path = tempfile::tempdir().unwrap().path(); +// // uses current directory, the generated files are kept +// let dir_path = Path::new(""); +// // let curr = env::current_dir().unwrap(); +// // let dir_path = curr.as_path(); +// let num_lines = 100_000; +// // The more differences, the faster cmp returns, as it stops after the first difference is found. +// let num_differences = 1; +// +// group.warm_up_time(Duration::from_millis(WARM_UP_TIME_MS)); +// // group.measurement_time(Duration::from_millis(MEASUREMENT_TIME_MS)); +// // group.sample_size(10); +// +// let (from, to) = +// generate_test_files(num_lines, 0, dir_path).expect("generate_test_files failed"); +// let cmd = format!("diff {from} {to}"); +// let opts = str_to_args(&cmd).into_iter().peekable(); +// let params = diffutilslib::params::parse_params(opts).unwrap(); +// +// // TODO need function because main cannot be called. +// group.bench_function(format!("diff files unchanged, lines: {num_lines}"), |b| { +// b.iter(|| diffutilslib::::cmp(¶ms).unwrap()) +// }); +// +// let (from, to) = generate_test_files(num_lines, num_differences, dir_path) +// .expect("generate_test_files failed"); +// let cmd = format!("diff {from} {to} -s"); +// let opts = str_to_args(&cmd).into_iter().peekable(); +// let params = diffutilslib::params::parse_params(opts).unwrap(); +// +// // TODO need function because main cannot be called. +// group.bench_function(format!("diff files changed, lines: {num_lines}"), |b| { +// b.iter(|| diffutilslib::::cmp(¶ms).unwrap()) +// }); +// +// group.finish(); +// } + +fn cmp_parse_only(cmd: &str) -> String { + let args = str_to_args(cmd).into_iter().peekable(); + let _params = match diffutilslib::cmp::parse_params(args) { + Ok(params) => params, + Err(e) => { + return e.to_string(); + } + }; + return "ok".to_string(); +} + +fn diff_parse_only(cmd: &str) -> String { + let args = str_to_args(cmd).into_iter().peekable(); + let _params = match diffutilslib::params::parse_params(args) { + Ok(params) => params, + Err(e) => { + return e.to_string(); + } + }; + return "ok".to_string(); +} + +fn str_to_args(opt: &str) -> Vec { + let s: Vec = opt + .split(" ") + .into_iter() + .map(|s| OsString::from(s)) + .collect(); + + s +} + +/// Generates two test files for comparison. +/// +/// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes. +/// If num_differences is set, '*' will be inserted between the first two words of a line, +/// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest. +fn generate_test_files( + lines: usize, + num_differences: usize, + dir: &Path, +) -> std::io::Result<(String, String)> { + let f1 = format!("{FROM_FILE}_{lines}.txt"); + let f2 = format!("{TO_FILE}_{lines}.txt"); + let from_path = dir.join(f1); + let to_path = dir.join(f2); + + generate_file_fast(&from_path, &to_path, lines, num_differences)?; + + Ok(( + from_path.to_string_lossy().to_string(), + to_path.to_string_lossy().to_string(), + )) +} + +// Largely Gemini AI Generated +fn generate_file_fast( + from_name: &Path, + to_name: &Path, + line_count: usize, + num_differences: usize, +) -> std::io::Result<()> { + let file_from = File::create(from_name)?; + let file_to = File::create(to_name)?; + let change = if num_differences == 0 { + 0 + } else { + line_count / num_differences + }; + // Use a larger 128KB buffer for massive files + let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from); + let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to); + let mut rng = rand::rng(); + + // Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes + let mut line_buffer = [b' '; 60]; + line_buffer[59] = b'\n'; // Set the newline once at the end + + for i in (0..line_count).rev() { + // Fill only the letter positions, skipping spaces and the newline + for word_idx in 0..10 { + let start = word_idx * 6; // Each word + space block is 6 bytes + for i in 0..5 { + line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); + } + } + + // Write the raw bytes directly to both files + writer_from.write_all(&line_buffer)?; + // make changes in the file + if num_differences == 0 { + writer_to.write_all(&line_buffer)?; + } else { + if i % change == 0 { + line_buffer[5] = b'*'; + } + writer_to.write_all(&line_buffer)?; + line_buffer[5] = b' '; + } + } + + writer_from.flush()?; + writer_to.flush()?; + + Ok(()) +} + +#[allow(unused)] +// fn bench_binary_execution(c: &mut BenchmarkGroup<'_, WallTime>) { +fn bench_binary_execution_cmp(c: &mut Criterion) { + c.bench_function("GNU cmp", |b| { + b.iter(|| { + let _status = Command::new("cmp") + .arg("from_file_100000.txt") + .arg("to_file_100000.txt") + .arg("-s") + .status() + .expect("Failed to execute binary"); + + // assert!(status.success()); + }) + }); + + c.bench_function("cmp binary", |b| { + b.iter(|| { + let _status = Command::new("target/release/diffutils") + .arg("cmp") + .arg("from_file_100000.txt") + .arg("to_file_100000.txt") + .arg("-s") + // .arg("--lines") + // .arg(black_box("10000")) + .status() + .expect("Failed to execute binary"); + + // assert!(status.success()); + }) + }); +} From 6d01c7653dc377e15c9c20464a987ba410f0fcf5 Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Tue, 3 Mar 2026 22:35:53 +0100 Subject: [PATCH 2/4] fix: Replaced Criterion with codspeed drop-in replacement --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6ebfe07..566e48a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ unicode-width = "0.2.0" [dev-dependencies] assert_cmd = "2.0.14" -criterion = { version = "0.8.2", features = ["html_reports"] } +criterion = { version = "4.3.0", package = "codspeed-criterion-compat" } pretty_assertions = "1.4.0" predicates = "3.1.0" rand = "0.10.0" From fe9b622e25d434ca6e5a8bf2fbb99c935a198d56 Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Wed, 4 Mar 2026 15:32:37 +0100 Subject: [PATCH 3/4] feat: uses Divan instead of Criterion --- Cargo.lock | 690 ++++++++++++++++++++++++++++++-- Cargo.toml | 6 +- benches/bench-diffutils.rs | 342 ++++++++++++++++ benches/benchmarks-criterion.rs | 284 ------------- 4 files changed, 1003 insertions(+), 319 deletions(-) create mode 100644 benches/bench-diffutils.rs delete mode 100644 benches/benchmarks-criterion.rs diff --git a/Cargo.lock b/Cargo.lock index e5461b0..c28f80b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,21 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "assert_cmd" version = "2.1.2" @@ -82,6 +97,23 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if", + "cpufeatures", + "rand_core", +] + [[package]] name = "chrono" version = "0.4.44" @@ -95,12 +127,123 @@ dependencies = [ "windows-link", ] +[[package]] +name = "clap" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +dependencies = [ + "anstyle", + "clap_lex", + "terminal_size", +] + +[[package]] +name = "clap_lex" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" + +[[package]] +name = "codspeed" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c2eb3388ebe26b5a0ab6bf4969d9c4840143d7f6df07caa3cc851b0606cef6" +dependencies = [ + "anyhow", + "cc", + "colored", + "getrandom 0.2.17", + "glob", + "libc", + "nix", + "serde", + "serde_json", + "statrs", +] + +[[package]] +name = "codspeed-divan-compat" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2de65b7489a59709724d489070c6d05b7744039e4bf751d0a2006b90bb5593d" +dependencies = [ + "clap", + "codspeed", + "codspeed-divan-compat-macros", + "codspeed-divan-compat-walltime", + "regex", +] + +[[package]] +name = "codspeed-divan-compat-macros" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56ca01ce4fd22b8dcc6c770dcd6b74343642e842482b94e8920d14e10c57638d" +dependencies = [ + "divan-macros", + "itertools", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "codspeed-divan-compat-walltime" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "720ab9d0714718afe5f5832be6e5f5eb5ce97836e24ca7bf7042eea4308b9fb8" +dependencies = [ + "cfg-if", + "clap", + "codspeed", + "condtype", + "divan-macros", + "libc", + "regex-lite", +] + +[[package]] +name = "colored" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" +dependencies = [ + "lazy_static", + "windows-sys 0.59.0", +] + +[[package]] +name = "condtype" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" + [[package]] name = "core-foundation-sys" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "diff" version = "0.1.13" @@ -119,16 +262,41 @@ version = "0.5.0" dependencies = [ "assert_cmd", "chrono", + "codspeed-divan-compat", "diff", "itoa", "predicates", "pretty_assertions", + "rand", "regex", "same-file", "tempfile", "unicode-width", ] +[[package]] +name = "divan-macros" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dc51d98e636f5e3b0759a39257458b22619cac7e96d932da6eeb052891bb67c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.10" @@ -136,7 +304,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -154,18 +322,64 @@ dependencies = [ "num-traits", ] +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "getrandom" -version = "0.3.1" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "libc", "wasi", - "windows-targets", ] +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "rand_core", + "wasip2", + "wasip3", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -189,6 +403,33 @@ dependencies = [ "cc", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" @@ -204,12 +445,30 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -228,6 +487,18 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -289,11 +560,30 @@ dependencies = [ "yansi", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -307,6 +597,29 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" + [[package]] name = "regex" version = "1.12.3" @@ -330,12 +643,31 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + [[package]] name = "regex-syntax" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + [[package]] name = "rustix" version = "1.1.4" @@ -345,8 +677,8 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys", - "windows-sys", + "linux-raw-sys 0.12.1", + "windows-sys 0.59.0", ] [[package]] @@ -358,31 +690,70 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + [[package]] name = "serde" -version = "1.0.197" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "statrs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e" +dependencies = [ + "approx", + "num-traits", +] + [[package]] name = "syn" -version = "2.0.50" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -396,10 +767,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" dependencies = [ "fastrand", - "getrandom", + "getrandom 0.4.2", "once_cell", - "rustix", - "windows-sys", + "rustix 1.1.4", + "windows-sys 0.59.0", +] + +[[package]] +name = "terminal_size" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +dependencies = [ + "rustix 0.38.44", + "windows-sys 0.48.0", ] [[package]] @@ -408,6 +789,36 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" +[[package]] +name = "toml_datetime" +version = "1.0.0+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.25.3+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0a07913e63758bc95142d9863a5a45173b71515e68b690cad70cf99c3255ce1" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.9+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" +dependencies = [ + "winnow", +] + [[package]] name = "unicode-ident" version = "1.0.12" @@ -420,6 +831,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "wait-timeout" version = "0.2.0" @@ -431,11 +848,26 @@ dependencies = [ [[package]] name = "wasi" -version = "0.13.3+wasi-0.2.2" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ - "wit-bindgen-rt", + "wit-bindgen", ] [[package]] @@ -492,6 +924,40 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "winapi" version = "0.3.9" @@ -529,7 +995,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", ] [[package]] @@ -538,13 +1004,37 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -553,28 +1043,46 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -587,24 +1095,48 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -612,12 +1144,100 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "wit-bindgen-rt" -version = "0.33.0" +name = "winnow" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", ] [[package]] @@ -625,3 +1245,9 @@ name = "yansi" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 566e48a..4561e6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ unicode-width = "0.2.0" [dev-dependencies] assert_cmd = "2.0.14" -criterion = { version = "4.3.0", package = "codspeed-criterion-compat" } +divan = { version = "4.3.0", package = "codspeed-divan-compat" } pretty_assertions = "1.4.0" predicates = "3.1.0" rand = "0.10.0" @@ -44,6 +44,6 @@ inherits = "release" lto = "thin" [[bench]] -name = "bench_cmp" -path = "benches/benchmarks-criterion.rs" +name = "bench_diffutils" +path = "benches/bench-diffutils.rs" harness = false diff --git a/benches/bench-diffutils.rs b/benches/bench-diffutils.rs new file mode 100644 index 0000000..72d0241 --- /dev/null +++ b/benches/bench-diffutils.rs @@ -0,0 +1,342 @@ +//! Benches for all utils in diffutils. +const FILE_LINES: [usize; 3] = [10_000, 100_000, 500_000]; +// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files +const TEMP_DIR: &str = ""; +const NUM_DIFF: usize = 4; + +mod diffutils_cmp { + use std::hint::black_box; + + use diffutilslib::cmp; + use divan::Bencher; + + use crate::{binary, prepare::*, FILE_LINES}; + + // // test the impact on the benchmark if not converting the cmd to Vec (doubles for parse) + // #[divan::bench] + // fn parser_cmp_no_prepare() { + // let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; + // let args = str_to_options(&cmd).into_iter().peekable(); + // let _ = cmp::parse_params(args); + // } + + #[divan::bench(args = FILE_LINES)] + fn diff_compare_files_equal(bencher: Bencher, lines: usize) { + let (from, to) = get_context().get_test_files_equal(lines); + let cmd = format!("cmp {from} {to}"); + let opts = str_to_options(&cmd).into_iter().peekable(); + let params = cmp::parse_params(opts).unwrap(); + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| params.clone()) + .bench_refs(|params| black_box(cmp::cmp(¶ms).unwrap())); + } + + // bench the actual compare; cmp exits on first difference + #[divan::bench(args = FILE_LINES)] + fn diff_compare_files_diff(bencher: Bencher, lines: usize) { + let (from, to) = get_context().get_test_files_different(lines); + let cmd = format!("cmp {from} {to} -s"); + let opts = str_to_options(&cmd).into_iter().peekable(); + let params = cmp::parse_params(opts).unwrap(); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| params.clone()) + .bench_refs(|params| black_box(cmp::cmp(¶ms).unwrap())); + } + + // bench original GNU cmp + #[divan::bench(args = FILE_LINES)] + fn cmd_cmp_gnu_equal(bencher: Bencher, num_lines: usize) { + let (from, to) = get_context().get_test_files_equal(num_lines); + let args_str = format!("{from} {to}"); + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| binary::bench_binary("cmp", cmd_args)); + } + + // bench the compiled release version + #[divan::bench(args = FILE_LINES)] + fn cmd_cmp_release_equal(bencher: Bencher, num_lines: usize) { + let (from, to) = get_context().get_test_files_equal(num_lines); + let args_str = format!("cmp {from} {to}"); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| binary::bench_binary("target/release/diffutils", cmd_args)); + } +} + +mod diffutils_diff { + // use std::hint::black_box; + + use crate::{binary, prepare::*, FILE_LINES}; + // use diffutilslib::params; + use divan::Bencher; + + // bench the actual compare + // TODO diff does not have a diff function + // #[divan::bench(args = [100_000,10_000])] + // fn diff_compare_files(bencher: Bencher, lines: usize) { + // let (from, to) = gen_testfiles(lines, 0, "id"); + // let cmd = format!("cmp {from} {to}"); + // let opts = str_to_options(&cmd).into_iter().peekable(); + // let params = params::parse_params(opts).unwrap(); + // + // bencher + // // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + // .with_inputs(|| params.clone()) + // .bench_refs(|params| diff::diff(¶ms).unwrap()); + // } + + // bench original GNU diff + #[divan::bench(args = FILE_LINES)] + fn cmd_diff_gnu_equal(bencher: Bencher, num_lines: usize) { + let (from, to) = get_context().get_test_files_equal(num_lines); + let args_str = format!("{from} {to}"); + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| binary::bench_binary("diff", cmd_args)); + } + + // bench the compiled release version + #[divan::bench(args = FILE_LINES)] + fn cmd_diff_release_equal(bencher: Bencher, num_lines: usize) { + let (from, to) = get_context().get_test_files_equal(num_lines); + let args_str = format!("diff {from} {to}"); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| binary::bench_binary("target/release/diffutils", cmd_args)); + } +} + +mod parser { + use std::hint::black_box; + + use diffutilslib::{cmp, params}; + use divan::Bencher; + + use crate::prepare::str_to_options; + + // bench the time it takes to parse the command line arguments + #[divan::bench] + fn cmp_parser(bencher: Bencher) { + let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; + let args = str_to_options(&cmd).into_iter().peekable(); + bencher + .with_inputs(|| args.clone()) + .bench_values(|data| black_box(cmp::parse_params(data))); + } + + // bench the time it takes to parse the command line arguments + #[divan::bench] + fn diff_parser(bencher: Bencher) { + let cmd = "diff file_1.txt file_2.txt -s --brief --expand-tabs --width=100"; + let args = str_to_options(&cmd).into_iter().peekable(); + bencher + .with_inputs(|| args.clone()) + .bench_values(|data| black_box(params::parse_params(data))); + } +} + +mod prepare { + use std::{ + ffi::OsString, + fs::{self, File}, + io::{BufWriter, Write}, + path::Path, + sync::OnceLock, + }; + + use rand::RngExt; + use tempfile::TempDir; + + use crate::{FILE_LINES, NUM_DIFF, TEMP_DIR}; + + // file lines and .txt will be added + const FROM_FILE: &str = "from_file"; + const TO_FILE: &str = "to_file"; + + #[derive(Debug, Default)] + pub struct BenchContext { + pub tmp_dir: Option, + pub dir: String, + pub files_equal: Vec<(String, String)>, + pub files_different: Vec<(String, String)>, + } + + impl BenchContext { + pub fn get_path(&self) -> &Path { + match &self.tmp_dir { + Some(tmp) => tmp.path(), + None => Path::new(&self.dir), + } + } + + pub fn get_test_files_equal(&self, num_lines: usize) -> &(String, String) { + let p = FILE_LINES.iter().position(|f| *f == num_lines).unwrap(); + &self.files_equal[p] + } + + pub fn get_test_files_different(&self, num_lines: usize) -> &(String, String) { + let p = FILE_LINES.iter().position(|f| *f == num_lines).unwrap(); + &self.files_different[p] + } + } + + // Since each bench function is separate in Divan it is more difficult to dynamically create test data. + // This keeps the TempDir alive until the program exits and generates the files only once. + static SHARED_CONTEXT: OnceLock = OnceLock::new(); + pub fn get_context() -> &'static BenchContext { + SHARED_CONTEXT.get_or_init(|| { + let mut ctx = BenchContext::default(); + if TEMP_DIR.is_empty() { + let tmp_dir = TempDir::new().expect("Failed to create temp dir"); + ctx.tmp_dir = Some(tmp_dir); + } else { + // uses current directory, the generated files are kept + let path = Path::new(TEMP_DIR); + if !path.exists() { + fs::create_dir_all(path).expect("Path {path} could not be created"); + } + ctx.dir = TEMP_DIR.to_string(); + }; + + // generate test files + for num_lines in FILE_LINES { + let f = generate_test_files(ctx.get_path(), num_lines, 0, "eq") + .expect("generate_test_files failed"); + ctx.files_equal.push(f); + let f = generate_test_files(ctx.get_path(), num_lines, NUM_DIFF, "df") + .expect("generate_test_files failed"); + ctx.files_different.push(f); + } + + ctx + }) + } + + pub fn str_to_options(opt: &str) -> Vec { + let s: Vec = opt + .split(" ") + .into_iter() + .filter(|s| !s.is_empty()) + .map(|s| OsString::from(s)) + .collect(); + + s + } + + // Generates the test files and returns the from and to file names. + #[allow(unused)] + pub fn gen_testfiles(num_lines: usize, num_diff: usize, id: &str) -> (String, String) { + let dir = get_context().get_path(); + generate_test_files(dir, num_lines, num_diff, id).expect("generate_test_files failed") + } + + /// Generates two test files for comparison. + /// + /// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes. + /// If num_differences is set, '*' will be inserted between the first two words of a line, + /// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest. + fn generate_test_files( + dir: &Path, + num_lines: usize, + num_differences: usize, + id: &str, + ) -> std::io::Result<(String, String)> { + let id = if id.is_empty() { + "".to_string() + } else { + format!("{id}_") + }; + let f1 = format!("{id}{FROM_FILE}_{num_lines}.txt"); + let f2 = format!("{id}{TO_FILE}_{num_lines}.txt"); + let from_path = dir.join(f1); + let to_path = dir.join(f2); + + generate_file_fast(&from_path, &to_path, num_lines, num_differences)?; + + Ok(( + from_path.to_string_lossy().to_string(), + to_path.to_string_lossy().to_string(), + )) + } + + // Largely Gemini AI + fn generate_file_fast( + from_name: &Path, + to_name: &Path, + line_count: usize, + num_differences: usize, + ) -> std::io::Result<()> { + let file_from = File::create(from_name)?; + let file_to = File::create(to_name)?; + let change = if num_differences == 0 { + 0 + } else { + line_count / num_differences + }; + // Use a larger 128KB buffer for massive files + let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from); + let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to); + let mut rng = rand::rng(); + + // Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes + let mut line_buffer = [b' '; 60]; + line_buffer[59] = b'\n'; // Set the newline once at the end + + for i in (0..line_count).rev() { + // Fill only the letter positions, skipping spaces and the newline + for word_idx in 0..10 { + let start = word_idx * 6; // Each word + space block is 6 bytes + for i in 0..5 { + line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); + } + } + + // Write the raw bytes directly to both files + writer_from.write_all(&line_buffer)?; + // make changes in the file + if num_differences == 0 { + writer_to.write_all(&line_buffer)?; + } else { + if i % change == 0 { + line_buffer[5] = b'*'; + } + writer_to.write_all(&line_buffer)?; + line_buffer[5] = b' '; + } + } + + writer_from.flush()?; + writer_to.flush()?; + + Ok(()) + } +} + +mod binary { + use std::process::Command; + + use crate::prepare::str_to_options; + + pub fn bench_binary(program: &str, cmd_args: &str) -> std::process::ExitStatus { + let args = str_to_options(cmd_args); + Command::new(program) + .args(args) + .status() + .expect("Failed to execute binary") + } +} + +fn main() { + // Run registered benchmarks. + divan::main(); +} diff --git a/benches/benchmarks-criterion.rs b/benches/benchmarks-criterion.rs deleted file mode 100644 index 442b89a..0000000 --- a/benches/benchmarks-criterion.rs +++ /dev/null @@ -1,284 +0,0 @@ -/// Benchmarks, currently only for cmp -/// -/// Provides some general functions, e.g. to create files to compare in different sizes. -/// -/// use hyperfine to benchmark against cmp -/// * hyperfine -i "target/release/diffutils cmp from_file_10000000.txt to_file_10000000.txt" -/// * hyperfine -i "cmp from_file_10000000.txt to_file_10000000.txt" -/// -/// The Rust version seems twice as slow. -use criterion::{criterion_group, criterion_main, Criterion}; -// use std::env; -// use std::hint::black_box; -use rand::RngExt; -use std::io::{BufWriter, Write}; -use std::path::Path; -use std::process::Command; -use std::{ffi::OsString, fs::File, time::Duration}; - -const WARM_UP_TIME_MS: u64 = 500; -#[allow(unused)] -const MEASUREMENT_TIME_MS: u64 = 2000; - -// file lines and .txt will be added -const FROM_FILE: &str = "from_file"; -const TO_FILE: &str = "to_file"; - -criterion_group!( - benches, - bench_parser, - bench_cmp // , bench_diff -); -criterion_main!(benches); - -// All results are a few microseconds, so negligible. -fn bench_parser(c: &mut Criterion) { - let mut group = c.benchmark_group("Bench parser"); - - group.warm_up_time(Duration::from_millis(WARM_UP_TIME_MS)); - // group.measurement_time(Duration::from_millis(MEASUREMENT_TIME_MS)); - // group.sample_size(10); - - group.bench_function("Parse cmp", |b| { - b.iter(|| { - cmp_parse_only( - "cmd file_1.txt file_2.txt -bl --bytes=2048 --ignore-initial=100KiB:1MiB", - ) - }) - }); - - group.bench_function("Parse diff", |b| { - b.iter(|| diff_parse_only("diff file_1.txt file_2.txt")) - }); - // group.bench_function("Parse error", |b| { - // b.iter(|| parse_single_arg("cmd file_1.txt file_2.txt --something-unknown")) - // }); - // group.bench_function("Parse help", |b| b.iter(|| parse_single_arg("cmd --help"))); - - group.finish(); -} - -// This is the interesting part. -fn bench_cmp(c: &mut Criterion) { - let mut group = c.benchmark_group("Bench cmp"); - // uses tmp - // let dir_path = tempfile::tempdir().unwrap().path(); - // uses current directory, the generated files are kept - let dir_path = Path::new(""); - // let curr = env::current_dir().unwrap(); - // let dir_path = curr.as_path(); - let num_lines = 100_000; - // The more differences, the faster cmp returns, as it stops after the first difference is found. - let num_differences = 1; - - group.warm_up_time(Duration::from_millis(WARM_UP_TIME_MS)); - // group.measurement_time(Duration::from_millis(MEASUREMENT_TIME_MS)); - // group.sample_size(10); - - let (from, to) = - generate_test_files(num_lines, 0, dir_path).expect("generate_test_files failed"); - let cmd = format!("cmp {from} {to}"); - let opts = str_to_args(&cmd).into_iter().peekable(); - let params = diffutilslib::cmp::parse_params(opts).unwrap(); - - group.bench_function(format!("cmp files unchanged, lines: {num_lines}"), |b| { - b.iter(|| diffutilslib::cmp::cmp(¶ms).unwrap()) - }); - - let (from, to) = generate_test_files(num_lines, num_differences, dir_path) - .expect("generate_test_files failed"); - let cmd = format!("cmp {from} {to} -s"); - let opts = str_to_args(&cmd).into_iter().peekable(); - let params = diffutilslib::cmp::parse_params(opts).unwrap(); - - group.bench_function(format!("cmp files changed, lines: {num_lines}"), |b| { - b.iter(|| diffutilslib::cmp::cmp(¶ms).unwrap()) - }); - - group.finish(); - - // Optional bench by executing the file as cmd - bench_binary_execution_cmp(c); -} - -// // This is the interesting part. -// fn bench_diff(c: &mut Criterion) { -// let mut group = c.benchmark_group("Bench cmp"); -// // uses tmp -// // let dir_path = tempfile::tempdir().unwrap().path(); -// // uses current directory, the generated files are kept -// let dir_path = Path::new(""); -// // let curr = env::current_dir().unwrap(); -// // let dir_path = curr.as_path(); -// let num_lines = 100_000; -// // The more differences, the faster cmp returns, as it stops after the first difference is found. -// let num_differences = 1; -// -// group.warm_up_time(Duration::from_millis(WARM_UP_TIME_MS)); -// // group.measurement_time(Duration::from_millis(MEASUREMENT_TIME_MS)); -// // group.sample_size(10); -// -// let (from, to) = -// generate_test_files(num_lines, 0, dir_path).expect("generate_test_files failed"); -// let cmd = format!("diff {from} {to}"); -// let opts = str_to_args(&cmd).into_iter().peekable(); -// let params = diffutilslib::params::parse_params(opts).unwrap(); -// -// // TODO need function because main cannot be called. -// group.bench_function(format!("diff files unchanged, lines: {num_lines}"), |b| { -// b.iter(|| diffutilslib::::cmp(¶ms).unwrap()) -// }); -// -// let (from, to) = generate_test_files(num_lines, num_differences, dir_path) -// .expect("generate_test_files failed"); -// let cmd = format!("diff {from} {to} -s"); -// let opts = str_to_args(&cmd).into_iter().peekable(); -// let params = diffutilslib::params::parse_params(opts).unwrap(); -// -// // TODO need function because main cannot be called. -// group.bench_function(format!("diff files changed, lines: {num_lines}"), |b| { -// b.iter(|| diffutilslib::::cmp(¶ms).unwrap()) -// }); -// -// group.finish(); -// } - -fn cmp_parse_only(cmd: &str) -> String { - let args = str_to_args(cmd).into_iter().peekable(); - let _params = match diffutilslib::cmp::parse_params(args) { - Ok(params) => params, - Err(e) => { - return e.to_string(); - } - }; - return "ok".to_string(); -} - -fn diff_parse_only(cmd: &str) -> String { - let args = str_to_args(cmd).into_iter().peekable(); - let _params = match diffutilslib::params::parse_params(args) { - Ok(params) => params, - Err(e) => { - return e.to_string(); - } - }; - return "ok".to_string(); -} - -fn str_to_args(opt: &str) -> Vec { - let s: Vec = opt - .split(" ") - .into_iter() - .map(|s| OsString::from(s)) - .collect(); - - s -} - -/// Generates two test files for comparison. -/// -/// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes. -/// If num_differences is set, '*' will be inserted between the first two words of a line, -/// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest. -fn generate_test_files( - lines: usize, - num_differences: usize, - dir: &Path, -) -> std::io::Result<(String, String)> { - let f1 = format!("{FROM_FILE}_{lines}.txt"); - let f2 = format!("{TO_FILE}_{lines}.txt"); - let from_path = dir.join(f1); - let to_path = dir.join(f2); - - generate_file_fast(&from_path, &to_path, lines, num_differences)?; - - Ok(( - from_path.to_string_lossy().to_string(), - to_path.to_string_lossy().to_string(), - )) -} - -// Largely Gemini AI Generated -fn generate_file_fast( - from_name: &Path, - to_name: &Path, - line_count: usize, - num_differences: usize, -) -> std::io::Result<()> { - let file_from = File::create(from_name)?; - let file_to = File::create(to_name)?; - let change = if num_differences == 0 { - 0 - } else { - line_count / num_differences - }; - // Use a larger 128KB buffer for massive files - let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from); - let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to); - let mut rng = rand::rng(); - - // Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes - let mut line_buffer = [b' '; 60]; - line_buffer[59] = b'\n'; // Set the newline once at the end - - for i in (0..line_count).rev() { - // Fill only the letter positions, skipping spaces and the newline - for word_idx in 0..10 { - let start = word_idx * 6; // Each word + space block is 6 bytes - for i in 0..5 { - line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); - } - } - - // Write the raw bytes directly to both files - writer_from.write_all(&line_buffer)?; - // make changes in the file - if num_differences == 0 { - writer_to.write_all(&line_buffer)?; - } else { - if i % change == 0 { - line_buffer[5] = b'*'; - } - writer_to.write_all(&line_buffer)?; - line_buffer[5] = b' '; - } - } - - writer_from.flush()?; - writer_to.flush()?; - - Ok(()) -} - -#[allow(unused)] -// fn bench_binary_execution(c: &mut BenchmarkGroup<'_, WallTime>) { -fn bench_binary_execution_cmp(c: &mut Criterion) { - c.bench_function("GNU cmp", |b| { - b.iter(|| { - let _status = Command::new("cmp") - .arg("from_file_100000.txt") - .arg("to_file_100000.txt") - .arg("-s") - .status() - .expect("Failed to execute binary"); - - // assert!(status.success()); - }) - }); - - c.bench_function("cmp binary", |b| { - b.iter(|| { - let _status = Command::new("target/release/diffutils") - .arg("cmp") - .arg("from_file_100000.txt") - .arg("to_file_100000.txt") - .arg("-s") - // .arg("--lines") - // .arg(black_box("10000")) - .status() - .expect("Failed to execute binary"); - - // assert!(status.success()); - }) - }); -} From 725b89f9626a7e591b4ebcfce1a1787c6c724b44 Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Thu, 5 Mar 2026 13:06:15 +0100 Subject: [PATCH 4/4] changed file num lines to file size in kb --- Cargo.toml | 7 ++ benches/bench-diffutils.rs | 163 ++++++++++++++++++++++--------------- fuzz/Cargo.lock | 27 ++++++ 3 files changed, 133 insertions(+), 64 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4561e6b..fe96436 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,3 +47,10 @@ lto = "thin" name = "bench_diffutils" path = "benches/bench-diffutils.rs" harness = false + +[features] +# default = ["feat_bench_not_diff"] +# Turn bench for diffutils cmp off +feat_bench_not_cmp = [] +# Turn bench for diffutils diff off +feat_bench_not_diff = [] diff --git a/benches/bench-diffutils.rs b/benches/bench-diffutils.rs index 72d0241..e506b3f 100644 --- a/benches/bench-diffutils.rs +++ b/benches/bench-diffutils.rs @@ -1,31 +1,40 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + //! Benches for all utils in diffutils. -const FILE_LINES: [usize; 3] = [10_000, 100_000, 500_000]; +//! +//! There is a file generator included to create files of different sizes for comparison. \ +//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ +//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. + +/// Generate test files with these sizes in KB. +const FILE_SIZE_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; +// const FILE_SIZE_KILO_BYTES: [u64; 3] = [100, 1 * MB, 5 * MB]; // Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files const TEMP_DIR: &str = ""; -const NUM_DIFF: usize = 4; +const NUM_DIFF: u64 = 4; +// just for FILE_SIZE_KILO_BYTES +const MB: u64 = 1_000; +const CHANGE_CHAR: u8 = b'#'; +#[cfg(not(feature = "feat_bench_not_cmp"))] mod diffutils_cmp { use std::hint::black_box; use diffutilslib::cmp; use divan::Bencher; - use crate::{binary, prepare::*, FILE_LINES}; - - // // test the impact on the benchmark if not converting the cmd to Vec (doubles for parse) - // #[divan::bench] - // fn parser_cmp_no_prepare() { - // let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; - // let args = str_to_options(&cmd).into_iter().peekable(); - // let _ = cmp::parse_params(args); - // } + use crate::{binary, prepare::*, FILE_SIZE_KILO_BYTES}; - #[divan::bench(args = FILE_LINES)] - fn diff_compare_files_equal(bencher: Bencher, lines: usize) { - let (from, to) = get_context().get_test_files_equal(lines); + #[divan::bench(args = FILE_SIZE_KILO_BYTES)] + fn cmp_compare_files_equal(bencher: Bencher, kb: u64) { + let (from, to) = get_context().get_test_files_equal(kb); let cmd = format!("cmp {from} {to}"); let opts = str_to_options(&cmd).into_iter().peekable(); let params = cmp::parse_params(opts).unwrap(); + bencher // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) .with_inputs(|| params.clone()) @@ -33,9 +42,9 @@ mod diffutils_cmp { } // bench the actual compare; cmp exits on first difference - #[divan::bench(args = FILE_LINES)] - fn diff_compare_files_diff(bencher: Bencher, lines: usize) { - let (from, to) = get_context().get_test_files_different(lines); + #[divan::bench(args = FILE_SIZE_KILO_BYTES)] + fn cmp_compare_files_different(bencher: Bencher, bytes: u64) { + let (from, to) = get_context().get_test_files_different(bytes); let cmd = format!("cmp {from} {to} -s"); let opts = str_to_options(&cmd).into_iter().peekable(); let params = cmp::parse_params(opts).unwrap(); @@ -47,9 +56,9 @@ mod diffutils_cmp { } // bench original GNU cmp - #[divan::bench(args = FILE_LINES)] - fn cmd_cmp_gnu_equal(bencher: Bencher, num_lines: usize) { - let (from, to) = get_context().get_test_files_equal(num_lines); + #[divan::bench(args = FILE_SIZE_KILO_BYTES)] + fn cmd_cmp_gnu_equal(bencher: Bencher, bytes: u64) { + let (from, to) = get_context().get_test_files_equal(bytes); let args_str = format!("{from} {to}"); bencher // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) @@ -58,9 +67,9 @@ mod diffutils_cmp { } // bench the compiled release version - #[divan::bench(args = FILE_LINES)] - fn cmd_cmp_release_equal(bencher: Bencher, num_lines: usize) { - let (from, to) = get_context().get_test_files_equal(num_lines); + #[divan::bench(args = FILE_SIZE_KILO_BYTES)] + fn cmd_cmp_release_equal(bencher: Bencher, bytes: u64) { + let (from, to) = get_context().get_test_files_equal(bytes); let args_str = format!("cmp {from} {to}"); bencher @@ -70,17 +79,18 @@ mod diffutils_cmp { } } +#[cfg(not(feature = "feat_bench_not_diff"))] mod diffutils_diff { // use std::hint::black_box; - use crate::{binary, prepare::*, FILE_LINES}; + use crate::{binary, prepare::*, FILE_SIZE_KILO_BYTES}; // use diffutilslib::params; use divan::Bencher; // bench the actual compare // TODO diff does not have a diff function // #[divan::bench(args = [100_000,10_000])] - // fn diff_compare_files(bencher: Bencher, lines: usize) { + // fn diff_compare_files(bencher: Bencher, bytes: u64) { // let (from, to) = gen_testfiles(lines, 0, "id"); // let cmd = format!("cmp {from} {to}"); // let opts = str_to_options(&cmd).into_iter().peekable(); @@ -93,9 +103,9 @@ mod diffutils_diff { // } // bench original GNU diff - #[divan::bench(args = FILE_LINES)] - fn cmd_diff_gnu_equal(bencher: Bencher, num_lines: usize) { - let (from, to) = get_context().get_test_files_equal(num_lines); + #[divan::bench(args = FILE_SIZE_KILO_BYTES)] + fn cmd_diff_gnu_equal(bencher: Bencher, bytes: u64) { + let (from, to) = get_context().get_test_files_equal(bytes); let args_str = format!("{from} {to}"); bencher // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) @@ -104,9 +114,9 @@ mod diffutils_diff { } // bench the compiled release version - #[divan::bench(args = FILE_LINES)] - fn cmd_diff_release_equal(bencher: Bencher, num_lines: usize) { - let (from, to) = get_context().get_test_files_equal(num_lines); + #[divan::bench(args = FILE_SIZE_KILO_BYTES)] + fn cmd_diff_release_equal(bencher: Bencher, bytes: u64) { + let (from, to) = get_context().get_test_files_equal(bytes); let args_str = format!("diff {from} {to}"); bencher @@ -134,6 +144,14 @@ mod parser { .bench_values(|data| black_box(cmp::parse_params(data))); } + // // test the impact on the benchmark if not converting the cmd to Vec (doubles for parse) + // #[divan::bench] + // fn cmp_parser_no_prepare() { + // let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; + // let args = str_to_options(&cmd).into_iter().peekable(); + // let _ = cmp::parse_params(args); + // } + // bench the time it takes to parse the command line arguments #[divan::bench] fn diff_parser(bencher: Bencher) { @@ -157,12 +175,14 @@ mod prepare { use rand::RngExt; use tempfile::TempDir; - use crate::{FILE_LINES, NUM_DIFF, TEMP_DIR}; + use crate::{CHANGE_CHAR, FILE_SIZE_KILO_BYTES, NUM_DIFF, TEMP_DIR}; // file lines and .txt will be added const FROM_FILE: &str = "from_file"; const TO_FILE: &str = "to_file"; + const LINE_LENGTH: usize = 60; + /// Contains test data (file names) which only needs to be created once. #[derive(Debug, Default)] pub struct BenchContext { pub tmp_dir: Option, @@ -179,13 +199,14 @@ mod prepare { } } - pub fn get_test_files_equal(&self, num_lines: usize) -> &(String, String) { - let p = FILE_LINES.iter().position(|f| *f == num_lines).unwrap(); + pub fn get_test_files_equal(&self, kb: u64) -> &(String, String) { + let p = FILE_SIZE_KILO_BYTES.iter().position(|f| *f == kb).unwrap(); &self.files_equal[p] } - pub fn get_test_files_different(&self, num_lines: usize) -> &(String, String) { - let p = FILE_LINES.iter().position(|f| *f == num_lines).unwrap(); + #[allow(unused)] + pub fn get_test_files_different(&self, kb: u64) -> &(String, String) { + let p = FILE_SIZE_KILO_BYTES.iter().position(|f| *f == kb).unwrap(); &self.files_different[p] } } @@ -193,6 +214,7 @@ mod prepare { // Since each bench function is separate in Divan it is more difficult to dynamically create test data. // This keeps the TempDir alive until the program exits and generates the files only once. static SHARED_CONTEXT: OnceLock = OnceLock::new(); + /// Creates the test files once and provides them to all tests. pub fn get_context() -> &'static BenchContext { SHARED_CONTEXT.get_or_init(|| { let mut ctx = BenchContext::default(); @@ -208,12 +230,12 @@ mod prepare { ctx.dir = TEMP_DIR.to_string(); }; - // generate test files - for num_lines in FILE_LINES { - let f = generate_test_files(ctx.get_path(), num_lines, 0, "eq") + // generate test bytes + for kb in FILE_SIZE_KILO_BYTES { + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq") .expect("generate_test_files failed"); ctx.files_equal.push(f); - let f = generate_test_files(ctx.get_path(), num_lines, NUM_DIFF, "df") + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df") .expect("generate_test_files failed"); ctx.files_different.push(f); } @@ -233,22 +255,15 @@ mod prepare { s } - // Generates the test files and returns the from and to file names. - #[allow(unused)] - pub fn gen_testfiles(num_lines: usize, num_diff: usize, id: &str) -> (String, String) { - let dir = get_context().get_path(); - generate_test_files(dir, num_lines, num_diff, id).expect("generate_test_files failed") - } - - /// Generates two test files for comparison. + /// Generates two test files for comparison with size. /// /// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes. - /// If num_differences is set, '*' will be inserted between the first two words of a line, + /// If num_differences is set, '#' will be inserted between the first two words of a line, /// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest. - fn generate_test_files( + fn generate_test_files_bytes( dir: &Path, - num_lines: usize, - num_differences: usize, + bytes: u64, + num_differences: u64, id: &str, ) -> std::io::Result<(String, String)> { let id = if id.is_empty() { @@ -256,12 +271,12 @@ mod prepare { } else { format!("{id}_") }; - let f1 = format!("{id}{FROM_FILE}_{num_lines}.txt"); - let f2 = format!("{id}{TO_FILE}_{num_lines}.txt"); + let f1 = format!("{id}{FROM_FILE}_{bytes}.txt"); + let f2 = format!("{id}{TO_FILE}_{bytes}.txt"); let from_path = dir.join(f1); let to_path = dir.join(f2); - generate_file_fast(&from_path, &to_path, num_lines, num_differences)?; + generate_file_bytes(&from_path, &to_path, bytes, num_differences)?; Ok(( from_path.to_string_lossy().to_string(), @@ -269,19 +284,25 @@ mod prepare { )) } - // Largely Gemini AI - fn generate_file_fast( + fn generate_file_bytes( from_name: &Path, to_name: &Path, - line_count: usize, - num_differences: usize, + bytes: u64, + num_differences: u64, ) -> std::io::Result<()> { let file_from = File::create(from_name)?; let file_to = File::create(to_name)?; - let change = if num_differences == 0 { + // for int division, lines will be smaller than requested bytes + let n_lines = bytes / LINE_LENGTH as u64; + let change_every_n_lines = if num_differences == 0 { 0 } else { - line_count / num_differences + let c = n_lines / num_differences; + if c == 0 { + 1 + } else { + c + } }; // Use a larger 128KB buffer for massive files let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from); @@ -292,7 +313,7 @@ mod prepare { let mut line_buffer = [b' '; 60]; line_buffer[59] = b'\n'; // Set the newline once at the end - for i in (0..line_count).rev() { + for i in (0..n_lines).rev() { // Fill only the letter positions, skipping spaces and the newline for word_idx in 0..10 { let start = word_idx * 6; // Each word + space block is 6 bytes @@ -307,14 +328,28 @@ mod prepare { if num_differences == 0 { writer_to.write_all(&line_buffer)?; } else { - if i % change == 0 { - line_buffer[5] = b'*'; + if i % change_every_n_lines == 0 && n_lines - i > 2 { + line_buffer[5] = CHANGE_CHAR; } writer_to.write_all(&line_buffer)?; line_buffer[5] = b' '; } } + // create last line + let missing = (bytes - n_lines as u64 * LINE_LENGTH as u64) as usize; + if missing > 0 { + for word_idx in 0..10 { + let start = word_idx * 6; // Each word + space block is 6 bytes + for i in 0..5 { + line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); + } + } + line_buffer[missing - 1] = b'\n'; + writer_from.write_all(&line_buffer[0..missing])?; + writer_to.write_all(&line_buffer[0..missing])?; + } + writer_from.flush()?; writer_to.flush()?; diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index c9b6f68..545c6ec 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -69,6 +69,26 @@ dependencies = [ "windows-link", ] +[[package]] +name = "const_format" +version = "0.2.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -86,6 +106,7 @@ name = "diffutils" version = "0.5.0" dependencies = [ "chrono", + "const_format", "diff", "itoa", "regex", @@ -301,6 +322,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unified-diff-fuzz" version = "0.0.0"