diff --git a/.gitignore b/.gitignore index 8d1533f..f459d1b 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ genome.fasta *.jpg *.png + +*.swp diff --git a/Cargo.lock b/Cargo.lock index c337c95..b543d22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,6 +119,21 @@ dependencies = [ "syn", ] +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bit_field" version = "0.10.3" @@ -161,6 +176,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + [[package]] name = "cc" version = "1.2.40" @@ -421,10 +442,16 @@ dependencies = [ name = "eurorust-2025-workshop" version = "0.1.0" dependencies = [ + "bit-set", + "bytes", "codspeed-divan-compat", "image", "image-compare", + "itertools 0.14.0", + "memchr", + "memmap2", "rand", + "rayon", ] [[package]] @@ -749,6 +776,15 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "memmap2" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" +dependencies = [ + "libc", +] + [[package]] name = "minimal-lexical" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index f4172be..9e9ca79 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,12 @@ path = "src/lib.rs" rand = "0.8" image = "0.25" image-compare = "0.5.0" +bit-set = "0.8.0" +itertools = "0.14.0" +rayon = "1.11.0" +memchr = "2.7.6" +memmap2 = "0.9.8" +bytes = "1.10.1" [dev-dependencies] divan = { version = "4.0.2", package = "codspeed-divan-compat" } diff --git a/README.md b/README.md index 8e9c5be..e5a3313 100644 --- a/README.md +++ b/README.md @@ -34,3 +34,5 @@ cargo codspeed run -m walltime ``` Note: You can also set the `CODSPEED_RUNNER_MODE` environment variable to `walltime` to avoid passing `-m walltime` every time. + +Participant: gendx diff --git a/benches/bfs.rs b/benches/bfs.rs index eaadd7a..d796914 100644 --- a/benches/bfs.rs +++ b/benches/bfs.rs @@ -1,5 +1,5 @@ use divan::Bencher; -use eurorust_2025_workshop::bfs::{bfs_naive, generate_graph}; +use eurorust_2025_workshop::bfs::{bfs_optimized, generate_graph}; fn main() { divan::main(); @@ -10,7 +10,7 @@ fn bfs_small_graph(bencher: Bencher) { let graph = generate_graph(100); bencher.bench_local(|| { - let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0))); + let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0))); assert!(!result.is_empty(), "BFS result should not be empty"); assert!( @@ -28,7 +28,7 @@ fn bfs_medium_graph(bencher: Bencher) { let graph = generate_graph(1000); bencher.bench_local(|| { - let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0))); + let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0))); assert!(!result.is_empty(), "BFS result should not be empty"); assert!( @@ -46,7 +46,7 @@ fn bfs_large_graph(bencher: Bencher) { let graph = generate_graph(10000); bencher.bench_local(|| { - let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0))); + let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0))); assert!(!result.is_empty(), "BFS result should not be empty"); assert!( diff --git a/benches/blob_corruption_checker.rs b/benches/blob_corruption_checker.rs index 3ad54d6..45084c0 100644 --- a/benches/blob_corruption_checker.rs +++ b/benches/blob_corruption_checker.rs @@ -18,14 +18,25 @@ fn corruption_check(bencher: Bencher) { // All corruptions should be 1KB aligned for corruption in &corruptions { - assert_eq!(corruption.offset % 1024, 0, "Corruption offset should be 1KB aligned"); - assert_eq!(corruption.length % 1024, 0, "Corruption length should be multiple of 1KB"); + assert_eq!( + corruption.offset % 1024, + 0, + "Corruption offset should be 1KB aligned" + ); + assert_eq!( + corruption.length % 1024, + 0, + "Corruption length should be multiple of 1KB" + ); } // Check specific corruptions assert_eq!(corruptions[0].offset, 14801920, "First corruption offset"); assert_eq!(corruptions[0].length, 2048, "First corruption length"); - assert_eq!(corruptions[25].offset, 243891200, "Middle corruption offset"); + assert_eq!( + corruptions[25].offset, 243891200, + "Middle corruption offset" + ); assert_eq!(corruptions[25].length, 4096, "Middle corruption length"); assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset"); assert_eq!(corruptions[49].length, 5120, "Last corruption length"); diff --git a/benches/dna_matcher.rs b/benches/dna_matcher.rs index c955168..4d5a302 100644 --- a/benches/dna_matcher.rs +++ b/benches/dna_matcher.rs @@ -6,13 +6,20 @@ fn main() { #[divan::bench(sample_count = 2, sample_size = 3)] fn dna_matcher() { - let genome = std::fs::read_to_string("genome.fasta").expect( + use bytes::Bytes; + use memmap2::Mmap; + use std::fs::File; + use std::ops::Deref; + + let file = File::open("genome.fasta").expect( "Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'", ); + let mmap = unsafe { Mmap::map(&file).unwrap() }; + let genome = Bytes::from_owner(mmap); let pattern = "AGTCCGTA"; - let matches = divan::black_box(naive_dna_matcher( - divan::black_box(&genome), + let matches = divan::black_box(dna_matcher_api( + divan::black_box(genome.deref()), divan::black_box(pattern), )); diff --git a/benches/lut_grayscale_bench.rs b/benches/lut_grayscale_bench.rs index 5816569..64add8b 100644 --- a/benches/lut_grayscale_bench.rs +++ b/benches/lut_grayscale_bench.rs @@ -1,5 +1,5 @@ use eurorust_2025_workshop::lut_grayscale::*; -use image::{RgbImage}; +use image::RgbImage; fn main() { divan::main(); diff --git a/src/bfs.rs b/src/bfs.rs index 487fddc..4c98a75 100644 --- a/src/bfs.rs +++ b/src/bfs.rs @@ -1,4 +1,5 @@ -use std::collections::HashSet; +use bit_set::BitSet; +use std::collections::{HashSet, VecDeque}; /// A simple graph represented as an adjacency list #[derive(Debug, Clone)] @@ -50,6 +51,29 @@ pub fn bfs_naive(graph: &Graph, start: usize) -> Vec { result } +pub fn bfs_optimized(graph: &Graph, start: usize) -> Vec { + let mut visited = BitSet::new(); + let mut queue = VecDeque::new(); + let mut result = Vec::new(); + + queue.push_back(start); + visited.insert(start); + + while let Some(node) = queue.pop_front() { + result.push(node); + + if let Some(neighbors) = graph.adjacency.get(node) { + for &neighbor in neighbors { + if visited.insert(neighbor) { + queue.push_back(neighbor); + } + } + } + } + + result +} + /// Helper function to generate a random graph for benchmarking pub fn generate_graph(nodes: usize) -> Graph { use rand::{Rng, SeedableRng}; diff --git a/src/blob_corruption_checker.rs b/src/blob_corruption_checker.rs index 2515c20..3df243c 100644 --- a/src/blob_corruption_checker.rs +++ b/src/blob_corruption_checker.rs @@ -92,10 +92,7 @@ mod tests { "Middle corruption offset" ); assert_eq!(corruptions[25].length, 4096, "Middle corruption length"); - assert_eq!( - corruptions[49].offset, 507871232, - "Last corruption offset" - ); + assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset"); assert_eq!(corruptions[49].length, 5120, "Last corruption length"); } } diff --git a/src/dna_matcher.rs b/src/dna_matcher.rs index d99c90e..307d270 100644 --- a/src/dna_matcher.rs +++ b/src/dna_matcher.rs @@ -1,5 +1,10 @@ +pub fn dna_matcher_api(genome: &[u8], pattern: &str) -> Vec { + optimized_dna_matcher_impl(genome, pattern.as_bytes()) +} + /// Naive approach: Read the entire file as a string and filter lines -pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec { +#[allow(dead_code)] +fn naive_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { genome .lines() .filter(|line| !line.starts_with('>')) // Skip headers @@ -8,27 +13,78 @@ pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec { .collect() } +#[allow(dead_code)] +fn itertools_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { + use itertools::*; + + std::iter::once(usize::MAX) + .chain(genome.as_bytes().iter().positions(|&c| c == b'\n')) + .chain(std::iter::once(genome.len())) + .tuple_windows() + .filter_map(|(start, end)| { + let line = if start == usize::MAX { + &genome[..end] + } else { + &genome[start + 1..end] + }; + if line.len() == 0 || line.as_bytes()[0] == b'>' { + None + } else { + Some(line) + } + }) + .filter(|line| line.contains(pattern)) + .map(|s| s.to_string()) + .collect() +} + +#[allow(dead_code)] +fn rayon_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { + use rayon::prelude::*; + + genome + .par_lines() + .filter(|line| !line.starts_with('>')) // Skip headers + .filter(|line| line.contains(pattern)) + .map(|s| s.to_string()) + .collect() +} + +fn optimized_dna_matcher_impl(genome: &[u8], pattern: &[u8]) -> Vec { + use memchr::memmem; + use rayon::prelude::*; + + let finder = memmem::Finder::new(pattern); + + genome + .par_split(|&c| c == b'\n') + .filter(|line| line.first().map_or(false, |&c| c != b'>')) // Skip headers and empty lines + .filter(|line| finder.find(line).is_some()) + .map(|s| std::str::from_utf8(s).unwrap().to_string()) + .collect() +} + #[cfg(test)] mod tests { use super::*; #[test] - fn test_naive_matcher() { - let test_genome = ">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG"; + fn test_matcher() { + let test_genome = b">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG"; let pattern = "AGTCCGTA"; - let matches = naive_dna_matcher(test_genome, pattern); + let matches = dna_matcher_api(test_genome, pattern); assert_eq!(matches.len(), 1); assert_eq!(matches[0], "AGTCCGTAAA"); } #[test] - fn test_naive_matcher_on_genome_file() { + fn test_matcher_on_genome_file() { // Read the actual genome.fasta file let genome = std::fs::read_to_string("genome.fasta") .expect("Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'"); let pattern = "AGTCCGTA"; - let matches = naive_dna_matcher(&genome, pattern); + let matches = dna_matcher_api(genome.as_bytes(), pattern); // With fixed seed (42), we should always get exactly 4927 matches assert_eq!( diff --git a/src/lut_filters.rs b/src/lut_filters.rs index a73068c..b8b472c 100644 --- a/src/lut_filters.rs +++ b/src/lut_filters.rs @@ -18,11 +18,11 @@ use image::{ImageBuffer, Rgb, RgbImage}; pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { - naive::apply_brightness_contrast(img, brightness, contrast) + optimized::apply_brightness_contrast(img, brightness, contrast) } pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage { - naive::apply_gamma(img, gamma) + optimized::apply_gamma(img, gamma) } pub fn apply_brightness_contrast_gamma( @@ -31,10 +31,10 @@ pub fn apply_brightness_contrast_gamma( contrast: f32, gamma: f32, ) -> RgbImage { - let temp_img = apply_brightness_contrast(img, brightness, contrast); - naive::apply_gamma(&temp_img, gamma) + optimized::apply_brightness_contrast_gamma(img, brightness, contrast, gamma) } +#[allow(dead_code)] mod naive { use super::*; @@ -86,6 +86,78 @@ mod naive { } } +mod optimized { + use super::*; + + /// Apply brightness and contrast with floating-point math per pixel + pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { + let (width, height) = img.dimensions(); + let mut output = ImageBuffer::new(width, height); + + let lut: [u8; 256] = std::array::from_fn(|x| { + (((x as f32 - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32).clamp(0.0, 255.0) + as u8 + }); + + for (x, y, pixel) in img.enumerate_pixels() { + let r = lut[pixel[0] as usize]; + let g = lut[pixel[1] as usize]; + let b = lut[pixel[2] as usize]; + + output.put_pixel(x, y, Rgb([r, g, b])); + } + + output + } + + /// Naive implementation: Apply gamma correction + /// This is VERY slow because powf() is expensive! + pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage { + let (width, height) = img.dimensions(); + let mut output = ImageBuffer::new(width, height); + + let lut: [u8; 256] = + std::array::from_fn(|x| ((x as f32 / 255.0).powf(1.0 / gamma) * 255.0) as u8); + + for (x, y, pixel) in img.enumerate_pixels() { + let r = lut[pixel[0] as usize]; + let g = lut[pixel[1] as usize]; + let b = lut[pixel[2] as usize]; + + output.put_pixel(x, y, Rgb([r, g, b])); + } + + output + } + + pub fn apply_brightness_contrast_gamma( + img: &RgbImage, + brightness: i16, + contrast: f32, + gamma: f32, + ) -> RgbImage { + let (width, height) = img.dimensions(); + let mut output = ImageBuffer::new(width, height); + + let lut: [u8; 256] = std::array::from_fn(|x| { + let brightness_contrast = + (((x as f32 - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32) + .clamp(0.0, 255.0) as u8; + ((brightness_contrast as f32 / 255.0).powf(1.0 / gamma) * 255.0) as u8 + }); + + for (x, y, pixel) in img.enumerate_pixels() { + let r = lut[pixel[0] as usize]; + let g = lut[pixel[1] as usize]; + let b = lut[pixel[2] as usize]; + + output.put_pixel(x, y, Rgb([r, g, b])); + } + + output + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/simd_filters.rs b/src/simd_filters.rs index 6675943..56f3887 100644 --- a/src/simd_filters.rs +++ b/src/simd_filters.rs @@ -1,11 +1,11 @@ use image::{ImageBuffer, Rgb, RgbImage}; pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { - naive::apply_brightness_contrast(img, brightness, contrast) + optimized::apply_brightness_contrast(img, brightness, contrast) } pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage { - naive::apply_gamma(img, gamma) + optimized::apply_gamma(img, gamma) } pub fn apply_brightness_contrast_gamma( @@ -14,10 +14,10 @@ pub fn apply_brightness_contrast_gamma( contrast: f32, gamma: f32, ) -> RgbImage { - let temp_img = apply_brightness_contrast(img, brightness, contrast); - naive::apply_gamma(&temp_img, gamma) + optimized::apply_brightness_contrast_gamma(img, brightness, contrast, gamma) } +#[allow(dead_code)] mod naive { use super::*; @@ -69,6 +69,96 @@ mod naive { } } +mod optimized { + use super::*; + + use std::simd::num::{SimdFloat, SimdUint}; + use std::simd::{Simd, u8x16, usizex16}; + + pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { + let lut: [u8; 256] = std::array::from_fn(|x| { + (((x as f32 - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32).clamp(0.0, 255.0) + as u8 + }); + + let (width, height) = img.dimensions(); + + let input = img.as_raw(); + let mut output = vec![0u8; input.len()]; + + // Process LANES bytes at a time + const LANES: usize = 8; + + let chunks = input.chunks_exact(LANES); + let remainder = chunks.remainder(); + + for (i, chunk) in chunks.enumerate() { + let pixels: Simd = Simd::from_slice(chunk); + let pixels: Simd = pixels.cast(); + let adjusted = (pixels - Simd::splat(128.0)) * Simd::splat(1.0 + contrast) + + Simd::splat(128.0 + brightness as f32); + let clamped = adjusted.simd_clamp(Simd::splat(0.0), Simd::splat(255.0)); + let result: Simd = clamped.cast(); + result.copy_to_slice(&mut output[i * LANES..(i + 1) * LANES]); + } + + // Handle remaining bytes + for (i, &byte) in remainder.iter().enumerate() { + output[input.len() - remainder.len() + i] = lut[byte as usize]; + } + + ImageBuffer::from_raw(width, height, output).unwrap() + } + + pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage { + let lut: [u8; 256] = + std::array::from_fn(|x| ((x as f32 / 255.0).powf(1.0 / gamma) * 255.0) as u8); + + apply_lut(img, &lut) + } + + pub fn apply_brightness_contrast_gamma( + img: &RgbImage, + brightness: i16, + contrast: f32, + gamma: f32, + ) -> RgbImage { + let lut: [u8; 256] = std::array::from_fn(|x| { + let brightness_contrast = + (((x as f32 - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32) + .clamp(0.0, 255.0) as u8; + ((brightness_contrast as f32 / 255.0).powf(1.0 / gamma) * 255.0) as u8 + }); + + apply_lut(img, &lut) + } + + fn apply_lut(img: &RgbImage, lut: &[u8; 256]) -> RgbImage { + let (width, height) = img.dimensions(); + + let input = img.as_raw(); + let mut output = vec![0u8; input.len()]; + + // Process 16 bytes at a time + let chunks = input.chunks_exact(16); + let remainder = chunks.remainder(); + + for (i, chunk) in chunks.enumerate() { + let pixels = u8x16::from_slice(chunk); + let indices: usizex16 = pixels.cast(); + let result: u8x16 = Simd::gather_or_default(lut, indices); + result.copy_to_slice(&mut output[i * 16..(i + 1) * 16]); + } + + // Handle remaining bytes + for (i, &byte) in remainder.iter().enumerate() { + output[input.len() - remainder.len() + i] = lut[byte as usize]; + } + + ImageBuffer::from_raw(width, height, output).unwrap() + } +} + #[cfg(test)] mod tests { use super::*;