CodSpeedHQ · gendx · Oct 8, 2025 · Oct 8, 2025 · Oct 8, 2025 · Oct 8, 2025
diff --git a/.gitignore b/.gitignore
@@ -31,3 +31,5 @@ genome.fasta
 
 *.jpg
 *.png
+
+*.swp
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,6 +11,12 @@ path = "src/lib.rs"
 rand = "0.8"
 image = "0.25"
 image-compare = "0.5.0"
+bit-set = "0.8.0"
+itertools = "0.14.0"
+rayon = "1.11.0"
+memchr = "2.7.6"
+memmap2 = "0.9.8"
+bytes = "1.10.1"
 
 [dev-dependencies]
 divan = { version = "4.0.2", package = "codspeed-divan-compat" }

diff --git a/README.md b/README.md
@@ -34,3 +34,5 @@ cargo codspeed run -m walltime
 ```
 
 Note: You can also set the `CODSPEED_RUNNER_MODE` environment variable to `walltime` to avoid passing `-m walltime` every time.
+
+Participant: gendx
diff --git a/benches/bfs.rs b/benches/bfs.rs
@@ -1,5 +1,5 @@
 use divan::Bencher;
-use eurorust_2025_workshop::bfs::{bfs_naive, generate_graph};
+use eurorust_2025_workshop::bfs::{bfs_optimized, generate_graph};
 
 fn main() {
     divan::main();
@@ -10,7 +10,7 @@ fn bfs_small_graph(bencher: Bencher) {
     let graph = generate_graph(100);
 
     bencher.bench_local(|| {
-        let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0)));
+        let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0)));
 
         assert!(!result.is_empty(), "BFS result should not be empty");
         assert!(
@@ -28,7 +28,7 @@ fn bfs_medium_graph(bencher: Bencher) {
     let graph = generate_graph(1000);
 
     bencher.bench_local(|| {
-        let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0)));
+        let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0)));
 
         assert!(!result.is_empty(), "BFS result should not be empty");
         assert!(
@@ -46,7 +46,7 @@ fn bfs_large_graph(bencher: Bencher) {
     let graph = generate_graph(10000);
 
     bencher.bench_local(|| {
-        let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0)));
+        let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0)));
 
         assert!(!result.is_empty(), "BFS result should not be empty");
         assert!(

diff --git a/benches/blob_corruption_checker.rs b/benches/blob_corruption_checker.rs
@@ -18,14 +18,25 @@ fn corruption_check(bencher: Bencher) {
 
         // All corruptions should be 1KB aligned
         for corruption in &corruptions {
-            assert_eq!(corruption.offset % 1024, 0, "Corruption offset should be 1KB aligned");
-            assert_eq!(corruption.length % 1024, 0, "Corruption length should be multiple of 1KB");
+            assert_eq!(
+                corruption.offset % 1024,
+                0,
+                "Corruption offset should be 1KB aligned"
+            );
+            assert_eq!(
+                corruption.length % 1024,
+                0,
+                "Corruption length should be multiple of 1KB"
+            );
         }
 
         // Check specific corruptions
         assert_eq!(corruptions[0].offset, 14801920, "First corruption offset");
         assert_eq!(corruptions[0].length, 2048, "First corruption length");
-        assert_eq!(corruptions[25].offset, 243891200, "Middle corruption offset");
+        assert_eq!(
+            corruptions[25].offset, 243891200,
+            "Middle corruption offset"
+        );
         assert_eq!(corruptions[25].length, 4096, "Middle corruption length");
         assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset");
         assert_eq!(corruptions[49].length, 5120, "Last corruption length");

diff --git a/benches/dna_matcher.rs b/benches/dna_matcher.rs
@@ -6,13 +6,20 @@ fn main() {
 
 #[divan::bench(sample_count = 2, sample_size = 3)]
 fn dna_matcher() {
-    let genome = std::fs::read_to_string("genome.fasta").expect(
+    use bytes::Bytes;
+    use memmap2::Mmap;
+    use std::fs::File;
+    use std::ops::Deref;
+
+    let file = File::open("genome.fasta").expect(
         "Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'",
     );
+    let mmap = unsafe { Mmap::map(&file).unwrap() };
+    let genome = Bytes::from_owner(mmap);
     let pattern = "AGTCCGTA";
 
-    let matches = divan::black_box(naive_dna_matcher(
-        divan::black_box(&genome),
+    let matches = divan::black_box(dna_matcher_api(
+        divan::black_box(genome.deref()),
         divan::black_box(pattern),
     ));
 

diff --git a/benches/lut_grayscale_bench.rs b/benches/lut_grayscale_bench.rs
@@ -1,5 +1,5 @@
 use eurorust_2025_workshop::lut_grayscale::*;
-use image::{RgbImage};
+use image::RgbImage;
 
 fn main() {
     divan::main();

diff --git a/src/bfs.rs b/src/bfs.rs
@@ -1,4 +1,5 @@
-use std::collections::HashSet;
+use bit_set::BitSet;
+use std::collections::{HashSet, VecDeque};
 
 /// A simple graph represented as an adjacency list
 #[derive(Debug, Clone)]
@@ -50,6 +51,29 @@ pub fn bfs_naive(graph: &Graph, start: usize) -> Vec<usize> {
     result
 }
 
+pub fn bfs_optimized(graph: &Graph, start: usize) -> Vec<usize> {
+    let mut visited = BitSet::new();
+    let mut queue = VecDeque::new();
+    let mut result = Vec::new();
+
+    queue.push_back(start);
+    visited.insert(start);
+
+    while let Some(node) = queue.pop_front() {
+        result.push(node);
+
+        if let Some(neighbors) = graph.adjacency.get(node) {
+            for &neighbor in neighbors {
+                if visited.insert(neighbor) {
+                    queue.push_back(neighbor);
+                }
+            }
+        }
+    }
+
+    result
+}
+
 /// Helper function to generate a random graph for benchmarking
 pub fn generate_graph(nodes: usize) -> Graph {
     use rand::{Rng, SeedableRng};

diff --git a/src/blob_corruption_checker.rs b/src/blob_corruption_checker.rs
@@ -92,10 +92,7 @@ mod tests {
             "Middle corruption offset"
         );
         assert_eq!(corruptions[25].length, 4096, "Middle corruption length");
-        assert_eq!(
-            corruptions[49].offset, 507871232,
-            "Last corruption offset"
-        );
+        assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset");
         assert_eq!(corruptions[49].length, 5120, "Last corruption length");
     }
 }
diff --git a/src/dna_matcher.rs b/src/dna_matcher.rs
@@ -1,5 +1,10 @@
+pub fn dna_matcher_api(genome: &[u8], pattern: &str) -> Vec<String> {
+    optimized_dna_matcher_impl(genome, pattern.as_bytes())
+}
+
 /// Naive approach: Read the entire file as a string and filter lines
-pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec<String> {
+#[allow(dead_code)]
+fn naive_dna_matcher_impl(genome: &str, pattern: &str) -> Vec<String> {
     genome
         .lines()
         .filter(|line| !line.starts_with('>')) // Skip headers
@@ -8,27 +13,78 @@ pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec<String> {
         .collect()
 }
 
+#[allow(dead_code)]
+fn itertools_dna_matcher_impl(genome: &str, pattern: &str) -> Vec<String> {
+    use itertools::*;
+
+    std::iter::once(usize::MAX)
+        .chain(genome.as_bytes().iter().positions(|&c| c == b'\n'))
+        .chain(std::iter::once(genome.len()))
+        .tuple_windows()
+        .filter_map(|(start, end)| {
+            let line = if start == usize::MAX {
+                &genome[..end]
+            } else {
+                &genome[start + 1..end]
+            };
+            if line.len() == 0 || line.as_bytes()[0] == b'>' {
+                None
+            } else {
+                Some(line)
+            }
+        })
+        .filter(|line| line.contains(pattern))
+        .map(|s| s.to_string())
+        .collect()
+}
+
+#[allow(dead_code)]
+fn rayon_dna_matcher_impl(genome: &str, pattern: &str) -> Vec<String> {
+    use rayon::prelude::*;
+
+    genome
+        .par_lines()
+        .filter(|line| !line.starts_with('>')) // Skip headers
+        .filter(|line| line.contains(pattern))
+        .map(|s| s.to_string())
+        .collect()
+}
+
+fn optimized_dna_matcher_impl(genome: &[u8], pattern: &[u8]) -> Vec<String> {
+    use memchr::memmem;
+    use rayon::prelude::*;
+
+    let finder = memmem::Finder::new(pattern);
+
+    genome
+        .par_split(|&c| c == b'\n')
+        .filter(|line| line.first().map_or(false, |&c| c != b'>')) // Skip headers and empty lines
+        .filter(|line| finder.find(line).is_some())
+        .map(|s| std::str::from_utf8(s).unwrap().to_string())
+        .collect()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
 
     #[test]
-    fn test_naive_matcher() {
-        let test_genome = ">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG";
+    fn test_matcher() {
+        let test_genome = b">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG";
         let pattern = "AGTCCGTA";
-        let matches = naive_dna_matcher(test_genome, pattern);
+        let matches = dna_matcher_api(test_genome, pattern);
         assert_eq!(matches.len(), 1);
         assert_eq!(matches[0], "AGTCCGTAAA");
     }
 
     #[test]
-    fn test_naive_matcher_on_genome_file() {
+    fn test_matcher_on_genome_file() {
         // Read the actual genome.fasta file
         let genome = std::fs::read_to_string("genome.fasta")
             .expect("Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'");
         let pattern = "AGTCCGTA";
 
-        let matches = naive_dna_matcher(&genome, pattern);
+        let matches = dna_matcher_api(genome.as_bytes(), pattern);
 
         // With fixed seed (42), we should always get exactly 4927 matches
         assert_eq!(
Original file line number	Diff line number	Diff line change
Expand Up		@@ -34,3 +34,5 @@ cargo codspeed run -m walltime
		```

		Note: You can also set the `CODSPEED_RUNNER_MODE` environment variable to `walltime` to avoid passing `-m walltime` every time.

		Participant: gendx