CodSpeedHQ · sukhmel · Oct 8, 2025 · Oct 8, 2025 · Oct 8, 2025 · Oct 8, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,6 +11,10 @@ path = "src/lib.rs"
 rand = "0.8"
 image = "0.25"
 image-compare = "0.5.0"
+fnv = "1.0.7"
+jetscii = { version = "0.5.3", features = [] }
+memchr = "2.7.6"
+rayon = "1.11.0"
 
 [dev-dependencies]
 divan = { version = "4.0.2", package = "codspeed-divan-compat" }

diff --git a/README.md b/README.md
@@ -34,3 +34,5 @@ cargo codspeed run -m walltime
 ```
 
 Note: You can also set the `CODSPEED_RUNNER_MODE` environment variable to `walltime` to avoid passing `-m walltime` every time.
+
+Vladislav.Sukhmel
diff --git a/benches/dna_matcher.rs b/benches/dna_matcher.rs
@@ -6,10 +6,10 @@ fn main() {
 
 #[divan::bench(sample_count = 2, sample_size = 3)]
 fn dna_matcher() {
-    let genome = std::fs::read_to_string("genome.fasta").expect(
+    let genome = std::fs::read("genome.fasta").expect(
         "Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'",
     );
-    let pattern = "AGTCCGTA";
+    let pattern = b"AGTCCGTA";
 
     let matches = divan::black_box(naive_dna_matcher(
         divan::black_box(&genome),

diff --git a/src/bfs.rs b/src/bfs.rs
@@ -1,4 +1,6 @@
-use std::collections::HashSet;
+use std::collections::{VecDeque};
+
+use fnv::FnvHashSet;
 
 /// A simple graph represented as an adjacency list
 #[derive(Debug, Clone)]
@@ -23,25 +25,24 @@ impl Graph {
     }
 }
 
-/// Naive BFS implementation using Vec as a queue (intentionally slow)
+/// Naive BFS implementation using VecDeque as a queue
 /// Returns the order in which nodes were visited
 pub fn bfs_naive(graph: &Graph, start: usize) -> Vec<usize> {
-    let mut visited = HashSet::new();
-    let mut queue = Vec::new(); // Using Vec instead of VecDeque - intentionally inefficient!
-    let mut result = Vec::new();
+    let mut visited = FnvHashSet::with_capacity_and_hasher(graph.num_nodes(), Default::default());
+    let mut queue = VecDeque::new();
+    let mut result = Vec::with_capacity(graph.num_nodes());
 
-    queue.push(start);
+    queue.push_back(start);
     visited.insert(start);
 
     while !queue.is_empty() {
-        // remove(0) is O(n) - this makes BFS slow!
-        let node = queue.remove(0);
+        let node = queue.pop_front().unwrap();
         result.push(node);
 
         if let Some(neighbors) = graph.adjacency.get(node) {
             for &neighbor in neighbors {
                 if visited.insert(neighbor) {
-                    queue.push(neighbor);
+                    queue.push_back(neighbor);
                 }
             }
         }

diff --git a/src/dna_matcher.rs b/src/dna_matcher.rs
@@ -1,10 +1,59 @@
+use memchr::Memchr;
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
+
+struct ByteSplitImpl<'a> {
+    iter: Memchr<'a>,
+    slice: &'a [u8],
+    position: usize,
+    add_next: bool,
+}
+
+trait ByteSplit<'a> {
+    fn byte_split(self, separator: u8) -> ByteSplitImpl<'a>;
+}
+
+impl<'a> ByteSplit<'a> for &'a [u8] {
+    fn byte_split(self, separator: u8) -> ByteSplitImpl<'a> {
+        ByteSplitImpl {
+            iter: memchr::memchr_iter(separator, self),
+            slice: self,
+            position: 0,
+            add_next: true,
+        }
+    }
+}
+
+impl<'a> Iterator for ByteSplitImpl<'a> {
+    type Item = &'a [u8];
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(next_position) = self.iter.next() {
+            let slice = self.slice.get(self.position..next_position);
+            self.position = next_position + 1;
+            self.add_next = true;
+            return slice;
+        }
+
+        // If the iterator is consumed check if the last part of the string
+        // is missing to be added.
+        if !self.add_next {
+            None
+        } else {
+            // Use case for reading from last comma to end.
+            let slice = self.slice.get(self.position..);
+            self.position = self.slice.len();
+            self.add_next = false;
+            slice
+        }
+    }
+}
+
 /// Naive approach: Read the entire file as a string and filter lines
-pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec<String> {
-    genome
-        .lines()
-        .filter(|line| !line.starts_with('>')) // Skip headers
-        .filter(|line| line.contains(pattern))
-        .map(|s| s.to_string())
+pub fn naive_dna_matcher<'a>(genome: &'a [u8], pattern: &[u8]) -> Vec<&'a [u8]> {
+    let matcher = jetscii::ByteSubstring::new(pattern);
+    let lines = genome.byte_split(b'\n').collect::<Vec<_>>();
+    lines
+        .into_par_iter()
+        .filter(|b| b.len() > 1 && b[0] != b'>' && matcher.find(b).is_some())
         .collect()
 }
 
@@ -14,21 +63,23 @@ mod tests {
 
     #[test]
     fn test_naive_matcher() {
-        let test_genome = ">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG";
-        let pattern = "AGTCCGTA";
+        let test_genome = b">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG";
+        let pattern = b"AGTCCGTA";
         let matches = naive_dna_matcher(test_genome, pattern);
+        println!("{:?}", matches);
         assert_eq!(matches.len(), 1);
-        assert_eq!(matches[0], "AGTCCGTAAA");
+        assert_eq!(matches[0], b"AGTCCGTAAA");
     }
 
     #[test]
     fn test_naive_matcher_on_genome_file() {
         // Read the actual genome.fasta file
-        let genome = std::fs::read_to_string("genome.fasta")
+        let genome = std::fs::read("genome.fasta")
             .expect("Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'");
-        let pattern = "AGTCCGTA";
+        let pattern = b"AGTCCGTA";
 
         let matches = naive_dna_matcher(&genome, pattern);
+        // println!("{:?}", matches);
 
         // With fixed seed (42), we should always get exactly 4927 matches
         assert_eq!(
@@ -39,7 +90,7 @@ mod tests {
         );
 
         println!(
-            "✓ Found {} sequences containing pattern '{}'",
+            "✓ Found {} sequences containing pattern '{:?}'",
             matches.len(),
             pattern
         );
Original file line number	Diff line number	Diff line change
Expand Up		@@ -34,3 +34,5 @@ cargo codspeed run -m walltime
		```

		Note: You can also set the `CODSPEED_RUNNER_MODE` environment variable to `walltime` to avoid passing `-m walltime` every time.

		Vladislav.Sukhmel