Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ genome.fasta

*.jpg
*.png

*.swp
36 changes: 36 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ path = "src/lib.rs"
rand = "0.8"
image = "0.25"
image-compare = "0.5.0"
bit-set = "0.8.0"
itertools = "0.14.0"
rayon = "1.11.0"
memchr = "2.7.6"
memmap2 = "0.9.8"
bytes = "1.10.1"

[dev-dependencies]
divan = { version = "4.0.2", package = "codspeed-divan-compat" }
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@ cargo codspeed run -m walltime
```

Note: You can also set the `CODSPEED_RUNNER_MODE` environment variable to `walltime` to avoid passing `-m walltime` every time.

Participant: gendx
8 changes: 4 additions & 4 deletions benches/bfs.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use divan::Bencher;
use eurorust_2025_workshop::bfs::{bfs_naive, generate_graph};
use eurorust_2025_workshop::bfs::{bfs_optimized, generate_graph};

fn main() {
divan::main();
Expand All @@ -10,7 +10,7 @@ fn bfs_small_graph(bencher: Bencher) {
let graph = generate_graph(100);

bencher.bench_local(|| {
let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0)));
let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0)));

assert!(!result.is_empty(), "BFS result should not be empty");
assert!(
Expand All @@ -28,7 +28,7 @@ fn bfs_medium_graph(bencher: Bencher) {
let graph = generate_graph(1000);

bencher.bench_local(|| {
let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0)));
let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0)));

assert!(!result.is_empty(), "BFS result should not be empty");
assert!(
Expand All @@ -46,7 +46,7 @@ fn bfs_large_graph(bencher: Bencher) {
let graph = generate_graph(10000);

bencher.bench_local(|| {
let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0)));
let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0)));

assert!(!result.is_empty(), "BFS result should not be empty");
assert!(
Expand Down
17 changes: 14 additions & 3 deletions benches/blob_corruption_checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,25 @@ fn corruption_check(bencher: Bencher) {

// All corruptions should be 1KB aligned
for corruption in &corruptions {
assert_eq!(corruption.offset % 1024, 0, "Corruption offset should be 1KB aligned");
assert_eq!(corruption.length % 1024, 0, "Corruption length should be multiple of 1KB");
assert_eq!(
corruption.offset % 1024,
0,
"Corruption offset should be 1KB aligned"
);
assert_eq!(
corruption.length % 1024,
0,
"Corruption length should be multiple of 1KB"
);
}

// Check specific corruptions
assert_eq!(corruptions[0].offset, 14801920, "First corruption offset");
assert_eq!(corruptions[0].length, 2048, "First corruption length");
assert_eq!(corruptions[25].offset, 243891200, "Middle corruption offset");
assert_eq!(
corruptions[25].offset, 243891200,
"Middle corruption offset"
);
assert_eq!(corruptions[25].length, 4096, "Middle corruption length");
assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset");
assert_eq!(corruptions[49].length, 5120, "Last corruption length");
Expand Down
13 changes: 10 additions & 3 deletions benches/dna_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,20 @@ fn main() {

#[divan::bench(sample_count = 2, sample_size = 3)]
fn dna_matcher() {
let genome = std::fs::read_to_string("genome.fasta").expect(
use bytes::Bytes;
use memmap2::Mmap;
use std::fs::File;
use std::ops::Deref;

let file = File::open("genome.fasta").expect(
"Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'",
);
let mmap = unsafe { Mmap::map(&file).unwrap() };
let genome = Bytes::from_owner(mmap);
let pattern = "AGTCCGTA";

let matches = divan::black_box(naive_dna_matcher(
divan::black_box(&genome),
let matches = divan::black_box(dna_matcher_api(
divan::black_box(genome.deref()),
divan::black_box(pattern),
));

Expand Down
2 changes: 1 addition & 1 deletion benches/lut_grayscale_bench.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use eurorust_2025_workshop::lut_grayscale::*;
use image::{RgbImage};
use image::RgbImage;

fn main() {
divan::main();
Expand Down
26 changes: 25 additions & 1 deletion src/bfs.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::collections::HashSet;
use bit_set::BitSet;
use std::collections::{HashSet, VecDeque};

/// A simple graph represented as an adjacency list
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -50,6 +51,29 @@ pub fn bfs_naive(graph: &Graph, start: usize) -> Vec<usize> {
result
}

pub fn bfs_optimized(graph: &Graph, start: usize) -> Vec<usize> {
let mut visited = BitSet::new();
let mut queue = VecDeque::new();
let mut result = Vec::new();

queue.push_back(start);
visited.insert(start);

while let Some(node) = queue.pop_front() {
result.push(node);

if let Some(neighbors) = graph.adjacency.get(node) {
for &neighbor in neighbors {
if visited.insert(neighbor) {
queue.push_back(neighbor);
}
}
}
}

result
}

/// Helper function to generate a random graph for benchmarking
pub fn generate_graph(nodes: usize) -> Graph {
use rand::{Rng, SeedableRng};
Expand Down
5 changes: 1 addition & 4 deletions src/blob_corruption_checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,7 @@ mod tests {
"Middle corruption offset"
);
assert_eq!(corruptions[25].length, 4096, "Middle corruption length");
assert_eq!(
corruptions[49].offset, 507871232,
"Last corruption offset"
);
assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset");
assert_eq!(corruptions[49].length, 5120, "Last corruption length");
}
}
68 changes: 62 additions & 6 deletions src/dna_matcher.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
pub fn dna_matcher_api(genome: &[u8], pattern: &str) -> Vec<String> {
optimized_dna_matcher_impl(genome, pattern.as_bytes())
}

/// Naive approach: Read the entire file as a string and filter lines
pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec<String> {
#[allow(dead_code)]
fn naive_dna_matcher_impl(genome: &str, pattern: &str) -> Vec<String> {
genome
.lines()
.filter(|line| !line.starts_with('>')) // Skip headers
Expand All @@ -8,27 +13,78 @@ pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec<String> {
.collect()
}

#[allow(dead_code)]
fn itertools_dna_matcher_impl(genome: &str, pattern: &str) -> Vec<String> {
use itertools::*;

std::iter::once(usize::MAX)
.chain(genome.as_bytes().iter().positions(|&c| c == b'\n'))
.chain(std::iter::once(genome.len()))
.tuple_windows()
.filter_map(|(start, end)| {
let line = if start == usize::MAX {
&genome[..end]
} else {
&genome[start + 1..end]
};
if line.len() == 0 || line.as_bytes()[0] == b'>' {
None
} else {
Some(line)
}
})
.filter(|line| line.contains(pattern))
.map(|s| s.to_string())
.collect()
}

#[allow(dead_code)]
fn rayon_dna_matcher_impl(genome: &str, pattern: &str) -> Vec<String> {
use rayon::prelude::*;

genome
.par_lines()
.filter(|line| !line.starts_with('>')) // Skip headers
.filter(|line| line.contains(pattern))
.map(|s| s.to_string())
.collect()
}

fn optimized_dna_matcher_impl(genome: &[u8], pattern: &[u8]) -> Vec<String> {
use memchr::memmem;
use rayon::prelude::*;

let finder = memmem::Finder::new(pattern);

genome
.par_split(|&c| c == b'\n')
.filter(|line| line.first().map_or(false, |&c| c != b'>')) // Skip headers and empty lines
.filter(|line| finder.find(line).is_some())
.map(|s| std::str::from_utf8(s).unwrap().to_string())
.collect()
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_naive_matcher() {
let test_genome = ">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG";
fn test_matcher() {
let test_genome = b">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG";
let pattern = "AGTCCGTA";
let matches = naive_dna_matcher(test_genome, pattern);
let matches = dna_matcher_api(test_genome, pattern);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0], "AGTCCGTAAA");
}

#[test]
fn test_naive_matcher_on_genome_file() {
fn test_matcher_on_genome_file() {
// Read the actual genome.fasta file
let genome = std::fs::read_to_string("genome.fasta")
.expect("Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'");
let pattern = "AGTCCGTA";

let matches = naive_dna_matcher(&genome, pattern);
let matches = dna_matcher_api(genome.as_bytes(), pattern);

// With fixed seed (42), we should always get exactly 4927 matches
assert_eq!(
Expand Down
Loading