From 4e2130db4111cbabc41a7819253d045ed2e82873 Mon Sep 17 00:00:00 2001 From: NicolasBuchin Date: Wed, 1 Apr 2026 08:17:36 +0200 Subject: [PATCH 1/8] Replace shuffle_top_nams with a generic shuffle_best function and move to shuffle.rs --- src/lib.rs | 1 + src/nam.rs | 18 ++---------------- src/shuffle.rs | 25 +++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 16 deletions(-) create mode 100644 src/shuffle.rs diff --git a/src/lib.rs b/src/lib.rs index f92b32fa..5407810f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,4 +17,5 @@ pub mod piecewisealigner; pub mod read; pub mod revcomp; pub mod seeding; +pub mod shuffle; pub mod ssw; diff --git a/src/nam.rs b/src/nam.rs index 334f6fb6..53951122 100644 --- a/src/nam.rs +++ b/src/nam.rs @@ -13,6 +13,7 @@ use crate::io::fasta::RefSequence; use crate::mcsstrategy::McsStrategy; use crate::read::Read; use crate::seeding::randstrobes_query; +use crate::shuffle::shuffle_best; /// Non-overlapping approximate match #[derive(Clone, Debug, Default)] @@ -155,7 +156,7 @@ pub fn get_nams_by_chaining( pub fn sort_nams(nams: &mut [Nam], rng: &mut Rng) -> f64 { let timer = Instant::now(); nams.sort_by(|a, b| b.score.total_cmp(&a.score)); - shuffle_top_nams(nams, rng); + shuffle_best(nams, |nam| nam.score, rng); if log::log_enabled!(Trace) { trace!("Found {} NAMs", nams.len()); @@ -173,18 +174,3 @@ pub fn sort_nams(nams: &mut [Nam], rng: &mut Rng) -> f64 { timer.elapsed().as_secs_f64() } - -/// Shuffle the top-scoring NAMs. Input must be sorted by score. -/// This helps to ensure we pick a random location in case there are multiple -/// equally good ones. -fn shuffle_top_nams(nams: &mut [Nam], rng: &mut Rng) { - if let Some(best) = nams.first() { - let best_score = best.score; - - let pos = nams.iter().position(|nam| nam.score != best_score); - let end = pos.unwrap_or(nams.len()); - if end > 1 { - rng.shuffle(&mut nams[0..end]); - } - } -} diff --git a/src/shuffle.rs b/src/shuffle.rs new file mode 100644 index 00000000..0bd4b2fb --- /dev/null +++ b/src/shuffle.rs @@ -0,0 +1,25 @@ +use fastrand::Rng; + +/// Shuffles the best scoring items in a vector, assuming the data is sorted +/// This helps to ensure we pick a random best item in case there are multiple +/// equally good ones. +/// Returns the number of best items +pub fn shuffle_best(items: &mut [T], score: F, rng: &mut Rng) -> usize +where + F: Fn(&T) -> S, + S: PartialEq + Copy, +{ + let Some(best) = items.first() else { + return 0; + }; + + let best_score = score(best); + + let n_best = items.iter().take_while(|x| score(x) == best_score).count(); + + if n_best > 1 { + rng.shuffle(&mut items[0..n_best]); + } + + n_best +} From 8a00e9e5217f5a2c6f3192bfeb176f4357220ab0 Mon Sep 17 00:00:00 2001 From: Nicolas Buchin Date: Tue, 5 May 2026 12:57:32 +0200 Subject: [PATCH 2/8] Add more tests for has_shared_substring --- src/mapper.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/mapper.rs b/src/mapper.rs index 9fc08e4b..926582fd 100644 --- a/src/mapper.rs +++ b/src/mapper.rs @@ -1468,6 +1468,26 @@ mod tests { assert_eq!(alignment_pairs.len(), 1); } + #[test] + fn shared_substring_found() { + assert!(has_shared_substring(b"ACGTACGTACGT", b"NNNNACGTACNNNN", 9)); + } + + #[test] + fn shared_substring_not_found() { + assert!(!has_shared_substring(b"ACGTACGTACGT", b"TTTTTTTTTTTTTT", 9)); + } + + #[test] + fn shared_substring_empty() { + assert!(!has_shared_substring(b"", b"ACGT", 9)); + } + + #[test] + fn shared_substring_shorter_than_submer() { + assert!(!has_shared_substring(b"ACG", b"ACGACGACG", 9)); + } + #[test] fn test_has_shared_substring() { assert!(!has_shared_substring( From 12f48c9b4a055b78e983c3ae06bb016f82a888d7 Mon Sep 17 00:00:00 2001 From: Nicolas Buchin Date: Tue, 5 May 2026 13:01:54 +0200 Subject: [PATCH 3/8] Let is_proper_pair accept alignments directly (no Option) --- src/mapper.rs | 95 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 19 deletions(-) diff --git a/src/mapper.rs b/src/mapper.rs index 926582fd..ddad1134 100644 --- a/src/mapper.rs +++ b/src/mapper.rs @@ -635,8 +635,8 @@ pub fn align_paired_end_read( // Then the mapping quality is computed based on the NAMs. AlignedPairs::Proper((alignment1, alignment2)) => { let is_proper = is_proper_pair( - Some(&alignment1), - Some(&alignment2), + &alignment1, + &alignment2, insert_size_distribution.mu, insert_size_distribution.sigma, ); @@ -1099,7 +1099,7 @@ fn has_shared_substring(read_seq: &[u8], ref_seq: &[u8], k: usize) -> bool { false } -fn is_proper_pair( +fn is_proper_pair_opt( alignment1: Option<&Alignment>, alignment2: Option<&Alignment>, mu: f32, @@ -1109,19 +1109,22 @@ fn is_proper_pair( (None, None) => false, (Some(_), None) => false, (None, Some(_)) => false, - (Some(a1), Some(a2)) => { - let dist = a2.ref_start as isize - a1.ref_start as isize; - let same_reference = a1.reference_id == a2.reference_id; - let r1_r2 = !a1.is_revcomp && a2.is_revcomp && dist >= 0; // r1 ---> <---- r2 - let r2_r1 = !a2.is_revcomp && a1.is_revcomp && dist <= 0; // r2 ---> <---- r1 - let rel_orientation_good = r1_r2 || r2_r1; - let insert_good = dist.unsigned_abs() <= (mu + sigma * 6.0) as usize; - - same_reference && insert_good && rel_orientation_good - } + (Some(a1), Some(a2)) => is_proper_pair(a1, a2, mu, sigma), } } +/// Return true if the two alignments form a proper pair: +/// on the same reference, in opposite orientations, within the expected insert size. +fn is_proper_pair(a1: &Alignment, a2: &Alignment, mu: f32, sigma: f32) -> bool { + let dist = a2.ref_start as isize - a1.ref_start as isize; + let same_reference = a1.reference_id == a2.reference_id; + let r1_r2 = !a1.is_revcomp && a2.is_revcomp && dist >= 0; // r1 ---> <---- r2 + let r2_r1 = !a2.is_revcomp && a1.is_revcomp && dist <= 0; // r2 ---> <---- r1 + let rel_orientation_good = r1_r2 || r2_r1; + let insert_good = dist.unsigned_abs() <= (mu + sigma * 6.0) as usize; + same_reference && insert_good && rel_orientation_good +} + pub fn is_proper_nam_pair(nam1: &Nam, nam2: &Nam, mu: f32, sigma: f32) -> bool { if nam1.ref_id != nam2.ref_id || nam1.is_revcomp == nam2.is_revcomp { return false; @@ -1322,7 +1325,7 @@ fn aligned_pairs_to_sam( let alignment1 = &best_aln_pair.alignment1; let alignment2 = &best_aln_pair.alignment2; - let is_proper = is_proper_pair(alignment1.as_ref(), alignment2.as_ref(), mu, sigma); + let is_proper = is_proper_pair_opt(alignment1.as_ref(), alignment2.as_ref(), mu, sigma); let is_primary = true; records.extend(sam_output.make_paired_records( [alignment1.as_ref(), alignment2.as_ref()], @@ -1341,7 +1344,8 @@ fn aligned_pairs_to_sam( let alignment2 = &aln_pair.alignment2; let s_score = aln_pair.score; if s_max - s_score < secondary_dropoff { - let is_proper = is_proper_pair(alignment1.as_ref(), alignment2.as_ref(), mu, sigma); + let is_proper = + is_proper_pair_opt(alignment1.as_ref(), alignment2.as_ref(), mu, sigma); let mapq = if is_primary { mapq } else { 0 }; records.extend(sam_output.make_paired_records( [alignment1.as_ref(), alignment2.as_ref()], @@ -1394,11 +1398,28 @@ fn top_dropoff(nams: &[Nam]) -> f32 { #[cfg(test)] mod tests { + use super::*; use crate::cigar::Cigar; - use crate::mapper::{ - Alignment, ScoredAlignmentPair, count_best_alignment_pairs, deduplicate_scored_pairs, - has_shared_substring, - }; + + pub fn make_alignment( + reference_id: usize, + ref_start: usize, + score: u32, + is_revcomp: bool, + ) -> Alignment { + Alignment { + reference_id, + ref_start, + score, + is_revcomp, + edit_distance: 0, + soft_clip_left: 0, + soft_clip_right: 0, + length: 50, + cigar: Default::default(), + gapped: false, + } + } fn dummy_alignment() -> Alignment { Alignment { @@ -1496,4 +1517,40 @@ mod tests { 20 )); } + + #[test] + fn is_proper_pair_within_distance() { + let mu = 300.0_f32; + let sigma = 50.0_f32; + let a1 = make_alignment(0, 100, 40, false); + let a2 = make_alignment(0, 350, 40, true); + assert!(is_proper_pair(&a1, &a2, mu, sigma)); + } + + #[test] + fn is_proper_pair_rev_within_distance() { + let mu = 300.0_f32; + let sigma = 50.0_f32; + let a1 = make_alignment(0, 350, 40, true); + let a2 = make_alignment(0, 100, 40, false); + assert!(is_proper_pair(&a1, &a2, mu, sigma)); + } + + #[test] + fn is_proper_pair_incompatible() { + let mu = 300.0_f32; + let sigma = 50.0_f32; + let a1 = make_alignment(0, 100, 40, false); + let a2 = make_alignment(0, 350, 40, false); + assert!(!is_proper_pair(&a1, &a2, mu, sigma)); + } + + #[test] + fn is_proper_pair_too_far() { + let mu = 300.0_f32; + let sigma = 50.0_f32; + let a1 = make_alignment(0, 100, 40, false); + let a2 = make_alignment(0, 10_000, 40, true); + assert!(!is_proper_pair(&a1, &a2, mu, sigma)); + } } From 14b978adbcdc330f657d745eacf7995fc7902199 Mon Sep 17 00:00:00 2001 From: NicolasBuchin Date: Tue, 5 May 2026 13:36:04 +0200 Subject: [PATCH 4/8] Remove n_matches attribute from Nam struct It is redundant information we already have access to with nam.anchors.len() --- src/chainer.rs | 5 +---- src/mapper.rs | 22 +++++++++++----------- src/nam.rs | 3 +-- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/chainer.rs b/src/chainer.rs index 4d43b9a9..49fedc02 100644 --- a/src/chainer.rs +++ b/src/chainer.rs @@ -392,7 +392,6 @@ fn extract_chains_from_dp( } let mut j = i; - let mut c = 1; let mut overlaps = false; let mut chain_anchors = vec![anchors[i]]; @@ -407,7 +406,6 @@ fn extract_chains_from_dp( } chain_anchors.push(anchors[j]); used[j] = true; - c += 1; matching_bases += ref_coverage.saturating_sub(anchors[j].ref_start).min(k); ref_coverage = anchors[j].ref_start; @@ -426,10 +424,9 @@ fn extract_chains_from_dp( query_end: last.query_start + k, ref_start: first.ref_start, ref_end: last.ref_start + k, - n_matches: c, matching_bases, ref_id: last.ref_id, - score: score + c as f32 * chaining_parameters.matches_weight, + score: score + chain_anchors.len() as f32 * chaining_parameters.matches_weight, is_revcomp, anchors: chain_anchors, }); diff --git a/src/mapper.rs b/src/mapper.rs index ddad1134..e618ee1c 100644 --- a/src/mapper.rs +++ b/src/mapper.rs @@ -364,7 +364,7 @@ pub fn align_single_end_read( .take(mapping_parameters.max_tries) .enumerate() { - let score_dropoff = nam.n_matches as f32 / nam_max.n_matches as f32; + let score_dropoff = nam.anchors.len() as f32 / nam_max.anchors.len() as f32; if (tries > 1 && best_edit_distance == 0) || score_dropoff < mapping_parameters.dropoff_threshold @@ -963,12 +963,12 @@ fn rescue_read( mu: f32, sigma: f32, ) -> Vec { - let n_max1_hits = nams1[0].n_matches; + let n_max1_hits = nams1[0].anchors.len(); let mut alignments1 = vec![]; let mut alignments2 = vec![]; for nam in nams1.iter_mut().take(max_tries) { - let score_dropoff1 = nam.n_matches as f32 / n_max1_hits as f32; + let score_dropoff1 = nam.anchors.len() as f32 / n_max1_hits as f32; // only consider top hits (as minimap2 does) and break if below dropoff cutoff. if score_dropoff1 < dropoff { break; @@ -1165,7 +1165,7 @@ pub fn get_best_scoring_nam_pairs( let mut best_joint_matches = 0; for nam1 in nams1.iter().take(MAX_PAIR_NAMS) { for nam2 in nams2.iter().take(MAX_PAIR_NAMS) { - let joint_matches = nam1.n_matches + nam2.n_matches; + let joint_matches = nam1.anchors.len() + nam2.anchors.len(); if joint_matches < best_joint_matches / 2 { break; } @@ -1187,10 +1187,10 @@ pub fn get_best_scoring_nam_pairs( let best_joint_hits1 = if best_joint_matches > 0 { best_joint_matches } else { - nams1[0].n_matches + nams1[0].anchors.len() }; for nam1 in nams1 { - if nam1.n_matches < best_joint_hits1 / 2 { + if nam1.anchors.len() < best_joint_hits1 / 2 { break; } if added_n1.contains(&nam1.nam_id) { @@ -1209,10 +1209,10 @@ pub fn get_best_scoring_nam_pairs( let best_joint_hits2 = if best_joint_matches > 0 { best_joint_matches } else { - nams2[0].n_matches + nams2[0].anchors.len() }; for nam2 in nams2 { - if nam2.n_matches < best_joint_hits2 / 2 { + if nam2.anchors.len() < best_joint_hits2 / 2 { break; } if added_n2.contains(&nam2.nam_id) { @@ -1238,7 +1238,7 @@ pub fn mapping_quality(nams: &[Nam]) -> u8 { let s1 = nams[0].score; let s2 = nams[1].score; // from minimap2: MAPQ = 40(1−s2/s1) ·min{1,|M|/10} · log s1 - let min_matches = min(nams[0].n_matches, 10) as f32 / 10.0; + let min_matches = min(nams[0].anchors.len(), 10) as f32 / 10.0; let uncapped_mapq = 40.0 * (1.0 - s2 / s1) * min_matches * s1.ln(); uncapped_mapq.min(60.0) as u8 @@ -1387,10 +1387,10 @@ fn joint_mapq_from_high_scores(pairs: &[ScoredAlignmentPair]) -> u8 { /// compute dropoff of the first (top) NAM fn top_dropoff(nams: &[Nam]) -> f32 { let n_max = &nams[0]; - if n_max.n_matches <= 2 { + if n_max.anchors.len() <= 2 { 1.0 } else if nams.len() > 1 { - nams[1].n_matches as f32 / n_max.n_matches as f32 + nams[1].anchors.len() as f32 / n_max.anchors.len() as f32 } else { 0.0 } diff --git a/src/nam.rs b/src/nam.rs index 53951122..f1dff076 100644 --- a/src/nam.rs +++ b/src/nam.rs @@ -23,7 +23,6 @@ pub struct Nam { pub ref_end: usize, pub query_start: usize, pub query_end: usize, - pub n_matches: usize, pub matching_bases: usize, pub ref_id: usize, pub score: f32, @@ -162,7 +161,7 @@ pub fn sort_nams(nams: &mut [Nam], rng: &mut Rng) -> f64 { trace!("Found {} NAMs", nams.len()); let mut printed = 0; for nam in nams.iter() { - if nam.n_matches > 1 || printed < 10 { + if nam.anchors.len() > 1 || printed < 10 { trace!("- {}", nam); printed += 1; } From d1328ff7a98e5ca7f089ccd1cf3269d2e12edbc6 Mon Sep 17 00:00:00 2001 From: NicolasBuchin Date: Wed, 1 Apr 2026 08:17:36 +0200 Subject: [PATCH 5/8] Replace joint_mapq_from_high_scores with generic alignment_quality function --- src/mapper.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/mapper.rs b/src/mapper.rs index e618ee1c..081d46bc 100644 --- a/src/mapper.rs +++ b/src/mapper.rs @@ -1318,7 +1318,7 @@ fn aligned_pairs_to_sam( return records; } - let mapq = joint_mapq_from_high_scores(high_scores); + let mapq = alignment_quality(high_scores, |ap| ap.score as f32); let best_aln_pair = &high_scores[0]; if max_secondary == 0 { @@ -1366,15 +1366,17 @@ fn aligned_pairs_to_sam( records } -fn joint_mapq_from_high_scores(pairs: &[ScoredAlignmentPair]) -> u8 { - if pairs.len() <= 1 { +fn alignment_quality(items: &[T], score: F) -> u8 +where + F: Fn(&T) -> f32, +{ + if items.len() <= 1 { return 60; } - let score1 = pairs[0].score; - let score2 = pairs[1].score; - if score1 == score2 { - return 0; - } + + let score1 = score(&items[0]); + let score2 = score(&items[1]); + if score1 > 0.0 && score2 > 0.0 { (score1 - score2).min(60.0) as u8 } else if score1 > 0.0 && score2 <= 0.0 { From ab3fda3ab8c9b675a17f76e00887c82dbe0b6e3f Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Thu, 16 Apr 2026 15:01:02 +0200 Subject: [PATCH 6/8] Refactor: Let colinear_chaining return a ChainingResult instead of a tuple --- src/chainer.rs | 156 ++++++++++++++++++++++++++----------------------- 1 file changed, 84 insertions(+), 72 deletions(-) diff --git a/src/chainer.rs b/src/chainer.rs index 49fedc02..40148a1a 100644 --- a/src/chainer.rs +++ b/src/chainer.rs @@ -41,6 +41,13 @@ impl Default for ChainingParameters { } } +#[derive(Debug, Default)] +pub struct ChainingResult { + best_score: f32, + dp: Vec, + predecessors: Vec, +} + #[derive(Debug)] pub struct Chainer { k: usize, @@ -70,10 +77,10 @@ impl Chainer { } } - fn collinear_chaining(&self, anchors: &[Anchor]) -> (f32, Vec, Vec) { + fn collinear_chaining(&self, anchors: &[Anchor]) -> ChainingResult { let n = anchors.len(); if n == 0 { - return (0.0, vec![], vec![]); + return ChainingResult::default(); } let mut dp = vec![self.k as f32; n]; @@ -146,7 +153,11 @@ impl Chainer { } } - (best_score, dp, predecessors) + ChainingResult { + best_score, + dp, + predecessors, + } } pub fn get_chains( @@ -191,9 +202,6 @@ impl Chainer { let hits_timer = Instant::now(); let mut anchors = hits_to_anchors(&hits[is_revcomp], index); time_find_hits += hits_timer.elapsed().as_secs_f64(); - /*for anchor in anchors.iter().take(100) { - trace!("{:?}", anchor); - }*/ n_anchors += anchors.len(); let chaining_timer = Instant::now(); trace!("Chaining {} anchors", anchors.len()); @@ -214,13 +222,10 @@ impl Chainer { // .join(",") // ); - let (best_score, dp, predecessors) = self.collinear_chaining(&anchors); + let chaining_result = self.collinear_chaining(&anchors); - extract_chains_from_dp( + chaining_result.extract_chains( &anchors, - &dp, - &predecessors, - best_score, index.k(), is_revcomp == 1, &mut chains, @@ -363,73 +368,73 @@ fn hits_to_anchors(hits: &Vec, index: &StrobemerIndex) -> Vec { anchors } -fn extract_chains_from_dp( - anchors: &[Anchor], - dp: &[f32], - predecessors: &[usize], - best_score: f32, - k: usize, - is_revcomp: bool, - chains: &mut Vec, - chaining_parameters: &ChainingParameters, -) { - let n = anchors.len(); - let valid_score = best_score * chaining_parameters.valid_score_threshold; +impl ChainingResult { + fn extract_chains( + &self, + anchors: &[Anchor], + k: usize, + is_revcomp: bool, + chains: &mut Vec, + chaining_parameters: &ChainingParameters, + ) { + let n = anchors.len(); + let valid_score = self.best_score * chaining_parameters.valid_score_threshold; - let mut candidates = vec![]; - for i in 0..n { - if dp[i] >= valid_score { - candidates.push((i, dp[i])); + let mut candidates = vec![]; + for i in 0..n { + if self.dp[i] >= valid_score { + candidates.push((i, self.dp[i])); + } } - } - candidates.sort_by(|a, b| b.1.total_cmp(&a.1)); + candidates.sort_by(|a, b| b.1.total_cmp(&a.1)); - let mut used = vec![false; n]; - for (i, score) in candidates { - if used[i] { - continue; - } + let mut used = vec![false; n]; + for (i, score) in candidates { + if used[i] { + continue; + } - let mut j = i; - let mut overlaps = false; - let mut chain_anchors = vec![anchors[i]]; + let mut j = i; + let mut overlaps = false; + let mut chain_anchors = vec![anchors[i]]; - let mut matching_bases = k; - let mut ref_coverage = anchors[i].ref_start; + let mut matching_bases = k; + let mut ref_coverage = anchors[i].ref_start; + + while self.predecessors[j] != usize::MAX { + j = self.predecessors[j]; + if used[j] { + overlaps = true; + break; + } + chain_anchors.push(anchors[j]); + used[j] = true; - while predecessors[j] != usize::MAX { - j = predecessors[j]; - if used[j] { - overlaps = true; - break; + matching_bases += ref_coverage.saturating_sub(anchors[j].ref_start).min(k); + ref_coverage = anchors[j].ref_start; } - chain_anchors.push(anchors[j]); - used[j] = true; - matching_bases += ref_coverage.saturating_sub(anchors[j].ref_start).min(k); - ref_coverage = anchors[j].ref_start; - } + if overlaps { + continue; + } - if overlaps { - continue; + let first = &anchors[j]; + let last = &anchors[i]; + + chains.push(Nam { + nam_id: chains.len(), + query_start: first.query_start, + query_end: last.query_start + k, + ref_start: first.ref_start, + ref_end: last.ref_start + k, + matching_bases, + ref_id: last.ref_id, + score: score + chain_anchors.len() as f32 * chaining_parameters.matches_weight, + is_revcomp, + anchors: chain_anchors, + }); } - - let first = &anchors[j]; - let last = &anchors[i]; - - chains.push(Nam { - nam_id: chains.len(), - query_start: first.query_start, - query_end: last.query_start + k, - ref_start: first.ref_start, - ref_end: last.ref_start + k, - matching_bases, - ref_id: last.ref_id, - score: score + chain_anchors.len() as f32 * chaining_parameters.matches_weight, - is_revcomp, - anchors: chain_anchors, - }); } } @@ -448,13 +453,13 @@ mod test { Anchor { ref_id: 0, ref_start: 95, query_start: 35, }, ]; - let (best_score, _dp, _predecessors) = chainer.collinear_chaining(&anchors); + let chaining_result = chainer.collinear_chaining(&anchors); // The best chain has score 42.342842 and uses anchors 0, 1, 3. // When using the heuristic that breaks early if the predecessor is on the // same diagonal, a suboptimal chain is found that has score 38.2 and // consists of anchors 2 and 3. - assert!(best_score > 42.0); + assert!(chaining_result.best_score > 42.0); } #[test] @@ -466,8 +471,15 @@ mod test { Anchor { ref_id: 0, ref_start: 20, query_start: 20, }, Anchor { ref_id: 0, ref_start: 40, query_start: 40, }, ]; - let score1 = chainer.collinear_chaining(&anchors[0..1]).0; - assert_eq!(chainer.collinear_chaining(&anchors[0..2]).0, score1 * 2.0); - assert_eq!(chainer.collinear_chaining(&anchors[0..3]).0, score1 * 3.0); + let chaining_result = chainer.collinear_chaining(&anchors[0..1]); + let score1 = chaining_result.best_score; + assert_eq!( + chainer.collinear_chaining(&anchors[0..2]).best_score, + score1 * 2.0 + ); + assert_eq!( + chainer.collinear_chaining(&anchors[0..3]).best_score, + score1 * 3.0 + ); } } From 623d62e30cfa8eaa3a383973b59641ad726b9c61 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Thu, 16 Apr 2026 15:14:46 +0200 Subject: [PATCH 7/8] Refactor: Let ChainingResult take ownership of the anchors --- src/chainer.rs | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/src/chainer.rs b/src/chainer.rs index 40148a1a..14bc1252 100644 --- a/src/chainer.rs +++ b/src/chainer.rs @@ -46,6 +46,7 @@ pub struct ChainingResult { best_score: f32, dp: Vec, predecessors: Vec, + anchors: Vec, } #[derive(Debug)] @@ -77,7 +78,7 @@ impl Chainer { } } - fn collinear_chaining(&self, anchors: &[Anchor]) -> ChainingResult { + fn collinear_chaining(&self, anchors: Vec) -> ChainingResult { let n = anchors.len(); if n == 0 { return ChainingResult::default(); @@ -157,6 +158,7 @@ impl Chainer { best_score, dp, predecessors, + anchors, } } @@ -222,10 +224,9 @@ impl Chainer { // .join(",") // ); - let chaining_result = self.collinear_chaining(&anchors); + let chaining_result = self.collinear_chaining(anchors); chaining_result.extract_chains( - &anchors, index.k(), is_revcomp == 1, &mut chains, @@ -371,13 +372,12 @@ fn hits_to_anchors(hits: &Vec, index: &StrobemerIndex) -> Vec { impl ChainingResult { fn extract_chains( &self, - anchors: &[Anchor], k: usize, is_revcomp: bool, chains: &mut Vec, chaining_parameters: &ChainingParameters, ) { - let n = anchors.len(); + let n = self.anchors.len(); let valid_score = self.best_score * chaining_parameters.valid_score_threshold; let mut candidates = vec![]; @@ -397,10 +397,10 @@ impl ChainingResult { let mut j = i; let mut overlaps = false; - let mut chain_anchors = vec![anchors[i]]; + let mut chain_anchors = vec![self.anchors[i]]; let mut matching_bases = k; - let mut ref_coverage = anchors[i].ref_start; + let mut ref_coverage = self.anchors[i].ref_start; while self.predecessors[j] != usize::MAX { j = self.predecessors[j]; @@ -408,19 +408,21 @@ impl ChainingResult { overlaps = true; break; } - chain_anchors.push(anchors[j]); + chain_anchors.push(self.anchors[j]); used[j] = true; - matching_bases += ref_coverage.saturating_sub(anchors[j].ref_start).min(k); - ref_coverage = anchors[j].ref_start; + matching_bases += ref_coverage + .saturating_sub(self.anchors[j].ref_start) + .min(k); + ref_coverage = self.anchors[j].ref_start; } if overlaps { continue; } - let first = &anchors[j]; - let last = &anchors[i]; + let first = &self.anchors[j]; + let last = &self.anchors[i]; chains.push(Nam { nam_id: chains.len(), @@ -446,14 +448,14 @@ mod test { fn test_chainer_early_break() { let chainer = Chainer::new(20, ChainingParameters::default()); #[rustfmt::skip] - let anchors = [ + let anchors = vec![ Anchor { ref_id: 0, ref_start: 0, query_start: 0, }, Anchor { ref_id: 0, ref_start: 30, query_start: 20, }, Anchor { ref_id: 0, ref_start: 60, query_start: 0, }, Anchor { ref_id: 0, ref_start: 95, query_start: 35, }, ]; - let chaining_result = chainer.collinear_chaining(&anchors); + let chaining_result = chainer.collinear_chaining(anchors); // The best chain has score 42.342842 and uses anchors 0, 1, 3. // When using the heuristic that breaks early if the predecessor is on the @@ -471,14 +473,18 @@ mod test { Anchor { ref_id: 0, ref_start: 20, query_start: 20, }, Anchor { ref_id: 0, ref_start: 40, query_start: 40, }, ]; - let chaining_result = chainer.collinear_chaining(&anchors[0..1]); + let chaining_result = chainer.collinear_chaining(anchors[0..1].to_vec()); let score1 = chaining_result.best_score; assert_eq!( - chainer.collinear_chaining(&anchors[0..2]).best_score, + chainer + .collinear_chaining(anchors[0..2].to_vec()) + .best_score, score1 * 2.0 ); assert_eq!( - chainer.collinear_chaining(&anchors[0..3]).best_score, + chainer + .collinear_chaining(anchors[0..3].to_vec()) + .best_score, score1 * 3.0 ); } From e3917fa362b67e4800b3cd979df385cfc132630e Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Thu, 16 Apr 2026 15:24:06 +0200 Subject: [PATCH 8/8] Refactor: Add parameters field to ChainingResult --- src/chainer.rs | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/src/chainer.rs b/src/chainer.rs index 14bc1252..bfa1f9cb 100644 --- a/src/chainer.rs +++ b/src/chainer.rs @@ -18,7 +18,7 @@ pub struct Anchor { pub query_start: usize, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ChainingParameters { pub max_lookback: usize, pub diag_diff_penalty: f32, @@ -47,6 +47,7 @@ pub struct ChainingResult { dp: Vec, predecessors: Vec, anchors: Vec, + parameters: ChainingParameters, } #[derive(Debug)] @@ -159,6 +160,7 @@ impl Chainer { dp, predecessors, anchors, + parameters: self.parameters.clone(), } } @@ -226,12 +228,7 @@ impl Chainer { let chaining_result = self.collinear_chaining(anchors); - chaining_result.extract_chains( - index.k(), - is_revcomp == 1, - &mut chains, - &self.parameters, - ); + chaining_result.extract_chains(index.k(), is_revcomp == 1, &mut chains); time_chaining += chaining_timer.elapsed().as_secs_f64(); } let mut hits_details12 = hits_details[0].clone(); @@ -370,15 +367,9 @@ fn hits_to_anchors(hits: &Vec, index: &StrobemerIndex) -> Vec { } impl ChainingResult { - fn extract_chains( - &self, - k: usize, - is_revcomp: bool, - chains: &mut Vec, - chaining_parameters: &ChainingParameters, - ) { + fn extract_chains(&self, k: usize, is_revcomp: bool, chains: &mut Vec) { let n = self.anchors.len(); - let valid_score = self.best_score * chaining_parameters.valid_score_threshold; + let valid_score = self.best_score * self.parameters.valid_score_threshold; let mut candidates = vec![]; for i in 0..n { @@ -432,7 +423,7 @@ impl ChainingResult { ref_end: last.ref_start + k, matching_bases, ref_id: last.ref_id, - score: score + chain_anchors.len() as f32 * chaining_parameters.matches_weight, + score: score + chain_anchors.len() as f32 * self.parameters.matches_weight, is_revcomp, anchors: chain_anchors, });