Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 90 additions & 84 deletions src/chainer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub struct Anchor {
pub query_start: usize,
}

#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct ChainingParameters {
pub max_lookback: usize,
pub diag_diff_penalty: f32,
Expand All @@ -41,6 +41,15 @@ impl Default for ChainingParameters {
}
}

#[derive(Debug, Default)]
pub struct ChainingResult {
best_score: f32,
dp: Vec<f32>,
predecessors: Vec<usize>,
anchors: Vec<Anchor>,
parameters: ChainingParameters,
}

#[derive(Debug)]
pub struct Chainer {
k: usize,
Expand Down Expand Up @@ -70,10 +79,10 @@ impl Chainer {
}
}

fn collinear_chaining(&self, anchors: &[Anchor]) -> (f32, Vec<f32>, Vec<usize>) {
fn collinear_chaining(&self, anchors: Vec<Anchor>) -> ChainingResult {
let n = anchors.len();
if n == 0 {
return (0.0, vec![], vec![]);
return ChainingResult::default();
}

let mut dp = vec![self.k as f32; n];
Expand Down Expand Up @@ -146,7 +155,13 @@ impl Chainer {
}
}

(best_score, dp, predecessors)
ChainingResult {
best_score,
dp,
predecessors,
anchors,
parameters: self.parameters.clone(),
}
}

pub fn get_chains(
Expand Down Expand Up @@ -191,9 +206,6 @@ impl Chainer {
let hits_timer = Instant::now();
let mut anchors = hits_to_anchors(&hits[is_revcomp], index);
time_find_hits += hits_timer.elapsed().as_secs_f64();
/*for anchor in anchors.iter().take(100) {
trace!("{:?}", anchor);
}*/
n_anchors += anchors.len();
let chaining_timer = Instant::now();
trace!("Chaining {} anchors", anchors.len());
Expand All @@ -214,18 +226,9 @@ impl Chainer {
// .join(",")
// );

let (best_score, dp, predecessors) = self.collinear_chaining(&anchors);

extract_chains_from_dp(
&anchors,
&dp,
&predecessors,
best_score,
index.k(),
is_revcomp == 1,
&mut chains,
&self.parameters,
);
let chaining_result = self.collinear_chaining(anchors);

chaining_result.extract_chains(index.k(), is_revcomp == 1, &mut chains);
time_chaining += chaining_timer.elapsed().as_secs_f64();
}
let mut hits_details12 = hits_details[0].clone();
Expand Down Expand Up @@ -363,76 +366,68 @@ fn hits_to_anchors(hits: &Vec<Hit>, index: &StrobemerIndex) -> Vec<Anchor> {
anchors
}

fn extract_chains_from_dp(
anchors: &[Anchor],
dp: &[f32],
predecessors: &[usize],
best_score: f32,
k: usize,
is_revcomp: bool,
chains: &mut Vec<Nam>,
chaining_parameters: &ChainingParameters,
) {
let n = anchors.len();
let valid_score = best_score * chaining_parameters.valid_score_threshold;
impl ChainingResult {
fn extract_chains(&self, k: usize, is_revcomp: bool, chains: &mut Vec<Nam>) {
let n = self.anchors.len();
let valid_score = self.best_score * self.parameters.valid_score_threshold;

let mut candidates = vec![];
for i in 0..n {
if dp[i] >= valid_score {
candidates.push((i, dp[i]));
let mut candidates = vec![];
for i in 0..n {
if self.dp[i] >= valid_score {
candidates.push((i, self.dp[i]));
}
}
}

candidates.sort_by(|a, b| b.1.total_cmp(&a.1));
candidates.sort_by(|a, b| b.1.total_cmp(&a.1));

let mut used = vec![false; n];
for (i, score) in candidates {
if used[i] {
continue;
}
let mut used = vec![false; n];
for (i, score) in candidates {
if used[i] {
continue;
}

let mut j = i;
let mut c = 1;
let mut overlaps = false;
let mut chain_anchors = vec![anchors[i]];
let mut j = i;
let mut overlaps = false;
let mut chain_anchors = vec![self.anchors[i]];

let mut matching_bases = k;
let mut ref_coverage = anchors[i].ref_start;
let mut matching_bases = k;
let mut ref_coverage = self.anchors[i].ref_start;

while predecessors[j] != usize::MAX {
j = predecessors[j];
if used[j] {
overlaps = true;
break;
while self.predecessors[j] != usize::MAX {
j = self.predecessors[j];
if used[j] {
overlaps = true;
break;
}
chain_anchors.push(self.anchors[j]);
used[j] = true;

matching_bases += ref_coverage
.saturating_sub(self.anchors[j].ref_start)
.min(k);
ref_coverage = self.anchors[j].ref_start;
}
chain_anchors.push(anchors[j]);
used[j] = true;
c += 1;

matching_bases += ref_coverage.saturating_sub(anchors[j].ref_start).min(k);
ref_coverage = anchors[j].ref_start;
}
if overlaps {
continue;
}

if overlaps {
continue;
let first = &self.anchors[j];
let last = &self.anchors[i];

chains.push(Nam {
nam_id: chains.len(),
query_start: first.query_start,
query_end: last.query_start + k,
ref_start: first.ref_start,
ref_end: last.ref_start + k,
matching_bases,
ref_id: last.ref_id,
score: score + chain_anchors.len() as f32 * self.parameters.matches_weight,
is_revcomp,
anchors: chain_anchors,
});
}

let first = &anchors[j];
let last = &anchors[i];

chains.push(Nam {
nam_id: chains.len(),
query_start: first.query_start,
query_end: last.query_start + k,
ref_start: first.ref_start,
ref_end: last.ref_start + k,
n_matches: c,
matching_bases,
ref_id: last.ref_id,
score: score + c as f32 * chaining_parameters.matches_weight,
is_revcomp,
anchors: chain_anchors,
});
}
}

Expand All @@ -444,20 +439,20 @@ mod test {
fn test_chainer_early_break() {
let chainer = Chainer::new(20, ChainingParameters::default());
#[rustfmt::skip]
let anchors = [
let anchors = vec![
Anchor { ref_id: 0, ref_start: 0, query_start: 0, },
Anchor { ref_id: 0, ref_start: 30, query_start: 20, },
Anchor { ref_id: 0, ref_start: 60, query_start: 0, },
Anchor { ref_id: 0, ref_start: 95, query_start: 35, },
];

let (best_score, _dp, _predecessors) = chainer.collinear_chaining(&anchors);
let chaining_result = chainer.collinear_chaining(anchors);

// The best chain has score 42.342842 and uses anchors 0, 1, 3.
// When using the heuristic that breaks early if the predecessor is on the
// same diagonal, a suboptimal chain is found that has score 38.2 and
// consists of anchors 2 and 3.
assert!(best_score > 42.0);
assert!(chaining_result.best_score > 42.0);
}

#[test]
Expand All @@ -469,8 +464,19 @@ mod test {
Anchor { ref_id: 0, ref_start: 20, query_start: 20, },
Anchor { ref_id: 0, ref_start: 40, query_start: 40, },
];
let score1 = chainer.collinear_chaining(&anchors[0..1]).0;
assert_eq!(chainer.collinear_chaining(&anchors[0..2]).0, score1 * 2.0);
assert_eq!(chainer.collinear_chaining(&anchors[0..3]).0, score1 * 3.0);
let chaining_result = chainer.collinear_chaining(anchors[0..1].to_vec());
let score1 = chaining_result.best_score;
assert_eq!(
chainer
.collinear_chaining(anchors[0..2].to_vec())
.best_score,
score1 * 2.0
);
assert_eq!(
chainer
.collinear_chaining(anchors[0..3].to_vec())
.best_score,
score1 * 3.0
);
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ pub mod piecewisealigner;
pub mod read;
pub mod revcomp;
pub mod seeding;
pub mod shuffle;
pub mod ssw;
Loading
Loading