Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
647de61
Break out chain scoring related values into a parameter struct
adamnovak May 12, 2026
748b814
Split --rec-penalty-chain into --rec-penalty, --rec-penalty-aln, and …
adamnovak May 12, 2026
931d9fb
Merge remote-tracking branch 'origin/master' into split-penalty
adamnovak May 13, 2026
a012c37
Merge remote-tracking branch 'origin/master' into split-penalty
adamnovak May 14, 2026
d8a8146
Move rec mode option to option parser system
adamnovak May 14, 2026
2401b0f
Start on logic to decide the real default recombination mode value
adamnovak May 14, 2026
78ebc23
Revert "Move rec mode option to option parser system"
adamnovak May 14, 2026
23b32b1
Fix spelling
adamnovak May 14, 2026
91c2817
Rename variable
adamnovak May 14, 2026
3e32c11
Implement part of defaulting the recombination-aware mapping to a sen…
adamnovak May 14, 2026
c187b46
Rip out PathMinimizer distinction and decide if we want to be in reco…
adamnovak May 14, 2026
d9091af
Ignore path payloads if we're not meant to be using them
adamnovak May 14, 2026
d8e9be8
Set up recombination penalty values for presets and reorder options
adamnovak May 14, 2026
f09b52f
Remove build errors I added
adamnovak May 14, 2026
09f2ad4
Make the old vg minimizer --rec-mode now enforce that path payloads w…
adamnovak May 14, 2026
4ea138c
Set optimized R10 recombination parameters
adamnovak May 15, 2026
fe16fd8
Set optimized R10 min-chain-score-per-base
adamnovak May 15, 2026
fbde742
Put min chain score per base back up and write down why we shouldn't …
adamnovak May 19, 2026
1b7a1bf
Stop mapping reads with negative scores and move responsibility for u…
adamnovak May 19, 2026
41f7494
Add test file
adamnovak May 19, 2026
6d989b2
Set up testing for rec-aware mapping by default and remove test for r…
adamnovak May 19, 2026
93df42e
Add missing include
adamnovak May 19, 2026
9d17b71
Wrap help text
adamnovak May 19, 2026
2816a85
Merge remote-tracking branch 'origin/split-penalty' into split-penalty
adamnovak May 19, 2026
01c3646
Merge remote-tracking branch 'origin/master' into split-penalty
adamnovak May 19, 2026
554a612
Use GBWTGraph's idea of max samples, and sniff for path cover samples
adamnovak May 20, 2026
d094989
Add test for reporting on path covers and fix error message generation
adamnovak May 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 25 additions & 44 deletions src/algorithms/chain_items.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,13 +640,9 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
const HandleGraph& graph,
int gap_open,
int gap_extension,
const ChainScoringScheme& scheme,
const transition_iterator& for_each_transition,
int item_bonus,
double item_scale,
double gap_scale,
double points_per_possible_match,
size_t max_indel_bases,
int recomb_penalty,
bool show_work) {

DiagramExplainer diagram(show_work);
Expand All @@ -659,7 +655,9 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
cerr << "Chaining group of " << to_chain.size() << " items" << endl;
}

crash_unless(recomb_penalty >= 0);
crash_unless(scheme.recombination_penalty >= 0);
crash_unless(scheme.consistency_bonus >= 0);


// Compute a base seed average length.
// TODO: Weight anchors differently?
Expand All @@ -683,11 +681,11 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
// We store the bonus used to select the current winning predecessor for
// each seed in this vector, which runs alongside the DP table.
//
// Starting from nowhere means full path conservation, so bonus = recomb_penalty.
std::vector<int> eval_bonuses(to_chain.size(), recomb_penalty);
// Starting from nowhere means full path conservation, so bonus = scheme.consistency_bonus.
std::vector<int> eval_bonuses(to_chain.size(), scheme.consistency_bonus);
for (size_t i = 0; i < to_chain.size(); i++) {
// Set up DP table so we can start anywhere with that item's score, scaled and with bonus applied.
chain_scores[i] = {(int)(to_chain[i].score() * item_scale + item_bonus), TracedScore::nowhere(), to_chain[i].anchor_end_paths()};
chain_scores[i] = {(int)(to_chain[i].score() * scheme.item_scale + scheme.item_bonus), TracedScore::nowhere(), to_chain[i].anchor_end_paths()};
}

// We will run this over every transition in a good DP order.
Expand All @@ -706,18 +704,18 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
auto& here = to_chain[transition.to_anchor];

// How many points is it worth to collect?
auto item_points = here.score() * item_scale + item_bonus;
auto item_points = here.score() * scheme.item_scale + scheme.item_bonus;

std::string here_gvnode;
if (diagram) {
here_gvnode = "i" + std::to_string(transition.to_anchor);
}

// If we come from nowhere, we get those points.
// This also has full path conservation (bonus = recomb_penalty).
// This also has full path conservation (bonus = scheme.consistency_bonus).
{
TracedScore from_nowhere = {(int)item_points, TracedScore::nowhere(), here.anchor_end_paths()};
int nowhere_bonus = recomb_penalty;
int nowhere_bonus = scheme.consistency_bonus;
int eval_nowhere = from_nowhere.score + nowhere_bonus;
int eval_current = chain_scores[transition.to_anchor].score + eval_bonuses[transition.to_anchor];
if (eval_nowhere > eval_current) {
Expand Down Expand Up @@ -787,13 +785,13 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
//
// But we account for anchor length in the item points, so don't use it
// here.
jump_points = -score_chain_gap(indel_length, base_seed_length) * gap_scale;
jump_points = -score_chain_gap(indel_length, base_seed_length) * scheme.gap_scale;

// add recombination penalty if necessary
jump_points -= check_recombination(chain_scores[transition.from_anchor], here) * recomb_penalty;
jump_points -= check_recombination(chain_scores[transition.from_anchor], here) * scheme.recombination_penalty;

// We can also account for the non-indel material, which we assume will have some identity in it.
jump_points += possible_match_length * points_per_possible_match;
jump_points += possible_match_length * scheme.points_per_possible_match;
}

if (jump_points != numeric_limits<int>::min()) {
Expand All @@ -805,15 +803,15 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
.set_shared_paths(here.anchor_paths());

// Evaluate heuristic to preserve path flexibility without inflating actual scoring DP.
// Bonus = fraction of conserved paths * recomb_penalty.
// Bonus = fraction of conserved paths * scheme.consistency_bonus.
// Bonus is 0 when recombination occurs (no shared paths).
int eval_bonus_from = 0;
if (recomb_penalty > 0) {
if (scheme.consistency_bonus > 0) {
int pre_count = __builtin_popcountll(source_score.paths);
if (pre_count > 0 && (source_score.paths & here.anchor_start_paths()) != 0) {
// No recombination: bonus = fraction of paths conserved * penalty
int post_count = __builtin_popcountll(from_source_score.paths);
eval_bonus_from = (recomb_penalty * post_count) / pre_count;
eval_bonus_from = (scheme.consistency_bonus * post_count) / pre_count;
}
// Recombination case (no shared paths): bonus stays 0
}
Expand Down Expand Up @@ -895,7 +893,7 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,

if (diagram) {
// Draw the item in the diagram
auto item_points = here.score() * item_scale + item_bonus;
auto item_points = here.score() * scheme.item_scale + scheme.item_bonus;
std::string here_gvnode = "i" + std::to_string(to_anchor);
std::stringstream label_stream;
label_stream << "#" << to_anchor << " " << here << " = " << item_points
Expand Down Expand Up @@ -941,8 +939,7 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
vector<pair<vector<size_t>, int>> chain_items_traceback(const vector<TracedScore>& chain_scores,
const VectorView<Anchor>& to_chain,
const TracedScore& best_past_ending_score_ever,
int item_bonus,
double item_scale,
const ChainScoringScheme& scheme,
size_t max_tracebacks) {

// We will fill this in with all the tracebacks, and then sort and truncate.
Expand Down Expand Up @@ -990,7 +987,7 @@ vector<pair<vector<size_t>, int>> chain_items_traceback(const vector<TracedScore
// Take away all the points we got for coming from there and being ourselves.
penalty += chain_scores[here].score;
// But then re-add our score for just us
penalty -= (to_chain[here].score() * item_scale + item_bonus);
penalty -= (to_chain[here].score() * scheme.item_scale + scheme.item_bonus);
// TODO: Score this more simply.
// TODO: find the edge to nowhere???
break;
Expand Down Expand Up @@ -1029,13 +1026,9 @@ ChainsResult find_best_chains(const VectorView<Anchor>& to_chain,
const HandleGraph& graph,
int gap_open,
int gap_extension,
int recomb_penalty,
const ChainScoringScheme& scheme,
size_t max_chains,
const transition_iterator& for_each_transition,
int item_bonus,
double item_scale,
double gap_scale,
double points_per_possible_match,
size_t max_indel_bases,
bool show_work) {

Expand All @@ -1057,20 +1050,16 @@ ChainsResult find_best_chains(const VectorView<Anchor>& to_chain,
graph,
gap_open,
gap_extension,
scheme,
for_each_transition,
item_bonus,
item_scale,
gap_scale,
points_per_possible_match,
max_indel_bases,
recomb_penalty,
show_work);
show_work);
#ifdef debug_chaining
std::cerr << "[REC INFO] Recombination number for chain: " << best_past_ending_score_ever.rec_num << "\tscore: " << best_past_ending_score_ever.score << "\tpaths: " << best_past_ending_score_ever.paths << std::endl;
#endif
// Then do the tracebacks
vector<pair<vector<size_t>, int>> tracebacks = chain_items_traceback(
chain_scores, to_chain, best_past_ending_score_ever, item_bonus, item_scale, max_chains);
chain_scores, to_chain, best_past_ending_score_ever, scheme, max_chains);

if (tracebacks.empty()) {
// Somehow we got nothing
Expand Down Expand Up @@ -1181,12 +1170,8 @@ pair<int, vector<size_t>> find_best_chain(const VectorView<Anchor>& to_chain,
const HandleGraph& graph,
int gap_open,
int gap_extension,
int recomb_penalty,
const ChainScoringScheme& scheme,
const transition_iterator& for_each_transition,
int item_bonus,
double item_scale,
double gap_scale,
double points_per_possible_match,
size_t max_indel_bases) {

ChainsResult cr = find_best_chains(
Expand All @@ -1195,13 +1180,9 @@ pair<int, vector<size_t>> find_best_chain(const VectorView<Anchor>& to_chain,
graph,
gap_open,
gap_extension,
recomb_penalty,
scheme,
1,
for_each_transition,
item_bonus,
item_scale,
gap_scale,
points_per_possible_match,
max_indel_bases
);
return cr.chains.front().scored_chain;
Expand Down
53 changes: 33 additions & 20 deletions src/algorithms/chain_items.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ ostream& operator<<(ostream& out, const TracedScore& value);
*/
void sort_anchor_indexes(const std::vector<Anchor>& items, std::vector<size_t>& indexes);

/// Represents a possible transition between anchors
struct transition_info {
// Index of the source anchor within the list of anchors
size_t from_anchor;
Expand All @@ -392,14 +393,37 @@ struct transition_info {
// Distance between anchors in the read
size_t read_distance;

// Constructor; read_distance defaults to max if not given
/// Build transition info from loose values
inline transition_info(size_t from, size_t to, size_t graph_dist, size_t read_dist = std::numeric_limits<size_t>::max())
: from_anchor(from), to_anchor(to), graph_distance(graph_dist), read_distance(read_dist) {}
};


/// Represents the scoring scheme for chains, to determine which are best.
/// Doesn't cover the parameters that really belong to an alignment scoring
/// scheme (like gap open and extend).
struct ChainScoringScheme {
/// Score bonus for each item collected
int item_bonus = 0;
/// Scale to apply to each item's own score
double item_scale = 1.0;
/// Scale to apply to the scores of gaps
double gap_scale = 1.0;
/// Add this many points per potential match between two seeds
double points_per_possible_match = 0;
/// Penalize this many points per recombination
int recombination_penalty = 0;
/// Apply a bonus during alternative selection (but not to actual DP
/// scores) of this many points when matching haplotype paths are
/// preserved, scaled by fraction of haplotypes preserved.
int consistency_bonus = 0;
};

/// A single chain result: scored chain plus the recombination count observed
/// on its endpoint.
/// TODO: Is there a better name for the abstraction this is getting at?
struct ChainWithRec {
// TODO: Shouldn't we split this into 2 fields?
std::pair<int, std::vector<size_t>> scored_chain;
// Positions (anchor indices) in the chain that introduce a recombination
// event between anchors. These correspond to anchors where we had to
Expand All @@ -418,6 +442,8 @@ struct ChainWithRec {

/// Result of finding best chains: a list of chains each paired with the
/// recombination count observed at that chain's endpoint.
/// TODO: Can we get rid of this once we're sure it won't need more fields?
/// TODO: Is there a better name for this?
struct ChainsResult {
std::vector<ChainWithRec> chains;
};
Expand Down Expand Up @@ -525,8 +551,7 @@ std::vector<transition_info> calculate_transition_read_distances(
*
* Input items must be sorted by start position in the read.
*
* Takes the given per-item bonus for each item collected, and scales item
* scores by the given scale.
* Uses the given scoring scheme to score chains.
*
* Uses a transition iterator to enumerate where we can come from to reach an
* item.
Expand All @@ -538,15 +563,12 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
const VectorView<Anchor>& to_chain,
const SnarlDistanceIndex& distance_index,
const HandleGraph& graph,
// TODO: We should maybe just take an EditAlignmentScorer here.
int gap_open,
int gap_extension,
const ChainScoringScheme& scheme = ChainScoringScheme(),
const transition_iterator& for_each_transition = lookback_transition_iterator(150, 0, 100),
int item_bonus = 0,
double item_scale = 1.0,
double gap_scale = 1.0,
double points_per_possible_match = 0,
size_t max_indel_bases = 100,
int recomb_penalty = 0,
bool show_work = false
);

Expand All @@ -567,8 +589,7 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
vector<pair<vector<size_t>, int>> chain_items_traceback(const vector<TracedScore>& chain_scores,
const VectorView<Anchor>& to_chain,
const TracedScore& best_past_ending_score_ever,
int item_bonus = 0,
double item_scale = 1.0,
const ChainScoringScheme& scheme = ChainScoringScheme(),
size_t max_tracebacks = 1);


Expand All @@ -586,13 +607,9 @@ ChainsResult find_best_chains(const VectorView<Anchor>& to_chain,
const HandleGraph& graph,
int gap_open,
int gap_extension,
int recomb_penalty = 0,
const ChainScoringScheme& scheme = ChainScoringScheme(),
size_t max_chains = 1,
const transition_iterator& for_each_transition = lookback_transition_iterator(150, 0, 100),
int item_bonus = 0,
double item_scale = 1.0,
double gap_scale = 1.0,
double points_per_possible_match = 0,
size_t max_indel_bases = 100,
bool show_work = false);

Expand All @@ -610,12 +627,8 @@ pair<int, vector<size_t>> find_best_chain(const VectorView<Anchor>& to_chain,
const HandleGraph& graph,
int gap_open,
int gap_extension,
int recomb_penalty = 0,
const ChainScoringScheme& scheme = ChainScoringScheme(),
const transition_iterator& for_each_transition = lookback_transition_iterator(150, 0, 100),
int item_bonus = 0,
double item_scale = 1.0,
double gap_scale = 1.0,
double points_per_possible_match = 0,
size_t max_indel_bases = 100);

/**
Expand Down
Loading
Loading